.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/plot_2_example_cmrc.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_plot_2_example_cmrc.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_plot_2_example_cmrc.py:


.. _ex2:

Example: Use of CMRC with different settings
============================================

Example of using CMRC with some of the common classification datasets with
different losses and feature mappings settings. We load the different datasets
and use 10-Fold Cross-Validation to generate the partitions for train and test.
We separate 1 partition each time for testing and use the others for training.
On each iteration we calculate
the classification error. We also calculate the mean training time.

Note that we set the parameter use_cvx=False. In the case of CMRC classifiers
and random fourier feature mapping this means that we will use Stochastic
Gradient Descent (SGD) approach to perform the optimization.

You can check a more elaborated example in :ref:`ex_comp`.

.. GENERATED FROM PYTHON SOURCE LINES 23-119

.. code-block:: default


    import time

    import numpy as np
    import pandas as pd
    from sklearn import preprocessing
    from sklearn.model_selection import StratifiedKFold

    from MRCpy import CMRC
    # Import the datasets
    from MRCpy.datasets import *

    # Data sets
    loaders = [load_mammographic, load_haberman, load_indian_liver,
              load_diabetes, load_credit]
    dataName = ["mammographic", "haberman", "indian_liver",
               "diabetes", "credit"]


    def runCMRC(phi, loss):
        results = pd.DataFrame()

        # We fix the random seed to that the stratified kfold performed
        # is the same through the different executions
        random_seed = 0

        # Iterate through each of the dataset and fit the CMRC classfier.
        for j, load in enumerate(loaders):

            # Loading the dataset
            X, Y = load()
            r = len(np.unique(Y))
            n, d = X.shape

            # Create the CMRC object initilized with the corresponding parameters
            clf = CMRC(phi=phi,
                       loss=loss,
                       random_state=random_seed,
                       solver='adam')

            # Generate the partitions of the stratified cross-validation
            n_splits = 5
            cv = StratifiedKFold(n_splits=n_splits, random_state=random_seed,
                                 shuffle=True)

            cvError = list()
            upper = 0
            auxTime = 0

            # Paired and stratified cross-validation
            for train_index, test_index in cv.split(X, Y):

                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = Y[train_index], Y[test_index]

                # Normalizing the data
                std_scale = preprocessing.StandardScaler().fit(X_train, y_train)
                X_train = std_scale.transform(X_train)
                X_test = std_scale.transform(X_test)

                # Save start time for computing training time
                startTime = time.time()

                # Train the model
                clf.fit(X_train, y_train)
                upper += clf.get_upper_bound()

                # Save the training time
                auxTime += time.time() - startTime

                # Predict the class for test instances
                y_pred = clf.predict(X_test)

                # Calculate the error made by CMRC classificator
                cvError.append(np.average(y_pred != y_test))

            upper = upper / n_splits
            res_mean = np.average(cvError)
            res_std = np.std(cvError)

            # Calculating the mean training time
            auxTime = auxTime / n_splits

            results = results._append({'dataset': dataName[j],
                                      'n_samples': '%d' % n,
                                      'n_attributes': '%d' % d,
                                      'n_classes': '%d' % r,
                                      "upper": "%1.2g" % upper,
                                      'error': '%1.2g' % res_mean + " +/- " +
                                      '%1.2g' % res_std,
                                      'avg_train_time (s)': '%1.2g' % auxTime},
                                     ignore_index=True)

        return results


.. GENERATED FROM PYTHON SOURCE LINES 120-124

.. code-block:: default


    r1 = runCMRC(phi='fourier', loss='0-1')
    r1.style.set_caption('Using 0-1 loss and fourier feature mapping')


.. raw:: html

    <div class="output_subarea output_html rendered_html output_result">
    <style type="text/css">
    </style>
    <table id="T_76434">
      <caption>Using 0-1 loss and fourier feature mapping</caption>
      <thead>
        <tr>
          <th class="blank level0" >&nbsp;</th>
          <th id="T_76434_level0_col0" class="col_heading level0 col0" >dataset</th>
          <th id="T_76434_level0_col1" class="col_heading level0 col1" >n_samples</th>
          <th id="T_76434_level0_col2" class="col_heading level0 col2" >n_attributes</th>
          <th id="T_76434_level0_col3" class="col_heading level0 col3" >n_classes</th>
          <th id="T_76434_level0_col4" class="col_heading level0 col4" >upper</th>
          <th id="T_76434_level0_col5" class="col_heading level0 col5" >error</th>
          <th id="T_76434_level0_col6" class="col_heading level0 col6" >avg_train_time (s)</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th id="T_76434_level0_row0" class="row_heading level0 row0" >0</th>
          <td id="T_76434_row0_col0" class="data row0 col0" >mammographic</td>
          <td id="T_76434_row0_col1" class="data row0 col1" >961</td>
          <td id="T_76434_row0_col2" class="data row0 col2" >5</td>
          <td id="T_76434_row0_col3" class="data row0 col3" >2</td>
          <td id="T_76434_row0_col4" class="data row0 col4" >0.2</td>
          <td id="T_76434_row0_col5" class="data row0 col5" >0.17 +/- 0.009</td>
          <td id="T_76434_row0_col6" class="data row0 col6" >1.9</td>
        </tr>
        <tr>
          <th id="T_76434_level0_row1" class="row_heading level0 row1" >1</th>
          <td id="T_76434_row1_col0" class="data row1 col0" >haberman</td>
          <td id="T_76434_row1_col1" class="data row1 col1" >306</td>
          <td id="T_76434_row1_col2" class="data row1 col2" >3</td>
          <td id="T_76434_row1_col3" class="data row1 col3" >2</td>
          <td id="T_76434_row1_col4" class="data row1 col4" >0.26</td>
          <td id="T_76434_row1_col5" class="data row1 col5" >0.26 +/- 0.034</td>
          <td id="T_76434_row1_col6" class="data row1 col6" >1.8</td>
        </tr>
        <tr>
          <th id="T_76434_level0_row2" class="row_heading level0 row2" >2</th>
          <td id="T_76434_row2_col0" class="data row2 col0" >indian_liver</td>
          <td id="T_76434_row2_col1" class="data row2 col1" >583</td>
          <td id="T_76434_row2_col2" class="data row2 col2" >10</td>
          <td id="T_76434_row2_col3" class="data row2 col3" >2</td>
          <td id="T_76434_row2_col4" class="data row2 col4" >0.3</td>
          <td id="T_76434_row2_col5" class="data row2 col5" >0.29 +/- 0.024</td>
          <td id="T_76434_row2_col6" class="data row2 col6" >1.9</td>
        </tr>
        <tr>
          <th id="T_76434_level0_row3" class="row_heading level0 row3" >3</th>
          <td id="T_76434_row3_col0" class="data row3 col0" >diabetes</td>
          <td id="T_76434_row3_col1" class="data row3 col1" >768</td>
          <td id="T_76434_row3_col2" class="data row3 col2" >8</td>
          <td id="T_76434_row3_col3" class="data row3 col3" >2</td>
          <td id="T_76434_row3_col4" class="data row3 col4" >0.23</td>
          <td id="T_76434_row3_col5" class="data row3 col5" >0.25 +/- 0.025</td>
          <td id="T_76434_row3_col6" class="data row3 col6" >2</td>
        </tr>
        <tr>
          <th id="T_76434_level0_row4" class="row_heading level0 row4" >4</th>
          <td id="T_76434_row4_col0" class="data row4 col0" >credit</td>
          <td id="T_76434_row4_col1" class="data row4 col1" >690</td>
          <td id="T_76434_row4_col2" class="data row4 col2" >15</td>
          <td id="T_76434_row4_col3" class="data row4 col3" >2</td>
          <td id="T_76434_row4_col4" class="data row4 col4" >0.14</td>
          <td id="T_76434_row4_col5" class="data row4 col5" >0.13 +/- 0.02</td>
          <td id="T_76434_row4_col6" class="data row4 col6" >1.6</td>
        </tr>
      </tbody>
    </table>

    </div>
    <br />
    <br />

.. GENERATED FROM PYTHON SOURCE LINES 125-128

.. code-block:: default


    r2 = runCMRC(phi='fourier', loss='log')
    r2.style.set_caption('Using log loss and fourier feature mapping')


.. raw:: html

    <div class="output_subarea output_html rendered_html output_result">
    <style type="text/css">
    </style>
    <table id="T_a5e74">
      <caption>Using log loss and fourier feature mapping</caption>
      <thead>
        <tr>
          <th class="blank level0" >&nbsp;</th>
          <th id="T_a5e74_level0_col0" class="col_heading level0 col0" >dataset</th>
          <th id="T_a5e74_level0_col1" class="col_heading level0 col1" >n_samples</th>
          <th id="T_a5e74_level0_col2" class="col_heading level0 col2" >n_attributes</th>
          <th id="T_a5e74_level0_col3" class="col_heading level0 col3" >n_classes</th>
          <th id="T_a5e74_level0_col4" class="col_heading level0 col4" >upper</th>
          <th id="T_a5e74_level0_col5" class="col_heading level0 col5" >error</th>
          <th id="T_a5e74_level0_col6" class="col_heading level0 col6" >avg_train_time (s)</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <th id="T_a5e74_level0_row0" class="row_heading level0 row0" >0</th>
          <td id="T_a5e74_row0_col0" class="data row0 col0" >mammographic</td>
          <td id="T_a5e74_row0_col1" class="data row0 col1" >961</td>
          <td id="T_a5e74_row0_col2" class="data row0 col2" >5</td>
          <td id="T_a5e74_row0_col3" class="data row0 col3" >2</td>
          <td id="T_a5e74_row0_col4" class="data row0 col4" >0.39</td>
          <td id="T_a5e74_row0_col5" class="data row0 col5" >0.18 +/- 0.0057</td>
          <td id="T_a5e74_row0_col6" class="data row0 col6" >2.8</td>
        </tr>
        <tr>
          <th id="T_a5e74_level0_row1" class="row_heading level0 row1" >1</th>
          <td id="T_a5e74_row1_col0" class="data row1 col0" >haberman</td>
          <td id="T_a5e74_row1_col1" class="data row1 col1" >306</td>
          <td id="T_a5e74_row1_col2" class="data row1 col2" >3</td>
          <td id="T_a5e74_row1_col3" class="data row1 col3" >2</td>
          <td id="T_a5e74_row1_col4" class="data row1 col4" >0.49</td>
          <td id="T_a5e74_row1_col5" class="data row1 col5" >0.26 +/- 0.043</td>
          <td id="T_a5e74_row1_col6" class="data row1 col6" >2.5</td>
        </tr>
        <tr>
          <th id="T_a5e74_level0_row2" class="row_heading level0 row2" >2</th>
          <td id="T_a5e74_row2_col0" class="data row2 col0" >indian_liver</td>
          <td id="T_a5e74_row2_col1" class="data row2 col1" >583</td>
          <td id="T_a5e74_row2_col2" class="data row2 col2" >10</td>
          <td id="T_a5e74_row2_col3" class="data row2 col3" >2</td>
          <td id="T_a5e74_row2_col4" class="data row2 col4" >0.5</td>
          <td id="T_a5e74_row2_col5" class="data row2 col5" >0.28 +/- 0.039</td>
          <td id="T_a5e74_row2_col6" class="data row2 col6" >2.7</td>
        </tr>
        <tr>
          <th id="T_a5e74_level0_row3" class="row_heading level0 row3" >3</th>
          <td id="T_a5e74_row3_col0" class="data row3 col0" >diabetes</td>
          <td id="T_a5e74_row3_col1" class="data row3 col1" >768</td>
          <td id="T_a5e74_row3_col2" class="data row3 col2" >8</td>
          <td id="T_a5e74_row3_col3" class="data row3 col3" >2</td>
          <td id="T_a5e74_row3_col4" class="data row3 col4" >0.41</td>
          <td id="T_a5e74_row3_col5" class="data row3 col5" >0.24 +/- 0.037</td>
          <td id="T_a5e74_row3_col6" class="data row3 col6" >3</td>
        </tr>
        <tr>
          <th id="T_a5e74_level0_row4" class="row_heading level0 row4" >4</th>
          <td id="T_a5e74_row4_col0" class="data row4 col0" >credit</td>
          <td id="T_a5e74_row4_col1" class="data row4 col1" >690</td>
          <td id="T_a5e74_row4_col2" class="data row4 col2" >15</td>
          <td id="T_a5e74_row4_col3" class="data row4 col3" >2</td>
          <td id="T_a5e74_row4_col4" class="data row4 col4" >0.29</td>
          <td id="T_a5e74_row4_col5" class="data row4 col5" >0.14 +/- 0.027</td>
          <td id="T_a5e74_row4_col6" class="data row4 col6" >2.9</td>
        </tr>
      </tbody>
    </table>

    </div>
    <br />
    <br />


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 1 minutes  55.998 seconds)


.. _sphx_glr_download_auto_examples_plot_2_example_cmrc.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: plot_2_example_cmrc.py <plot_2_example_cmrc.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: plot_2_example_cmrc.ipynb <plot_2_example_cmrc.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_