Source code for skbold.estimators.roi_voting_classifier

# Class to implement a voting classifier from the output of
# classifiers trained on different feature sets from different ROIs.

# Author: Lukas Snoek [lukassnoek.github.io]
# Contact: lukassnoek@gmail.com
# License: 3 clause BSD

# Note: this implementation was inspired by the code of S. Rashka
# (http://sebastianraschka.com/Articles/2014_ensemble_classifier.html)

from __future__ import division, print_function, absolute_import

import glob
import numpy as np
import os.path as op
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.svm import SVC
from skbold.feature_extraction import RoiIndexer
from ..feature_selection import fisher_criterion_score, SelectAboveCutoff
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from copy import copy, deepcopy

import skbold
roi_dir = op.join(op.dirname(skbold.__file__), 'data', 'ROIs',
                  'harvard_oxford')


[docs]class RoiVotingClassifier(BaseEstimator, ClassifierMixin):
    """
    This classifier fits a base-estimator (by default a linear SVM) on
    different feature sets (i.e. voxels) from different regions of interest
    (which are drawn from the Harvard-Oxford Cortical atlas), and subsequently
    the final prediction is derived through a max-voting rule, which can be
    either 'soft' (argmax of mean class probability) or 'hard' (max of class
    prediction).

    Notes
    -----
    This classifier has not been tested!

    Parameters
    ----------
    mvp : mvp-object
        An custom object from the skbold package containing data (X, y)
        and corresponding meta-data (e.g. mask info)
    preproc_pipeline : object
        A scikit-learn Pipeline object with desired preprocessing steps
        (e.g. scaling, additional feature selection)
    clf : object
        A scikit-learn style classifier (implementing fit(), predict(),
        and predict_proba()), that is able to be used in Pipelines.
    mask_type : str
        Can be 'unilateral' or 'bilateral', which will use all masks from
        the corresponding Harvard-Oxford Cortical (lateralized) atlas.
        Alternatively, it may be an absolute path to a directory containing
        a custom set of masks as nifti-files (default: 'unilateral').
    voting : str
        Either 'hard' or 'soft' (default: 'soft').
    weights : list (or ndarray)
        List/array of shape [n_rois] with a relative weighting factor to be
        used in the voting procedure.
    """

    def __init__(self, mvp, preproc_pipeline=None, clf=None,
                 mask_type='unilateral', voting='soft', weights=None):

        self.mvp = mvp
        self.voting = voting
        self.mask_type = mask_type

        if clf is None:
            clf = SVC(C=1.0, kernel='linear', probability=True,
                      decision_function_shape='ovo')
        self.clf = clf

        # If no preprocessing pipeline is defined, we'll assume that at least
        # scaling and minor (univariate) feature selection is desired.
        if preproc_pipeline is None:
            scaler = StandardScaler()
            transformer = SelectAboveCutoff(1, fisher_criterion_score)
            preproc_pipeline = Pipeline([('transformer', transformer),
                                         ('scaler', scaler)])

        self.preproc_pipeline = preproc_pipeline

        # Glob masks
        if mask_type not in ['unilateral', 'bilateral']:
            self.masks = glob.glob(op.join(mask_type, '*nii.gz'))
        else:
            mask_dir = op.join(op.dirname(roi_dir), mask_type)
            self.masks = glob.glob(op.join(mask_dir, '*nii.gz'))

        self.pipes = []  # This will gather all roi-specific pipelines
        self.clf = clf  # base-classifier

        # If no weights are specified, use equal weights
        if weights is None:
            weights = np.ones(len(self.masks))

        self.weights = weights

[docs]    def fit(self, X=None, y=None):
        """ Fits RoiVotingClassifier.

        Parameters
        ----------
        X : ndarray
            Array of shape = [n_samples, n_features].
        y : list or ndarray of int or float
            List or ndarray with floats/ints corresponding to labels.

        Returns
        -------
        self : object
            RoiStackingClassifier instance with fitted parameters.
        """
        if X is None:
            X = self.mvp.X

        if y is None:
            y = self.mvp.y

        for i, mask in enumerate(self.masks):
            roiindexer = RoiIndexer(self.mvp, mask, mask_threshold=0)
            pipeline = [('roiindexer', roiindexer)]
            tmp_preproc = deepcopy(self.preproc_pipeline)
            pipeline.extend(tmp_preproc.steps)
            pipeline.extend([('clf', copy(self.clf))])
            pipeline = Pipeline(pipeline)

            pipeline.fit(X, y)
            self.pipes.append(pipeline)

        return self

[docs]    def predict(self, X):
        """ Predict class given fitted RoiVotingClassifier.

        Parameters
        ----------
        X : ndarray
            Array of shape = [n_samples, n_features].

        Returns
        -------
        maxvotes : ndarray
            Array with class predictions for all classes of X.
        """

        if self.voting == 'hard':
            votes = np.asarray([p.predict(X) for p in self.pipes])

            # Credits to Sebastian Rashka:
            # http://sebastianraschka.com/Articles/2014_ensemble_classifier.html
            maxvotes = np.apply_along_axis(lambda x: np.argmax(np.bincount(x,
                                           weights=self.weights)), axis=0,
                                           arr=votes)

        elif self.voting == 'soft':
            votes = np.asarray([p.predict_proba(X) for p in self.pipes])
            maxvotes = np.average(votes, axis=0,
                                  weights=self.weights).argmax(axis=-1)

        return maxvotes