Source code for skbold.preproc.confounds

"""
The confounds module contains code to handle and account for
confounds in pattern analyses.
"""

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin


[docs]class ConfoundRegressor(BaseEstimator, TransformerMixin):
    """ Fits a confound onto each feature in X and returns their residuals."""

[docs]    def __init__(self, confound, X, cross_validate=True,
                 stack_intercept=True):
        """ Regresses out a variable (confound) from each feature in X.

        Parameters
        ----------
        confound : numpy array
            Array of length (n_samples, n_confounds) to regress out of each
            feature; May have multiple columns for multiple confounds.
        X : numpy array
            Array of length (n_samples, n_features), from which the confound
            will be regressed. This is used to determine how the
            confound-models should be cross-validated (which is necessary
            to use in in scikit-learn Pipelines).
        cross_validate : bool
            Whether to cross-validate the confound-parameters (y~confound)
            estimated from the train-set to the test set (cross_validate=True)
            or whether to fit the confound regressor separately on the test-set
            (cross_validate=False); we recommend setting this to True to get
            an unbiased estimate.
        stack_intercept : bool
            Whether to stack an intercept to the confound (default is True)

        Attributes
        ----------
        weights_ : numpy array
            Array with weights for the confound(s).
        """

        self.confound = confound
        self.cross_validate = cross_validate
        self.X = X
        self.stack_intercept = stack_intercept
        self.weights_ = None

[docs]    def fit(self, X, y=None):
        """ Fits the confound-regressor to X.

        Parameters
        ----------
        X : numpy array
            An array of shape (n_samples, n_features), which should correspond
            to your train-set only!
        y : None
            Included for compatibility; does nothing.
        """
        if self.confound.squeeze().ndim == 1 and self.stack_intercept:
            intercept = np.ones(self.confound.shape[0])
            self.confound = np.column_stack((intercept, self.confound))

        confound = self.confound
        fit_idx = np.in1d(self.X, X).reshape(self.X.shape).sum(axis=1) == self.X.shape[1]
        confound = confound[fit_idx, :]
        weights = np.zeros((X.shape[1], confound.shape[1]))
        for i in range(X.shape[1]):
            b, _, _, _ = np.linalg.lstsq(confound, X[:, i])
            weights[i, :] = b

        self.weights_ = weights
        return self

[docs]    def transform(self, X):
        """ Regresses out confound from X.

        Parameters
        ----------
        X : numpy array
            An array of shape (n_samples, n_features), which should correspond
            to your train-set only!

        Returns
        -------
        X_new : ndarray
            ndarray with confound-regressed features
        """

        if not self.cross_validate:
            self.fit(X)

        fit_idx = np.in1d(self.X, X).reshape(self.X.shape).sum(axis=1) == self.X.shape[1]
        confound = self.confound[fit_idx]
        X_new = np.zeros_like(X)
        for i in range(X.shape[1]):
            X_new[:, i] = X[:, i] - np.squeeze(confound.dot(self.weights_[i, :]))

        return X_new