Source code for skbold.utils.misc_transformers

# Author: Lukas Snoek [lukassnoek.github.io]
# Contact: lukassnoek@gmail.com
# License: 3 clause BSD

from __future__ import absolute_import, division, print_function
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin


[docs]class ArrayPermuter(BaseEstimator, TransformerMixin): """ Permutes (shuffles) rows of matrix. """
[docs] def __init__(self): """ Initializes ArrayPermuter object. """ pass
[docs] def fit(self, X=None, y=None): """ Does nothing, but included to be used in sklearn's Pipeline. """ return self
[docs] def transform(self, X): """ Permutes rows of data input. Parameters ---------- X : ndarray Numeric (float) array of shape = [n_samples, n_features] Returns ------- X_new : ndarray ndarray with permuted rows """ return np.random.permutation(X)
[docs]class RowIndexer(object): """ Selects a subset of rows from an Mvp object. Notes ----- NOT a scikit-learn style transformer. Parameters ---------- idx : ndarray Array with indices. mvp : mvp-object Mvp-object to drawn metadata from. """ def __init__(self, mvp, train_idx): self.idx = train_idx self.mvp = mvp
[docs] def transform(self): """ Returns ------- mvp : mvp-object Indexed mvp-object. X_not_selected : ndarray Data which has not been selected. y_not_selected : ndarray Labels which have not been selected. """ mvp = self.mvp selection = np.zeros(mvp.X.shape[0], dtype=bool) selection[self.idx] = True X_not_selected = mvp.X[~selection, :] y_not_selected = mvp.y[~selection] mvp.X = mvp.X[selection, :] mvp.y = mvp.y[selection] return mvp, X_not_selected, y_not_selected
[docs]class SelectFeatureset(BaseEstimator, TransformerMixin): """ Selects only columns of a certain featureset. CANNOT be used in a scikit-learn pipeline! Parameters ---------- mvp : mvp-object Used to extract meta-data. featureset_idx : ndarray Array with indices which map to unique feature-set voxels. """ def __init__(self, mvp, featureset_idx): self.mvp = mvp self.featureset_idx = featureset_idx
[docs] def fit(self): """ Does nothing, but included due to scikit-learn API. """ return self
[docs] def transform(self, X=None): """ Transforms mvp. """ mvp = self.mvp fids = np.unique(mvp.featureset_id) col_idx = np.in1d(mvp.featureset_id, self.featureset_idx) pos_idx = np.where(col_idx)[0] if len(pos_idx) > 1: msg = ("Found more than one positional index when selecting " "feature-set %i" % int(self.featureset_idx)) raise ValueError(msg) elif len(pos_idx) == 0: msg = ("Didn't find a feature-set with id '%i'." % self.featureset_idx) raise ValueError(msg) else: pos_idx = pos_idx[0] mvp.X = mvp.X[:, col_idx] mvp.voxel_idx = mvp.voxel_idx[col_idx] mvp.featureset_id = mvp.featureset_id[col_idx] mvp.featureset_id = np.zeros_like(mvp.featureset_id) mvp.data_shape = [mvp.data_shape[pos_idx]] mvp.data_name = [mvp.data_name[pos_idx]] mvp.affine = [mvp.affine[pos_idx]] self.mvp = mvp return mvp