Source code for skbold.core.mvp

# Basic Mvp class, from which first-level specific (e.g. FSL or, perhaps in the
# future, SPM) containers/converters are subclassed.

# Author: Lukas Snoek [lukassnoek.github.io]
# Contact: lukassnoek@gmail.com
# License: 3 clause BSD

from __future__ import print_function, absolute_import, division

import os
import nibabel as nib
import os.path as op
import numpy as np
from glob import glob
from copy import copy
from sklearn.externals import joblib


[docs]class Mvp(object): """ Mvp (multiVoxel Pattern) class. Creates an object, specialized for storing fMRI data that will be analyzed using machine learning or RSA-like analyses, that stores both the data (X: an array of samples by features, y: numeric labels corresponding to X's classes/conditions) and the corresponding meta-data (e.g. nifti header, mask info, etc.). Parameters ---------- X : ndarray A 2D numpy-array with rows indicating samples and columns indicating features. y : list or ndarray Array/list with labels/targets corresponding to samples in X. mask : str Absolute path to nifti-file that will mask (index) the patterns. mask_thres : int or float Minimum value for mask (in cases of probabilistic masks). Attributes ---------- mask_shape : tuple Shape of mask that patterns will be indexed with. nifti_header : Nifti1Header object Nifti-header from corresponding mask. affine : ndarray Affine corresponding to nifti-mask. voxel_idx : ndarray Array with integer-indices indicating which voxels are used in the patterns relative to whole-brain space. In other words, it allows to map back the patterns to a whole-brain orientation. X : ndarray The actual patterns (2D: samples X features) y : list or ndarray Array/list with labels/targets corresponding to samples in X. Notes ----- This class is mainly meant to serve as a parent-class for ``MvpWithin`` and ``MvpBetween``, but it can alternatively be used as an object to store a 'custom' multivariate-pattern set with meta-data. """ def __init__(self, X=None, y=None, mask=None, mask_thres=0): if isinstance(mask, list): msg = 'You can only pass one mask! To use custom masks for each ' \ 'source entry, specify the mask-key in source.' raise ValueError(msg) if mask is None: self.common_mask = None self.voxel_idx = None else: maskl = nib.load(mask) self.common_mask = {'path': mask, 'threshold': mask_thres, 'idx': (maskl.get_data() > mask_thres).ravel(), 'shape': maskl.shape, 'affine': maskl.affine} self.voxel_idx = np.arange(np.prod(self.common_mask['shape'])) self.voxel_idx = self.voxel_idx[self.common_mask['idx']] self.nifti_header = None self.affine = None self.X = X self.y = y
[docs] def write(self, path=None, name='mvp', backend='joblib'): """ Writes the Mvp-object to disk. Parameters ---------- path : str Absolute path where the file will be written to. name : str Name of to-be-written file. backend : str Which format will be used to save the files. Default is 'joblib', which conveniently saves the Mvp-object as one file. Alternatively, and if the Mvp-object is too large to be save with joblib, a data-header format will be used, in which the data (``X``) will be saved using Numpy and the meta-data (everythin except ``X``) will be saved using joblib. """ if path is None: path = os.getcwd() fn = op.join(path, name) print("Saving file '%s' to disk." % fn) if backend == 'joblib': try: joblib.dump(self, fn + '.jl', compress=3) except: msg = "Array too large to save with joblib; using Numpy ... " print(msg) backend = 'numpy' to_remove = glob(op.join(path, '*npy.z')) _ = [os.remove(f) for f in to_remove] if backend == 'numpy': np.save(fn + '_data.npy', self.X) self.X = None joblib.dump(self, fn + '_header.jl', compress=3)
[docs] def update_mask(self, mask, threshold=0): # For external use if isinstance(mask, (str, unicode)): mask = nib.load(mask).get_data() > threshold if isinstance(mask, list): if not isinstance(threshold, list): threshold = [threshold] * len(mask) if all(isinstance(m, (str, unicode)) for m in mask): mask = [nib.load(copy(m)).get_data() > threshold[i] for i, m in enumerate(mask)] to_iterate = zip(copy(mask), copy(threshold), np.unique(self.featureset_id)) unpack = True else: to_iterate = np.unique(self.featureset_id) unpack = False indices = [] for it in to_iterate: if unpack: mask, threshold, i = it else: i = it fids = np.unique(self.featureset_id) posidx = np.where(i == fids)[0][0] tmp = np.zeros(self.data_shape[posidx]).ravel() fidx = self.featureset_id == i tmp[self.voxel_idx[fidx]] = 1 tmp[mask.ravel()] += 1 indices.append((tmp == 2)[self.voxel_idx[fidx]]) indices = np.concatenate(indices, axis=0) self.X = self.X[:, indices] self.featureset_id = self.featureset_id[indices] self.voxel_idx = self.voxel_idx[indices]
def _update_mask_info(self, mask, threshold=None): thr = 0 if threshold is None else threshold maskl = nib.load(mask) self.common_mask = {'path': mask, 'threshold': thr, 'idx': (maskl.get_data() > thr).ravel(), 'shape': maskl.shape, 'affine': maskl.affine} self.voxel_idx = np.arange(np.prod(self.common_mask['shape'])) self.voxel_idx = self.voxel_idx[self.common_mask['idx']] self.affine = maskl.affine self.nifti_header = maskl.header