Source code for skbold.core.mvp

# Basic Mvp class, from which first-level specific (e.g. FSL or, perhaps in the
# future, SPM) containers/converters are subclassed.

# Author: Lukas Snoek [lukassnoek.github.io]
# Contact: lukassnoek@gmail.com
# License: 3 clause BSD

from __future__ import print_function, absolute_import, division

import os
import nibabel as nib
import os.path as op
import numpy as np
from glob import glob
from copy import copy
from sklearn.externals import joblib


[docs]class Mvp(object):
    """
    Mvp (multiVoxel Pattern) class.
    Creates an object, specialized for storing fMRI data that will be analyzed
    using machine learning or RSA-like analyses, that stores both the data
    (X: an array of samples by features, y: numeric labels corresponding to
    X's classes/conditions) and the corresponding meta-data (e.g. nifti header,
    mask info, etc.).

    Parameters
    ----------
    X : ndarray
        A 2D numpy-array with rows indicating samples and
        columns indicating features.
    y : list or ndarray
        Array/list with labels/targets corresponding to samples in X.
    mask : str
        Absolute path to nifti-file that will mask (index) the patterns.
    mask_thres : int or float
        Minimum value for mask (in cases of probabilistic masks).

    Attributes
    ----------
    mask_shape : tuple
        Shape of mask that patterns will be indexed with.
    nifti_header : Nifti1Header object
        Nifti-header from corresponding mask.
    affine : ndarray
        Affine corresponding to nifti-mask.
    voxel_idx : ndarray
        Array with integer-indices indicating which voxels are used in the
        patterns relative to whole-brain space. In other words, it allows to
        map back the patterns to a whole-brain orientation.
    X : ndarray
        The actual patterns (2D: samples X features)
    y : list or ndarray
        Array/list with labels/targets corresponding to samples in X.

    Notes
    -----
    This class is mainly meant to serve as a parent-class for ``MvpWithin``
    and ``MvpBetween``, but it can alternatively be used as an object to store
    a 'custom' multivariate-pattern set with meta-data.
    """

    def __init__(self, X=None, y=None, mask=None, mask_thres=0):

        if isinstance(mask, list):
            msg = 'You can only pass one mask! To use custom masks for each ' \
                  'source entry, specify the mask-key in source.'
            raise ValueError(msg)

        if mask is None:
            self.common_mask = None
            self.voxel_idx = None
        else:
            maskl = nib.load(mask)
            self.common_mask = {'path': mask, 'threshold': mask_thres,
                                'idx': (maskl.get_data() > mask_thres).ravel(),
                                'shape': maskl.shape, 'affine': maskl.affine}
            self.voxel_idx = np.arange(np.prod(self.common_mask['shape']))
            self.voxel_idx = self.voxel_idx[self.common_mask['idx']]

        self.nifti_header = None
        self.affine = None

        self.X = X
        self.y = y

[docs]    def write(self, path=None, name='mvp', backend='joblib'):
        """ Writes the Mvp-object to disk.

        Parameters
        ----------
        path : str
            Absolute path where the file will be written to.
        name : str
            Name of to-be-written file.
        backend : str
            Which format will be used to save the files. Default is 'joblib',
            which conveniently saves the Mvp-object as one file. Alternatively,
            and if the Mvp-object is too large to be save with joblib, a
            data-header format will be used, in which the data (``X``) will be
            saved using Numpy and the meta-data (everythin except ``X``) will
            be saved using joblib.
        """
        if path is None:
            path = os.getcwd()

        fn = op.join(path, name)

        print("Saving file '%s' to disk." % fn)

        if backend == 'joblib':
            try:
                joblib.dump(self, fn + '.jl', compress=3)
            except:
                msg = "Array too large to save with joblib; using Numpy ... "
                print(msg)
                backend = 'numpy'
                to_remove = glob(op.join(path, '*npy.z'))
                _ = [os.remove(f) for f in to_remove]

        if backend == 'numpy':
            np.save(fn + '_data.npy', self.X)
            self.X = None
            joblib.dump(self, fn + '_header.jl', compress=3)

[docs]    def update_mask(self, mask, threshold=0):
        # For external use

        if isinstance(mask, (str, unicode)):
            mask = nib.load(mask).get_data() > threshold

        if isinstance(mask, list):

            if not isinstance(threshold, list):
                threshold = [threshold] * len(mask)

            if all(isinstance(m, (str, unicode)) for m in mask):
                mask = [nib.load(copy(m)).get_data() > threshold[i]
                        for i, m in enumerate(mask)]

            to_iterate = zip(copy(mask), copy(threshold),
                             np.unique(self.featureset_id))
            unpack = True
        else:
            to_iterate = np.unique(self.featureset_id)
            unpack = False

        indices = []

        for it in to_iterate:

            if unpack:
                mask, threshold, i = it
            else:
                i = it

            fids = np.unique(self.featureset_id)
            posidx = np.where(i == fids)[0][0]
            tmp = np.zeros(self.data_shape[posidx]).ravel()
            fidx = self.featureset_id == i
            tmp[self.voxel_idx[fidx]] = 1
            tmp[mask.ravel()] += 1
            indices.append((tmp == 2)[self.voxel_idx[fidx]])

        indices = np.concatenate(indices, axis=0)
        self.X = self.X[:, indices]
        self.featureset_id = self.featureset_id[indices]
        self.voxel_idx = self.voxel_idx[indices]

    def _update_mask_info(self, mask, threshold=None):

        thr = 0 if threshold is None else threshold
        maskl = nib.load(mask)
        self.common_mask = {'path': mask, 'threshold': thr,
                            'idx': (maskl.get_data() > thr).ravel(),
                            'shape': maskl.shape, 'affine': maskl.affine}

        self.voxel_idx = np.arange(np.prod(self.common_mask['shape']))
        self.voxel_idx = self.voxel_idx[self.common_mask['idx']]
        self.affine = maskl.affine
        self.nifti_header = maskl.header