Source code for skbold.core.mvp_within

from __future__ import division, print_function, absolute_import

import os.path as op
import pandas as pd
import numpy as np
import nibabel as nib
from ..core import Mvp, convert2epi, convert2mni
from ..utils import sort_numbered_list
from sklearn.preprocessing import LabelEncoder
from glob import glob


[docs]class MvpWithin(Mvp): """ Extracts and stores subject-specific single-trial multivoxel-patterns The MvpWithin class allows for the extraction of subject-specific single-trial, multivoxel fMRI patterns from a FSL feat-directory. Parameters ---------- source : str An absolute path to a subject-specific first-level FEAT directory. read_labels : bool Whether to read the labels/targets (i.e. ``y``) from the contrast names defined in the design.con file. remove_contrast : list Given that all contrasts (COPEs) are loaded from the FEAT-directory, this argument can be used to remove irrelevant contrasts (e.g. contrasts of nuisance predictors). Entries in remove_contrast do not have to literal; they may be a substring of the full name of the contrast. invert_selection : bool Sometimes, instead of loading in all contrasts and excluding some, you might want to load only a single or a couple contrasts, and exclude all other. By setting invert_selection to True, it treats the remove_contrast variable as a list of contrasts to include. ref_space : str Indicates in which 'space' the patterns will be stored. The default is 'epi', indicating that the patterns will be loaded and stored in subject-specific (native) functional space. The other option is 'mni', which indicates that MvpWithin will first transform contrasts to MNI152 (2mm) space before it loads them. This option assumes that a 'reg' directory is present in the .feat-directory, including warp-files from functional to mni space (i.e. example_func2standara.nii.gz). statistic : str Which statistic (beta = (CO)PE, tstat, zstat, etc.) from FEAT directories to use as patterns. remove_zeros : bool Whether to remove features (i.e. voxels) which are 0 across all trials (due to, e.g., being located outside the brain). X : ndarray Not necessary to pass MvpWithin, but needs to be defined as it is needed in the super-constructor. y : ndarray or list Labels or targets corresponding to the samples in ``X``. This can be used when read_labels is set to False. mask : str Absolute path to nifti-file that will be used as mask. mask_threshold : int or float Minimum value to binarize the mask when it's probabilistic. Attributes ---------- mask_shape : tuple Shape of mask that patterns will be indexed with. nifti_header : Nifti1Header object Nifti-header from corresponding mask. affine : ndarray Affine corresponding to nifti-mask. voxel_idx : ndarray Array with integer-indices indicating which voxels are used in the patterns relative to whole-brain space. In other words, it allows to map back the patterns to a whole-brain orientation. X : ndarray The actual patterns (2D: samples X features) y : list or ndarray Array/list with labels/targets corresponding to samples in X. contrast_labels : list List of names corresponding to the y-values. """ def __init__(self, source, read_labels=True, remove_contrast=[], invert_selection=None, ref_space='epi', statistic='tstat', remove_zeros=True, X=None, y=None, mask=None, mask_threshold=0): super(MvpWithin, self).__init__(X=X, y=y, mask=mask, mask_thres=mask_threshold) self.source = source self.read_labels = read_labels self.ref_space = ref_space self.statistic = statistic self.invert_selection = invert_selection self.remove_zeros = remove_zeros self.remove_contrast = remove_contrast self.remove_idx = None self.data_shape = None self.directories = [] self.y = [] self.contrast_labels = [] self.X = []
[docs] def create(self): """ Extracts (meta-)data from FEAT-directory given appropriate settings during initialization. Raises ------ ValueError If the 'source'-directory doesn't exist. ValueError If the number of loaded contrasts does not equal the number of extracted labels. """ if isinstance(self.source, str): self.source = [self.source] # Loop over sources for src in self.source: if '.feat' in src: self._load_fsl(src) else: msg = "Loading 'within-data' from other sources than " \ "FSL-feat directories is not yet implemented!" raise ValueError(msg) # If only one featureset, just index; otherwise, concatenate if len(self.X) == 1: self.X = self.X[0] else: self.X = np.concatenate(self.X, axis=0) if self.read_labels: self.y = LabelEncoder().fit_transform(self.contrast_labels) if self.remove_zeros: idx = np.invert((self.X == 0)).all(axis=0) self.X = self.X[:, idx] self.voxel_idx = self.voxel_idx[idx] self.featureset_id = self.featureset_id[idx]
def _load_fsl(self, src): if not op.isdir(src): msg = "The feat-directory '%s' doesn't seem to exist." % src raise ValueError(msg) reg_dir = op.join(src, 'reg') if not op.isdir(reg_dir): print("WARNING: no reg-dir found in '%s'; cannot perform any " "mni-to-epi (or vice versa) transformations" % src) if self.read_labels: design = op.join(src, 'design.con') contrast_labels_current = self._extract_labels(design_file=design) self.contrast_labels.extend(contrast_labels_current) if self.common_mask is not None: already_epi_mask = '_epi.nii.gz' in self.common_mask['path'] if self.ref_space == 'epi' and not already_epi_mask: new_mask = convert2epi(self.common_mask['path'], reg_dir=reg_dir, out_dir=reg_dir) self._update_mask_info(new_mask, self.common_mask['threshold']) if self.ref_space == 'epi': stat_dir = op.join(src, 'stats') elif self.ref_space == 'mni': stat_dir = op.join(src, 'reg_standard') else: raise ValueError('Specify valid reference-space (ref_space)') if self.ref_space == 'mni' and not op.isdir(stat_dir): stat_dir = op.join(src, 'stats') transform2mni = True else: transform2mni = False stat_query = op.join(stat_dir, '%s*.nii.gz' % self.statistic) stat_files = sort_numbered_list(glob(stat_query)) # Transform stat-files if ref_space is 'mni' but files are in 'epi'. if transform2mni: out_dir = op.join(src, 'reg_standard') stat_files = convert2mni(stat_files, reg_dir, out_dir) _ = [stat_files.pop(idx) for idx in sorted(self.remove_idx, reverse=True)] n_stat = len(stat_files) if not n_stat == len(contrast_labels_current) and self.read_labels: msg = 'The number of trials (%i) do not match the number of ' \ 'class labels (%i)' % (n_stat, len(self.contrast_labels)) raise ValueError(msg) if self.common_mask is None: # set attributes if no mask was given tmp = nib.load(stat_files[0]) self.affine = tmp.affine self.nifti_header = tmp.header # maybe a call to _update_mask_info here? self.voxel_idx = np.arange(np.prod(tmp.shape)) # Pre-allocate mvp_data = np.zeros((n_stat, self.voxel_idx.size)) # Load in data (stat_files) for i, path in enumerate(stat_files): stat_img = nib.load(path) mvp_data[i, :] = stat_img.get_data().ravel()[self.voxel_idx] self.directories.append(src) mvp_data[np.isnan(mvp_data)] = 0 self.X.append(mvp_data) # The following attributes are added for compatibility with MvpResults self.data_shape = stat_img.shape self.data_name = ['MvpWithin'] self.featureset_id = np.zeros(mvp_data.shape[1], dtype=np.uint32) def _read_design(self, design_file): if not op.isfile(design_file): raise IOError('There is no design.con file for %s' % design_file) # Find number of contrasts and read in accordingly with open(design_file, 'r') as dfile: lines = dfile.readlines() contrasts = sum(1 if 'ContrastName' in line else 0 for line in lines) n_lines = sum(1 for line in lines) df = pd.read_csv(design_file, delimiter='\t', header=None, skipfooter=n_lines - contrasts, engine='python') cope_labels = list(df[1].str.strip()) # remove spaces # Here, numeric extensions of labels (e.g. 'positive_003') are removed labels = [] for c in cope_labels: parts = [x.strip() for x in c.split('_')] if parts[-1].isdigit(): label = '_'.join(parts[:-1]) labels.append(label) else: labels.append(c) return labels def _extract_labels(self, design_file): cope_labels = self._read_design(design_file) if isinstance(self.remove_contrast, str): self.remove_contrast = [self.remove_contrast] remove_contrast = self.remove_contrast if remove_contrast is None: self.remove_idx = [] return cope_labels # Remove to-be-ignored contrasts (e.g. cues) remove_idx = np.zeros((len(cope_labels), len(remove_contrast))) for i, name in enumerate(remove_contrast): remove_idx[:, i] = np.array([name in lab for lab in cope_labels]) self.remove_idx = np.where(remove_idx.sum(axis=1).astype(int))[0] if self.invert_selection: indices = np.arange(len(cope_labels)) self.remove_idx = [x for x in indices if x not in self.remove_idx] _ = [cope_labels.pop(idx) for idx in np.sort(self.remove_idx)[::-1]] return cope_labels