Source code for skbold.postproc.extract_roi_info

# Extract region-specific info for a given statistic-file (nifti) and
# outputs a csv-file which can be copy-pasted directly in a Word (ugh..) file.

# Note: this is quite likely the ugliest code I've ever written.
# My apologies.

# Author: Lukas Snoek [lukassnoek.github.io]
# Contact: lukassnoek@gmail.com
# License: 3 clause BSD

from __future__ import division, print_function, absolute_import
from builtins import range
import numpy as np
import nibabel as nib
from glob import glob
import os.path as op
import pandas as pd
from scipy.ndimage.measurements import label

import skbold
roi_dir = op.join(op.dirname(skbold.__file__), 'data', 'ROIs',
                  'harvard_oxford')


[docs]def extract_roi_info(statfile, stat_name=None, roi_type='unilateral', per_cluster=True, cluster_engine='scipy', min_clust_size=20, stat_threshold=None, mask_threshold=20, save_indices=True, verbose=True): """ Extracts information per ROI for a given statistics-file. Reads in a thresholded (!) statistics-file (such as a thresholded z- or t-stat from a FSL first-level directory) and calculates for a set of ROIs the number of significant voxels included and its maximum value (+ coordinates). Saves a csv-file in the same directory as the statistics-file. Assumes that the statistics file is in MNI152 2mm space. Parameters ---------- statfile : str Absolute path to statistics-file (nifti) that needs to be evaluated. stat_name : str Name for the contrast/stat-file that is being analyzed. roi_type : str Whether to use unilateral or bilateral masks (thus far, only Harvard- Oxford atlas masks are supported.) per_cluster : bool Whether to evaluate the statistics-file as a whole (per_cluster=False) or per cluster separately (per_cluster=True). cluster_engine : str Which 'engine' to use for clustering; can be 'scipy' (default), using scipy.ndimage.measurements.label, or 'fsl' (using FSL's cluster commmand). min_clust_size : int Minimum cluster size (i.e. clusters with fewer voxels than this number are discarded; also, ROIs containing fewer voxels than this will not be listed on the CSV. stat_threshold : int or float If the stat-file contains uncorrected data, stat_threshold can be used to set a lower bound. mask_threshold : bool Threshold for probabilistics masks, such as the Harvard-Oxford masks. Default of 25 is chosen as this minimizes overlap between adjacent masks while still covering most of the entire brain. save_indices : bool Whether to save the indices (coordinates) of peaks of clusters. verbose : bool Whether to print some output regarding the parsing process. Returns ------- df : Dataframe Dataframe corresponding to the written csv-file. """ if isinstance(statfile, (str, unicode)): data = nib.load(statfile).get_data() else: data = statfile sign_mask = np.ones(shape=data.shape) sign_mask[data < 0] = -1 # data = np.abs(data) if stat_threshold: data[data < stat_threshold] = 0 masks = glob(op.join(roi_dir, roi_type, '*.nii.gz')) df_list = [] if per_cluster: # Start clustering of data if cluster_engine == 'scipy': clustered, num_clust = label(data > 0) values, counts = np.unique(clustered.ravel(), return_counts=True) n_clust = np.argmax(np.sort(counts)[::-1] < min_clust_size) if n_clust == 0: print('No (sufficiently large) clusters!') return 0 # Sort and trim cluster_nrs = values[counts.argsort()[::-1][:n_clust]] cluster_nrs = np.delete(cluster_nrs, 0) elif cluster_engine == 'fsl': # Not yet implemented pass print('Analyzing %i clusters for %s' % (len(cluster_nrs), stat_name)) cluster_list = [] # Looping over clusters for i, cluster_id in enumerate(cluster_nrs): if verbose: print('Processing cluster %i' % (i+1)) cl_mask = clustered == cluster_id k = cl_mask.sum() mx = data[cl_mask].max() tmp = np.zeros(data.shape) tmp[cl_mask] = data[cl_mask] == mx # in case of multiple voxels with same stat / weight if np.sum(tmp == 1) > 1: X = np.where(tmp == 1)[0][0] Y = np.where(tmp == 1)[1][0] Z = np.where(tmp == 1)[2][0] else: X = np.where(tmp == 1)[0] Y = np.where(tmp == 1)[1] Z = np.where(tmp == 1)[2] # if weight / stat is negative, change sign of mx if sign_mask[X, Y, Z] < 0: mx = -mx # convert to MNI-coordinates X = 90 - X * 2 Y = -126 + Y * 2 Z = -72 + Z * 2 # This is purely for formatting issues if i == 0: if stat_name is None: stat = op.basename(statfile).split('.')[0].split('_')[-1] c = op.basename(op.dirname(statfile)).split('.')[0] c += '_%s' % stat else: c = stat_name else: c = '' to_append = {'Contrast': c, 'cluster': (i+1), 'k cluster': str(k), 'max cluster': mx, 'x': str(X[0]), 'y': str(Y[0]), 'z': str(Z[0]), 'Region': '', 'k region': '', 'max region': ''} cluster_list.append(to_append) roi_list = [] # Loop over ROIs for imask, mask in enumerate(masks): mask_name = op.basename(mask).split('.')[0].split('_') if roi_type == 'unilateral': mask_name[0] += '.' mask_name = ' '.join(mask_name) roi_mask = nib.load(mask).get_data() > mask_threshold overlap = cl_mask.astype(int) + roi_mask.astype(int) == 2 k = overlap.sum() if k > 0: mx = data[overlap].max() tmp = np.zeros(data.shape) tmp[overlap] = data[overlap] == mx # in case of multiple voxels with same stat / weight if np.sum(tmp == 1) > 1: X = np.where(tmp == 1)[0][0] Y = np.where(tmp == 1)[1][0] Z = np.where(tmp == 1)[2][0] else: X = np.where(tmp == 1)[0] Y = np.where(tmp == 1)[1] Z = np.where(tmp == 1)[2] # if sign of weight / stat is negative, change sign of mx if sign_mask[X, Y, Z] < 0: mx = -mx # convert to MNI-coordinates X = 90 - X * 2 Y = -126 + Y * 2 Z = -72 + Z * 2 else: # If no voxels, write some default values mx = 0 X, Y, Z = 0, 0, 0 if imask == 0: cluster_list[-1]['Region'] = mask_name cluster_list[-1]['k region'] = k cluster_list[-1]['max region'] = mx else: to_append = {'Contrast': '', 'cluster': (i + 1 + 0.1), 'k cluster': '', 'max cluster': '', 'x': '', 'y': '', 'z': '', 'Region': mask_name, 'k region': k, 'max region': mx} if k > cluster_list[-1]['k region']: to_append['Region'] = cluster_list[-1]['Region'] to_append['k region'] = cluster_list[-1]['k region'] maxr = cluster_list[-1]['max region'] to_append['max region'] = maxr cluster_list[-1]['Region'] = mask_name cluster_list[-1]['k region'] = k cluster_list[-1]['max region'] = mx roi_list.append(to_append) roi_dfs = pd.concat([pd.DataFrame(tmp, [0]) for tmp in roi_list]) roi_dfs = roi_dfs[roi_dfs['k region'] > min_clust_size] roi_dfs = roi_dfs.sort_values(by=['cluster', 'k region'], ascending=[True, False]) whiteline = {key: '' if key != 'cluster' else (i + 1 + 0.1) for key in roi_dfs.keys()} roi_dfs = roi_dfs.append(whiteline, ignore_index=True) df_list.append(pd.DataFrame(cluster_list[-1], [0])) df_list.append(roi_dfs) # Concatenate dataframes! df = pd.concat(df_list) # Some cleaning / processing cols_ordered = ['Contrast', 'cluster', 'k cluster', 'max cluster', 'x', 'y', 'z', 'Region', 'k region', 'max region'] df = df[cols_ordered] df['cluster'] = ['' if (val % 1) != 0 else str(val) for val in df['cluster']] else: # If not extracting info per cluster, but wholebrain print('Analyzing %s' % statfile) col_names = ['roi', 'k', 'max', 'mean', 'sd', 'X', 'Y', 'Z'] results = pd.DataFrame(columns=col_names) # Only loop over masks for mask in masks: mask_name = op.basename(mask).split('.')[0].split('_') if roi_type == 'unilateral': mask_name[0] += '.' mask_name = ' '.join(mask_name) roi_mask = nib.load(mask).get_data() > mask_threshold sig_mask = data > 0 overlap = sig_mask.astype(int) + roi_mask.astype(int) == 2 k = overlap.sum() X, Y, Z = 0, 0, 0 if k > 0: mx = data[overlap].max() mean, std = data[overlap].mean(), data[overlap].std() tmp = np.zeros(data.shape) tmp[overlap] = data[overlap] == mx if save_indices: X = 90 - np.where(tmp == 1)[0] * 2 Y = -126 + np.where(tmp == 1)[1] * 2 Z = -72 + np.where(tmp == 1)[2] * 2 else: mx, mean, std = 0, 0, 0 to_append = {'roi': mask_name, 'k': k, 'max': mx, 'mean': mean, 'sd': std, 'X': X, 'Y': Y, 'Z': Z} to_append = pd.DataFrame(to_append, index=[0]) df_list.append(to_append) df = pd.concat(df_list) df = df[df.k != 0].sort_values(by='k', ascending=False) df = df[['roi', 'k', 'max', 'mean', 'sd', 'X', 'Y', 'Z']] df = df[df['k'] > min_clust_size] filename = op.join(op.dirname(statfile), 'roi_info_%s.csv' % stat_name) df.to_csv(filename, index=False, header=True, sep='\t') return df