Source code for skbold.postproc.prevalence

from __future__ import division, print_function, absolute_import

from builtins import range
import os.path as op
import numpy as np
import nibabel as nib
from sklearn.externals import joblib
from scipy import stats
from tqdm import trange
from warnings import filterwarnings

filterwarnings(action='ignore', category=RuntimeWarning)

[docs]class PrevalenceInference(object): """ Class that performs PrevalenceInference based on the paper by Allefeld, Gorgen, & Haynes (2016), NeuroImage. Parameters ---------- obs : numpy ndarray A 2D array of shape [N (subjects) x K (voxels)], or a 1D array of shape [N, 1]. perms : numpy ndarray A 3D array of shape [N (subjects) x K (voxels) x P1 (first level permutations)], or a 2D array of shape [N x P1] P2 : int Number of second level permutations to run gamma0 : float What prevalence inference null (gamma < gamma0) to test alpha : float Significance level for hypothesis testing Examples -------- >>> from skbold.postproc import PrevalenceInference >>> import numpy as np >>> N, K, P1 = 20, (40, 40, 38), 15 >>> obs = np.random.normal(loc=0.55, scale=0.05, size=(N, np.prod(K))) >>> perms = np.random.normal(loc=0.5, scale=0.05, size=(N, np.prod(K), P1)) >>> pvi = PrevalenceInference(obs=obs, perms=perms, P2=100000, gamma0=05, alpha=0.05) >>> pvi.run() Running with parameters: N = 20 K = 60800 P1 = 15 P2 = 100000 """
[docs] def __init__(self, obs, perms, P2=100000, gamma0=0.5, alpha=0.05): """ Initializes PrevalenceInference object.""" self.obs = obs self.perms = perms self.P2 = P2 self.gamma0 = gamma0 self.alpha = alpha self.N = None self.K = None self.P1 = None print("This is experimental functionality! (i.e., not yet fully tested" " through)")
def _check_inputs(self): """ Checks and validates inputs data and extracts parameters. """ # Remove singleton dimensions self.obs = self.obs.squeeze() self.perms = self.perms.squeeze() if self.perms.ndim > 3: raise ValueError("Your array should be 2D or 3D!") if self.obs.ndim == 2: # Assume we're dealing with multiple voxels self.K = self.obs.shape[1] self._create_mask() # To remove NaNs and such else: # We just have a single score self.K = 1 # Add singleton axis to make calculations more parsimonious self.perms = self.perms[:, np.newaxis, :] if self.obs.ndim > 2: msg = "Observed values (obs) should be 1 or 2 dimensional!" raise ValueError(msg) self.N = self.obs.shape[0] self.P1 = self.perms.shape[-1] print("Running with parameters:\n\tN = %i\n\tK = %i\n\tP1 = %i\n\tP2 = %i" % (self.N, self.K, self.P1, self.P2)) def _create_mask(self): """ Removes all zero or NaN voxels. """ # Stack all data together all_data = np.dstack((self.obs[:, :, np.newaxis], self.perms)) # Voxels should be neither zero nor NaN mask = np.logical_and(~(np.isnan(all_data).sum(axis=(0, 2)) > 0), ~((all_data == 0.0).sum(axis=(0, 2)) > 0)) all_data_nonzero = all_data[:, mask, :] # index with mask # Unstack obs and perms self.obs = all_data_nonzero[:, :, 0] self.perms = all_data_nonzero[:, :, 1:] print("Found %i non-zero voxels" % mask.sum())
[docs] def run(self): """ Runs actual prevalence inference algorithm. """ self._check_inputs() # Shorten parameters for clarity N, K, P1, P2, alpha, gamma0 = (self.N, self.K, self.P1, self.P2, self.alpha, self.gamma0) m = np.min(self.obs, axis=0) u_rank = np.zeros(K) if K > 1: c_rank = np.zeros(K) for j in trange(P2): # Loop for second level permutations these_perms = np.vstack([self.perms[k, :, np.random.choice(np.arange(P1))] for k in range(N)]) min_vals = these_perms.min(axis=0) u_rank += m <= min_vals # Update uncorrected values if K > 1: # Update corrected values c_rank += m <= min_vals.max() # Calculate statistics! # - pu_GN = pvalue uncorrected Global Null, # - pu_MN = pvalue uncorrected Majority Null, # - pc_MN = pvalue corrected Majority Null, # - gamma0_u = largest gamma0 given alpha (uncorrected) # - gamma0_max_u = largest possible gamma0 given N, P2, and alpha (uncorrected) pu_GN = (1 + u_rank) / (P2 + 1) pu_MN = ((1 - gamma0) * pu_GN ** (1 / N) + gamma0) ** N gamma0_u = (alpha ** (1 / N) - pu_GN ** (1 / N)) / (1 - pu_GN ** (1 / N)) gamma0_u[alpha < pu_GN] = 0 gamma0_max_u = (alpha ** (1 / N) - 1 / P2 ** (1 / N)) / (1 - 1 / P2 ** (1 / N)) self.pu_GN = pu_GN self.pu_MN = pu_MN self.gamma0_u = gamma0_u self.gamma0_max_u = gamma0_max_u # Some extra corrected statistics # - pc_GN = pvalue corrected Global Null # - pc_GN = pvalue corrected Global Null # - gamma0_c = largest gamma0 given alpha (corrected) # - gamma0_max_c = largest possible gamma0 given N, P2, and alpha (corrected) if K > 1: pc_GN = (1 + c_rank) / (P2 + 1) pc_MN = pc_GN + (1 - pc_GN) * pu_MN alpha_c = (alpha - pc_GN) / (1 - pc_GN) alpha_c[np.isinf(alpha_c)] = 0 alpha_c[alpha_c <= 0.0] = 0 gamma0_c = (alpha_c ** (1 / N) - pu_GN ** (1 / N)) / (1 - pu_GN ** (1 / N)) gamma0_c[alpha_c < pu_GN] = 0 alpha_max_c = (alpha - 1 / P2) / (1 - 1 / P2) gamma0_max_c = (alpha_max_c ** (1 / N) - 1 / P2 ** (1 / N)) / (1 - 1 / P2 ** (1 / N)) self.pc_GN = pc_GN self.pc_MN = pc_MN self.gamma0_c = gamma0_c self.gamma0_max_c = gamma0_max_c # Median scores of observed values self.score_typical = np.median(self.obs, axis=0)
[docs] def write(self, path): """ Writes results from Prevalence Inference procedure to disk. Parameters ---------- path : str Where to write the results to disk """ pass