Source code for skbold.feature_selection.selectors

# Class to implement sklearn's f_classif function, but with a minimum
# cutoff instead of an absolute or proportional amount of features.

# Author: Lukas Snoek [lukassnoek.github.io]
# Contact: lukassnoek@gmail.com
# License: 3 clause BSD

from __future__ import print_function, division, absolute_import
from builtins import range
import numpy as np
from itertools import combinations


[docs]def fisher_criterion_score(X, y, norm='l1', balance=False): """ Calculates fisher score. See [1]_ for more info. References ---------- [1] P. E. H. R. O. Duda and D. G. Stork. Pattern Classification. Wiley-Interscience Publication, 2001. Parameters ---------- X : {array-like, sparse matrix} shape = (n_samples, n_features) The set of regressors that will be tested sequentially. y : array of shape(n_samples). The data matrix norm : str Whether to use the l1-norm or l2-norm. Returns ------- scores_ : array, shape=(n_features,) Fisher criterion scores for each feature. """ n_class = np.unique(y).shape[0] n_features = X.shape[1] av_patterns = np.zeros((n_class, n_features)) # Calculate mean patterns y_unique = np.unique(y) for i in range(n_class): av_patterns[i, :] = X[y == y_unique[i], :].mean(axis=0) av_patterns[np.isnan(av_patterns)] = 0 # Create difference vectors, z-score standardization, absolute comb = list(combinations(range(1, n_class + 1), 2)) diff_patterns = np.zeros((len(comb), n_features)) for i, cb in enumerate(comb): a, b = av_patterns[cb[0] - 1], av_patterns[cb[1] - 1, :] tmp = a - b if norm == 'l1': diff_patterns[i, :] = np.abs(tmp / (a.std() + b.std())) else: diff_patterns[i, :] = (tmp ** 2) / (a.std() ** 2 + b.std() ** 2) if balance: scores_ = diff_patterns else: scores_ = np.mean(diff_patterns, axis=0) return scores_