def _SelectKHighest_from_mask(self, X, y, roi_path): print('Selecting K Highest from mask: %s' % roi_path) ''' roi_mask = masking.apply_mask( roi_path, self.mask_non_brain, k_features=self.k_features ) ''' roi_mask = masking.apply_mask(roi_path, self.mask_non_brain) print('SelectKHighest ROI mask size: %d' % roi_mask.astype(bool).sum()) from pymri.utils.masking import separate_k_highest roi_mask = separate_k_highest(self.k_features, roi_mask) roi_mask = roi_mask.astype(bool) import nibabel mask_brain_img = nibabel.load(self.mask_non_brain).get_data() mask_brain = mask_brain_img.flatten().astype(bool) roi = np.zeros(mask_brain.flatten().shape) roi[mask_brain] = roi_mask roi = roi.reshape(mask_brain_img.shape) img = nibabel.Nifti1Image(roi, np.eye(4)) img.to_filename('/tmp/best_roi.nii.gz') print('SelectKHighest data reduction from: %s' % str(X.shape)) X = X[..., roi_mask] print('SelectKHighest data reduction to: %s' % str(X.shape)) self.feature_reduction_method = roi_path return X
def _roi_mask_apply(self, X, roi_path): print('Applying mask: %s' % roi_path) roi_mask = masking.apply_mask(roi_path, self.mask_non_brain) roi_mask = roi_mask.astype(bool) print('ROI mask apply ROI mask size: %s' % str(roi_mask.sum())) X = X[..., roi_mask] self.feature_reduction_method = roi_path print('Masked data has shape: %s' % str(X.shape)) return X
nibabel.load('/git/pymri/examples/MNI152_T1_2mm_brain.nii.gz').get_data() # X.shape is (91,109, 91, 216) # and mask.shape is (91, 109, 91) affine = np.array([[-2., 0., 0., 90.], [0., 2., 0., -126.], [0., 0., 2., -72.], [0., 0., 0., 1.]]) usage_print() # ### Masking step from pymri.utils import masking, signal from nibabel import Nifti1Image # Mask data X0_img = Nifti1Image(X0, affine) X0 = masking.apply_mask(X0_img, mask, smoothing_fwhm=4) X1_img = Nifti1Image(X1, affine) X1 = masking.apply_mask(X1_img, mask, smoothing_fwhm=4) # # Standardize data # from sklearn import preprocessing # for sample in range(len(X0)): # X0[sample] = preprocessing.scale(X0[sample]) # for sample in range(len(X1)): # X1[sample] = preprocessing.scale(X1[sample]) X = np.zeros(shape=( (len(X0) + len(X1)), X0.shape[-1], )) y = np.zeros(shape=((len(y0) + len(y1)), ))
def load_data(self): # create sklearn's Bunch of data dataset_files = Bunch( func=self.bold, session_target=self.attr, mask=self.mask_brain, conditions_target=self.attr_lit ) # fmri_data and mask are copied to break reference to # the original object bold_img = nibabel.load(dataset_files.func) fmri_data = bold_img.get_data().astype(float) affine = bold_img.get_affine() y, session = np.loadtxt(dataset_files.session_target).astype("int").T conditions = np.recfromtxt(dataset_files.conditions_target)['f0'] mask = dataset_files.mask self.mask_non_brain = mask # ### Restrict to specified conditions condition_mask = np.zeros(shape=(conditions.shape[0])) # unifrom contrasts, e.g.: contrast=(('face', 'table'), 'house')): # face:0, table:1, house:2 # [0, 0, 2, 1, 2, 0, 1, 1] ==> [0, 0, 2, 0, 2, 0, 0, 0] for n in self.contrast: if type(n) == tuple: # first label k_uniform = n[0] k_uniform = y[conditions == k_uniform][0] for k in n: condition_mask += conditions == k # unifying subclasses into one class # (string label doesn't matter) y[conditions == k] = k_uniform else: condition_mask += conditions == n condition_mask = np.array(condition_mask, dtype=bool) X = fmri_data[..., condition_mask] y = y[condition_mask] # adjust all (target) labels to range(0:n_classes range), # e.g. [4,4,2,8,9] ==> [1,1,0,2,3] # e.g. [0, 0, 2, 0, 2, 0, 0, 0] ==> [0, 0, 1, 0, 1, 0, 0, 0] cnt = 0 for val in np.unique(y): if val > cnt: y[y == val] = cnt cnt += 1 # ### Masking step # Mask data using brain mask (remove non-brain regions) X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) # X = signal.clean(X, standardize=True, detrend=False) print('##############################') print('# dataset loaded successfully ') print('# X shape: %s' % str(X.shape)) print('# y shape: %s' % str(y.shape)) print('##############################') self.condition_mask = condition_mask self.X_raw, self.y = X, y
# condition_mask = np.logical_or.reduce((conditions == 'face', conditions == 'house', conditions == 'cat')) # X = fmri_data[..., condition_mask] # y = y[condition_mask] X = fmri_data y = y # session = session[condition_mask] # conditions = conditions[condition_mask] # ### Masking step # from utils import masking, signal from pymri.utils import masking from nibabel import Nifti1Image # Mask data X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) # X = signal.clean(X, standardize=True, detrend=False) # ### Sampling ################################################################ from sklearn.cross_validation import train_test_split # split original dataset into training and testing datasets X, X_t, y, y_t = train_test_split(X, y, test_size=0.4, random_state=42) ############################################################################### # # F-score # ############################################################################### from sklearn.feature_selection import f_classif f_values, p_values = f_classif(X, y)
# ### Restrict to faces and houses condition_mask = np.logical_or(conditions == 'face', conditions == 'house') X = fmri_data[..., condition_mask] y = y[condition_mask] # session = session[condition_mask] # conditions = conditions[condition_mask] # ### Masking step # from utils import masking, signal from pymri.utils import masking from nibabel import Nifti1Image # Mask data X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) # X = signal.clean(X, standardize=True, detrend=False) # ### Sampling ################################################################ from sklearn.cross_validation import train_test_split # split original dataset into training and testing datasets X, X_t, y, y_t = train_test_split( X, y, test_size=0.4, random_state=42 ) ############################################################################### # # F-score # ###############################################################################
def load_nifti(data_dir, Y, k_features=784, normalize=True, scale_0_1=False, vectorize_target=False, reshape=False): ''' Parameters ---------- data_dir : string. Location of the data files (bold.nii.gz, attributes.txt, attributes_literal.txt). Y : ndtuple of strings. Classes. Label space will be reduced to specified conditions. Returns ------- X : ndnumpy array. Samples containing features. y : ndnumpy array. Labels, targets, classes. conditions: Y = {Y_1, Y_2, ... , Y_n} Y_n = S = {S_1, S_2, .... , S_k} where: Y - set of classes S - set of subclasses n - number of classes k - number of subclasses classes consists of subclasses ''' import numpy as np import nibabel from sklearn.datasets.base import Bunch # create sklearn's Bunch of data dataset_files = Bunch(func=data_dir + 'bold.nii.gz', session_target=data_dir + 'attributes.txt', mask=data_dir + 'mask.nii.gz', conditions_target=data_dir + 'attributes_literal.txt') # fmri_data and mask are copied to break reference to the original object bold_img = nibabel.load(dataset_files.func) fmri_data = bold_img.get_data().astype(float) affine = bold_img.get_affine() y, session = np.loadtxt(dataset_files.session_target).astype("int").T conditions = np.recfromtxt(dataset_files.conditions_target)['f0'] mask = dataset_files.mask # ### Restrict to specified conditions condition_mask = np.zeros(shape=(conditions.shape[0])) for n in Y: if type(n) == tuple: # first label k_uniform = n[0] k_uniform = y[conditions == k_uniform][0] for k in n: condition_mask += conditions == k # unifying subclasses into one class # (string label doesn't matter) y[conditions == k] = k_uniform else: condition_mask += conditions == n condition_mask = np.array(condition_mask, dtype=bool) X = fmri_data[..., condition_mask] y = y[condition_mask] # adjust all labels to 0:n_classes range, e.g. [4,4,2,8,9] ==> [1,1,0,2,3] cnt = 0 for val in np.unique(y): if val > cnt: y[y == val] = cnt cnt += 1 # ### Masking step from pymri.utils import masking from nibabel import Nifti1Image # Mask data X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) from sklearn.feature_selection import SelectKBest, f_classif # ### Define the dimension reduction to be used. # Here we use a classical univariate feature selection based on F-test, # namely Anova. We set the number of features to be selected to 784 feature_selection = SelectKBest(f_classif, k=k_features) feature_selection.fit(X, y) X = feature_selection.transform(X) # normalize data if normalize: from sklearn import preprocessing X = preprocessing.normalize(X) if scale_0_1: # scale data in range (0,1) X = (X - X.min()) / (X.max() - X.min()) if vectorize_target: # how many classes do we have? n_classes = len(Y) # [0, 1, 1] ==> [[1, 0], [0, 1], [0, 1]] y = vectorize(y, n_classes) if reshape: # reshape is needed for nnadl acceptable format X = np.reshape(X, (X.shape[0], X.shape[1], 1)) y = np.reshape(y, (y.shape[0], y.shape[1], 1)) return X, y
def load_data(self): # create sklearn's Bunch of data dataset_files = Bunch(func=self.bold, session_target=self.attr, mask=self.mask_brain, conditions_target=self.attr_lit) # fmri_data and mask are copied to break reference to # the original object bold_img = nibabel.load(dataset_files.func) fmri_data = bold_img.get_data().astype(float) affine = bold_img.get_affine() y, session = np.loadtxt(dataset_files.session_target).astype("int").T conditions = np.recfromtxt(dataset_files.conditions_target)['f0'] mask = dataset_files.mask self.mask_non_brain = mask # ### Restrict to specified conditions condition_mask = np.zeros(shape=(conditions.shape[0])) # unifrom contrasts, e.g.: contrast=(('face', 'table'), 'house')): # face:0, table:1, house:2 # [0, 0, 2, 1, 2, 0, 1, 1] ==> [0, 0, 2, 0, 2, 0, 0, 0] for n in self.contrast: if type(n) == tuple: # first label k_uniform = n[0] k_uniform = y[conditions == k_uniform][0] for k in n: condition_mask += conditions == k # unifying subclasses into one class # (string label doesn't matter) y[conditions == k] = k_uniform else: condition_mask += conditions == n condition_mask = np.array(condition_mask, dtype=bool) X = fmri_data[..., condition_mask] y = y[condition_mask] # adjust all (target) labels to range(0:n_classes range), # e.g. [4,4,2,8,9] ==> [1,1,0,2,3] # e.g. [0, 0, 2, 0, 2, 0, 0, 0] ==> [0, 0, 1, 0, 1, 0, 0, 0] cnt = 0 for val in np.unique(y): if val > cnt: y[y == val] = cnt cnt += 1 # ### Masking step # Mask data using brain mask (remove non-brain regions) X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) # X = signal.clean(X, standardize=True, detrend=False) print('##############################') print('# dataset loaded successfully ') print('# X shape: %s' % str(X.shape)) print('# y shape: %s' % str(y.shape)) print('##############################') self.condition_mask = condition_mask self.X_raw, self.y = X, y
def load_theano_dataset(data_dir): """ 00. Load data from file (X and y). 01. Split into training phase (train dataset) and validation phase. 02. Split validation phase into valdation dataset and test dataset. Datasets proportions: (train/validation/test) (0.5/0.25/0.25) """ import numpy as np import nibabel from sklearn.datasets.base import Bunch data_dir = data_dir # create sklearn's Bunch of data dataset_files = Bunch( func=data_dir+'bold.nii.gz', session_target=data_dir+'attributes.txt', mask=data_dir+'mask.nii.gz', conditions_target=data_dir+'attributes_literal.txt' ) # fmri_data and mask are copied to break reference to the original object bold_img = nibabel.load(dataset_files.func) fmri_data = bold_img.get_data().astype(float) affine = bold_img.get_affine() y, session = np.loadtxt(dataset_files.session_target).astype("int").T conditions = np.recfromtxt(dataset_files.conditions_target)['f0'] mask = dataset_files.mask # ### Restrict to specified conditions condition_mask = np.logical_or( np.logical_or( conditions == 'ExeCtrl_5', conditions == 'ExeCtrl_0' ), conditions == 'Rest' ) X = fmri_data[..., condition_mask] y = y[condition_mask] from sklearn.preprocessing import binarize y = binarize(y, threshold=2.0)[0] # ### Masking step from pymri.utils import masking from nibabel import Nifti1Image # Mask data X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) from sklearn.feature_selection import SelectKBest, f_classif # ### Define the dimension reduction to be used. # Here we use a classical univariate feature selection based on F-test, # namely Anova. We set the number of features to be selected to 784 feature_selection = SelectKBest(f_classif, k=784) feature_selection.fit(X, y) X = feature_selection.transform(X) print(X.shape) # ### Splitting ########################################################### from sklearn.cross_validation import train_test_split # split original dataset into training phase (dataset) and validation phase X, X_v, y, y_v = train_test_split( X, y, test_size=0.5, random_state=42 ) # split validation phase into validation dataset and test dataset X_v, X_t, y_v, y_t = train_test_split( X_v, y_v, test_size=0.25, random_state=42 ) # X, y - training dataset # X_v, y_v - validation dataset # X_t, y_t - test dataset return (X, y), (X_v, y_v), (X_t, y_t)
def load_nifti( data_dir, Y, k_features=784, normalize=True, scale_0_1=False, vectorize_target=False, reshape=False ): ''' Parameters ---------- data_dir : string. Location of the data files (bold.nii.gz, attributes.txt, attributes_literal.txt). Y : ndtuple of strings. Classes. Label space will be reduced to specified conditions. Returns ------- X : ndnumpy array. Samples containing features. y : ndnumpy array. Labels, targets, classes. conditions: Y = {Y_1, Y_2, ... , Y_n} Y_n = S = {S_1, S_2, .... , S_k} where: Y - set of classes S - set of subclasses n - number of classes k - number of subclasses classes consists of subclasses ''' import numpy as np import nibabel from sklearn.datasets.base import Bunch # create sklearn's Bunch of data dataset_files = Bunch( func=data_dir+'bold.nii.gz', session_target=data_dir+'attributes.txt', mask=data_dir+'mask.nii.gz', conditions_target=data_dir+'attributes_literal.txt' ) # fmri_data and mask are copied to break reference to the original object bold_img = nibabel.load(dataset_files.func) fmri_data = bold_img.get_data().astype(float) affine = bold_img.get_affine() y, session = np.loadtxt(dataset_files.session_target).astype("int").T conditions = np.recfromtxt(dataset_files.conditions_target)['f0'] mask = dataset_files.mask # ### Restrict to specified conditions condition_mask = np.zeros(shape=(conditions.shape[0])) for n in Y: if type(n) == tuple: # first label k_uniform = n[0] k_uniform = y[conditions == k_uniform][0] for k in n: condition_mask += conditions == k # unifying subclasses into one class # (string label doesn't matter) y[conditions == k] = k_uniform else: condition_mask += conditions == n condition_mask = np.array(condition_mask, dtype=bool) X = fmri_data[..., condition_mask] y = y[condition_mask] # adjust all labels to 0:n_classes range, e.g. [4,4,2,8,9] ==> [1,1,0,2,3] cnt = 0 for val in np.unique(y): if val > cnt: y[y == val] = cnt cnt += 1 # ### Masking step from pymri.utils import masking from nibabel import Nifti1Image # Mask data X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) from sklearn.feature_selection import SelectKBest, f_classif # ### Define the dimension reduction to be used. # Here we use a classical univariate feature selection based on F-test, # namely Anova. We set the number of features to be selected to 784 feature_selection = SelectKBest(f_classif, k=k_features) feature_selection.fit(X, y) X = feature_selection.transform(X) # normalize data if normalize: from sklearn import preprocessing X = preprocessing.normalize(X) if scale_0_1: # scale data in range (0,1) X = (X - X.min()) / (X.max() - X.min()) if vectorize_target: # how many classes do we have? n_classes = len(Y) # [0, 1, 1] ==> [[1, 0], [0, 1], [0, 1]] y = vectorize(y, n_classes) if reshape: # reshape is needed for nnadl acceptable format X = np.reshape(X, (X.shape[0], X.shape[1], 1)) y = np.reshape(y, (y.shape[0], y.shape[1], 1)) return X, y
def load_theano_dataset(data_dir): """ 00. Load data from file (X and y). 01. Split into training phase (train dataset) and validation phase. 02. Split validation phase into valdation dataset and test dataset. Datasets proportions: (train/validation/test) (0.5/0.25/0.25) """ import numpy as np import nibabel from sklearn.datasets.base import Bunch data_dir = data_dir # create sklearn's Bunch of data dataset_files = Bunch(func=data_dir + 'bold.nii.gz', session_target=data_dir + 'attributes.txt', mask=data_dir + 'mask.nii.gz', conditions_target=data_dir + 'attributes_literal.txt') # fmri_data and mask are copied to break reference to the original object bold_img = nibabel.load(dataset_files.func) fmri_data = bold_img.get_data().astype(float) affine = bold_img.get_affine() y, session = np.loadtxt(dataset_files.session_target).astype("int").T conditions = np.recfromtxt(dataset_files.conditions_target)['f0'] mask = dataset_files.mask # ### Restrict to specified conditions condition_mask = np.logical_or( np.logical_or(conditions == 'ExeCtrl_5', conditions == 'ExeCtrl_0'), conditions == 'Rest') X = fmri_data[..., condition_mask] y = y[condition_mask] from sklearn.preprocessing import binarize y = binarize(y, threshold=2.0)[0] # ### Masking step from pymri.utils import masking from nibabel import Nifti1Image # Mask data X_img = Nifti1Image(X, affine) X = masking.apply_mask(X_img, mask, smoothing_fwhm=4) from sklearn.feature_selection import SelectKBest, f_classif # ### Define the dimension reduction to be used. # Here we use a classical univariate feature selection based on F-test, # namely Anova. We set the number of features to be selected to 784 feature_selection = SelectKBest(f_classif, k=784) feature_selection.fit(X, y) X = feature_selection.transform(X) print(X.shape) # ### Splitting ########################################################### from sklearn.cross_validation import train_test_split # split original dataset into training phase (dataset) and validation phase X, X_v, y, y_v = train_test_split(X, y, test_size=0.5, random_state=42) # split validation phase into validation dataset and test dataset X_v, X_t, y_v, y_t = train_test_split(X_v, y_v, test_size=0.25, random_state=42) # X, y - training dataset # X_v, y_v - validation dataset # X_t, y_t - test dataset return (X, y), (X_v, y_v), (X_t, y_t)
affine = np.array([ [ -2., 0., 0., 90.], [ 0., 2., 0., -126.], [ 0., 0., 2., -72.], [ 0., 0., 0., 1.] ]) usage_print() # ### Masking step from pymri.utils import masking, signal from nibabel import Nifti1Image # Mask data X0_img = Nifti1Image(X0, affine) X0 = masking.apply_mask(X0_img, mask, smoothing_fwhm=4) X1_img = Nifti1Image(X1, affine) X1 = masking.apply_mask(X1_img, mask, smoothing_fwhm=4) # # Standardize data # from sklearn import preprocessing # for sample in range(len(X0)): # X0[sample] = preprocessing.scale(X0[sample]) # for sample in range(len(X1)): # X1[sample] = preprocessing.scale(X1[sample]) X = np.zeros(shape=((len(X0) + len(X1)), X0.shape[-1],)) y = np.zeros(shape=((len(y0) + len(y1)),)) usage_print()