def test_sensitivity_based_feature_selection(self, clf): # sensitivity analyser and transfer error quantifier use the SAME clf! sens_ana = clf.get_sensitivity_analyzer(postproc=maxofabs_sample()) # of features to remove Nremove = 2 # because the clf is already trained when computing the sensitivity # map, prevent retraining for transfer error calculation # Use absolute of the svm weights as sensitivity fe = SensitivityBasedFeatureSelection( sens_ana, feature_selector=FixedNElementTailSelector(2), enable_ca=["sensitivity", "selected_ids"]) data = self.get_data() data_nfeatures = data.nfeatures fe.train(data) resds = fe(data) # fail if orig datasets are changed self.assertTrue(data.nfeatures == data_nfeatures) # silly check if nfeatures got a single one removed self.assertEqual(data.nfeatures, resds.nfeatures + Nremove, msg="We had to remove just a single feature") self.assertEqual( fe.ca.sensitivity.nfeatures, data_nfeatures, msg="Sensitivity have to have # of features equal to original")
def test_sensitivity_based_feature_selection(self, clf): # sensitivity analyser and transfer error quantifier use the SAME clf! sens_ana = clf.get_sensitivity_analyzer(postproc=maxofabs_sample()) # of features to remove Nremove = 2 # because the clf is already trained when computing the sensitivity # map, prevent retraining for transfer error calculation # Use absolute of the svm weights as sensitivity fe = SensitivityBasedFeatureSelection(sens_ana, feature_selector=FixedNElementTailSelector(2), enable_ca=["sensitivity", "selected_ids"]) data = self.get_data() data_nfeatures = data.nfeatures fe.train(data) resds = fe(data) # fail if orig datasets are changed self.assertTrue(data.nfeatures == data_nfeatures) # silly check if nfeatures got a single one removed self.assertEqual(data.nfeatures, resds.nfeatures+Nremove, msg="We had to remove just a single feature") self.assertEqual(fe.ca.sensitivity.nfeatures, data_nfeatures, msg="Sensitivity have to have # of features equal to original")
def test_custom_combined_selectors(self): """Test combination of the selectors in a single function """ def custom_tail_selector(seq): seq1 = FractionTailSelector(0.01, mode='discard', tail='upper')(seq) seq2 = FractionTailSelector(0.05, mode='select', tail='upper')(seq) return list(set(seq1).intersection(seq2)) seq = np.arange(100) seq_ = custom_tail_selector(seq) assert_array_equal(sorted(seq_), [95, 96, 97, 98]) # verify that this function could be used in place of the selector fs = SensitivityBasedFeatureSelection(OneWayAnova(), custom_tail_selector) ds = datasets['3dsmall'] fs.train(ds) # XXX: why needs to be trained here explicitly? ds_ = fs(ds) assert_equal(ds_.nfeatures, int(ds.nfeatures * 0.04))
def test_custom_combined_selectors(self): """Test combination of the selectors in a single function """ def custom_tail_selector(seq): seq1 = FractionTailSelector(0.01, mode='discard', tail='upper')(seq) seq2 = FractionTailSelector(0.05, mode='select', tail='upper')(seq) return list(set(seq1).intersection(seq2)) seq = np.arange(100) seq_ = custom_tail_selector(seq) assert_array_equal(sorted(seq_), [95, 96, 97, 98]) # verify that this function could be used in place of the selector fs = SensitivityBasedFeatureSelection( OneWayAnova(), custom_tail_selector) ds = datasets['3dsmall'] fs.train(ds) # XXX: why needs to be trained here explicitly? ds_ = fs(ds) assert_equal(ds_.nfeatures, int(ds.nfeatures * 0.04))
data = np.concatenate(data) labels = np.concatenate(labels) return data, labels.astype(np.int) rois = ['aSTG', 'HG', 'pSTG'] for sub_id in range(1, 21): data = [] for roi in rois: data_path = os.path.join(data_dir, roi) tmp_data, label = load_data(data_path, sub_id) data.append(tmp_data) data = np.concatenate(data, axis=1) data = np.concatenate([data[i,:,:].T for i in range(len(data))]) ds = Dataset(data) ds.sa['time_coords'] = np.linspace(0, len(ds)-1, len(ds)) events = [{'onset': i*5, 'duration': 5, 'targets':label[i], 'chunks':i+1} for i in range(int(len(ds)/5))] hrf_estimates = fit_event_hrf_model(ds, events, time_attr='time_coords', condition_attr=('targets', 'chunks'), design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) fsel = SensitivityBasedFeatureSelection(OneWayAnova(), FixedNElementTailSelector(5000, mode='select', tail='upper')) fsel.train(hrf_estimates) ds_p = fsel(hrf_estimates) np.save('feat_sub{:03d}'.format(sub_id), ds_p.samples)