def test_split_samples_probability_mapper(self): skip_if_no_external('scipy') nf = 10 ns = 100 nsubj = 5 nchunks = 5 data = np.random.normal(size=(ns, nf)) ds = AttrDataset(data, sa=dict(sidx=np.arange(ns), targets=np.arange(ns) % nchunks, chunks=np.floor(np.arange(ns) * nchunks / ns), subjects=np.arange(ns) / (ns / nsubj / nchunks) % nsubj), fa=dict(fidx=np.arange(nf))) analyzer = OneWayAnova() element_selector = FractionTailSelector(.4, mode='select', tail='upper') common = True m = SplitSamplesProbabilityMapper(analyzer, 'subjects', probability_label='fprob', select_common_features=common, selector=element_selector) m.train(ds) y = m(ds) z = m(ds.samples) assert_array_equal(z, y.samples) assert_equal(y.shape, (100, 4))
def test_compound_node(self): data = np.asarray([[1, 2, 3, 4]], dtype=np.float_).T ds = AttrDataset(data, sa=dict(targets=[0, 0, 1, 1])) add = lambda x: lambda y: x + y mul = lambda x: lambda y: x * y add2 = FxNode(add(2)) mul3 = FxNode(mul(3)) assert_array_equal(add2(ds).samples, data + 2) add2mul3 = ChainNode([add2, mul3]) assert_array_equal(add2mul3(ds), (data + 2) * 3) add2_mul3v = CombinedNode([add2, mul3], 'v') add2_mul3h = CombinedNode([add2, mul3], 'h') assert_array_equal( add2_mul3v(ds).samples, np.vstack((data + 2, data * 3))) assert_array_equal( add2_mul3h(ds).samples, np.hstack((data + 2, data * 3)))
def test_compound_learner(self): data = np.asarray([[1, 2, 3, 4]], dtype=np.float_).T ds = AttrDataset(data, sa=dict(targets=[0, 0, 1, 1])) train = ds[ds.sa.targets == 0] test = ds[ds.sa.targets == 1] dtrain = train.samples dtest = test.samples sub = FxyLearner(lambda x: lambda y: x - y) assert_false(sub.is_trained) sub.train(train) assert_array_equal(sub(test).samples, dtrain - dtest) div = FxyLearner(lambda x: lambda y: x / y) div.train(train) assert_array_almost_equal(div(test).samples, dtrain / dtest) div.untrain() subdiv = ChainLearner((sub, div)) assert_false(subdiv.is_trained) subdiv.train(train) assert_true(subdiv.is_trained) subdiv.untrain() assert_raises(RuntimeError, subdiv, test) subdiv.train(train) assert_array_almost_equal( subdiv(test).samples, dtrain / (dtrain - dtest)) sub_div = CombinedLearner((sub, div), 'v') assert_true(sub_div.is_trained) sub_div.untrain() subdiv.train(train) assert_true(sub_div.is_trained) assert_array_almost_equal( sub_div(test).samples, np.vstack((dtrain - dtest, dtrain / dtest)))
def load_dataset(dataset_file): ds = AttrDataset.from_hdf5(dataset_file) return ds