Exemplo n.º 1
0
def test_icamapper():
    # data: 40 sample feature line in 2d space (40x2; samples x features)
    samples = np.vstack([np.arange(40.) for i in range(2)]).T
    samples -= samples.mean()
    samples +=  np.random.normal(size=samples.shape, scale=0.1)
    ndlin = Dataset(samples)

    pm = ICAMapper()
    try:
        pm.train(ndlin.copy())
        assert_equal(pm.proj.shape, (2, 2))
        p = pm.forward(ndlin.copy())
        assert_equal(p.shape, (40, 2))
        # check that the mapped data can be fully recovered by 'reverse()'
        assert_array_almost_equal(pm.reverse(p), ndlin)
    except mdp.NodeException:
        # do not puke if the ICA did not converge at all -- that is not our
        # fault but MDP's
        pass
Exemplo n.º 2
0
def test_mean_removal():
    test_array = np.array([[0, 0.5, 1, 1.5], [2, 2.5, 3,
                                              3.5], [3, 3.5, 4, 4.5],
                           [5, 5.5, 6, 6.5], [7, 7.5, 8, 8.5]])
    test_dataset = Dataset(test_array)
    desired_result = np.array([[-0.75, -0.25, 0.25, 0.75],
                               [-0.75, -0.25, 0.25, 0.75],
                               [-0.75, -0.25, 0.25, 0.75],
                               [-0.75, -0.25, 0.25, 0.75],
                               [-0.75, -0.25, 0.25, 0.75]])

    mr = MeanRemoval(in_place=False)
    mr_inplace = MeanRemoval(in_place=True)
    mr_fx = subtract_mean_feature()

    functions = (mr, mr_inplace, mr_fx)
    for function in functions:
        assert_true(
            np.array_equal(function(test_array.copy()), desired_result),
            function)

    for function in functions:
        assert_true(
            np.array_equal(
                function(test_dataset.copy()).samples, desired_result))

    random_array = np.random.rand(50, 1000)
    assert_true(
        np.array_equal(mr_fx(random_array.copy()), mr(random_array.copy())))
    assert_true(
        np.array_equal(mr_fx(random_array.copy()),
                       mr_inplace(random_array.copy())))

    # corner cases
    int_arr = np.array([1, 2, 3, 4, 5])
    desired = int_arr.astype(float) - int_arr.mean()
    assert_array_equal(mr.forward1(int_arr), desired)
    # or list
    assert_array_equal(mr.forward1(list(int_arr)), desired)
    # missing value -> NaN just like mean() would do
    nan_arr = np.array([1, 2, np.nan, 4, 5])
    assert_array_equal(mr.forward1(nan_arr), [np.nan] * len(int_arr))
    # but with a masked array it works as intended, i.e. just like mean()
    nan_arr = np.ma.array(nan_arr, mask=np.isnan(nan_arr))
    nan_arr_dm = desired.copy()
    nan_arr_dm[2] = np.nan
    assert_array_equal(mr.forward1(nan_arr), nan_arr_dm)
Exemplo n.º 3
0
def test_mean_removal():
    test_array = np.array([[0, 0.5, 1, 1.5],
                           [2, 2.5, 3, 3.5],
                           [3, 3.5, 4, 4.5],
                           [5, 5.5, 6, 6.5],
                           [7, 7.5, 8, 8.5]])
    test_dataset = Dataset(test_array)
    desired_result = np.array([[-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75],
                               [-0.75, -0.25,  0.25,  0.75]])

    mr = MeanRemoval(in_place=False)
    mr_inplace = MeanRemoval(in_place=True)
    mr_fx = subtract_mean_feature()

    functions = (mr, mr_inplace, mr_fx)
    for function in functions:
        assert_true(np.array_equal(function(test_array.copy()),
                                   desired_result), function)

    for function in functions:
        assert_true(np.array_equal(function(test_dataset.copy()).samples,
                                   desired_result))

    random_array = np.random.rand(50, 1000)
    assert_true(np.array_equal(mr_fx(random_array.copy()),
                               mr(random_array.copy())))
    assert_true(np.array_equal(mr_fx(random_array.copy()),
                               mr_inplace(random_array.copy())))

    # corner cases
    int_arr = np.array([1, 2, 3, 4, 5])
    desired = int_arr.astype(float) - int_arr.mean()
    assert_array_equal(mr.forward1(int_arr), desired)
    # or list
    assert_array_equal(mr.forward1(list(int_arr)), desired)
    # missing value -> NaN just like mean() would do
    nan_arr = np.array([1, 2, np.nan, 4, 5])
    assert_array_equal(mr.forward1(nan_arr), [np.nan] * len(int_arr))
    # but with a masked array it works as intended, i.e. just like mean()
    nan_arr = np.ma.array(nan_arr, mask=np.isnan(nan_arr))
    nan_arr_dm = desired.copy()
    nan_arr_dm[2] = np.nan
    assert_array_equal(mr.forward1(nan_arr), nan_arr_dm)
Exemplo n.º 4
0
def test_pcamapper():
    # data: 40 sample feature line in 20d space (40x20; samples x features)
    ndlin = Dataset(np.concatenate([np.arange(40)
                               for i in range(20)]).reshape(20,-1).T)

    pm = PCAMapper()
    # train PCA
    assert_raises(mdp.NodeException, pm.train, ndlin)
    ndlin.samples = ndlin.samples.astype('float')
    ndlin_noise = ndlin.copy()
    ndlin_noise.samples += np.random.random(size=ndlin.samples.shape)
    # we have no variance for more than one PCA component, hence just one
    # actual non-zero eigenvalue
    assert_raises(mdp.NodeException, pm.train, ndlin)
    pm.train(ndlin_noise)
    assert_equal(pm.proj.shape, (20, 20))
    # now project data into PCA space
    p = pm.forward(ndlin.samples)
    assert_equal(p.shape, (40, 20))
    # check that the mapped data can be fully recovered by 'reverse()'
    assert_array_almost_equal(pm.reverse(p), ndlin)
Exemplo n.º 5
0
def test_from_wizard():
    samples = np.arange(12).reshape((4, 3)).view(myarray)
    labels = range(4)
    chunks = [1, 1, 2, 2]

    ds = Dataset(samples, sa={'targets': labels, 'chunks': chunks})
    ds.init_origids('both')
    first = ds.sa.origids
    # now do again and check that they get regenerated
    ds.init_origids('both')
    assert_false(first is ds.sa.origids)
    assert_array_equal(first, ds.sa.origids)

    ok_(is_datasetlike(ds))
    ok_(not is_datasetlike(labels))

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    ## XXX stuff that needs thought:

    # ds.sa (empty) has this in the public namespace:
    #   add, get, getvalue, has_key, is_set, items, listing, name, names
    #   owner, remove, reset, setvalue, which_set
    # maybe we need some form of leightweightCollection?

    assert_array_equal(ds.samples, samples)
    assert_array_equal(ds.sa.targets, labels)
    assert_array_equal(ds.sa.chunks, chunks)

    # same should work for shortcuts
    assert_array_equal(ds.targets, labels)
    assert_array_equal(ds.chunks, chunks)

    ok_(sorted(ds.sa.keys()) == ['chunks', 'origids', 'targets'])
    ok_(sorted(ds.fa.keys()) == ['origids'])
    # add some more
    ds.a['random'] = 'blurb'

    # check stripping attributes from a copy
    cds = ds.copy() # full copy
    ok_(sorted(cds.sa.keys()) == ['chunks', 'origids', 'targets'])
    ok_(sorted(cds.fa.keys()) == ['origids'])
    ok_(sorted(cds.a.keys()) == ['random'])
    cds = ds.copy(sa=[], fa=[], a=[]) # plain copy
    ok_(cds.sa.keys() == [])
    ok_(cds.fa.keys() == [])
    ok_(cds.a.keys() == [])
    cds = ds.copy(sa=['targets'], fa=None, a=['random']) # partial copy
    ok_(cds.sa.keys() == ['targets'])
    ok_(cds.fa.keys() == ['origids'])
    ok_(cds.a.keys() == ['random'])

    # there is not necessarily a mapper present
    ok_(not ds.a.has_key('mapper'))

    # has to complain about misshaped samples attributes
    assert_raises(ValueError, Dataset.from_wizard, samples, labels + labels)

    # check that we actually have attributes of the expected type
    ok_(isinstance(ds.sa['targets'], ArrayCollectable))

    # the dataset will take care of not adding stupid stuff
    assert_raises(ValueError, ds.sa.__setitem__, 'stupid', np.arange(3))
    assert_raises(ValueError, ds.fa.__setitem__, 'stupid', np.arange(4))
    # or change proper attributes to stupid shapes
    try:
        ds.sa.targets = np.arange(3)
    except ValueError:
        pass
    else:
        ok_(False, msg="Assigning value with improper shape to attribute "
                       "did not raise exception.")
Exemplo n.º 6
0
def test_from_wizard():
    samples = np.arange(12).reshape((4, 3)).view(myarray)
    labels = range(4)
    chunks = [1, 1, 2, 2]

    ds = Dataset(samples, sa={'targets': labels, 'chunks': chunks})
    ds.init_origids('both')
    first = ds.sa.origids
    # now do again and check that they get regenerated
    ds.init_origids('both')
    assert_false(first is ds.sa.origids)
    assert_array_equal(first, ds.sa.origids)

    ok_(is_datasetlike(ds))
    ok_(not is_datasetlike(labels))

    # array subclass survives
    ok_(isinstance(ds.samples, myarray))

    ## XXX stuff that needs thought:

    # ds.sa (empty) has this in the public namespace:
    #   add, get, getvalue, has_key, is_set, items, listing, name, names
    #   owner, remove, reset, setvalue, which_set
    # maybe we need some form of leightweightCollection?

    assert_array_equal(ds.samples, samples)
    assert_array_equal(ds.sa.targets, labels)
    assert_array_equal(ds.sa.chunks, chunks)

    # same should work for shortcuts
    assert_array_equal(ds.targets, labels)
    assert_array_equal(ds.chunks, chunks)

    ok_(sorted(ds.sa.keys()) == ['chunks', 'origids', 'targets'])
    ok_(sorted(ds.fa.keys()) == ['origids'])
    # add some more
    ds.a['random'] = 'blurb'

    # check stripping attributes from a copy
    cds = ds.copy()  # full copy
    ok_(sorted(cds.sa.keys()) == ['chunks', 'origids', 'targets'])
    ok_(sorted(cds.fa.keys()) == ['origids'])
    ok_(sorted(cds.a.keys()) == ['random'])
    cds = ds.copy(sa=[], fa=[], a=[])  # plain copy
    ok_(cds.sa.keys() == [])
    ok_(cds.fa.keys() == [])
    ok_(cds.a.keys() == [])
    cds = ds.copy(sa=['targets'], fa=None, a=['random'])  # partial copy
    ok_(cds.sa.keys() == ['targets'])
    ok_(cds.fa.keys() == ['origids'])
    ok_(cds.a.keys() == ['random'])

    # there is not necessarily a mapper present
    ok_(not ds.a.has_key('mapper'))

    # has to complain about misshaped samples attributes
    assert_raises(ValueError, Dataset.from_wizard, samples, labels + labels)

    # check that we actually have attributes of the expected type
    ok_(isinstance(ds.sa['targets'], ArrayCollectable))

    # the dataset will take care of not adding stupid stuff
    assert_raises(ValueError, ds.sa.__setitem__, 'stupid', np.arange(3))
    assert_raises(ValueError, ds.fa.__setitem__, 'stupid', np.arange(4))
    # or change proper attributes to stupid shapes
    try:
        ds.sa.targets = np.arange(3)
    except ValueError:
        pass
    else:
        ok_(False,
            msg="Assigning value with improper shape to attribute "
            "did not raise exception.")