Exemplo n.º 1
0
 def test_tsne_plot_abclabs(self):
     sids = list(range(8))
     fids = [str(i) for i in range(10)]
     labs = list(range(8))
     np.random.seed(123)
     np.random.seed(123)
     x = np.random.ranf(80).reshape(8, -1)
     x_sorted = x[np.argsort(x[:, 5])]
     g = x_sorted[:, 5]
     slab_csamples = eda.SingleLabelClassifiedSamples(
         x_sorted, labs, sids=sids, fids=fids)
     return slab_csamples.tsne_plot(g, labels=list('abcdefgh'),
                                    figsize=(10, 10), s=50)
Exemplo n.º 2
0
 def test_swarm_a(self):
     # array([[0, 1],
     #        [2, 3],
     #        [4, 5],
     #        [6, 7],
     #        [8, 9]])
     tslcs = eda.SingleLabelClassifiedSamples(np.arange(10).reshape(5, 2),
                                              [0, 0, 1, 2, 3],
                                              ['1', '2', '3', '4', '5'],
                                              ['a', 'z'])
     return tslcs.feature_swarm_plot('a', transform=lambda x: x + 200,
                                     selected_labels=[0, 2, 3],
                                     title='test', xlab='x', ylab='y')
Exemplo n.º 3
0
    def test_lab_sorted_sids(self):
        qsids = [0, 1, 5, 3, 2, 4]
        qlabs = [0, 0, 2, 1, 1, 1]
        rsids = [3, 4, 2, 5, 1, 0]
        slab_csamples = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), qlabs, qsids)
        rs_qsids, rs_qlabs = slab_csamples.lab_sorted_sids(rsids)
        np.testing.assert_equal(rs_qsids, np.array([3, 4, 2, 5, 1, 0]))
        np.testing.assert_equal(rs_qlabs, np.array([1, 1, 1, 2, 0, 0]))

        rs_qsids, rs_qlabs = slab_csamples.lab_sorted_sids()
        np.testing.assert_equal(rs_qsids, np.array([0, 1, 3, 2, 4, 5]))
        np.testing.assert_equal(rs_qlabs, np.array([0, 0, 1, 1, 1, 2]))
Exemplo n.º 4
0
 def test_feature_importance_across_labs(self):
     # Generate simple dataset with gaussian noise
     x_centers = np.array([[0, 0,   1,  1, 5, 50, 10, 37],
                           [0, 0, 1.5,  5, 5, 50, 10, 35],
                           [0, 0,  10, 10, 5, 50, 10, 33]])
     np.random.seed(1920)
     c1x = np.array(x_centers[0]) + np.random.normal(size=(500, 8))
     c2x = np.array(x_centers[1]) + np.random.normal(size=(200, 8))
     c3x = np.array(x_centers[2]) + np.random.normal(size=(300, 8))
     x = np.vstack((c1x, c2x, c3x))
     labs = [0] * 500 + [1] * 200 + [2] * 300
     slcs = eda.SingleLabelClassifiedSamples(x, labs=labs)
     # binary logistic regression
     f_importance_list, bst = slcs.feature_importance_across_labs(
         [0, 1], silent=0)
     assert f_importance_list[0][0] == 3
     # multi class softmax
     f_importance_list2, bst2 = slcs.feature_importance_across_labs(
         [0, 1, 2], random_state=123, silent=1)
     assert f_importance_list2[0][0] == 3
     assert f_importance_list2 != f_importance_list
     # multiclass with provided parames
     xgb_params = {
         'eta': 0.3,
         'max_depth': 6,
         'silent': 0,
         'nthread': 1,
         'alpha': 1,
         'lambda': 0,
         'seed': 0,
         'objective': 'multi:softmax',
         'eval_metric': 'merror',
         'num_class': 3
     }
     f_importance_list3, bst3 = slcs.feature_importance_across_labs(
         [0, 1, 2], random_state=123, xgb_params=xgb_params)
     assert f_importance_list3 == f_importance_list2
     # shuffle features
     f_importance_list4, bst4 = slcs.feature_importance_across_labs(
         [0, 1], random_state=123, shuffle_features=True)
     assert f_importance_list2[0][0] == 3
     # bootstrapping
     f_importance_list5, bst5 = slcs.feature_importance_across_labs(
         [0, 1], random_state=123, shuffle_features=True,
         num_bootstrap_round=10)
     f_importance_list6, bst6 = slcs.feature_importance_across_labs(
         [0, 1], random_state=123, shuffle_features=True,
         num_bootstrap_round=10)
     assert f_importance_list5 == f_importance_list6
     assert f_importance_list5[0][0] == 3
Exemplo n.º 5
0
 def test_feature_importance_across_labs_bootstrap_resample(self):
     x_centers = np.array([[0, 0,   1,  1, 5, 50, 10, 37],
                           [0, 0, 1.5,  5, 5, 50, 10, 35],
                           [0, 0,  10, 10, 5, 50, 10, 33]])
     np.random.seed(1920)
     c1x = np.array(x_centers[0]) + np.random.normal(size=(500, 8))
     c2x = np.array(x_centers[1]) + np.random.normal(size=(1, 8))
     c3x = np.array(x_centers[2]) + np.random.normal(size=(30, 8))
     x = np.vstack((c1x, c2x, c3x))
     labs = [0] * 500 + [1] * 1 + [2] * 30
     slcs = eda.SingleLabelClassifiedSamples(x, labs=labs)
     # bootstrapping
     f_importance_list, bst = slcs.feature_importance_across_labs(
         [0, 1], random_state=123, shuffle_features=True,
         num_bootstrap_round=10)
Exemplo n.º 6
0
    def test_lab_x_empty(self):
        sids = list('abcdef')
        fids = list(range(10, 20))
        labs = [0, 0, 0, 1, 2, 2]

        slcs = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), labs=labs,
            sids=sids, fids=fids)
        # select sf
        empty_s = slcs.lab_x([])
        assert empty_s._x.shape == (0, 10)
        assert empty_s._d.shape == (0, 0)
        assert empty_s._sids.shape == (0,)
        assert empty_s._labs.shape == (0,)
        assert empty_s._fids.shape == (10,)
        assert empty_s._labs.shape == (0,)
Exemplo n.º 7
0
 def test_feature_importance_distintuishing_labs(self):
     # Generate simple dataset with gaussian noise
     x_centers = np.array([[0, 0,   1,  1, 5, 50, 10, 37],
                           [0, 0, 1.5,  5, 5, 50, 10, 35],
                           [0, 0,  10, 10, 5, 50, 10, 33]])
     np.random.seed(1920)
     c1x = np.array(x_centers[0]) + np.random.normal(size=(500, 8))
     c2x = np.array(x_centers[1]) + np.random.normal(size=(200, 8))
     c3x = np.array(x_centers[2]) + np.random.normal(size=(300, 8))
     x = np.vstack((c1x, c2x, c3x))
     labs = [0] * 500 + [1] * 200 + [2] * 300
     slcs = eda.SingleLabelClassifiedSamples(x, labs=labs)
     # binary logistic regression
     f_importance_list, bst = slcs.feature_importance_distintuishing_labs(
         [0, 1], silent=0)
     assert f_importance_list[0][0] == 2
Exemplo n.º 8
0
    def test_getters(self):
        tslcs = eda.SingleLabelClassifiedSamples(np.arange(10).reshape(5, 2),
                                                 [0, 0, 1, 2, 3],
                                                 ['a', 'b', 'c', '1', '2'],
                                                 ['a', 'z'])

        np.testing.assert_equal(tslcs.x, np.array(
            np.arange(10).reshape(5, 2), dtype='float64'))
        np.testing.assert_equal(
            tslcs.sids, np.array(['a', 'b', 'c', '1', '2']))
        np.testing.assert_equal(tslcs.fids, np.array(['a', 'z']))
        np.testing.assert_equal(tslcs.labs, np.array([0, 0, 1, 2, 3]))

        assert tslcs.x is not tslcs._x
        assert tslcs.sids is not tslcs._sids
        assert tslcs.fids is not tslcs._fids
        assert tslcs.labs is not tslcs._labs
Exemplo n.º 9
0
 def test_feature_importance_each_lab(self):
     # Generate simple dataset with gaussian noise
     x_centers = np.array([[0, 0,   1,  1, 5, 50, 10, 37],
                           [0, 0, 1.5,  5, 5, 50, 10, 35],
                           [0, 0,  10, 10, 5, 50, 10, 33]])
     np.random.seed(1920)
     c1x = np.array(x_centers[0]) + np.random.normal(size=(500, 8))
     c2x = np.array(x_centers[1]) + np.random.normal(size=(200, 8))
     c3x = np.array(x_centers[2]) + np.random.normal(size=(300, 8))
     x = np.vstack((c1x, c2x, c3x))
     labs = [0] * 500 + [1] * 200 + [2] * 300
     slcs = eda.SingleLabelClassifiedSamples(x, labs=labs)
     # binary logistic regression
     ulab_fi_lut = slcs.feature_importance_each_lab()
     assert ulab_fi_lut[0][-1][0] == 3
     print(ulab_fi_lut)
     assert ulab_fi_lut[1][-1][0] == 2
Exemplo n.º 10
0
    def test_merge_labels(self):
        sids = list('abcdef')
        fids = list(range(10, 20))
        labs = [0, 0, 1, 1, 2, 3]

        slcs = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), labs=labs,
            sids=sids, fids=fids)

        slcs.merge_labels([1, 2, 3], 5)
        new_labs = [0, 0, 5, 5, 5, 5]
        assert slcs.labs == new_labs
        assert slcs.sids == sids
        assert slcs.fids == fids
        assert slcs.labs_to_sids([5]) == (('c', 'd', 'e', 'f'),)
        assert slcs.sids_to_labs(sids).tolist() == new_labs
        assert slcs._uniq_labs.tolist() == [0, 5]
        assert slcs._uniq_lab_cnts.tolist() == [2, 4]
Exemplo n.º 11
0
 def test_filter_min_class_n(self):
     sids = [0, 1, 2, 3, 4, 5]
     labs = [0, 0, 0, 1, 2, 2]
     slab_csamples = eda.SingleLabelClassifiedSamples(
         np.random.ranf(60).reshape(6, -1), labs, sids, None)
     min_cl_n = 2
     mcnf_slab_csamples = slab_csamples.filter_min_class_n(min_cl_n)
     np.testing.assert_equal(mcnf_slab_csamples.sids,
                             np.array([0, 1, 2, 4, 5]))
     np.testing.assert_equal(mcnf_slab_csamples.labs,
                             np.array([0, 0, 0, 2, 2]))
     np.testing.assert_equal(mcnf_slab_csamples._x.shape,
                             (5, 10))
     np.testing.assert_equal(mcnf_slab_csamples.fids,
                             slab_csamples.fids)
     np.testing.assert_equal(mcnf_slab_csamples._x,
                             slab_csamples._x[np.array([0, 1, 2, 4, 5])])
     s_inds = np.array([0, 1, 2, 4, 5])
     np.testing.assert_equal(mcnf_slab_csamples._d,
                             slab_csamples._d[s_inds][:, s_inds])
Exemplo n.º 12
0
    def test_relabel(self):
        sids = list('abcdef')
        fids = list(range(10, 20))
        labs = [0, 0, 0, 1, 2, 2]

        slcs = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), labs=labs,
            sids=sids, fids=fids)

        new_labs = ['a', 'b', 'c', 'd', 'e', 'f']
        slcs_rl = slcs.relabel(new_labs)
        assert slcs_rl.labs == new_labs
        assert slcs_rl._x is not slcs._x
        assert slcs_rl._d is not slcs._d
        assert slcs_rl._sids is not slcs._sids
        assert slcs_rl._fids is not slcs._fids
        np.testing.assert_equal(slcs_rl._x, slcs._x)
        np.testing.assert_equal(slcs_rl._d, slcs._d)
        np.testing.assert_equal(slcs_rl._sids, slcs._sids)
        np.testing.assert_equal(slcs_rl._fids, slcs._fids)
Exemplo n.º 13
0
    def test_merge_labels_wrong_args(self):
        sids = list('abcdef')
        fids = list(range(10, 20))
        labs = [0, 0, 1, 1, 2, 3]

        slcs = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), labs=labs,
            sids=sids, fids=fids)
        # wrong new lab type
        with pytest.raises(ValueError) as excinfo:
            slcs.merge_labels([1, 2, 3], [5])
        # wrong m lab type
        with pytest.raises(ValueError) as excinfo:
            slcs.merge_labels([[], [1]], 1)
        # duplicated m labs
        with pytest.raises(ValueError) as excinfo:
            slcs.merge_labels([1, 1, 2], 1)
        # m lab not in original lab
        with pytest.raises(ValueError) as excinfo:
            slcs.merge_labels([0, 1, 5], 1)
Exemplo n.º 14
0
 def test_tsne_feature_gradient_plot_wrong_args(self):
     sids = list(range(8))
     fids = [str(i) for i in range(10)]
     labs = list(range(8))
     np.random.seed(123)
     x = np.random.ranf(80).reshape(8, -1)
     x_sorted = x[np.argsort(x[:, 5])]
     slab_csamples = eda.SingleLabelClassifiedSamples(
         x, labs, sids=sids, fids=fids)
     with pytest.raises(ValueError):
         slab_csamples.tsne_feature_gradient_plot([0, 1])
     with pytest.raises(ValueError):
         slab_csamples.tsne_feature_gradient_plot(11)
     with pytest.raises(ValueError):
         slab_csamples.tsne_feature_gradient_plot(11)
     with pytest.raises(ValueError):
         slab_csamples.tsne_feature_gradient_plot(-1)
     with pytest.raises(ValueError):
         slab_csamples.tsne_feature_gradient_plot(5)
     with pytest.raises(ValueError):
         slab_csamples.tsne_feature_gradient_plot('123')
Exemplo n.º 15
0
    def test_lab_x(self):
        sids = list('abcdef')
        fids = list(range(10, 20))
        labs = [0, 0, 0, 1, 2, 2]

        slcs = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), labs=labs,
            sids=sids, fids=fids)
        # select sf
        ss_slcs = slcs.lab_x([0, 2])
        assert ss_slcs._x.shape == (5, 10)
        assert ss_slcs.sids == ['a', 'b', 'c', 'e', 'f']
        assert ss_slcs.labs == [0, 0, 0, 2, 2]
        assert ss_slcs.fids == list(range(10, 20))
        ss_s_inds = [0, 1, 2, 4, 5]
        np.testing.assert_equal(ss_slcs.d,
                                slcs._d[np.ix_(ss_s_inds, ss_s_inds)])
        # select sf
        ss_slcs = slcs.lab_x(0)
        assert ss_slcs._x.shape == (3, 10)
        assert ss_slcs.sids == ['a', 'b', 'c']
        assert ss_slcs.labs == [0, 0, 0]
        assert ss_slcs.fids == list(range(10, 20))
        ss_s_inds = [0, 1, 2]
        np.testing.assert_equal(ss_slcs.d,
                                slcs._d[np.ix_(ss_s_inds, ss_s_inds)])
        # select with None
        slcs_n = slcs.lab_x(None)
        np.testing.assert_equal(slcs_n._x, slcs._x)
        np.testing.assert_equal(slcs_n._d, slcs._d)
        np.testing.assert_equal(slcs_n._sids, slcs._sids)
        np.testing.assert_equal(slcs_n._fids, slcs._fids)
        np.testing.assert_equal(slcs_n._labs, slcs._labs)
        # select non-existent labs
        with pytest.raises(ValueError) as excinfo:
            slcs.lab_x([-1])
        with pytest.raises(ValueError) as excinfo:
            slcs.lab_x([0, 3])
        with pytest.raises(ValueError) as excinfo:
            slcs.lab_x([0, -3])
Exemplo n.º 16
0
    def test_id_x(self):
        sids = list('abcdef')
        fids = list(range(10, 20))
        labs = [0, 0, 0, 1, 2, 2]

        slcs = eda.SingleLabelClassifiedSamples(
            np.random.ranf(60).reshape(6, -1), labs=labs,
            sids=sids, fids=fids)
        # select sf
        ss_slcs = slcs.id_x(['a', 'f'], list(range(10, 15)))
        assert ss_slcs._x.shape == (2, 5)
        assert ss_slcs.sids == ['a', 'f']
        assert ss_slcs.labs == [0, 2]
        assert ss_slcs.fids == list(range(10, 15))
        np.testing.assert_equal(
            ss_slcs.d, slcs._d[np.ix_((0, 5), (0, 5))])

        # select with Default
        ss_slcs = slcs.id_x()
        assert ss_slcs._x.shape == (6, 10)
        assert ss_slcs.sids == list('abcdef')
        assert ss_slcs.labs == labs
        assert ss_slcs.fids == list(range(10, 20))
        np.testing.assert_equal(ss_slcs.d, slcs._d)

        # select with None
        ss_slcs = slcs.id_x(None, None)
        assert ss_slcs._x.shape == (6, 10)
        assert ss_slcs.sids == list('abcdef')
        assert ss_slcs.labs == labs
        assert ss_slcs.fids == list(range(10, 20))
        np.testing.assert_equal(ss_slcs.d, slcs._d)

        # select non-existent inds
        # id lookup raises ValueError
        with pytest.raises(ValueError) as excinfo:
            slcs.id_x([6])

        with pytest.raises(ValueError) as excinfo:
            slcs.id_x(None, ['a'])
Exemplo n.º 17
0
def test_labs_to_cmap():
    sids = [0, 1, 2, 3, 4, 5, 6, 7]
    labs = list(map(str, [3, 0, 1, 0, 0, 1, 2, 2]))
    slab_csamples = eda.SingleLabelClassifiedSamples(
        np.random.ranf(80).reshape(8, -1), labs, sids)

    (lab_cmap, lab_norm, lab_ind_arr, lab_col_lut,
     uniq_lab_lut) = eda.plot.labs_to_cmap(slab_csamples.labs, return_lut=True)

    n_uniq_labs = len(set(labs))
    assert lab_cmap.N == n_uniq_labs
    assert lab_cmap.colors == sns.hls_palette(n_uniq_labs)
    np.testing.assert_equal(lab_ind_arr, np.array([3, 0, 1, 0, 0, 1, 2, 2]))
    assert labs == [uniq_lab_lut[x] for x in lab_ind_arr]
    assert len(uniq_lab_lut) == n_uniq_labs
    assert len(lab_col_lut) == n_uniq_labs
    assert [lab_col_lut[uniq_lab_lut[i]]
            for i in range(n_uniq_labs)] == sns.hls_palette(n_uniq_labs)

    lab_cmap2, lab_norm2 = eda.plot.labs_to_cmap(slab_csamples.labs,
                                                 return_lut=False)
    assert lab_cmap2.N == n_uniq_labs
    assert lab_cmap2.colors == lab_cmap.colors
    np.testing.assert_equal(lab_norm2.boundaries, lab_norm.boundaries)
Exemplo n.º 18
0
 def test_swarm_minimal_z(self):
     tslcs = eda.SingleLabelClassifiedSamples(np.arange(10).reshape(5, 2),
                                              [0, 0, 1, 2, 3],
                                              ['1', '2', '3', '4', '5'],
                                              ['a', 'z'])
     return tslcs.feature_swarm_plot('z')
Exemplo n.º 19
0
 def test_dmat_heatmap(self):
     x = [[0, 0], [1, 1], [2, 2], [10, 10], [12, 12], [11, 11], [100, 100]]
     tslcs = eda.SingleLabelClassifiedSamples(x, [0, 0, 0, 1, 1, 1, 2],
                                              metric='euclidean')
     return tslcs.dmat_heatmap(selected_labels=[0, 1],
                               transform=lambda x: x + 100)
Exemplo n.º 20
0
 def test_init_wrong_lab_len(self):
     with pytest.raises(Exception) as excinfo:
         eda.SingleLabelClassifiedSamples(
             self.sfm3x3_arr, [0, 1], None, None)