示例#1
0
文件: mshi.py 项目: pyongjoo/ende
    def _test_size(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        for set_size in numpy.arange(100, 1000, 100):
            X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
            X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
            assert(len(y_train) == set_size)

            train_set = (X_train, y_train)
            test_set_original = (X_test, y_test)

            ms = MS2(LogisticRegression)
            ms.fit(train_set)

            r = 0.05
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = ms.predict(X_test_new)

            err = rms(dist_est, dist_true)

            print dist_est
            print "size: %d, err: %f" % (set_size, err)
示例#2
0
文件: ac2.py 项目: pyongjoo/ende
    def _test_ac_forest(self):
        X_train_full, y_train_full, X_test, y_test = rcv1_binary_reader.toNumpy()
        set_size = 200     # an arbitrary number
        X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        r = 0.8
        X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
        test_set = [X_test_new, y_test_new]

        dist_true = DE.arrayToDist(y_test_new)

        pos = []
        for i in range(500):
            train_sub = self.get_sub_set_with_size(train_set, 0.5)
            ac = AC2(LogisticRegression)
            ac.fit(train_sub)
            dist_est = ac.predict(X_test_new)
            pos.append(dist_est[1])

        print pos
        print numpy.mean(pos)
        print numpy.median(pos)
示例#3
0
文件: mshi.py 项目: pyongjoo/ende
    def test_class_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = news_20_reader.toNumpy()
        set_size = 1000
        X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        ms = MSHI(LinearSVC)
        ms.fit(train_set)

        print 'Done training'

        for r in numpy.arange(0.05, 1.0, 0.05):
        #for r in [0.05]:
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = ms.predict(X_test_new)

            err = rms(dist_est, dist_true)

            print "r: %f, pos: %f" % (r, dist_est[1])
示例#4
0
文件: cc2.py 项目: pyongjoo/ende
    def test_ratio(self):
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        #X_train_full, y_train_full, X_test, y_test = rcv1_binary_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = synthetic_reader.toNumpy(0.3, n_class=2)
        set_size = 500     # an arbitrary number
        X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        cc = CC2(KNeighborsClassifier)
        cc.fit(train_set)


        for r in numpy.arange(0.05, 1.0, 0.05):
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = cc.predict(X_test_new)

            err = rms(dist_est, dist_true)

            #print dist_est
            print "%f\t%f" % (dist_true[1], dist_est[1])