示例#1
0
    def run_training_size(self, pos_ratio):
        X_train_full, y_train_full, X_test, y_test = synthetic_reader.toNumpy()
        test_set_original = [X_test, y_test]
        X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, pos_ratio, pos_label=1)
        test_set = [X_test_new, y_test_new]

        print "We compare performance as chaning the training set size."
        print "Positive class ratio is %f" % pos_ratio
        print "size\tcc\tac\tms\tra\trc\trb\trd"
        for set_size in (numpy.arange(50, 100, 10).tolist()
                + numpy.arange(100, 1100, 100).tolist()):
            cc = CC2(LinearSVC)
            ac = AC2(LinearSVC)
            ms = MS2(LogisticRegression)
            ra = RA(LinearSVC, ac_method = 'ac')
            rc = RA(LinearSVC, ac_method = 'cac')
            rb = RA(LinearSVC, ac_method = 'bac')
            rd = RA(LinearSVC, ac_method = 'dac')

            ests = [cc, ac, ms, ra, rc, rb, rd]

            X_train_sub, y_train_sub = self.get_sub_set_with_size(
                    [X_train_full, y_train_full], set_size)
            train_set = [X_train_sub, y_train_sub]
            map(lambda e: e.fit(train_set), ests)

            errs = map(lambda e: self.run_for_estimator(e, test_set), ests)
            print ("%d" + "\t%.4f" * 7) % (set_size, errs[0], errs[1], errs[2],
                    errs[3], errs[4], errs[5], errs[6])
示例#2
0
文件: cc2.py 项目: pyongjoo/ende
    def test_ratio(self):
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        #X_train_full, y_train_full, X_test, y_test = rcv1_binary_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = synthetic_reader.toNumpy(0.3, n_class=2)
        set_size = 500     # an arbitrary number
        X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        cc = CC2(KNeighborsClassifier)
        cc.fit(train_set)


        for r in numpy.arange(0.05, 1.0, 0.05):
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = cc.predict(X_test_new)

            err = rms(dist_est, dist_true)

            #print dist_est
            print "%f\t%f" % (dist_true[1], dist_est[1])
示例#3
0
文件: cc2.py 项目: pyongjoo/ende
    def _test_basic(self):
        X_train_full, y_train_full, X_test, y_test = synthetic_reader.toNumpy(0.3, n_class=2)
        set_size = 1000     # an arbitrary number
        X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        clf = LinearSVC()
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        print confusion_matrix(y_test, y_pred)

        print clf.coef_
        print clf.intercept_

        numpy.savetxt('synthetic_X_test', X_test)
        numpy.savetxt('synthetic_y_test', y_test)
示例#4
0
文件: ac2.py 项目: pyongjoo/ende
    def test_class_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        #X_train_full, y_train_full, X_test, y_test = rcv1_binary_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = synthetic_reader.toNumpy(0, n_class=2)
        #X_train_full, y_train_full, X_test, y_test = snippet_reader.toNumpy()
        set_size = 100     # an arbitrary number
        X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        #ac = AC2(LogisticRegression)
        ac = AC2(SyntheticClassifier)
        ac.fit(train_set)
示例#5
0
 def test_ratio(self):
     dataset = synthetic_reader.toNumpy(err = 0, n_class=2)
     #for set_size in numpy.arange(100, 2100, 100):
     for set_size in [100]:
         self.run_ratio(dataset, set_size)
         print