Пример #1
0
def run(n_feature, train, validation, test):
    train_x, train_y = train
    val_x, val_y = validation
    test_x, test_y = test

    start = time.time()
    ex = RBFKernelRetraining('aws', train_x, train_y, val_x, val_y, test_x,
                             test_y, n_feature, OfflineMethods.RT_in_F)
    print ex.grid_retrain_in_x()

    # print ex.grid_retrain_in_f(100)

    def quadratic_map(x):
        # feature map for polynomial kernel (gamma* u`v + c)^2
        # assume gamma=1, c = 0
        n = len(x)
        r = []
        r.extend([x[i] * x[i] for i in range(n - 1, -1, -1)])
        for i in range(n - 1, -1, -1):
            for j in range(i - 1, -1, -1):
                r.append(sqrt(2) * x[i] * x[j])
        return r

    # TODO retrain directly and plot into plots
    poly = PolySolver('aws', train_x, train_y, val_x, val_y, test_x, test_y,
                      quadratic_map, n_feature)
    print poly.grid_retrain_in_f()

    linear = LinearTrainer('aws', train_x, train_y, val_x, val_y, test_x,
                           test_y, n_feature)
    print linear.grid_search()

    now = time.time()
    print 'time: %d %d' % (len(train_y), now - start)
Пример #2
0
def run(n_feature, train, validation, test):
    train_x, train_y = train
    val_x, val_y = validation
    test_x, test_y = test


    start = time.time()
    ex = RBFKernelRetraining('aws', train_x, train_y,
                             val_x, val_y,
                             test_x, test_y, n_feature, OfflineMethods.RT_in_F)
    print ex.grid_retrain_in_x()
    # print ex.grid_retrain_in_f(100)

    def quadratic_map(x):
        # feature map for polynomial kernel (gamma* u`v + c)^2
        # assume gamma=1, c = 0
        n = len(x)
        r = []
        r.extend([x[i] * x[i] for i in range(n - 1, -1, -1)])
        for i in range(n - 1, -1, -1):
            for j in range(i - 1, -1, -1):
                r.append(sqrt(2) * x[i] * x[j])
        return r

    # TODO retrain directly and plot into plots
    poly = PolySolver('aws', train_x, train_y, val_x, val_y, test_x, test_y, quadratic_map, n_feature)
    print poly.grid_retrain_in_f()

    linear = LinearTrainer('aws', train_x, train_y, val_x, val_y, test_x, test_y, n_feature)
    print linear.grid_search()

    now = time.time()
    print 'time: %d %d' % (len(train_y), now - start)
Пример #3
0
def run(name, n_features, train, validation, test, q):
    train_x, train_y = train
    val_x, val_y = validation
    test_x, test_y = test

    assert not any_none((
        train_x,
        train_y,
        val_x,
        val_y,
        test_x,
        test_y,
    ))

    print q

    # start = time.time()

    # logger.info('Trying RBF kernel')
    rbf = RBFKernelRetraining(name, train_x, train_y, val_x, val_y, test_x,
                              test_y, n_features)
    print rbf.grid_retrain_in_x()

    # poly = PolyTrainer(name, train_x, train_y, val_x, val_y, test_x, test_y, n_features)
    # print poly.grid_search()

    linear = LinearTrainer(name, train_x, train_y, val_x, val_y, test_x,
                           test_y, n_features)
    print linear.grid_search()
Пример #4
0
def retrain_in_f_with_grid(name, label_p, label_n, oracle, n_features, ftype,
                           test_x, test_y, benchmark):
    print '--------------- retrain in F with grid -----------------'
    for n_pts in xrange(50, 601, 50):

        online = OnlineBase(name,
                            label_p,
                            label_n,
                            oracle,
                            n_features,
                            ftype,
                            error=.1)
        online.collect_pts(n_pts, -1)
        ex = RBFKernelRetraining(
            name,
            online.get_QSV(),
            online.get_QSV_labels(),  # training data
            online.get_QSV(),
            online.get_QSV_labels(),  # validation data
            test_x,
            test_y,  # test data
            n_features)

        print 'nQSV=%d, Q=%d, dim=100,' % (
            n_pts, online.get_n_query()), ex.grid_retrain_in_f(100)
Пример #5
0
def run(dataset):
    n_features = len(meta[dataset]['val_name'])

    result_online = Result('%s-%s' %(dataset, 'aws-online'), aws=True)
    result_baseline = Result('%s-%s' %(dataset, 'aws-baseline'), aws=True)
    result_active = Result('%s-%s' %(dataset, 'aws-active'), aws=True)

    for repeat in range(0, n_repeat):
        print 'Round %d of %d'% (repeat, n_repeat - 1)

        ex = AWSOnline(meta[dataset]['model_id'], 1, 0, n_features, meta[dataset]['val_name'], ftype='uniform', error=.1)

        test_x, test_y = load_svmlight_file('/Users/Fan/dev/ML/code/binary-classifiers/targets/%s/test.scale' % dataset, n_features)
        test_x = test_x.todense()
        test_y = [a if a == 1 else 0 for a in test_y]
        train_x, train_y = [], []

        for i in result_active.index:
            q_by_u = result_active.Q_by_U[i]
            print 'Active learning with budget %d / %d' % (q_by_u, q_by_u * (n_features + 1))
            main = ActiveLearning(ex, (None, None), (test_x, test_y), n_features,
                                  q_by_u * (n_features + 1), 5)

            L_unif, L_test = main.do()

            result_active.L_unif[i].append(L_unif)
            result_active.L_test[i].append(L_test)
            result_active.nquery[i].append(ex.get_n_query())

        ex = AWSOnline(meta[dataset]['model_id'], 1, 0, n_features, meta[dataset]['val_name'], ftype='uniform', error=.1)

        for i in result_online.index:
            q_by_u = result_online.Q_by_U[i]
            print 'collecting up to budget %d / %d' % (q_by_u, q_by_u * (n_features + 1))

            ex.collect_up_to_budget(q_by_u * (n_features + 1))
            train_x.extend(ex.pts_near_b)
            train_y.extend(ex.pts_near_b_labels)

            print 'retraining with %d points' % len(train_y)

            # online
            e = RBFKernelRetraining(ex.batch_predict, (train_x, train_y), (test_x, test_y), n_features)
            L_unif, L_test = e.grid_retrain_in_x()

            result_online.L_unif[i].append(L_unif)
            result_online.L_test[i].append(L_test)
            result_online.nquery[i].append(ex.get_n_query())

            # baseline
            e = Baseline(ex.batch_predict, (train_x, train_y), (test_x, test_y), n_features)
            L_unif, L_test = e.do()

            result_baseline.L_unif[i].append(L_unif)
            result_baseline.L_test[i].append(L_test)
            result_baseline.nquery[i].append(ex.get_n_query())

    print result_online
    print result_baseline
    print result_active
Пример #6
0
def rbf_auto(ex, name, n_features, step):
    train_x, train_y = [], []
    val_x, val_y = [], []
    try:
        while True:
            ex.collect_pts(step)
            train_x.extend(ex.pts_near_b)
            train_y.extend(ex.pts_near_b_labels)
            val_x.extend(ex.support_pts)
            val_y.extend(ex.support_labels)
            e = RBFKernelRetraining(name, train_x, train_y, val_x, val_y,
                                    train_x, train_y, n_features)
            print ex.get_n_query(), e.grid_retrain_in_x()
    except KeyboardInterrupt:
        print 'Done'
Пример #7
0
def run(name, n_features, train, validation, test, q):
    train_x, train_y = train
    val_x, val_y = validation
    test_x, test_y = test

    assert not any_none((train_x, train_y, val_x, val_y, test_x, test_y,))

    print q

    # start = time.time()

    # logger.info('Trying RBF kernel')
    rbf = RBFKernelRetraining(name, train_x, train_y, val_x, val_y, test_x, test_y, n_features)
    print rbf.grid_retrain_in_x()

    # poly = PolyTrainer(name, train_x, train_y, val_x, val_y, test_x, test_y, n_features)
    # print poly.grid_search()

    linear = LinearTrainer(name, train_x, train_y, val_x, val_y, test_x, test_y, n_features)
    print linear.grid_search()
Пример #8
0
    # load model and collect QSV
    ex = LibSVMOnline(dataset_name, model_file, (1, -1), n_features, 'uniform',
                      1e-1)
    # generate test score
    X_test, y_test = load_svmlight_file(os.path.join(base_dir, 'test.scale'),
                                        n_features)
    X_test = X_test.todense()

    train_x, train_y = [], []

    for i in range(0, 10):
        ex.collect_up_to_budget(50)
        train_x.extend(ex.pts_near_b)
        train_y.extend(ex.pts_near_b_labels)

        rbf = RBFKernelRetraining(dataset_name, ex.batch_predict, train_x,
                                  train_y, X_test, y_test, n_features)
        print 'Q = ', ex.get_n_query()
        rbf.grid_retrain_in_x()


def run(dataset_name, n_features):
    base_dir = os.path.join(os.getcwd(), '../targets/%s/' % dataset_name)
    model_file = os.path.join(base_dir, 'train.scale.model')

    result = Result('baseline')
    n_repeat = 10
    for repeat in range(0, n_repeat):
        print 'Round %d of %d' % (repeat, n_repeat - 1)

        # load model and collect QSV
        ex = LibSVMOnline(dataset_name, model_file, (1, -1), n_features,
Пример #9
0
def main():
    X1, Y1 = make_circles(n_samples=800, noise=0.07,
                          factor=0.4)  # defined in sklearn.datasets
    # gererates a data set X1 and labels Y1 with data from two circles, an inner circle
    # and an outer circle. The labels in Y1 are 0 or 1, indiciating the inner or outer circle.
    # n_samples is the number of data points, noise is the noise on the data, factor is the
    # ratio between the radius of the inner circle to the radius of the outer circle
    frac0 = len(np.where(Y1 == 0)[0]) / float(
        len(Y1))  # the number of points in the inner circle
    frac1 = len(np.where(Y1 == 1)[0]) / float(
        len(Y1))  # the number of points in the outer circle

    print("Percentage of '0' labels:", frac0)
    print("Percentage of '1' labels:", frac1)

    plt.figure()
    plt.subplot(121)
    plt.title(
        "Our Dataset: N=200, '0': {0} '1': {1} ".format(
            frac0, frac1),  # format is a way of printing reals/integers 
        fontsize="large")

    plt.scatter(X1[:, 0], X1[:, 1], marker='o', c=Y1)
    plt.xlim((-2, 2))
    plt.ylim((-2, 2))

    clf = svm.SVC()  # creates a support vector classification object.
    clf.fit(X1, Y1)  # fits the SVC to the data given

    print(accuracy_score(Y1, clf.predict(
        X1)))  # prints the accuracy of the model on the training data

    ex = OnlineBase('circle', 1, 0, clf.predict, 2, 'uniform', .1)
    step = 6
    train_x, train_y = [], []
    val_x, val_y = [], []
    while True:
        ex.collect_pts(
            step)  # collects step points around the decision boundary of ex
        train_x.extend(ex.pts_near_b)  # first step this list is empty.
        train_y.extend(ex.pts_near_b_labels)  # first step this list is empty
        #val_x.extend(ex.support_pts)
        #val_y.extend(ex.support_labels)
        try:
            e = RBFKernelRetraining(
                'circle', [train_x, train_y], [train_x, train_y], n_features=2
            )  # creates a new object every time? is this the smartest way to retrain?
            print(
                ex.get_n_query(), e.grid_retrain_in_x()
            )  # TODO I do not get how ex and e are connected, it seems to me that
            # grid_retrain_in_x() indeeds does something like retraing the model, but there are no points added to pts_near_b or are there?
        except KeyboardInterrupt:  ## TODO stop condition!!
            print('Done')
            break

    train_x = np.array(train_x)
    plt.subplot(122)
    plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y)
    plt.xlim((-2, 2))
    plt.ylim((-2, 2))
    plt.show()
Пример #10
0
for i in range(0, 1):
    val_name = ['v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10']
    n_features = 10

    ex = AWSOnline(model_id='ml-lkYRYeldcrH',
                   label_p=1,
                   label_n=0,
                   n_features=n_features,
                   val_name=val_name,
                   ftype='uniform',
                   error=.1)

    step = 6
    train_x, train_y = [], []
    val_x, val_y = [], []
    test_x, test_y = load_svmlight_file('test.scale', n_features)
    test_x = test_x.todense()
    test_y = [a if a == 1 else 0 for a in test_y]
    try:
        while True:
            ex.collect_pts(step)
            train_x.extend(ex.pts_near_b)
            train_y.extend(ex.pts_near_b_labels)
            val_x.extend(ex.support_pts)
            val_y.extend(ex.support_labels)
            e = RBFKernelRetraining('circle', train_x, train_y, val_x, val_y,
                                    test_x, test_y, n_features)
            print ex.get_n_query(), e.grid_retrain_in_x()
    except KeyboardInterrupt:
        print 'Done'
Пример #11
0
    model_file = os.path.join(base_dir, 'train.scale.model')

    # load model and collect QSV
    ex = LibSVMOnline(dataset_name, model_file, (1, -1), n_features, 'uniform', 1e-1)
    # generate test score
    X_test, y_test = load_svmlight_file(os.path.join(base_dir, 'test.scale'), n_features)
    X_test = X_test.todense()

    train_x, train_y = [], []

    for i in range(0, 10):
        ex.collect_up_to_budget(50)
        train_x.extend(ex.pts_near_b)
        train_y.extend(ex.pts_near_b_labels)

        rbf = RBFKernelRetraining(dataset_name, ex.batch_predict, train_x, train_y, X_test, y_test, n_features)
        print 'Q = ', ex.get_n_query()
        rbf.grid_retrain_in_x()


def run(dataset_name, n_features):
    base_dir = os.path.join(os.getcwd(), '../targets/%s/' % dataset_name)
    model_file = os.path.join(base_dir, 'train.scale.model')

    result = Result('baseline')
    n_repeat = 10
    for repeat in range(0, n_repeat):
        print 'Round %d of %d'% (repeat, n_repeat - 1)

        # load model and collect QSV
        ex = LibSVMOnline(dataset_name, model_file, (1, -1), n_features, 'uniform', 1e-1)