def msrc():
    models_basedir = 'models/msrc/'
    crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4,
                  inference_method='gco')
    clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2,
                       tol=0.1, n_jobs=4,
                       inference_cache=100)

    X, Y = load_msrc('train')
    Y = remove_areas(Y)

    start = time()
    clf.fit(X, Y)
    stop = time()

    np.savetxt(models_basedir + 'msrc_full.csv', clf.w)
    with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f:
        cPickle.dump(clf, f)

    X, Y = load_msrc('test')
    Y = remove_areas(Y)

    Y_pred = clf.predict(X)

    print 'Error on test set: %f' % compute_error(Y, Y_pred)
    print 'Score on test set: %f' % clf.score(X, Y)
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % (stop - start)

    return clf
示例#2
0
def msrc_weak(n_full=20, n_train=276, C=100, latent_iter=25,
              max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
              initialize=True, alpha=0.1, n_inference_iter=5):
    crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha,
               inference_method='gco', n_iter=n_inference_iter)
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=10)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    Xtest, Ytest = load_msrc('test')
    Ytest = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True)
             for y in Ytest]

    train_mask = np.genfromtxt('../data/msrc/trainmasks/trainMaskX%d.txt' % n_full)
    train_mask = train_mask[0:n_train].astype(np.bool)
    Xtrain, Ytrain_raw = load_msrc('train')
    Ytrain_full = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True)
                   for y in Ytrain_raw]
    Ytrain = []
    for y, f in zip(Ytrain_raw, train_mask):
        if f:
            Ytrain.append(Label(y[:, 0].astype(np.int32),
                                None, y[:, 1], True))
        else:
            Ytrain.append(Label(None, np.unique(y[:, 0].astype(np.int32)),
                                y[:, 1], False))

    start = time()
    clf.fit(Xtrain, Ytrain, initialize=initialize)
    stop = time()

    train_score = clf.score(Xtrain, Ytrain_full)
    test_score = clf.score(Xtest, Ytest)
    time_elapsed = stop - start 

    print 'Score on train set: %f' % train_score
    print 'Score on test set: %f' % test_score
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % time_elapsed

    test_scores = []
    for score in clf.staged_score(Xtest, Ytest):
        test_scores.append(score)

    result = ExperimentResult(np.array(test_scores), clf.changes_,
                              clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, 
                              clf.objective_curve_, clf.timestamps_, clf.base_iter_history_,
                              train_score=train_score, test_score=test_score,
                              time_elapsed=time_elapsed, n_inference_iter=n_inference_iter,
                              n_full=n_full, n_train=n_train, C=C,
                              latent_iter=latent_iter, max_iter=max_iter,
                              inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha,
                              min_changes=min_changes, initialize=initialize,
                              dataset_name='msrc', annotation_type='image-level labelling',
                              label_type='full+weak')
    return result
def msrc_test():
    # test model on different train set sizes
    basedir = '../data/msrc/trainmasks/'
    models_basedir = 'models/msrc/'
    quality = []

    Xtest, Ytest = load_msrc('test')
    Ytest = remove_areas(Ytest)
    Xtrain, Ytrain = load_msrc('train')
    Ytrain = remove_areas(Ytrain)

    for n_train in [20, 40, 80, 160, 276]:
        crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4,
                      inference_method='gco')
        clf = OneSlackSSVM(crf, max_iter=1000, C=0.01, verbose=0,
                           tol=0.1, n_jobs=4, inference_cache=100)

        if n_train != 276:
            train_mask = np.genfromtxt(basedir + 'trainMaskX%d.txt' % n_train)
            train_mask = train_mask[:277].astype(np.bool)
        else:
            train_mask = np.ones(276).astype(np.bool)

        curX = []
        curY = []
        for (s, x, y) in zip(train_mask, Xtrain, Ytrain):
            if s:
                curX.append(x)
                curY.append(y)

        start = time()
        clf.fit(curX, curY)
        stop = time()

        np.savetxt(models_basedir + 'test_model_%d.csv' % n_train, clf.w)
        with open(models_basedir + 'test_model_%d' % n_train + '.pickle', 'w') as f:
            cPickle.dump(clf, f)

        Ypred = clf.predict(Xtest)

        q = 1 - compute_error(Ytest, Ypred)

        print 'n_train=%d, quality=%f, time=%f' % (n_train, q, stop - start)
        quality.append(q)

    np.savetxt('results/msrc/msrc_full.txt', quality)
def msrc_load(n_full, n_train):
    Xtest, Ytest = load_msrc('test')
    Ytest = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True)
             for y in Ytest]

    train_mask = np.genfromtxt('../data/msrc/trainmasks/trainMaskX%d.txt' % n_full)
    train_mask = train_mask[0:n_train].astype(np.bool)
    Xtrain, Ytrain_raw = load_msrc('train')
    Ytrain_full = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True)
                   for y in Ytrain_raw]
    Ytrain = []
    for y, f in zip(Ytrain_raw, train_mask):
        if f:
            Ytrain.append(Label(y[:, 0].astype(np.int32),
                                None, y[:, 1], True))
        else:
            Ytrain.append(Label(None, np.unique(y[:, 0].astype(np.int32)),
                                y[:, 1], False))

    return Xtrain, Ytrain, Ytrain_full, Xtest, Ytest