示例#1
0
def test_larcqy_logistic(verbose=False):
    # factorization of the parameters with multiple linear regressions

    toy = False
    np.random.seed(1)

    # load toy data
    if toy:
        data, rank, emb0_val = toy_dual_supervision_data()
        data_train, data_test = train_test_split(data)
        n_ents = 1 + np.max([np.max([np.max(t[0]) for t in qc[0]]) for qc in data_train] +
                            [np.max([np.max(t[0]) for t in qc[0]]) for qc in data_test])
    else:
        from factorix.demos.urban.urban_data_loading import load_area_aspects_data
        # aspects = {'multicultural'}
        # aspects = {'posh'}
        aspects = {'waterside'}
        nw, na = 1000000, 300
        data_train, voc = load_area_aspects_data(aspects, 'train', max_n_words_per_area=nw, max_n_words_per_aspect=na)
        data_test, voc = load_area_aspects_data(aspects, 'test', max_n_words_per_area=nw, max_n_words_per_aspect=na,
                                                vocab=voc)
        rank = 1
        n_ents = len(voc.index)

    regs = np.linspace(0.1, 20, 10)
    max_epochs_list = [500]  # range(25, 500, 25)

    for reg in regs:
        for max_epochs in max_epochs_list:
            model = EmbeddingUpdater(rank, n_ents, reg, max_epochs=max_epochs, verbose=False)
            model.fit(data_train)
            pred, y, test_nll = model.predict(data_test)
            test_auc = eval_auc(pred, y)
            print('reg: ', reg, 'niter: ', max_epochs, ', auc: ', test_auc, ', nll: ', test_nll)
示例#2
0
def test_logistic_regression_equivalence_prediction(verbose=False):
    iris = datasets.load_iris()
    x_mat = iris.data
    y = iris.target
    x_mat = x_mat[y != 2]
    y = y[y != 2]
    x_mat -= np.mean(x_mat, 0)
    n_ents = x_mat.shape[1] + 3  # number of entities is the dimension with 2 slots and 1 intercept
    clf1 = linear_model.LogisticRegression(C=1.0, penalty='l2', tol=1e-6)
    clf1.fit(x_mat, y)
    prob = clf1.predict_proba(x_mat)
    n_show = 3
    scores = x_mat.dot(clf1.coef_.T) + clf1.intercept_
    clf2 = EmbeddingUpdater(rank=1, n_ents=n_ents, n_slots=2, reg=1.0, max_epochs=500, verbose=False,
                            preprocessing=multitask_to_tuples)
    clf2.logistic2embeddings(coefs=clf1.coef_, intercept=clf1.intercept_)
    pred2, y, nll = clf2.predict(x_mat, y)
    clf3 = EmbeddingUpdater(rank=1, n_ents=n_ents, n_slots=2, reg=1.0, max_epochs=500, verbose=False,
                            preprocessing=multitask_to_tuples)
    clf3.logistic2embeddings(coefs=clf1.coef_, intercept=clf1.intercept_)
    pred3, y, nll = clf3.predict(x_mat, y)

    if verbose:
        print('linear_model.LogisticRegression predictions:', prob[0:n_show])
        print('logistic regression prediction: ', 1.0 / (np.exp(-scores[0:n_show]) + 1.0))
        print('EmbeddingUpdater predictions:', 1.0 / (np.exp(-pred2[0:n_show]) + 1.0))
        print('same:', 1.0 / (np.exp(-pred3[0:n_show]) + 1.0))
    assert(np.linalg.norm(scores - pred2) < 1e-2)
    assert(np.linalg.norm(scores - pred3) < 1e-2)
示例#3
0
def test_logistic_regression_equivalence_prediction(verbose=False):
    iris = datasets.load_iris()
    x_mat = iris.data
    y = iris.target
    x_mat = x_mat[y != 2]
    y = y[y != 2]
    x_mat -= np.mean(x_mat, 0)
    n_ents = x_mat.shape[
        1] + 3  # number of entities is the dimension with 2 slots and 1 intercept
    clf1 = linear_model.LogisticRegression(C=1.0, penalty='l2', tol=1e-6)
    clf1.fit(x_mat, y)
    prob = clf1.predict_proba(x_mat)
    n_show = 3
    scores = x_mat.dot(clf1.coef_.T) + clf1.intercept_
    clf2 = EmbeddingUpdater(rank=1,
                            n_ents=n_ents,
                            n_slots=2,
                            reg=1.0,
                            max_epochs=500,
                            verbose=False,
                            preprocessing=multitask_to_tuples)
    clf2.logistic2embeddings(coefs=clf1.coef_, intercept=clf1.intercept_)
    pred2, y, nll = clf2.predict(x_mat, y)
    clf3 = EmbeddingUpdater(rank=1,
                            n_ents=n_ents,
                            n_slots=2,
                            reg=1.0,
                            max_epochs=500,
                            verbose=False,
                            preprocessing=multitask_to_tuples)
    clf3.logistic2embeddings(coefs=clf1.coef_, intercept=clf1.intercept_)
    pred3, y, nll = clf3.predict(x_mat, y)

    if verbose:
        print('linear_model.LogisticRegression predictions:', prob[0:n_show])
        print('logistic regression prediction: ',
              1.0 / (np.exp(-scores[0:n_show]) + 1.0))
        print('EmbeddingUpdater predictions:',
              1.0 / (np.exp(-pred2[0:n_show]) + 1.0))
        print('same:', 1.0 / (np.exp(-pred3[0:n_show]) + 1.0))
    assert (np.linalg.norm(scores - pred2) < 1e-2)
    assert (np.linalg.norm(scores - pred3) < 1e-2)
示例#4
0
def test_logistic_regression_equivalence_learning(verbose=False):
    iris = datasets.load_iris()
    x_mat = iris.data
    y = iris.target
    x_mat = x_mat[y != 2, 0:1]
    y = y[y != 2]
    x_mat -= np.mean(x_mat, 0)
    n_ents = x_mat.shape[1] + 3
    idx_show = [0, 99]
    c = 1e10  # no regularization
    clf1 = linear_model.LogisticRegression(C=c, penalty='l2', tol=1e-6)
    clf1.fit(x_mat, y)
    scores = x_mat.dot(clf1.coef_.T) + clf1.intercept_
    clf2 = EmbeddingUpdater(rank=1,
                            n_ents=n_ents,
                            n_slots=2,
                            reg=1e-10,
                            max_epochs=500,
                            verbose=verbose,
                            preprocessing=multitask_to_tuples)
    clf2.logistic2embeddings(coefs=clf1.coef_, intercept=clf1.intercept_)
    pred, y, nll = clf2.predict(x_mat, y)
    clf2.fit(x_mat, y)
    pred2, y2, nll2 = clf2.predict(x_mat, y)
    clf3 = EmbeddingUpdater(rank=1,
                            n_ents=n_ents,
                            n_slots=2,
                            reg=1e-10,
                            max_epochs=500,
                            verbose=verbose,
                            preprocessing=multitask_to_tuples)
    clf3.fit(x_mat, y)
    pred3, y3, nll3 = clf3.predict(x_mat, y)
    if verbose:
        print('logistic regression prediction:\n',
              1.0 / (np.exp(-scores[idx_show]) + 1.0))
        print('EmbeddingUpdater predictions before learning:\n',
              1.0 / (np.exp(-pred[idx_show]) + 1.0))
        print(
            'EmbeddingUpdater predictions after learning (oracle initialization): \n',
            1.0 / (np.exp(-pred2[idx_show]) + 1.0))
        print(
            'EmbeddingUpdater predictions after learning (random initialization): \n',
            1.0 / (np.exp(-pred3[idx_show]) + 1.0))
    assert (np.linalg.norm(scores - pred) < 1e-1)
    assert (np.linalg.norm(scores - pred2) < 1e-1)
    assert (np.linalg.norm(scores - pred3) < 1e-1)
示例#5
0
def test_logistic_regression_equivalence_learning(verbose=False):
    iris = datasets.load_iris()
    x_mat = iris.data
    y = iris.target
    x_mat = x_mat[y != 2, 0:1]
    y = y[y != 2]
    x_mat -= np.mean(x_mat, 0)
    n_ents = x_mat.shape[1] + 3
    idx_show = [0, 99]
    c = 1e10  # no regularization
    clf1 = linear_model.LogisticRegression(C=c, penalty='l2', tol=1e-6)
    clf1.fit(x_mat, y)
    scores = x_mat.dot(clf1.coef_.T) + clf1.intercept_
    clf2 = EmbeddingUpdater(rank=1, n_ents=n_ents, n_slots=2, reg=1e-10, max_epochs=500, verbose=verbose,
                            preprocessing=multitask_to_tuples)
    clf2.logistic2embeddings(coefs=clf1.coef_, intercept=clf1.intercept_)
    pred, y, nll = clf2.predict(x_mat, y)
    clf2.fit(x_mat, y)
    pred2, y2, nll2 = clf2.predict(x_mat, y)
    clf3 = EmbeddingUpdater(rank=1, n_ents=n_ents, n_slots=2, reg=1e-10, max_epochs=500, verbose=verbose,
                            preprocessing=multitask_to_tuples)
    clf3.fit(x_mat, y)
    pred3, y3, nll3 = clf3.predict(x_mat, y)
    if verbose:
        print('logistic regression prediction:\n', 1.0 / (np.exp(-scores[idx_show]) + 1.0))
        print('EmbeddingUpdater predictions before learning:\n', 1.0 / (np.exp(-pred[idx_show]) + 1.0))
        print('EmbeddingUpdater predictions after learning (oracle initialization): \n',
              1.0 / (np.exp(-pred2[idx_show]) + 1.0))
        print('EmbeddingUpdater predictions after learning (random initialization): \n',
              1.0 / (np.exp(-pred3[idx_show]) + 1.0))
    assert(np.linalg.norm(scores-pred) < 1e-1)
    assert(np.linalg.norm(scores-pred2) < 1e-1)
    assert(np.linalg.norm(scores-pred3) < 1e-1)