Exemplo n.º 1
0
def main():
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary=True) for t in train]
    train = [t for t in train if t[3][-1] >= 0]  # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary=True) for t in test]
    test = [t for t in test if t[3][-1] >= 0]  # for filtering binary labels

    train = shuffle(train)
    train = train[:1000]
    test = shuffle(test)
    test = test[:500]

    V = len(word2idx)
    print "vocab size:", V
    D = 80
    K = 5

    model = RecursiveNN(V, D, K)
    model.fit(train, reg=0, activation=T.nnet.relu)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
def main():
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary=True) for t in train]
    train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary=True) for t in test]
    test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:1000]
    test = shuffle(test)
    test = test[:500]

    V = len(word2idx)
    print "vocab size:", V
    D = 80
    K = 5

    model = RecursiveNN(V, D, K)
    model.fit(train, reg=0, activation=T.nnet.relu)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:5000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:1000]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print("vocab size:", V)
    D = 20
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train)
    print("train accuracy:", model.score(train))
    print("test accuracy:", model.score(test))
    print("train f1:", model.f1_score(train))
    print("test f1:", model.f1_score(test))
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:5000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:1000]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print "vocab size:", V
    D = 20
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
    print "train f1:", model.f1_score(train)
    print "test f1:", model.f1_score(test)
def main():
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, True) for t in train]
    train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, True) for t in test]
    test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:1000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:100]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print "vocab size:", V
    D = 80
    K = 5

    model = RecursiveNN(V, D, K)
    model.fit(train, epochs=3, activation=T.nnet.relu)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
    print "train f1:", model.f1_score(train)
    print "test f1:", model.f1_score(test)
Exemplo n.º 6
0
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train
                 if t[3][-1] >= 0]  # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test
                if t[3][-1] >= 0]  # for filtering binary labels

    # check imbalance
    # pos = 0
    # neg = 0
    # mid = 0
    # label_counts = np.zeros(5)
    # for t in train + test:
    #     words, left_child, right_child, labels = t
    #     # for l in labels:
    #     #     if l == 0:
    #     #         neg += 1
    #     #     elif l == 1:
    #     #         pos += 1
    #     #     else:
    #     #         mid += 1
    #     for l in labels:
    #         label_counts[l] += 1
    # # print("pos / total:", float(pos) / (pos + neg + mid))
    # # print("mid / total:", float(mid) / (pos + neg + mid))
    # # print("neg / total:", float(neg) / (pos + neg + mid))
    # print("label proportions:", label_counts / label_counts.sum())
    # exit()

    train = shuffle(train)
    # train = train[:5000]
    # n_pos = sum(t[3][-1] for t in train)
    # print("n_pos train:", n_pos)
    test = shuffle(test)
    smalltest = test[:1000]
    # n_pos = sum(t[3][-1] for t in test)
    # print("n_pos test:", n_pos)

    V = len(word2idx)
    print("vocab size:", V)
    D = 20
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train, smalltest, epochs=20, train_inner_nodes=True)
    print("train accuracy:", model.score(train))
    print("test accuracy:", model.score(test))
    print("train f1:", model.f1_score(train))
    print("test f1:", model.f1_score(test))
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    # check imbalance
    # pos = 0
    # neg = 0
    # mid = 0
    # label_counts = np.zeros(5)
    # for t in train + test:
    #     words, left_child, right_child, labels = t
    #     # for l in labels:
    #     #     if l == 0:
    #     #         neg += 1
    #     #     elif l == 1:
    #     #         pos += 1
    #     #     else:
    #     #         mid += 1
    #     for l in labels:
    #         label_counts[l] += 1
    # # print("pos / total:", float(pos) / (pos + neg + mid))
    # # print("mid / total:", float(mid) / (pos + neg + mid))
    # # print("neg / total:", float(neg) / (pos + neg + mid))
    # print("label proportions:", label_counts / label_counts.sum())
    # exit()


    train = shuffle(train)
    # train = train[:5000]
    # n_pos = sum(t[3][-1] for t in train)
    # print("n_pos train:", n_pos)
    test = shuffle(test)
    smalltest = test[:1000]
    # n_pos = sum(t[3][-1] for t in test)
    # print("n_pos test:", n_pos)

    V = len(word2idx)
    print("vocab size:", V)
    D = 20
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train, smalltest, epochs=20, train_inner_nodes=True)
    print("train accuracy:", model.score(train))
    print("test accuracy:", model.score(test))
    print("train f1:", model.f1_score(train))
    print("test f1:", model.f1_score(test))
Exemplo n.º 8
0
def main():
    train, test, word2idx = get_ptb_data()

    train = train[:100]
    test = test[:100]

    V = len(word2idx)
    D = 80
    K = 5

    model = TNN(V, D, K, tf.nn.relu)
    model.fit(train)
Exemplo n.º 9
0
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train
                 if t[3][-1] >= 0]  # for filtering binary labels

    # sanity check
    # check that last node has no parent
    # for t in train:
    #     assert(t[1][-1] == -1 and t[2][-1] == -1)

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test
                if t[3][-1] >= 0]  # for filtering binary labels

    train = shuffle(train)
    # train = train[:2000]
    n_pos = sum(t[3][-1] for t in train)
    # print("num pos train:", n_pos)
    # idx2word = {v:k for k, v in word2idx.items()}
    # for i in range(4):
    #     words, _, _, labels = train[i]
    #     print_sentence(words, idx2word)
    #     print("label:", labels[-1])
    test = shuffle(test)
    test = test[:1000]

    V = len(word2idx)
    print("vocab size:", V)
    D = 10
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train,
              learning_rate=1e-2,
              reg=1e-2,
              mu=0,
              epochs=20,
              activation=T.tanh,
              train_inner_nodes=False)
    print("train accuracy:", model.score(train))
    print("test accuracy:", model.score(test))

    # make sure program doesn't end until we close the plot
    plt.show()
def main():
    train, test, word2idx = get_ptb_data()

    train = train[:100]
    test = test[:100]

    V = len(word2idx)
    D = 80
    K = 5

    model = TNN(V, D, K, tf.nn.relu)
    model.fit(train)
    print "train accuracy:", model.score(None)
    print "test accuracy:", model.score(test)
Exemplo n.º 11
0
def main():
    train, test, word2idx = get_ptb_data()

    train = train[:100]
    test = test[:100]

    V = len(word2idx)
    D = 80
    K = 5

    model = TNN(V, D, K, tf.nn.relu)
    model.fit(train)
    print "train accuracy:", model.score(None)
    print "test accuracy:", model.score(test)
Exemplo n.º 12
0
def main():
    train, test, word2idx = get_ptb_data()

    train = train[:5]
    # print (word2idx)
    exit()
    test = test[:5]

    V = len(word2idx)
    D = 20
    K = 5

    model = TNN(V, D, K, tf.nn.relu)
    model.fit(train)
Exemplo n.º 13
0
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    # sanity check
    # check that last node has no parent
    # for t in train:
    #     assert(t[1][-1] == -1 and t[2][-1] == -1)

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    # train = train[:2000]
    n_pos = sum(t[3][-1] for t in train)
    # print("num pos train:", n_pos)
    # idx2word = {v:k for k, v in word2idx.items()}
    # for i in range(4):
    #     words, _, _, labels = train[i]
    #     print_sentence(words, idx2word)
    #     print("label:", labels[-1])
    test = shuffle(test)
    test = test[:1000]

    V = len(word2idx)
    print("vocab size:", V)
    D = 10
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train, learning_rate=1e-2, reg=1e-2, mu=0, epochs=20, activation=T.tanh, train_inner_nodes=False)
    print("train accuracy:", model.score(train))
    print("test accuracy:", model.score(test))

    # make sure program doesn't end until we close the plot
    plt.show()
def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    # sanity check
    # check that last node has no parent
    # for t in train:
    #     assert(t[1][-1] == -1 and t[2][-1] == -1)

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
Exemplo n.º 16
0
            wordSequence.append(reverseLookUp[element])
        else:
            wordSequence.append('.')
    return wordSequence


datasetPath = "trees/treeSentences.txt"
lookUp, reverseLookUp = getLookUps(datasetPath)

sentences = getSentences(datasetPath, lookUp)
#print(lookUp)
#print(reverseLookUp)
#print(sentences)
print("hello")
vocabsize = len(lookUp)
train, test, word2idx = get_ptb_data()
# vocabsize = len(word2idx)
ans = []

print(word_yield(train[0]))
exit()
#print("vocab size: "+str(len(lookUp))+" number of sentences: "+str(len(sentences)))

# word vector dimensions
dime = 200

# word embedding

# all variables will get changed during back propogation

embeddings = tf.Variable(tf.random_uniform([vocabsize, dime], -1.0, 1.0),