Пример #1
0
def q2_5():
    gold_words = load_gold('test')
    test_size = len(gold_words)
    test_chars_size = 0
    word_acc = 0.0
    char_acc = 0.0
    for i in range(1, test_size + 1):
        data_fname = "../data/test_img%d.txt" % (i)
        data = load_data(data_fname)
        f_weights = load_feature_weights()
        t_weights = load_transition_weights()
        node_potentials = crf.compute_node_potentials(data, f_weights)
        clique_potentials = crf.compute_clique_potentials(
            node_potentials, t_weights)
        forward, backward = crf.compute_messages(clique_potentials)
        beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
        pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs)
        word = crf.classify(pos_probs)
        gold = ''.join(map(lambda c: char_map[c], gold_words[i - 1]))
        word_acc += 1.0 if word == gold else 0.0
        chars_correct = sum(
            [1 if word[j] == gold[j] else 0 for j in range(len(word))])
        char_acc += chars_correct
        test_chars_size += len(word)
        if i < 6:  # only print first 5 test words
            print word, gold
    word_acc /= test_size
    char_acc /= test_chars_size
    print "Word accuracy:", word_acc
    print "Character accuracy:", char_acc
Пример #2
0
def q2_5(): 
    gold_words = load_gold('test')
    test_size = len(gold_words)
    test_chars_size = 0
    word_acc = 0.0
    char_acc = 0.0
    for i in range(1,test_size+1):
        data_fname = "../data/test_img%d.txt" % (i)
        data = load_data(data_fname)
        f_weights = load_feature_weights()
        t_weights = load_transition_weights()
        node_potentials = crf.compute_node_potentials(data, f_weights)
        clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights)
        forward, backward = crf.compute_messages(clique_potentials)
        beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
        pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs)
        word = crf.classify(pos_probs)
        gold = ''.join(map(lambda c: char_map[c], gold_words[i-1]))
        word_acc += 1.0 if word==gold else 0.0
        chars_correct = sum([1 if word[j]==gold[j] else 0 for j in range(len(word))])
        char_acc += chars_correct
        test_chars_size += len(word)
        if i < 6 : # only print first 5 test words
            print word, gold
    word_acc /= test_size
    char_acc /= test_chars_size
    print "Word accuracy:", word_acc
    print "Character accuracy:", char_acc
Пример #3
0
def q2_3(): 
    data_fname = "../data/test_img1.txt"
    data = load_data(data_fname)
    f_weights = load_feature_weights()
    t_weights = load_transition_weights()
    node_potentials = crf.compute_node_potentials(data, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
    print "beliefs:\n", beliefs[:,:2,:2] # only want e, t
Пример #4
0
def q2_2(): 
    data_fname = "../data/test_img1.txt"
    data = load_data(data_fname)
    f_weights = load_feature_weights()
    t_weights = load_transition_weights()
    node_potentials = crf.compute_node_potentials(data, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    print "forward:\n", forward
    print "backward:\n", backward
Пример #5
0
def q2_3():
    data_fname = "../data/test_img1.txt"
    data = load_data(data_fname)
    f_weights = load_feature_weights()
    t_weights = load_transition_weights()
    node_potentials = crf.compute_node_potentials(data, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials,
                                                      t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
    print "beliefs:\n", beliefs[:, :2, :2]  # only want e, t
Пример #6
0
def q2_2():
    data_fname = "../data/test_img1.txt"
    data = load_data(data_fname)
    f_weights = load_feature_weights()
    t_weights = load_transition_weights()
    node_potentials = crf.compute_node_potentials(data, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials,
                                                      t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    print "forward:\n", forward
    print "backward:\n", backward
Пример #7
0
def q2_4(): 
    data_fname = "../data/test_img1.txt"
    data = load_data(data_fname)
    f_weights = load_feature_weights()
    t_weights = load_transition_weights()
    node_potentials = crf.compute_node_potentials(data, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
    pos_probs, trans_probs = crf.compute_marginals(clique_potentials, beliefs)
    print "marginals:\n", pos_probs
    # uncomment to print LaTeX tabular version
    #print_marginals_table(pos_probs)
    indices = (0,1,8) # only want e, t, r
    print "pairwise:\n", trans_probs[...,indices][:,indices]
Пример #8
0
def q2_4():
    data_fname = "../data/test_img1.txt"
    data = load_data(data_fname)
    f_weights = load_feature_weights()
    t_weights = load_transition_weights()
    node_potentials = crf.compute_node_potentials(data, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials,
                                                      t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
    pos_probs, trans_probs = crf.compute_marginals(clique_potentials, beliefs)
    print "marginals:\n", pos_probs
    # uncomment to print LaTeX tabular version
    #print_marginals_table(pos_probs)
    indices = (0, 1, 8)  # only want e, t, r
    print "pairwise:\n", trans_probs[..., indices][:, indices]
Пример #9
0
def test_model(f_weights, t_weights):
    test_chars_size = 0
    word_acc = 0.0
    char_acc = 0.0
    for i in range(test_size):
        node_potentials = crf.compute_node_potentials(test_data[i], f_weights)
        clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights)
        forward, backward = crf.compute_messages(clique_potentials)
        beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
        pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs)
        word = crf.classify(pos_probs)
        gold = ''.join(map(lambda c: char_map[c], gold_test_words[i]))
        word_acc += 1.0 if word == gold else 0.0
        chars_correct = sum([1 if word[j] == gold[j] else 0 for j in range(len(word))])
        char_acc += chars_correct
        test_chars_size += len(word)
    word_acc /= test_size
    char_acc /= test_chars_size
    return 1-char_acc
Пример #10
0
def get_sum_prod_marginals(num_examples, f_weights, t_weights):
    all_node_potentials = [
        crf.compute_node_potentials(example, f_weights)
        for example in training_data[:num_examples]
    ]
    all_clique_potentials = [
        crf.compute_clique_potentials(node_potentials, t_weights)
        for node_potentials in all_node_potentials
    ]
    all_fb = [
        crf.compute_messages(clique_potentials)
        for clique_potentials in all_clique_potentials
    ]
    all_beliefs = [
        crf.compute_beliefs(all_clique_potentials[i], all_fb[i][0],
                            all_fb[i][1]) for i in range(num_examples)
    ]
    return np.array([
        crf.compute_marginals(all_clique_potentials[i], all_beliefs[i])
        for i in range(num_examples)
    ])
Пример #11
0
def test_model(f_weights, t_weights):
    test_chars_size = 0
    word_acc = 0.0
    char_acc = 0.0
    for i in range(test_size):
        node_potentials = crf.compute_node_potentials(test_data[i], f_weights)
        clique_potentials = crf.compute_clique_potentials(
            node_potentials, t_weights)
        forward, backward = crf.compute_messages(clique_potentials)
        beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
        pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs)
        word = crf.classify(pos_probs)
        gold = ''.join(map(lambda c: char_map[c], gold_test_words[i]))
        word_acc += 1.0 if word == gold else 0.0
        chars_correct = sum(
            [1 if word[j] == gold[j] else 0 for j in range(len(word))])
        char_acc += chars_correct
        test_chars_size += len(word)
    word_acc /= test_size
    char_acc /= test_chars_size
    return 1 - char_acc
Пример #12
0
def get_sum_prod_marginals(num_examples, f_weights, t_weights):
    all_node_potentials = [crf.compute_node_potentials(example, f_weights) for example in training_data[:num_examples]]
    all_clique_potentials = [crf.compute_clique_potentials(node_potentials, t_weights) for node_potentials in all_node_potentials]
    all_fb = [crf.compute_messages(clique_potentials) for clique_potentials in all_clique_potentials]
    all_beliefs = [crf.compute_beliefs(all_clique_potentials[i], all_fb[i][0], all_fb[i][1]) for i in range(num_examples)]
    return np.array([crf.compute_marginals(all_clique_potentials[i], all_beliefs[i]) for i in range(num_examples)])
Пример #13
0
'''
CS688 HW02: Chain CRF

Question 3: Maximum Log Likelihood Learning Derivation

@author: Emma Strubell
'''

from data_loader import *
import chain_crf as crf

# compute log likelihood over first 50 training examples
gold_words = load_gold('train')
train_size = 50
f_weights = load_feature_weights()
t_weights = load_transition_weights()
total = 0.0
for i in range(train_size):
    data_fname = "../data/train_img%d.txt" % (i+1)
    example = load_data(data_fname)
    node_potentials = crf.compute_node_potentials(example, f_weights)
    clique_potentials = crf.compute_clique_potentials(node_potentials, t_weights)
    forward, backward = crf.compute_messages(clique_potentials)
    beliefs = crf.compute_beliefs(clique_potentials, forward, backward)
    pos_probs, _ = crf.compute_marginals(clique_potentials, beliefs) 
    total += np.sum(np.log([pos_probs[i,j] for i,j in enumerate(gold_words[i])]))
total /= train_size
print "Log likelihood:", total