Exemplo n.º 1
0
from treeBank import TreeBank

tb_train = TreeBank.from_file("./data/train.trees")
word_freq = tb_train.count_word_freq()
vocab_train = set(
    map(lambda x: x[0], filter(lambda x: x[1] > 1, word_freq.items())))
rare_train = set(
    map(lambda x: x[0], filter(lambda x: x[1] == 1, word_freq.items())))
#vocab_train = tb_train.collect_vocabulary()

tb_dev = TreeBank.from_file("./data/test.trees")
vocab_dev = tb_dev.collect_vocabulary()

diff = vocab_dev.difference(vocab_train)
for w in diff:
    print(w)

print(rare_train.intersection(vocab_dev))
Exemplo n.º 2
0
def train_on(filename):
    tb = TreeBank.from_file(filename)
    tb.normalize()
    rule_set = tb.collect_rule_set(vertical_markov=True,
                                   vertical_markov_factor=0.8)
    return rule_set
Exemplo n.º 3
0
def train_on(filename):
    tb = TreeBank.from_file(filename)
    tb.normalize()
    rule_set = tb.collect_rule_set(vertical_markov=True, vertical_markov_factor=0.8)
    return rule_set
Exemplo n.º 4
0
#!/usr/bin/env python

from treeBank import TreeBank

train_words = TreeBank.from_file('./data/train.trees').collect_vocabulary()

dev_words = TreeBank.from_file('./data/dev.trees').collect_vocabulary()

diff = dev_words.difference(train_words)
print(diff)
print(len(diff))
Exemplo n.º 5
0
from treeBank import TreeBank

tb_train = TreeBank.from_file("./data/train.trees")
word_freq = tb_train.count_word_freq()
vocab_train = set(map(lambda x: x[0], filter(lambda x: x[1] > 1, word_freq.items())))
rare_train = set(map(lambda x: x[0], filter(lambda x: x[1] == 1, word_freq.items())))
#vocab_train = tb_train.collect_vocabulary()


tb_dev = TreeBank.from_file("./data/test.trees")
vocab_dev = tb_dev.collect_vocabulary()

diff = vocab_dev.difference(vocab_train)
for w in diff:
    print(w)

print(rare_train.intersection(vocab_dev))
Exemplo n.º 6
0
#!/usr/bin/env python

from treeBank import TreeBank

ans = max([
    len(node.children) for t in TreeBank.from_file('./data/train.trees').trees
    for node in t.bottom_up()
])

print(ans)
Exemplo n.º 7
0
#!/usr/bin/env python

from treeBank import TreeBank

ans = max([len(node.children)
           for t in TreeBank.from_file('./data/train.trees').trees
           for node in t.bottom_up()])

print(ans)
Exemplo n.º 8
0
#!/usr/bin/env python

from treeBank import TreeBank

tb = TreeBank.from_file("./data/train.trees")
tb.normalize()

print(tb.collect_rule_set().size)