def test6(ntrees=10, steps=10): import wsj10 tb = wsj10.WSJ10() tb.trees = tb.trees[:ntrees] m = DMV(tb) m.train(steps) return m
def test0(ntrees=10, steps=1, tb=None, clazz=None): if tb is None: import wsj10 tb = wsj10.WSJ10() if clazz is None: clazz = DMVCCM tb.trees = tb.trees[:ntrees] m = clazz(tb) t0 = time.clock() m.train(steps) t = time.clock() - t0 print('Tiempo (seg.):', t) return m
def main1(): import wsj10 tb = wsj10.WSJ10() m = UBound(tb) m.eval() return m
def _get_treebank(self, treebank=None): if treebank is None: treebank = wsj10.WSJ10() return treebank
# print m # m.train(2) # m.train(10) # m.test() # can't do - presumably not tagged #m = ccm.CCM(dependency_treebank) #from nltk.corpus import treebank #t = treebank.parsed_sents('wsj_0001.mrg')[0] tb = wsj10.WSJ10(basedir=CORPUS_DIR) #print tb.get_trees() #print tb.raw() tb.print_stats() m = ccm.CCM(tb) print m m.train(40) m.test() # with 10 iterations # Sentences: 7422 # Micro-averaged measures: # Precision: 60.5 # Recall: 76.8 # Harmonic mean F1: 67.6
def main1(): import wsj10 tb = wsj10.WSJ10() m = LBranch(tb) m.eval()