def length_vector_ftb(input_file): reader = XmlReader() treebank = reader.read_dir_xml(input_file) res = [] for tree in treebank: tree.merge_num() tree.merge_cpds() res.append(len(tree.tree_yield())) return res
all = bool(opts.all) raw = bool(opts.raw) pos = bool(opts.pos) sym = bool(opts.sym) subcat = bool(opts.subcat) fun = bool(opts.fun) pcfg = bool(opts.pcfg) top = int(opts.top) traces = bool(opts.traces) lncky = bool(opts.lncky) if source == 'xml': reader = XmlReader() if input_file <> None : if os.path.isdir(input_file): treebank = reader.read_dir_xml(input_file) else: instream = open(input_file) treebank = reader.read_xml(instream) else: treebank = reader.read_xml(sys.stdin) elif source=='penn': stream = sys.stdin.read() reader = PtbReader(drparser=True) treebank = reader.parse_treebank(stream) else : print "invalid source format\n" sys.exit(1) freq = FrequencyTable(subcat,not traces)