示例#1
0
文件: sample.py 项目: Camille31/Swip
def length_vector_ftb(input_file):
    reader = XmlReader()
    treebank =  reader.read_dir_xml(input_file)
    res = []
    for tree in treebank:
        tree.merge_num()
        tree.merge_cpds()
        res.append(len(tree.tree_yield()))
    return res
示例#2
0
文件: twc.py 项目: Camille31/Swip
all = bool(opts.all)
raw = bool(opts.raw)
pos = bool(opts.pos)
sym = bool(opts.sym)
subcat = bool(opts.subcat)
fun = bool(opts.fun)
pcfg = bool(opts.pcfg)
top = int(opts.top)
traces = bool(opts.traces)
lncky = bool(opts.lncky)

if source == 'xml':
     reader = XmlReader()
     if input_file <> None :
          if os.path.isdir(input_file):
               treebank =  reader.read_dir_xml(input_file)
          else:
               instream = open(input_file)
               treebank = reader.read_xml(instream)
     else:
          treebank = reader.read_xml(sys.stdin)
elif source=='penn':
     stream  = sys.stdin.read()
     reader = PtbReader(drparser=True)
     treebank = reader.parse_treebank(stream)
else :
     print "invalid source format\n"
     sys.exit(1)


freq = FrequencyTable(subcat,not traces)