def find_tree(treebankdir, treebank_re, interesting_tree_p=lambda tree: True): """find_tree displays trees for which interesting_tree_p(tree) is True. treebankdir and treebank_re specify the treebank files to be searched. interesting_tree_p is a function which is called on each tree in turn. If interesting_tree_p does not return False, the tree is displayed using drawtree.""" for filename in lx.findfiles(treebankdir, treebank_re): trees = read_file(filename) treeno = 0 for root in trees: treeno += 1 tree = root[1] if interesting_tree_p(tree): drawtree.tb_tk(tree) inp = raw_input(filename+":"+str(treeno)+" find next tree? [Y/n/p] ").strip() if len(inp) != 0: if inp[0] == "P" or inp[0] == "p": psfilename = raw_input("Saving postscript image of tree; enter filename for image file? ").strip() if psfilename != "": drawtree.tktb_postscript(psfilename) elif inp[0] != "Y" and inp[0] != "y": return
def trees(treebankdir, treebank_re): """trees is a generator that iterates through the trees in a treebank. treebankdir is the name of the directory in which the treebank files are located, and treebank_re is a regular expression that all treebank files must match.""" for filename in lx.findfiles(treebankdir, treebank_re): trees = read_file(filename) for root in trees: tree = root[1] yield tree
""" extracts regular verbs and their inflections from CHILDES files.""" import lx, re, xmlchat childes_dir = "/usr/local/data/CHILDES/data-xml/English-USA/Brent" for xmlfile in lx.findfiles(childes_dir, re.compile(r".*\.xml")): roles, utterances = xmlchat.readfile(xmlfile) for utt in utterances: id = utt['who'] role = roles[id]['role'] if role in ["Mother","Father"]: for morph in utt.get('mor', []): if morph['c'] == 'v': print morph