#!/usr/bin/env python # -*- coding: utf-8 -*- """converts a directory (ARG1) of Gate lexicon to LT-TTT format in another directory (ARG2) ARG2 must already exist """ from Lookup import Gazetteer import sys indir=sys.argv[1] outdir=sys.argv[2] lexicon=Gazetteer(indir) lexicon.export(directory=outdir,target="LT-TTT")
#!/usr/bin/env python # -*- coding: utf-8 -*- """converts a directory (ARG1) of Gate lexicon to LT-TTT format in another directory (ARG2) ARG2 must already exist """ from Lookup import Gazetteer import sys indir = sys.argv[1] outdir = sys.argv[2] lexicon = Gazetteer(indir) lexicon.export(directory=outdir, target="LT-TTT")
if __name__=="__main__": try: doc=annodisAnnot(sys.argv[1]) except: print "ERROR reading file:", sys.argv[1] sys.exit(0) try: prep=Preprocess(sys.argv[1].split(".xml")[0]+".txt.prep.xml") doc.add_preprocess(prep) except: print "ERROR reading prepocessed file for", sys.argv[1] sys.exit(0) lexicon=Gazetteer(sys.argv[2]) txt=doc.text() lookup=lexicon.tag(txt) for one in doc.edus(): all=[] span=int(one.attrib["start"]),int(one.attrib["end"]) #print >> sys.stderr, "edu", span tokens=doc.get_edu_tokens(one.attrib["id"]) txt = " ".join([x.lemma() for x in tokens]) #print >> sys.stderr, txt verb_class= lexicon.tag(txt).get("verb_classes",[]) if verb_class!=[]: verb_class=set([z.val() for (x,y,z) in verb_class]) one.attrib["verb_class"]="+".join(verb_class)