Exemplo n.º 1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
  
"""converts a directory (ARG1) of Gate lexicon to LT-TTT format in 
another directory (ARG2)

ARG2 must already exist
"""

from Lookup import Gazetteer
import sys

indir=sys.argv[1]
outdir=sys.argv[2]
lexicon=Gazetteer(indir)
lexicon.export(directory=outdir,target="LT-TTT")
Exemplo n.º 2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""converts a directory (ARG1) of Gate lexicon to LT-TTT format in 
another directory (ARG2)

ARG2 must already exist
"""

from Lookup import Gazetteer
import sys

indir = sys.argv[1]
outdir = sys.argv[2]
lexicon = Gazetteer(indir)
lexicon.export(directory=outdir, target="LT-TTT")
Exemplo n.º 3
0
if __name__=="__main__":
    try:
        doc=annodisAnnot(sys.argv[1]) 
    except:
        print "ERROR reading file:", sys.argv[1]
        sys.exit(0)
    try:
        prep=Preprocess(sys.argv[1].split(".xml")[0]+".txt.prep.xml")
        doc.add_preprocess(prep)
    except:
        print "ERROR reading prepocessed file for", sys.argv[1]
        sys.exit(0)


    lexicon=Gazetteer(sys.argv[2])

    txt=doc.text()
    lookup=lexicon.tag(txt)

    for one in doc.edus():
        all=[]
        span=int(one.attrib["start"]),int(one.attrib["end"])
        #print >> sys.stderr, "edu", span
        tokens=doc.get_edu_tokens(one.attrib["id"])
        txt = " ".join([x.lemma() for x in tokens])
        #print >> sys.stderr, txt
        verb_class= lexicon.tag(txt).get("verb_classes",[])
        if verb_class!=[]:
            verb_class=set([z.val() for (x,y,z) in verb_class])
            one.attrib["verb_class"]="+".join(verb_class)