Exemplo n.º 1
0
    # basename = args[0]
    # doc      = annodisAnnot(basename+".xml")
    # feats    = FeatureMap(basename+".features")
    # prep     = Preprocess( basename+".txt.prep.xml")

    if options.merge:
        feats = FeatureMap("", empty = True)
        feats.init_from_dir(".", suffix = options.merge)
        basename = "no base file to consider"
    else:
        if True:
            basename = args[0]
            feats = FeatureMap(basename + ".features", weird = options.weird)
            feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2])
            if allfuncs != {}:
                doc = annodisAnnot(basename + ".xml")
                prep = Preprocess(basename + ".txt.prep.xml")
                doc.add_preprocess(prep)
            if options.voisins:
                doc._voisins = get_voisins_dict(table, doc._vocab)
                for entry in doc._voisins:
                    doc._voisins[entry] = dict(doc._voisins[entry])
        else:
            print >> sys.stderr, "Usage: script file-basename ?", args
            sys.exit(0)

    if options.merge:
        feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2, "m#FILE"])
    else:
        feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2])
Exemplo n.º 2
0
    # basename = args[0]
    # doc      = annodisAnnot(basename+".xml")
    # feats    = FeatureMap(basename+".features")
    # prep     = Preprocess( basename+".txt.prep.xml")

    if options.merge:
        feats = FeatureMap("", empty=True)
        feats.init_from_dir(".", suffix=options.merge)
        basename = "no base file to consider"
    else:
        if True:
            basename = args[0]
            feats = FeatureMap(basename + ".features", weird=options.weird)
            feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2])
            if allfuncs != {}:
                doc = annodisAnnot(basename + ".xml")
                prep = Preprocess(basename + ".txt.prep.xml")
                doc.add_preprocess(prep)
            if options.voisins:
                doc._voisins = get_voisins_dict(table, doc._vocab)
                for entry in doc._voisins:
                    doc._voisins[entry] = dict(doc._voisins[entry])
        else:
            print >> sys.stderr, "Usage: script file-basename ?", args
            sys.exit(0)

    if options.merge:
        feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2, "m#FILE"])
    else:
        feats.index(["m#%s" % INDEX1, "m#%s" % INDEX2])
Exemplo n.º 3
0
import codecs
import re
import xml.etree.ElementTree as ET

from AnnodisReader import annodisAnnot, Preprocess
from Lookup import Gazetteer

def includes(pos1,pos2):
    a1,b1=pos1
    a2,b2=pos2
    return a1<=a2 and b2<=b1


if __name__=="__main__":
    try:
        doc=annodisAnnot(sys.argv[1]) 
    except:
        print "ERROR reading file:", sys.argv[1]
        sys.exit(0)
    try:
        prep=Preprocess(sys.argv[1].split(".xml")[0]+".txt.prep.xml")
        doc.add_preprocess(prep)
    except:
        print "ERROR reading prepocessed file for", sys.argv[1]
        sys.exit(0)


    lexicon=Gazetteer(sys.argv[2])

    txt=doc.text()
    lookup=lexicon.tag(txt)