Exemplo n.º 1
0
def ner(fin):
    AMIRA(fin)
    txt1 = getText(fin+".bw.TOK.NORM.POS.bpcOut")
    print "Preparing data for NER"
    txt2 = tools.get_columns(txt1, '\t', [0,1,10,11])
    fPre = fin+".bw.pre.NER"
    writeText(fPre, txt2)
    fAfter = fin + ".bw.post.NER"
    print "Runnin yamcha for NER"
    yamcha(fPre, fAfter, settings.amira_dir+"/SVMmodel.model")

    txt3 = getText(fAfter)
    print "Producing formatted NER output"
    txt4 = tools.get_columns(txt3, '\t', [0,4])
    writeText(fin+".bw.ner", txt4)
    print ""
Exemplo n.º 2
0
def make_gazet(fin):
    txt1 = getText(fin)
    txt3 = tools.get_columns(txt1,'->', [0])
    txt4 = re.sub('\w+\s*\n','',txt3);
#    txt5 = re.sub('\n+','\n',txt4);  
    writeText(fin+'.dict',txt4)