def ner(fin): AMIRA(fin) txt1 = getText(fin+".bw.TOK.NORM.POS.bpcOut") print "Preparing data for NER" txt2 = tools.get_columns(txt1, '\t', [0,1,10,11]) fPre = fin+".bw.pre.NER" writeText(fPre, txt2) fAfter = fin + ".bw.post.NER" print "Runnin yamcha for NER" yamcha(fPre, fAfter, settings.amira_dir+"/SVMmodel.model") txt3 = getText(fAfter) print "Producing formatted NER output" txt4 = tools.get_columns(txt3, '\t', [0,4]) writeText(fin+".bw.ner", txt4) print ""
def make_gazet(fin): txt1 = getText(fin) txt3 = tools.get_columns(txt1,'->', [0]) txt4 = re.sub('\w+\s*\n','',txt3); # txt5 = re.sub('\n+','\n',txt4); writeText(fin+'.dict',txt4)