def parseFile(infile, lemodel, tagmodel, parsemodel, folderpref="mate/parses/"): timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M') if folderpref: prelimfolder = folderpref + "_prelim/" else: prelimfolder = folderpref + timestamp + "_prelim/" parsefile = parsing(infile=infile, lemodel=lemodel, tagmodel=tagmodel, parsemodel=parsemodel, outfolder=prelimfolder, memory=memory) # , depparse=False #parsefile="mate/parses/2016-09-22_01:18/Louise_Liotard_F_85_et_Jeanne_Mallet_F_75_SO-2-one-word-per-line.conll14_parse" print "retokenizing..." newname = retokeniser(parsefile, addtoout="_retok") print "retokenization done" if folderpref: outfolder = folderpref + "/" else: outfolder = folderpref + timestamp + "/" createNonExistingFolders(outfolder) emptyname = makeEmpty(newname, outfolder=outfolder) parsefile = parsing(infile=emptyname, lemodel=modeldir + args.get("lemmodel", None), tagmodel=modeldir + args.get("tagmodel", None), parsemodel=modeldir + args.get("parsemodel", None), outfolder=outfolder, memory="40G")
def parseSentenceFile(sentencefile, modelfolder="mate/platinum.2016-10-20_01:34/models/", lemodel="LemModel", tagmodel="TagModel", parsemodel="ParseModel", outfolder=".", memory="4G", removePunct=False, specialCharWords=[], degrade=False): """ removed parsertype from options! """ print "\n\nparseSentenceFile", sentencefile emptyConll = emptyFromSentence(sentencefile, specialCharWords=specialCharWords, outfolder=outfolder) print "made empty", emptyConll parsedfile = parsing(emptyConll, lemodel=modelfolder + lemodel, tagmodel=modelfolder + tagmodel, parsemodel=modelfolder + parsemodel, outfolder=outfolder, memory=memory) print parsedfile if removePunct: removePuncsFromConllfile(parsedfile) if degrade: degradeConllfile(parsedfile) return parsedfile
def parseSentenceFile(sentencefile, modelfolder="mate/platinum.2016-10-20_01:34/models/", lemodel="LemModel", tagmodel="TagModel", parsemodel="ParseModel", outfolder=".", memory="40G", removePunct=False, useTokDic=False, degrade=False): """ removed parsertype from options! """ emptyConll = emptyFromSentence(sentencefile, useTokDic=useTokDic) parsedfile = parsing(emptyConll, lemodel=modelfolder + lemodel, tagmodel=modelfolder + tagmodel, parsemodel=modelfolder + parsemodel, outfolder=outfolder, memory=memory) print parsedfile if removePunct: removePuncsFromConllfile(parsedfile) if degrade: degradeConllfile(parsedfile)