Пример #1
0
def parseFile(infile,
              lemodel,
              tagmodel,
              parsemodel,
              folderpref="mate/parses/"):
    timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M')
    if folderpref: prelimfolder = folderpref + "_prelim/"
    else: prelimfolder = folderpref + timestamp + "_prelim/"
    parsefile = parsing(infile=infile,
                        lemodel=lemodel,
                        tagmodel=tagmodel,
                        parsemodel=parsemodel,
                        outfolder=prelimfolder,
                        memory=memory)  # , depparse=False
    #parsefile="mate/parses/2016-09-22_01:18/Louise_Liotard_F_85_et_Jeanne_Mallet_F_75_SO-2-one-word-per-line.conll14_parse"
    print "retokenizing..."
    newname = retokeniser(parsefile, addtoout="_retok")
    print "retokenization done"
    if folderpref: outfolder = folderpref + "/"
    else: outfolder = folderpref + timestamp + "/"
    createNonExistingFolders(outfolder)
    emptyname = makeEmpty(newname, outfolder=outfolder)
    parsefile = parsing(infile=emptyname,
                        lemodel=modeldir + args.get("lemmodel", None),
                        tagmodel=modeldir + args.get("tagmodel", None),
                        parsemodel=modeldir + args.get("parsemodel", None),
                        outfolder=outfolder,
                        memory="40G")
Пример #2
0
def parseSentenceFile(sentencefile,
                      modelfolder="mate/platinum.2016-10-20_01:34/models/",
                      lemodel="LemModel",
                      tagmodel="TagModel",
                      parsemodel="ParseModel",
                      outfolder=".",
                      memory="4G",
                      removePunct=False,
                      specialCharWords=[],
                      degrade=False):
    """
	removed parsertype from options!
	"""
    print "\n\nparseSentenceFile", sentencefile
    emptyConll = emptyFromSentence(sentencefile,
                                   specialCharWords=specialCharWords,
                                   outfolder=outfolder)
    print "made empty", emptyConll
    parsedfile = parsing(emptyConll,
                         lemodel=modelfolder + lemodel,
                         tagmodel=modelfolder + tagmodel,
                         parsemodel=modelfolder + parsemodel,
                         outfolder=outfolder,
                         memory=memory)
    print parsedfile
    if removePunct: removePuncsFromConllfile(parsedfile)
    if degrade: degradeConllfile(parsedfile)
    return parsedfile
Пример #3
0
def parseSentenceFile(sentencefile,
                      modelfolder="mate/platinum.2016-10-20_01:34/models/",
                      lemodel="LemModel",
                      tagmodel="TagModel",
                      parsemodel="ParseModel",
                      outfolder=".",
                      memory="40G",
                      removePunct=False,
                      useTokDic=False,
                      degrade=False):
    """
	removed parsertype from options!
	"""
    emptyConll = emptyFromSentence(sentencefile, useTokDic=useTokDic)
    parsedfile = parsing(emptyConll,
                         lemodel=modelfolder + lemodel,
                         tagmodel=modelfolder + tagmodel,
                         parsemodel=modelfolder + parsemodel,
                         outfolder=outfolder,
                         memory=memory)
    print parsedfile
    if removePunct: removePuncsFromConllfile(parsedfile)
    if degrade: degradeConllfile(parsedfile)