at=AlignmentTemplate()
         at.parse(line)
         ruleList.add(at)
     gfile.close()
 
 #load sentences
 sentences=list()
 if args.sentences:
     if args.sentences.lower().endswith('.gz'):
         gfile=gzip.open(args.sentences)
     else:
         gfile=open(args.sentences)
     for line in gfile:
         line=line.strip().decode('utf-8')
         parallelSentence=ParallelSentence()
         parallelSentence.parse(line, parseTlLemmasFromDic=True,forRBPE=args.rbpe)
         if not args.tt1_beam:
             parallelSentence.add_explicit_empty_tags()
         sentences.append(parallelSentence)
     gfile.close()
 
 boxesDic=dict()
 if args.final_boxes_index:
     for line in open(args.final_boxes_index):
         parts=line.split("\t")
         boxesDic[parts[1].strip()]=int(parts[0])
 
 #read best rule application for each sentence
 bestHypothesisForEachSentence=list()
 emptyIndexes=set()
 numLine=0
示例#2
0
'''
Created on 12/02/2014

@author: vitaka
'''
#stdinput: sentences to be translated
from beamSearchLib import RuleList, ParallelSentence, RuleApplicationHypothesis
from ruleLearningLib import AlignmentTemplate
import ruleLearningLib
import argparse
import gzip
import sys

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--tag_groups_file_name', required=True)
    parser.add_argument('--tag_sequences_file_name', required=True)
    args = parser.parse_args(sys.argv[1:])

    ruleLearningLib.AT_LexicalTagsProcessor.initialize(
        args.tag_groups_file_name, args.tag_sequences_file_name)
    for line in sys.stdin:
        line = line.rstrip('\n').decode('utf-8')
        print "Parsing ..."
        print line.encode('utf-8')
        parallelSentence = ParallelSentence()
        parallelSentence.parse(line, parseTlLemmasFromDic=True)
        parallelSentence.add_explicit_empty_tags()
    print "Everything OK"
'''
Created on 12/02/2014

@author: vitaka
'''
#stdinput: sentences to be translated
from beamSearchLib import RuleList, ParallelSentence, RuleApplicationHypothesis
from ruleLearningLib import AlignmentTemplate
import ruleLearningLib
import argparse
import gzip
import sys

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--tag_groups_file_name',required=True)
    parser.add_argument('--tag_sequences_file_name',required=True)
    args = parser.parse_args(sys.argv[1:])
    
    ruleLearningLib.AT_LexicalTagsProcessor.initialize(args.tag_groups_file_name,args.tag_sequences_file_name)
    for line in sys.stdin:
        line=line.rstrip('\n').decode('utf-8')
        print "Parsing ..."
        print line.encode('utf-8')
        parallelSentence=ParallelSentence()
        parallelSentence.parse(line, parseTlLemmasFromDic=True)
        parallelSentence.add_explicit_empty_tags()
    print "Everything OK"
示例#4
0
        gfile.close()
    print >> sys.stderr, "... done"

    print >> sys.stderr, "Loading sentences ..."
    #load sentences
    sentences = list()
    if args.sentences:
        if args.sentences.lower().endswith(".gz"):
            gfile = gzip.open(args.sentences)
        else:
            gfile = open(args.sentences)
        for line in gfile:
            line = line.strip().decode('utf-8')
            parallelSentence = ParallelSentence()
            parallelSentence.parse(line,
                                   parseTlLemmasFromDic=True,
                                   forRBPE=args.rbpe)
            parallelSentence.add_explicit_empty_tags()
            sentences.append(parallelSentence)
        gfile.close()
    print >> sys.stderr, "... done"

    boxesInvDic = dict()
    boxesDic = dict()
    if args.final_boxes_index:
        for line in open(args.final_boxes_index):
            parts = line.split("\t")
            boxesInvDic[int(parts[0])] = parts[1].strip()
            boxesDic[parts[1].strip()] = int(parts[0])

    nfirst = None