#this is useless, I think
 RuleApplicationHypothesis.set_minimum_covered_words(True)
 
 ruleList=RuleList()
 #load alignment templates
 if args.alignment_templates:
     if args.alignment_templates.endswith(".gz"):
         gfile=gzip.open(args.alignment_templates)
     else:
         gfile=open(args.alignment_templates)
     for line in gfile:
         line=line.strip().decode('utf-8')
         at=AlignmentTemplate()
         at.parse(line)
         ruleList.add(at)
     gfile.close()
 
 #load sentences
 sentences=list()
 if args.sentences:
     if args.sentences.lower().endswith('.gz'):
         gfile=gzip.open(args.sentences)
     else:
         gfile=open(args.sentences)
     for line in gfile:
         line=line.strip().decode('utf-8')
         parallelSentence=ParallelSentence()
         parallelSentence.parse(line, parseTlLemmasFromDic=True,forRBPE=args.rbpe)
         if not args.tt1_beam:
             parallelSentence.add_explicit_empty_tags()
Exemplo n.º 2
0
    RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
    RuleApplicationHypothesis.set_target_language(args.target_language)

    print >> sys.stderr, "Loading ATs ..."
    ruleList = RuleList()
    #load alignment templates
    if args.alignment_templates:
        if args.alignment_templates.lower().endswith(".gz"):
            gfile = gzip.open(args.alignment_templates)
        else:
            gfile = open(args.alignment_templates)
        for line in gfile:
            line = line.strip().decode('utf-8')
            at = AlignmentTemplate()
            at.parse(line)
            ruleList.add(at)
        gfile.close()
    print >> sys.stderr, "... done"

    print >> sys.stderr, "Loading sentences ..."
    #load sentences
    sentences = list()
    if args.sentences:
        if args.sentences.lower().endswith(".gz"):
            gfile = gzip.open(args.sentences)
        else:
            gfile = open(args.sentences)
        for line in gfile:
            line = line.strip().decode('utf-8')
            parallelSentence = ParallelSentence()
            parallelSentence.parse(line,
 ruleList=RuleList()
 
 RuleApplicationHypothesis.set_target_language(args.target_language)
 RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
 RuleApplicationHypothesis.set_minimum_covered_words(args.minimum_covered_words)
 
 #load alignment templates
 if args.alignment_templates.endswith(".gz"):
     gfile=gzip.open(args.alignment_templates)
 else:
     gfile=open(args.alignment_templates)
 for line in gfile:
     line=line.strip().decode('utf-8')
     at=AlignmentTemplate()
     at.parse(line)
     ruleList.add(at)
 gfile.close()
 ruleLists=[ruleList]
 
 if args.alternative_alignment_templates:
     altRuleList=RuleList()
     gfile=gzip.open(args.alternative_alignment_templates)
     for line in gfile:
         line=line.strip().decode('utf-8')
         at=AlignmentTemplate()
         at.parse(line)
         altRuleList.add(at)
     gfile.close()
     ruleLists=[ruleList,altRuleList]
 
 boxesCoverage=False
Exemplo n.º 4
0
    RuleApplicationHypothesis.set_target_language(args.target_language)
    RuleApplicationHypothesis.set_apertium_data_dir(args.apertium_data_dir)
    RuleApplicationHypothesis.set_minimum_covered_words(
        args.minimum_covered_words)

    #load alignment templates
    if args.alignment_templates.endswith(".gz"):
        gfile = gzip.open(args.alignment_templates)
    else:
        gfile = open(args.alignment_templates)
    for line in gfile:
        line = line.strip().decode('utf-8')
        at = AlignmentTemplate()
        at.parse(line)
        ruleList.add(at)
    gfile.close()
    ruleLists = [ruleList]

    if args.alternative_alignment_templates:
        altRuleList = RuleList()
        gfile = gzip.open(args.alternative_alignment_templates)
        for line in gfile:
            line = line.strip().decode('utf-8')
            at = AlignmentTemplate()
            at.parse(line)
            altRuleList.add(at)
        gfile.close()
        ruleLists = [ruleList, altRuleList]

    boxesCoverage = False