def genGHKMfiles(self,args,derivations): parse_file = open(args.parse_path,'w') align_file = open(args.align_path,'w') text_file = open(args.text_path,'w') for s,d in derivations: x = DerivationTree.fromDerivation(d) parse,align = x.getGHKMtriple_Java() text = s["nl"].strip(' \t\n\r') parse_file.write("%s\n"%parse) align_file.write("%s\n"%align) text_file.write("%s\n"%text) parse_file.close() align_file.close() text_file.close() print "Running GHKM Java rule extraction" mem = "2g" ghkm_opts = "-fCorpus %s -eParsedCorpus %s -align %s -joshuaFormat false -maxLHS 200 -maxRHS 15 -MaxUnalignedRHS 15" % (args.text_path,args.parse_path,args.align_path) java_opts="-Xmx%s -Xms%s -cp %s/ghkm.jar:%s/lib/fastutil.jar -XX:+UseCompressedOops"%(mem,mem,args.ghkmDir,args.ghkmDir) os.system("java %s edu.stanford.nlp.mt.syntax.ghkm.RuleExtractor %s > %s" % (java_opts,ghkm_opts,args.ghkm_path)) print "Converting GHKM rules to Tiburon format" ghkm2tib(args.ghkm_path,args.tib_path)
def genGHKMfiles(self, args, derivations): parse_file = open(args.parse_path, 'w') align_file = open(args.align_path, 'w') text_file = open(args.text_path, 'w') for s, d in derivations: x = DerivationTree.fromDerivation(d) parse, align = x.getGHKMtriple_Java() text = s["nl"].strip(' \t\n\r') parse_file.write("%s\n" % parse) align_file.write("%s\n" % align) text_file.write("%s\n" % text) parse_file.close() align_file.close() text_file.close() print "Running GHKM Java rule extraction" mem = "2g" ghkm_opts = "-fCorpus %s -eParsedCorpus %s -align %s -joshuaFormat false -maxLHS 200 -maxRHS 15 -MaxUnalignedRHS 15" % ( args.text_path, args.parse_path, args.align_path) java_opts = "-Xmx%s -Xms%s -cp %s/ghkm.jar:%s/lib/fastutil.jar -XX:+UseCompressedOops" % ( mem, mem, args.ghkmDir, args.ghkmDir) os.system( "java %s edu.stanford.nlp.mt.syntax.ghkm.RuleExtractor %s > %s" % (java_opts, ghkm_opts, args.ghkm_path)) print "Converting GHKM rules to Tiburon format" ghkm2tib(args.ghkm_path, args.tib_path)
def parseMRfiles(self,args,derivations): output_file = open(args.output_path,'w') for s,d in derivations: x = DerivationTree.fromDerivation(d) tibTree = x.getTiburonTree() genString = tibTree2String(args,tibTree) if genString: output_file.write("%s\n"%genString) print genString output_file.close()