def calculate_e(node, logic_map,aligned_word,start=True): if type(node.label) != int: words = [i for i in logic_map[str_logical_rule(node.label,node.id)]] node.e = [] node.eorigin = [] for w in words: node.e.append(w) node.eorigin.append(w) aligned_word.add(w) for (i,child) in enumerate(node.childs): node.childs[i] = calculate_e(child, logic_map,aligned_word,False) for word in node.childs[i].e: node.e.append(word) node.e = sorted(node.e) node.eorigin = sorted(node.eorigin) return node
def main(): parser = argparse.ArgumentParser(description="Run Geoparse Alignment Input Generator") parser.add_argument('--input',type=str,required=True,help="Input file of geoparse") parser.add_argument('--osent',type=str,required=True,help="Directory where sentence is outputed") parser.add_argument('--ologic',type=str,required=True,help="Directory where logical-form is outputed") parser.add_argument('--output',type=str,help="Directory where verbosed output is generated") parser.add_argument('--manual',type=str) args = parser.parse_args() if args.manual: with open(args.manual) as fp: for line in fp: a, b = line.strip().split() manual_align[a] = b linecount = 0 inp = open(args.input,"r") out_sent = open(args.osent, "w") out_sent_g = open(args.osent + ".gin", "w") out_log = open(args.ologic,"w") out_log_g = open(args.ologic + ".gin", "w") out_log_p = open(args.ologic + ".parse", "w") out_w = open(args.osent + ".word", "w") out = None if args.output: out = open(args.output, "w") #### For every well formed query in file extract the rule! for line in inp: line = line.strip() (sentence_node, query_node) = extract(line,0,"")[0][0].childs #### Sentence and node sentence = [node.label for node in sentence_node.childs] if sentence[-1] == "'.'" or sentence[-1] == "?": sentence = sentence[:-1] # print_node(sentence_node) # print_node(query_node) for word in sentence: words.add(word) #### logical rule extraction var_map = defaultdict(lambda: len(var_map)+1) query_node = construct_query_node(query_node,[]) query_node = change_var_to_x(query_node,var_map) rules = transform_into_rule([],query_node,start=True) #### Printing out_sent.write(" ".join(sentence) + "\n") out_sent_g.write(" ".join(sentence) + "\n") (logical_rule, logical_rule_giza) = ([str_logical_rule(rule[1],rule[4]) for rule in rules], [str_giza_in_rule(rule) for rule in rules]) if (len(logical_rule) != len(logical_rule_giza)): print >> sys.stderr, "Rule size doesn't match", logical_rule_giza, logical_rule out_log.write(" ".join(logical_rule) + "\n") out_log_g.write(" ".join(logical_rule_giza)+ "\n") out_log_p.write(query_representation(query_node,{value:key for key, value in var_map.items()},input_generator=True) +"\n") if args.output: out.write(" ".join(sentence) + "\n") for rule in rules: out.write(str_logical_rule(rule[1],rule[4]) + " ||| " + str_giza_in_rule(rule)+ "\n") out.write("------------------------------------\n") linecount += 1 inp.close() out_sent.close() out_log.close() if args.output: out.close() #### ADDITIONAL information for alignment #### Every word is aligned to itself for i in range(0,10): for word in sorted(words): out_sent_g.write(word + "\n") out_log_g.write(word + "\n") out_w.write(word +"\n") for word1, word2 in manual_align.items(): out_sent_g.write(word1 + "\n") out_log_g.write(word2 + "\n") out_w.write(word1 + "\n") #### Handle something like 'south dakota' so add alignment south -> south_dakota and dakota -> south_dakota for literals in many_literals: literals = literals.split(' ') for word in literals: out_sent_g.write(word + "\n") out_log_g.write('_'.join(literals) + "\n") out_sent_g.close() out_log_g.close() print >> sys.stderr, "Successfully extracting :", linecount, "pair(s)."