def process_binaryrule(parent): List = [] List.append("binary") List.append(parent.attrib["type"] + " " + parent.attrib["description"]) i = 0 while i < len(parent): if parent[i].tag == "cat": List.append(process_cat(parent[i][0])) break i += 1 while i < len(parent): if parent[i].tag == "sem": supertag = DRSnode() supertag.init_from_xml(parent[i][0]) add_pointer(supertag, 1000) List.append(json.dumps(supertag.serialization())) break i += 1 print filename, der print List[0], List[1] print List[2] print List[3].encode("UTF-8") print
def process_drs(parent, out): assert parent.tag == "drs" supertag = DRSnode() supertag.init_from_xml(parent) add_pointer(supertag, 1000) out.write(json.dumps(supertag.serialization()) + "\n")
def process_rule(parent): for child in parent: if child.tag == "binaryrule": process_rule(child) elif child.tag == "unaryrule": process_rule(child) elif child.tag == "lex": d = {} for cc in child: if cc.tag == "token": d["token"] = cc.text if cc.tag == "tag": d[cc.attrib["type"]] = cc.text if cc.tag == "cat": d["cat"] = process_cat(cc[0]) if cc.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(cc[0]) d["sem"] = supertag.serialization() d["sem"] = general(d["lemma"], d["sem"]) print "\t".join([ d["token"].encode("utf-8"), d["lemma"].encode("utf-8"), d["pos"], d["cat"], json.dumps(d["sem"]).encode("utf-8") ])
def process_lex(parent): print filename, der print "lex lex" for child in parent: if child.tag == "cat": print process_cat(child[0]) if child.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(child[0]) print json.dumps(supertag.serialization()) break assert find
def process_unaryrule(parent): List = [] List.append("unary") List.append(parent.attrib["type"] + " " + parent.attrib["description"]) i = 0 while i < len(parent): if parent[i].tag == "cat": List.append(process_cat(parent[i][0])) break i += 1 while i < len(parent): if parent[i].tag == "sem": supertag = DRSnode() supertag.init_from_xml(parent[i][0]) List.append(json.dumps(supertag.serialization())) break i += 1 i = 0 cnt = 0 while i < len(parent): if parent[i].tag in ["binaryrule", "unaryrule", "lex"]: find = False for child in parent[i]: if child.tag == "cat": List.append(process_cat(child[0])) if child.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(child[0]) List.append(json.dumps(supertag.serialization())) break assert find cnt += 1 i += 1 assert cnt == 1 print filename, der print List[0], List[1] print List[2] print List[3].encode("UTF-8") print List[4] print List[5].encode("UTF-8") print for child in parent: if child.tag == "binaryrule": process_binaryrule(child) elif child.tag == "unaryrule": process_unaryrule(child)
def process_lex(parent): d = {} find = False for child in parent: if child.tag == "token": d["token"] = child.text if child.tag == "tag": d[child.attrib["type"]] = child.text if child.tag == "cat": d["cat"] = process_cat(child[0]) if child.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(child[0]) d["sem"] = supertag.serialization() d["sem"] = general(d["lemma"], d["sem"]) print "\t".join([ d["token"].encode("utf-8"), d["lemma"].encode("utf-8"), d["pos"], d["cat"], json.dumps(d["sem"]).encode("utf-8") ]) assert find
if __name__ == "__main__": L = [] eq = 0 total = 0 print "#"," ".join(sys.argv) for line in open(sys.argv[1]): line = line.strip() if line == "": if L[0] == "illegal": L = [] continue total += 1 target = json.loads(L[3], object_hook=ascii_encode_dict) target_DRSnode = DRSnode() target_DRSnode.unserialization(target) simplify_temporal(target_DRSnode) #normal_variables(target_DRSnode) L[3] = json.dumps(target_DRSnode.serialization()) print "\n".join(L) print L = [] else: if line[0] == "#": continue L.append(line)
travel4(node) if __name__ == "__main__": L = [] cnt = 0 total = 0 print "#", " ".join(sys.argv) for line in open(sys.argv[1]): line = line.strip() if line == "": if L[0] == "illegal": L = [] continue total += 1 target = json.loads(L[3], object_hook=ascii_encode_dict) target_DRSnode = DRSnode() target_DRSnode.unserialization(target) #print "\n".join(L) #print dummy_node = DRSnode() dummy_node.expression.append(target_DRSnode) merge(dummy_node) L[3] = json.dumps(dummy_node.expression[0].serialization()) print "\n".join(L) print #print cnt, total L = [] else: if line[0] == "#": continue
def tc2(node, start): #N->NP. alfa def v = start + "0" node_app = DRSnode() node_app.type = "app" node_var1 = DRSnode() node_var1.type = "var" node_var1.text = "v1" node_var2 = DRSnode() node_var2.type = "var" node_var2.text = v node_app.expression.append(node_var1) node_app.expression.append(node_var2) node_merge = DRSnode() node_merge.type = "alfa" node_merge.attrib = {"type": "def"} node_last, idx = modify_attrib(node, "v1", v) node.add_variable(v, True) node_merge.expression.append(node_last.expression[idx]) node_merge.expression.append(node_app) node_last.expression[idx] = node_merge normal_variables(node) return node
node_lam = get_app_lam(node) #vi. @ lam_vj -> lam_vj node_merge = get_merge(node_lam) node_d if __name__ == "__main__": L = [] eq = 0 total = 0 for line in open(sys.argv[1]): line = line.strip() if line == "": #print "\n".join(L) #print total += 1 target = json.loads(L[3], object_hook=ascii_encode_dict) target_DRSnode = DRSnode() target_DRSnode.unserialization(target) normal_variables(target_DRSnode) target = json.dumps(target_DRSnode.serialization()) source = json.loads(L[5], object_hook=ascii_encode_dict) source_DRSnode = DRSnode() source_DRSnode.unserialization(source) source_DRSnode = tc1(source_DRSnode, "x") change = json.dumps(source_DRSnode.serialization()) #l = "" #for i in range(len(target)): # if target[i] == change[i]: # l += target[i] # else:
def ftr(node): node_lam = DRSnode() node_lam.type = "lam" node_var = DRSnode() node_var.type = "var" node_var.text = "v0" node_lam.expression.append(node_var) node_app = DRSnode() node_app.type = "app" node_var = DRSnode() node_var.type = "var" node_var.text = "v0" node_app.expression.append(node_var) node_app.expression.append(node) node_lam.expression.append(node_app) normal_variables(node_lam) return node_lam