def test(): #syntactic_tree1 = SyntacticTree.read_tree("VP (VBZ play-v) (NP (N guitar-n))") #syntactic_tree2 = SyntacticTree.read_tree("VP (VBZ play-v) (NP (N instrument-n))") xml_string1 = ''' <ccg> <rule type="fa" cat="S[dcl]\NP"> <lf start="1" span="1" word="play-v" lemma="play" pos="VBZ" chunk="I-VP" entity="O" cat="(S[dcl]\NP)/NP" /> <rule type="lex" cat="NP"> <lf start="2" span="1" word="guitar-n" lemma="guitar" pos="NN" chunk="I-NP" entity="O" cat="N" /> </rule> </rule> </ccg>''' xml_string2 = ''' <ccg> <rule type="fa" cat="S[dcl]\NP"> <lf start="1" span="1" word="play-v" lemma="play" pos="VBZ" chunk="I-VP" entity="O" cat="(S[dcl]\NP)/NP" /> <rule type="lex" cat="NP"> <lf start="2" span="1" word="instrument-n" lemma="instrument" pos="NN" chunk="I-NP" entity="O" cat="N" /> </rule> </rule> </ccg>''' syntactic_tree1 = SyntacticTree.parse_tree_from_xml_string(xml_string1) syntactic_tree2 = SyntacticTree.parse_tree_from_xml_string(xml_string2) lexical_space = io_utils.load("/home/thenghiapham/work/project/tree_kernel/spaces/lexical_ppmi_svd300.pkl") kernel = SemanticTreeKernel(1.0) composition_model = WeightedAdditive() semantic_tree1 = syntactic_tree_2_semantic_tree(syntactic_tree1, lexical_space, composition_model) semantic_tree2 = syntactic_tree_2_semantic_tree(syntactic_tree2, lexical_space, composition_model) print semantic_tree1 print semantic_tree2 print [node._label for node in semantic_tree1.get_nodes()]
def test(): print "hello" syntactic_tree1 = SyntacticTree.read_tree("(VP (VBZ kill) (NP (N man)))") syntactic_tree2 = SyntacticTree.read_tree("(VP (VBZ murder) (NP (N man)))") kernel = SyntacticTreeKernel(0.4) print syntactic_tree1 print syntactic_tree2 print [node._label for node in syntactic_tree1.get_nodes()]
def test(): print "hello" syntactic_tree1 = SyntacticTree.read_tree("VP (VBZ play-v) (NP (N guitar-n))") syntactic_tree2 = SyntacticTree.read_tree("VP (VBZ play-v) (NP (N instrument-n))") lexical_space = io_utils.load("/home/thenghiapham/work/project/tree_kernel/spaces/lexical_ppmi_svd300.pkl") kernel = SemanticSyntacticTreeKernel(1.0, lexical_space) print syntactic_tree1 print syntactic_tree2 print [node._label for node in syntactic_tree1.get_nodes()]
def test_read_tree(self): test_cases = [self.tree_string1, self.tree_string2] for tree_string in test_cases: syntactic_tree = SyntacticTree.read_tree(tree_string) output_tree_string = str(syntactic_tree) self.assertEqual(tree_string,output_tree_string, "tree strings must be the same")
def test_parse_tree(self): test_cases = [(self.xml_string1,self.tree_string1), (self.xml_string2,self.tree_string2)] for xml_string, tree_string in test_cases: syntactic_tree = SyntacticTree.parse_tree_from_xml_string(xml_string) output_tree_string = str(syntactic_tree) self.assertEqual(tree_string,output_tree_string, "tree strings must be the same")
from tree.papfunc import Papfunc_SemanticNode from tree.semantic_node import SemanticNode from tree.syntactic_tree import SyntacticTree from composes.semantic_space.space import Space from examples import test_vector_file_prefix, test_matrix_file_prefix # FIRST TEST xml_string = ''' <ccg> <lf start="0" span="1" word="dog" lemma="dog" pos="NN" chunk="I-NP" entity="O" cat="N" /> </ccg> ''' syntactic_tree = SyntacticTree.parse_tree_from_xml_string(xml_string) vecfilepref = test_vector_file_prefix matfilepref = test_matrix_file_prefix vecspace = Space.build(data = vecfilepref + ".dm", rows = vecfilepref + ".rows", format = "dm") matspace = Space.build(data = matfilepref + ".dm", rows = matfilepref + ".rows", format = "dm") semnode = SemanticNode.create_semantic_node(syntactic_tree.root,None) papnode = Papfunc_SemanticNode.create_papfunc_node(semnode,vecspace,matspace) print "*****" print "Syntactic tree:", semnode print "Symbolic representation:", papnode._matrep print "Numeric representation:" for x in papnode._numrep: print x
currtree="" intree=0 #process each xml file in the input directory with open(sys.argv[1]+"/" + infile) as data: for line in data: if re.match("^<ccg>",line): currtree = currtree + line intree=1 sent +=1 elif re.match("^</ccg>",line): currtree = currtree + line intree=0 try: syntactic_tree = SyntacticTree.parse_tree_from_xml_string(currtree) succ +=1 semnode = SemanticNode.create_semantic_node(syntactic_tree.root,None) papnode = Papfunc_SemanticNode.create_papfunc_node(semnode,vecspace,matspace) try: for x in range(len(papnode.get_vector().mat.A[0].tolist())): dim=str(papnode.get_vector().mat.A[0].tolist()[x]) if x>0: print('\t',end='',file=outfile) print(dim,end='',file=outfile) print(papnode.get_matrep(),file=matfile) print("",file=outfile) vecs +=1 except AttributeError: print ("Papnode %s doesn't have a vector representation" %papnode) except ParseError: "THIS TREE WAS NOT XML-PARSABLE: %s" %currtree