-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
119 lines (94 loc) · 2.76 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from Tree import Node
import utils
from Train_LBFGS import Train_LBFGS
def stringToTree(s, loc):
#print s[loc[0]:]
#print loc[0]
if loc[0]>= len(s):
return None
ind = 0
while loc[0]+ind < len(s) and (s[loc[0]+ind]!='(' and s[loc[0]+ind]!=')') :
ind+=1
#print ind
#print s[loc[0]:(loc[0]+ind)]
res = Node(s[loc[0]:(loc[0]+ind)])
loc[0]=loc[0]+ind
if s[loc[0]]=='(':
loc[0]=loc[0]+1
res.left = stringToTree(s,loc)
loc[0]=loc[0]+1
if s[loc[0]]=='(':
loc[0]=loc[0]+1
res.right = stringToTree(s,loc)
loc[0]=loc[0]+1
return res
def stringToTreeWrapper(s):
loc = [0]
return stringToTree(s,loc)
def printInorder(root):
if root is None:
return []
if root.left is None and root.right is None:
#print root.data
return [root.data]
label=printInorder(root.left)+printInorder(root.right)
print label
return label
#my_string = "(ROOT (S (NP (PRP I)) (@S (VP (VBD shot) (NP (@NP (@NP (@NP (NP (DT an) (NN elephant)) (, ,)) (NP (NN rabbit))) (CC and)) (NP (NP (NN goat)) (PP (IN in) (NP (PRP$ my) (NNS pajamas)))))) (. .))))"
#my_string=my_string.replace("(","").replace(")","").replace("ROOT","")
#my_string="(ROOT (S (NP (PRP He)) (VP (VBD jumped) (PP (IN over) (NP (DT the) (NN wall))))))"
#s=my_string.split()
#content=''
f = open('out.txt', "r")
lines = list(f)
f.close()
#with open("out.txt") as f:
# content=f.readlines
trees_train=[]
for my_string in lines:
s=my_string.split()
for i in range(len(s)):
if s[i].endswith(')'):
s[i]="("+s[i]+")"
pre="".join(s)
pre=pre[6:(len(pre)-1)]
print pre
root=stringToTreeWrapper(pre)
trees_train.append(root)
def getMeWord(root):
if root.left is None:
return root.word
return getMeWord(root.left)
def preOrder(root):
if root.left is not None and root.right is None:
root.word=getMeWord(root.left)
root.left=None
return
preOrder(root.left)
preOrder(root.right)
for tree in trees_train:
preOrder(tree)
print tree.word_yield()
def check(root):
if root.left is None and root.right is None:
return
if root.left is not None and root.right is None:
print "WRONG!!!!!!!!!!!"
for tree in trees_train:
check(tree)
dictionary = utils.constructCompactDictionary(trees_train)
trainObj = Train_LBFGS(dictionary, trees_train)
optResult = trainObj.train()
#for i in range(len(s)):
# if s[i].endswith(')'):
# s[i]="("+s[i]+")"
#pre="".join(s)
#pre=pre[6:(len(pre)-1)]
#print pre
#root=stringToTreeWrapper(pre)
#print root
#print root.word_yield()
#print "Preorder traversal of the constructed tree:"
#printInorder(root)
#print "######Copied#####"
#printInorder(root.clone())