-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
68 lines (59 loc) · 1.75 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import common as c
from common import Logger as Log
import eisner
def parse(sentences, weights):
results = []
edge_feats_list, featdict = c.extract(sentences)
size = len(sentences)
count = 0
correct = 0
total = 0
for sentence, edge_feats in zip(sentences, edge_feats_list):
count = count + 1
Log.i("Parsing sentence: %d of %d" % (count, size))
result = []
heads = eisner.parse(sentence, weights, edge_feats)
for index, word in enumerate(sentence):
if index == 0:
continue
token = [
str(word.id),
word.form,
word.lemma,
word.cpostag,
word.postag,
word.feats,
str(heads[index]),
word.deprel
]
line = "\t".join(token)
if heads[index] == word.head:
correct = correct + 1
total = total + 1
result.append(line)
results.append(result)
return results, float(correct / total)
def main(args):
if len(args) < 2:
Log.w("Arguments Error")
return
path = args[0]
mfile = args[1]
Log.i("Training file: %s" % path)
Log.i("----------------")
sentences = c.readconllfile(path)
Log.i("Model file: %s" % mfile)
Log.i("----------------")
featdict, weights = c.load(mfile)
output, score = parse(sentences, weights)
for each in output:
for token in each:
print(token)
print()
Log.i("[DONE] accuracy: {:.2%}".format(score))
if __name__ == "__main__":
Log.setConfig(loglevel=Log.DEBUG, verbose=True)
main(c.readargs())
Log.finalize()