-
Notifications
You must be signed in to change notification settings - Fork 0
/
dd_two_taggers.py
70 lines (53 loc) · 1.72 KB
/
dd_two_taggers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# ! /usr/bin/python
'''
Dual decomposition for two tagger models.
Created on Sep 21, 2013
@author: swabha
'''
from collections import defaultdict
import utils, cky, viterbi
'''
Executes the dual decomposition algorithm
Note here that the nonterminals are more in number than the tags
'''
def run(sentence, tagset, hmm_prob):
max_iterations = 200
step_size = 15
n = len(sentence)
u1 = defaultdict() # dual decomposition parameter
u2 = defaultdict()
for i in xrange(0, n):
u1[i] = defaultdict()
u2[i] = defaultdict()
for t in tagset:
u1[i][t] = 0
u2[i][t] = 0
k = 0 # number of iterations
while k < max_iterations:
tags1 = viterbi.run(sentence, tagset, hmm_prob, u1)
tags2 = viterbi.run(sentence, tagset, hmm_prob, u2)
if k == 0:
print "initial tags:"
print tags1, ":tagger1"
print tags2, ":tagger2"
if disagree(tags1, tags2):
return k, tags1, tags2 # converges in the kth iteration
update(tags1, tags2, u1, u2, step_size)
k += 1
return -1, tags1, tags2 # does not converge
def update(tags1, tags2, u1, u2, step_size):
for i in xrange(0, len(tags1)):
if tags1[i] == tags2[i]:
u1[i][tags1[i]] += step_size
u2[i][tags2[i]] -= step_size
def disagree(tags1, tags2):
for i in xrange(0, len(tags1)):
if tags1[i] != tags2[i]:
return True
else:
continue
return False
if __name__ == "__main__":
parse = "(S (NP (NNP Ms.) (NNP Haag)) (VP (VBZ plays) (NP (NNP Elianti))) (. .))"
tags = ["NNP", "NNP", "VBZ", "NNP", "."]
print agree(parse, tags)