-
Notifications
You must be signed in to change notification settings - Fork 0
/
senseval.py
executable file
·41 lines (37 loc) · 1.19 KB
/
senseval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import re, sys, os
from nltk.corpus import wordnet as wn
def readAnswers(ifile="corpora/answers+misc/tasks/english-all-words/key"):
answers = {}
pattern = re.compile("(?P<id>d\S*)\s+(?P<sense>\S+:\S+)")
for i in pattern.finditer(open(ifile).read()):
try:
answers[i.group("id")] = wn.lemma_from_key(i.group("sense"))
except:
pass
return answers
def readTests(ifile="corpora/english-all-words/test/eng-all-words.test.xml"):
pattern = re.compile("""(<head id="(?P<id>\S*)">(?P<form>\S*)</head>)|(?P<simpleword>\w+)""")
n = 0
idtable = {}
text = []
for line in open(ifile):
m = pattern.match(line.strip())
if m:
id = m.group("id")
if id:
idtable[n] = id
text.append(m.group("form"))
else:
text.append(m.group("simpleword"))
n += 1
return text, idtable
def canonicalAnswers(text, answers, idtable):
markedup = []
for i in range(len(text)):
w = text[i]
try:
answer = answers[idtable[i]]
except:
answer = "ignore"
markedup.append((w, answer))
return markedup