-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_baseline.py
98 lines (71 loc) · 3.24 KB
/
test_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import baseline
import globoquotes
import unittest
class TestBaseline(unittest.TestCase):
_corpus = None
@classmethod
def setUpClass(cls):
cls._corpus = globoquotes.load("GloboQuotes/corpus-globocom-cv.txt")
def setUp(self):
pass
def tearDown(self):
pass
def test_detoken(self):
#sne = [ e[0] for e in self._corpus[0] ]
sne = ["O", "cachorro", "abanou", "o", "rabo", "."]
resp1 = " O cachorro abanou o rabo .\n"
resp2 = [0,0,0,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,4,4,4,4,4,5,5]
text, tr = baseline.detoken(sne)
print(text)
print(tr)
self.assertTrue(text == resp1 and resp2 == tr)
def test_boundedChunk(self):
s = [["\'"], ["Basta"], ["\'"], [","], ["disse"], ["o"], ["guarda"], ["."], ["\""], ["Agora"], [","], ["só"], ["nos"], ["resta"], ["esperar"], ["\""], [","], ["falou"], ["o"], ["sol."], ["-"], ["A"], ["vida"], ["é"], ["-"], ["afirmou"], ["a"], ["presidente"], ["."]]
bc = baseline.boundedChunk(s)
for i in range(len(bc)):
print(s[i][0], bc[i])
self.assertTrue(True)
def test_firstLetterUpperCase(self):
s = [["Guilherme"],["disse"],["a"], ["Maria"], [":"], ["olha"], ["lá"], ["!"]]
resp = [1,0,0,1,0,0,0,0]
uc = baseline.firstLetterUpperCase(s)
self.assertEqual(uc, resp)
def test_verbSpeechNeighb(self):
s = [["disse", "VSAY"], ["o", "XX"], ["juiz", "ABC"], ["de", "PREP"], ["o", "ART"]]
resp = [1,1,1,0,0]
vsn = baseline.verbSpeechNeighb(s)
self.assertEqual(resp, vsn)
def test_quotationStart(self):
# Sentence that fits in the first regular expression rule:
qs = baseline.quotationStart(self._corpus[0])
#for i in range(len(qs)):
# print(self._corpus[0][i][0], "\t", qs[i])
# Sentence that fits in the second regular expression rule:
qs = baseline.quotationStart(self._corpus[231])
#for i in range(len(qs)):
# print(self._corpus[231][i][0], "\t", qs[i])
self.assertTrue(True)
def test_quotationEnd(self):
# Sentence that fits in the first regular expression rule:
qs = baseline.quotationStart(self._corpus[0])
qe = baseline.quotationEnd(self._corpus[0], qs)
# Sentence that fits in the second regular expression rule:
qs = baseline.quotationStart(self._corpus[231])
qe = baseline.quotationEnd(self._corpus[231], qs)
self.assertTrue(True)
def test_quoteBounds(self):
# Sentence that fits in the first regular expression rule:
qs = baseline.quotationStart(self._corpus[0])
qe = baseline.quotationEnd(self._corpus[0], qs)
qb = baseline.quoteBounds(qs, qe)
for i in range(len(qe)):
print(self._corpus[0][i][0], "\t", qs[i], qe[i], qb[i])
# Sentence that fits in the second regular expression rule:
qs = baseline.quotationStart(self._corpus[231])
qe = baseline.quotationEnd(self._corpus[231], qs)
qb = baseline.quoteBounds(qs, qe)
for i in range(len(qe)):
print(self._corpus[231][i][0], "\t", qs[i], qe[i], qb[i])
self.assertTrue(True)
if __name__ == '__main__':
unittest.main()