-
Notifications
You must be signed in to change notification settings - Fork 0
/
POS_tagger.py
91 lines (74 loc) · 2.06 KB
/
POS_tagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#cfg = Context-free grammar
#https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
#This should be the guide to the tagset.
import nltk
import en
def rearrange(sent):
for w in sent:
#This should check for all punctuation, and make it simpler.
if w[1] in "$'(),--.:SYM":
w = (w[0],'.')
temp_sentence = []
temp_adjectives = []
for w in sent:
#Adjectives and adverbs
s = str(w)
slist = s.split("'")
typ = slist[3]
#adjectives.
if typ in 'CDJJRJJSRBR':
temp_adjectives.append(w)
elif typ in 'CC.':
if temp_adjectives:
temp_adjectives.append(w)
else:
temp_sentence.append(w)
else:
temp_sentence.append(w)
for i in temp_adjectives:
temp_sentence.append(i)
temp_adjectives = []
sent = temp_sentence
return sent
def convert(text1):
sentence_1 = []
sentence = nltk.pos_tag(nltk.word_tokenize(text1))
for i in sentence:
sentence_1.append(( str(i[0]), str(i[1]) ))
sentence_1 = rearrange(sentence_1)
string = ''
for w in sentence_1:
if string and w[1] != '.':
string += ' '
string += w[0]
print string
started = 0
text = raw_input('')
while (text != ' '):
if started:
if text[0] != '_':
temp = ''
temp_list = []
temp_list = text.split(' ')
lazy = 0
for x in temp_list:
try:
x = int(x)
except:
pass
if lazy == 0:
temp += en.number.spoken(x)
lazy = 1
else:
temp += ' ' + en.number.spoken(x)
text = temp
convert(text)
else:
print text
else:
print text
if text == '***':
started = 1
text = raw_input()
print en.is_number("twelve")
print en.is_basic_emotion("cheerful")