/
topic.py
79 lines (67 loc) · 1.47 KB
/
topic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import sys
from ngram import NGram
class TopicDetector:
def __init__(self,text):
self.ng=[]
self.topic=[]
file = open(text,"r")
for linea in file.readlines():
words = linea.split(" ")
self.topic += [words[0]]
nng = NGram(words[2].split(","))
self.ng += [nng]
file.close()
'''
file = open(text,"r")
linea = file.readline()
while linea != '':
words = linea.split(" ")
self.topic += [words[0]]
nng = NGram(words[2].split(","))
self.ng += [nng]
linea = file.readline()
file.close()
'''
def verify(self,text_compare):
results = []
texto = []
'''
file2 = open(text_compare,"r")
for linea2 in file2.readlines():
texto+=linea2.split(" ")
tng=NGram(texto)
file2.close()
'''
file2 = open(text_compare,"r")
linea2 = file2.readline()
while linea2 != '':
texto+=linea2.split(" ")
linea2 = file2.readline()
tng=NGram(texto)
file2.close()
for ngs in self.ng:
count=0
for word in list(ngs):
for porc in tng.search(word):
if porc[1]>0.3:
count+=1
results+=[count]
print list(results)
pos=0
count=0
i=0
for res in results:
if count<res:
count=res
pos=i
i+=1
if results[pos]>2:
print("Tema mas preciso del texto: "+repr(self.topic[pos]))
else:
print("No se ha podido precisar de que trata")
print ""
if __name__ == '__main__':
Detector=TopicDetector("topicos")
Detector.verify("texto1.txt")
Detector.verify("texto2.txt")
Detector.verify("texto3.txt")