/
reviewTipsExtraction.py
66 lines (50 loc) · 1.86 KB
/
reviewTipsExtraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import sys
import pymongo
import TopicModeler as tm
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
def main():
try:
c = MongoClient('localhost', 27017)
except ConnectionFailure, e:
print "Could not connect to MongoDB: %s" % e
# getting a handle to the database
db = c["projectDB"]
print "Businesses database connected successfully"
reviewsSet = []
tipsSet = []
# find query will give us all the rows
# businesses_results = db.businesses.find()
# businesses_results.count()
# for item in range(1000):
# query = {"business_id" : unicode(businesses_results[item]["business_id"])}
count = 1
query = {"sentiment" : { "$lt" : -0.05 }}
for review in db.reviewsSentiment.find(query):
reviewsSet.append(review["text"])
sys.stdout.write("\r" + "%d reviews processed" % count)
sys.stdout.flush()
count += 1
# if count == 10:
# break
print "\nReviews processing done!"
tm.buildTopicModel(reviewsSet)
# count = 1
# for tip in db.tips.find():
# tipsSet.append(tip["text"])
# sys.stdout.write("\r" + "%d tips processed" % count)
# sys.stdout.flush()
# count += 1
# # if count == 10000:
# # break
# print "\nTips processing done!"
# tm.analyzeSentiment(reviewsSet)
# print "Reviews %d" % db.reviews.find(query).count()
# print "Tips %d" % db.tips.find(query).count()
c.close()
# print "Topic modelling the reviews ... "
# tm.buildTopicModel(reviewsSet)
# print "Topic modelling the tips ... "
# tm.buildTopicModel(tipsSet)
if __name__ == "__main__":
main()