-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
85 lines (63 loc) · 2.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from analysing_data import mc_difference_logic
from analysing_data.booster import db_mc_handler
from analysing_data.markov_chain_machine import markov_chain
import loggers
from model.db import db_handler
from search_engine import twitter_engine
from search_engine.twitter_engine import tweepy_engine
import tools
from visualise import vis_machine
__author__ = '4ikist'
db_ = db_handler(truncate=False)
api_engine = twitter_engine.tweepy_engine(out=db_)
booster = db_mc_handler(truncate=False)
vis_processor = vis_machine
log = loggers.logger
def model_splitter(message):
message_ = message.split()
return message_
def process_names(file_name, class_name):
"""
get from file ser names, scrapping saving and forming markov chains for any user timeline
"""
names = open(file_name).readlines()
result = []
for name in names:
name = tools.imply_dog(name, with_dog=True).strip()
log.info("start processing name %s" % name)
user = api_engine.scrap(name)
db_.set_class(name, class_name)
mc = markov_chain(name, booster)
messages = []
for t_el in user.timeline:
log.debug('>>>>%s' % t_el)
if t_el:
mc.add_message(model_splitter(t_el['text']))
mc.save()
result.append(mc)
return result
def get_models(model_ids):
result = []
for model_id in model_ids:
result.append(markov_chain.create(model_id, booster))
return result
def process_models(models):
result = []
for model in models:
for model_ in models:
if model != model_:
result.append(mc_difference_logic.diff_markov_chains(model, model_))
log.info(sum([el['content'] for el in result]))
return result
def create_one_big_model(models):
log.info('create big model')
n = len(models)
prev_model_id_ = booster.sum_models(models[0].model_id_, models[1].model_id_)
for i in range(2, n):
log.info('difference between: %s < -- > %s' % (prev_model_id_, models[i].model_id_))
prev_model_id_ = booster.sum_models(prev_model_id_, models[i].model_id_)
log.info('is win! : ' + prev_model_id_)
return markov_chain.create(prev_model_id_, booster)
if __name__ == '__main__':
engine = tweepy_engine()
navalny = engine.scrap('navalny',0)