def get_user_timeline(user_name): out = db_handler(host_='localhost', port_=27017, db_name_='ttr_exp') engine = tweepy_engine(out=out) user = engine.get_user_info(user_name) user._id = user.name_ out.save_user(user.serialise()) return user
def extract_messages(file, to_what=db_handler(messages_truncate=True), limit=10): f = open(file) users = set() message = None line = f.readline() while line: if _is_message_element(line): if not message: message = {} element = _get_element(line) if element[0] == 'T': message['time'] = element[1] elif element[0] == 'U': user = element[1] message['user'] = user[user.index('twitter.com') + len('twitter.com') + 1:] elif element[0] == 'W': message['words'] = element[1] if message and len(message) == 3: if message['words'] != 'No Post Title': if to_what: log.debug('save message > %s'%message) to_what.save_message(message) users.add(message['user']) message = None line = f.readline() return users
def visualise_users(): db = db_handler() users = db.users.find() for user in users: user = m_user.create(user) print user.name_ nodes,edges = form_graph(user) add_nodes(nodes) add_edges(edges)
def __init__(self, inited=props.is_inited(), out=None): if not out: out = db_handler() self.auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) if not inited: self.access_token, self.access_token_secret = self.init_account() self.auth.set_access_token(self.access_token, self.access_token_secret) else: self.auth.set_access_token(props.access_token, props.access_token_secret) self.api = tweepy.API(self.auth) self._count_requests = int(1) log.debug("auth +1") self.out = out self.relations_cache = {}
def __init__(self): self.handler = db_handler(host_='localhost',port_=27107)
from differences import diff_machine from model.db import db_handler __author__ = 'Alesha' #todo create test for difference difference_factory = diff_machine.difference_factory() db = db_handler() linoleum = db.get_user_by_name('@linoleum2k12') print linoleum chikist = db.get_user_by_name('@4ikist_') print chikist
import tools from text_proc.text_processing import get_words from analysing_data.mc_difference_logic import diff_markov_chains import visualise.vis2d_machine as vis __author__ = '4ikist' __doc__ = """ Эксперимент 1. 1) Создание общей модели определенного класса людей. 2) Нахождение весов людей на основе принадлежности ленты определенного человека к общей модели. 3) Класстеризация на людей на основе разницы """ log = loggers.logger main_db = db_handler() engine = engines.tweepy_engine(out=main_db) booster = db_mc_handler() def get_users(filename): """ forming users some from db or scrapping from ttr """ result = [] users = open(filename).readlines() for user in users: name_ = tools.imply_dog(user, with_dog=True).strip() log.info('find user by name "%s"' % name_) m_user = main_db.get_user({'name_': name_})
from analysing_data import markov_chain_machine from analysing_data.markov_chain_machine import markov_chain import text_proc.text_processing as tp from analysing_data.booster import db_mc_handler from model.db import db_handler from search_engine.twitter_engine import tweepy_engine from analysing_data.mc_difference_logic import diff_markov_chains import tools __author__ = '4ikist' db = db_handler(host_='localhost', port_=27017, db_name_='ttr_exp') boost = db_mc_handler() engine = tweepy_engine(out=db) def get_users_data(user_name1, user_name2): user1 = engine.get_user_info(user_name1) user2 = engine.get_user_info(user_name2) db.save_user(user1.serialise()) db.save_user(user2.serialise()) timeline1 = tools.flush(user1.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10] timeline2 = tools.flush(user2.timeline, by_what=lambda x: tp.get_words(x['text'], is_normalise=True))[:10] print len(timeline1) print len(timeline2) mc1 = markov_chain_machine.create_model(timeline1, user_name1, boost) mc2 = markov_chain_machine.create_model(timeline2, user_name2, boost) return mc1, mc2
def load_users_by_star_friend(star_name): engine = tweepy_engine(out=db_handler(host_='localhost', port_=27027, db_name_='ttr_test')) engine.get_relations_of_user(star_name)
def get_user(user_name): db = db_handler(host_='localhost', port_=27017, db_name_='ttr_tl') user = db.get_user({'name_':tools.imply_dog(user_name,with_dog=True)}) return user
from analysing_data import mc_difference_logic from analysing_data.booster import db_mc_handler from analysing_data.markov_chain_machine import markov_chain import loggers from model.db import db_handler from search_engine import twitter_engine from search_engine.twitter_engine import tweepy_engine import tools from visualise import vis_machine __author__ = '4ikist' db_ = db_handler(truncate=False) api_engine = twitter_engine.tweepy_engine(out=db_) booster = db_mc_handler(truncate=False) vis_processor = vis_machine log = loggers.logger def model_splitter(message): message_ = message.split() return message_ def process_names(file_name, class_name): """ get from file ser names, scrapping saving and forming markov chains for any user timeline """