def _get_name_from_user(self, user): if isinstance(user, m_user): return user.name_ if isinstance(user, tweepy.User): return tools.imply_dog(user.screen_name, with_dog=True) else: return tools.imply_dog(user, with_dog=True)
def _is_name_in_cache(self, user_name, r_type, name): if self.relations_cache.has_key(user_name): user_relations = self.relations_cache[user_name] type_relations = user_relations[r_type] for relation in type_relations: if tools.imply_dog(relation.screen_name) == tools.imply_dog(name): return True return False
def _get_data(self, t_user): """ forming user in our model returning m_user object """ try: result = m_user(tools.imply_dog(t_user.screen_name, with_dog=True)) if t_user.protected: log.debug('user %s is protected... skip him' % t_user.screen_name) return None result.real_name = t_user.name lists = t_user.lists() self._count_requests += 1 log.debug("get lists +1") result.set_lists(tools.flush(lists, lambda x: x.name), len(lists)) result.followers_count = t_user.followers_count result.friends_count = t_user.friends_count result.favorites_count = t_user.favourites_count result.timeline = self._get_time_line(t_user) result.timeline_count = t_user.statuses_count result.inited_ = t_user.created_at.strftime(props.time_format) return result except tweepy.TweepError as e: if 'Rate limit exceeded' in str(e): log.info('oook wil be sleep...') time.sleep(360) return self._get_data(t_user)
def _form_relations(self, user_name, relation_types, relations_names): """ input : name of user which form relations, types of relations like mentions,followers,friends, names of relations objects form relations/ if some relation object not in cache - loading from ttr/ if some relation type not in cache - loading from ttr/ this method will load some user obj if it not in some relation type """ log.info('start forming relations for user %s' % user_name) relations = self._get_relations_cache(user_name) for r_type in relation_types: if not relations.has_key(r_type): #getting tweepy model users which in relations log.info("cache have not contain this relations type [%s]" % r_type) relations[r_type] = self._load_users_by_names(relations_names[r_type]) continue for r_name in relations_names[r_type]: if self._is_name_in_cache(user_name, r_type, r_name): continue else: relations[r_type].append(self._get_user_by_name(tools.imply_dog(r_name, with_dog=True))) return relations
def _load_users_by_names(self, list): res = [] for user in list: try: user_obj = self._get_user_by_name(tools.imply_dog(user)) if user_obj: res.append(user_obj) except Exception as e: log.warn('problems with user %s ' % user) return res
def _get_user_relations(self, t_user): """ returning object of followers and friends names with @ also storing in cache objects in tweepy model """ try: followers = t_user.followers() friends = t_user.friends() self.relations_cache[tools.imply_dog(t_user.screen_name, with_dog=True)] = {'followers': followers, 'friends': friends} self._count_requests += 2 log.debug("get followers and friends +2") return {'followers': [tools.imply_dog(user.screen_name, with_dog=True) for user in followers], 'friends': [tools.imply_dog(user.screen_name, with_dog=True) for user in friends]} except Exception as e: if 'Rate limit exceeded' in str(e): log.info('oook wil be sleep...') time.sleep(3600) return self._get_user_relations(t_user)
def get_users(filename): """ forming users some from db or scrapping from ttr """ result = [] users = open(filename).readlines() for user in users: name_ = tools.imply_dog(user, with_dog=True).strip() log.info('find user by name "%s"' % name_) m_user = main_db.get_user({'name_': name_}) if m_user: log.info('user found %s' % m_user.name_) result.append(m_user) else: log.info('user will load %s' % name_) m_user = engine.scrap(name_, neighbourhood=0) result.append(m_user) return result
def _get_user_by_name(self, user_name): """ returning user in tweepy model with user_name name """ user_name = tools.imply_dog(user_name, with_dog=True) try: t_user = self.api.get_user(user_name) self._count_requests += 1 log.debug("get user +1") return t_user except Exception as e: log.warn('in getting user by name exceptions is: %s \n with username: %s' % (e, user_name)) if 'Rate limit exceeded' in str(e): log.info('oook wil be sleep...') time.sleep(3600) return self._get_user_by_name(user_name) else: raise e
def process_names(file_name, class_name): """ get from file ser names, scrapping saving and forming markov chains for any user timeline """ names = open(file_name).readlines() result = [] for name in names: name = tools.imply_dog(name, with_dog=True).strip() log.info("start processing name %s" % name) user = api_engine.scrap(name) db_.set_class(name, class_name) mc = markov_chain(name, booster) messages = [] for t_el in user.timeline: log.debug('>>>>%s' % t_el) if t_el: mc.add_message(model_splitter(t_el['text'])) mc.save() result.append(mc) return result
def get_user(user_name): db = db_handler(host_='localhost', port_=27017, db_name_='ttr_tl') user = db.get_user({'name_':tools.imply_dog(user_name,with_dog=True)}) return user
def get_messages_by_user(self, user): #http://twitter.com/mrletemkno return [message for message in self.messages.find({'user': '******' + tools.imply_dog(str(user))})]