if user: logging.exception("exception for user %s" % user.to_d()) else: logging.exception("exception and user is None") logging.info("api calls remaining: %d", self.twitter.remaining) print "slave is done" def crawl(self, user): logging.debug("visiting %s - %s", user._id, user.screen_name) tweets = self.twitter.save_timeline(user._id, user.last_tid) if tweets: user.last_tid = tweets[0]._id now = datetime.utcnow() last = user.last_crawl_date if user.last_crawl_date is not None else datetime( 2010, 11, 12) delta = now - last seconds = delta.seconds + delta.days * 24 * 3600 tph = (3600.0 * len(tweets) / seconds + user.tweets_per_hour) / 2 user.tweets_per_hour = tph hours = min(settings.tweets_per_crawl / tph, settings.max_hours) user.next_crawl_date = now + timedelta(hours=hours) user.last_crawl_date = now user.save() if __name__ == '__main__': User.database = CouchDB(settings.couchdb_root + "houtx_user", True) proc = CrawlMaster() create_slaves(CrawlSlave, proc.todo, proc.done) proc.run()
except Exception as ex: if user: logging.exception("exception for user %s"%user.to_d()) else: logging.exception("exception and user is None") logging.info("api calls remaining: %d",self.twitter.remaining) print "slave is done" def crawl(self, user): logging.debug("visiting %s - %s",user._id,user.screen_name) tweets = self.twitter.save_timeline(user._id, user.last_tid) if tweets: user.last_tid = tweets[0]._id now = datetime.utcnow() last = user.last_crawl_date if user.last_crawl_date is not None else datetime(2010,11,12) delta = now - last seconds = delta.seconds + delta.days*24*3600 tph = (3600.0*len(tweets)/seconds + user.tweets_per_hour)/2 user.tweets_per_hour = tph hours = min(settings.tweets_per_crawl/tph, settings.max_hours) user.next_crawl_date = now+timedelta(hours=hours) user.last_crawl_date = now user.save() if __name__ == '__main__': User.database = CouchDB(settings.couchdb_root+"houtx_user",True) proc = CrawlMaster() create_slaves(CrawlSlave, proc.todo, proc.done) proc.run()
jobs[u].rfriends_score = RFRIEND_POINTS/len(rfriends) if tweets: ats = defaultdict(int) for tweet in tweets: for uid in tweet.mentions: ats[uid]+=1 for u,c in ats.iteritems(): points = c*MENTION_POINTS if points >0: jobs[u].mention_score = points for k,j in jobs.iteritems(): j._id = k j.put(self.stalk) if __name__ == '__main__': if len(sys.argv) >1: if sys.argv[1]=='m': proc = LookupMaster() elif sys.argv[1]=='s': proc = LookupSlave('x') elif sys.argv[1]=='c': create_slaves(LookupSlave, prefix="x") else: print "spawning minions!" create_slaves(LookupSlave) proc = LookupMaster() proc.run()
if len(rfriends) < RFRIEND_POINTS: for u in rfriends: jobs[u].rfriends_score = RFRIEND_POINTS/len(rfriends) if tweets: ats = defaultdict(int) for tweet in tweets: for uid in tweet.mentions: ats[uid]+=1 for u,c in ats.iteritems(): points = c*MENTION_POINTS if points >0: jobs[u].mention_score = points for k,j in jobs.iteritems(): j._id = k j.put(self.stalk) if __name__ == '__main__': if len(sys.argv) >1: if sys.argv[1]=='m': proc = LookupMaster() elif sys.argv[1]=='s': proc = LookupSlave('x') else: print "spawning minions!" create_slaves(LookupSlave) proc = LookupMaster() proc.run()
if len(rfriends) < RFRIEND_POINTS: for u in rfriends: jobs[u].rfriends_score = RFRIEND_POINTS / len(rfriends) if tweets: ats = defaultdict(int) for tweet in tweets: for uid in tweet.mentions: ats[uid] += 1 for u, c in ats.iteritems(): points = c * MENTION_POINTS if points > 0: jobs[u].mention_score = points for k, j in jobs.iteritems(): j._id = k j.put(self.stalk) if __name__ == '__main__': if len(sys.argv) > 1: if sys.argv[1] == 'm': proc = LookupMaster() elif sys.argv[1] == 's': proc = LookupSlave('x') else: print "spawning minions!" create_slaves(LookupSlave) proc = LookupMaster() proc.run()