def _send_job(self, uid, rfs, ats, force=None): job = LookupJobBody( _id=uid, rfriends_score=rfs, mention_score=ats, force=force ) job.put(self.stalk) self.scores.set_state(uid, scoredict.LOOKUP)
def read_scores(self): job = None stop = 10000000 if self.halt else 100000 for x in xrange(stop): try: job = self.stalk.reserve(35) if job is None: logging.info("loaded %d scores",x) return if job.body=="halt": self.halt=True print "starting to halt..." logging.info("starting to halt...") job.delete() return body = LookupJobBody.from_job(job) if body.done: self.scores.set_state(body._id, scoredict.DONE) else: self.scores.increment( body._id, body.rfriends_score, body.mention_score ) job.delete() except: logging.exception("exception in read_scores caused HALT") self.halt = True if job: job.bury() return
def run(self): while True: jobs = [] for x in xrange(100): try: # reserve blocks to wait when x is 0, but returns None for 1-99 j = self.stalk.reserve(0 if x else None) except beanstalkc.DeadlineSoon: break if j is None: break jobs.append(j) bodies = [LookupJobBody.from_job(j) for j in jobs] try: users =self.twitter.user_lookup([b._id for b in bodies]) except ResourceNotFound: logging.info("no profile for %r",[b._id for b in bodies]) continue logging.info("looking at %r"%[getattr(u,'screen_name','') for u in users]) for job,body,user in zip(jobs,bodies,users): if user is None: logging.info("no profile for %d",body._id) job.delete() continue try: self.twitter.sleep_if_needed() logging.info("look at %s",user.screen_name) if (not body.force) and User.in_db(user._id): job.delete() continue self.crawl_user(user,body.force) user.save() job.delete() except: logging.exception("exception for job %s"%job.body) job.bury() logging.info("api calls remaining: %d",self.twitter.remaining)