def notifyProgress(self, client, result, status, attributes): # We use this notification to check the progress of the analyzer and just add the # results to our stream ot, of, oa, ou = 0, 0, 0, 0 if result.operation == TwitterJob.TIMELINE_OP: ot = self.redis.decr(Stats.TIMELINE_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.TIMELINE_COMPLETED) elif result.operation == TwitterJob.FOLLOWER_OP: of = self.redis.decr(Stats.FOLLOWER_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.FOLLOWER_COMPLETED) elif result.operation == TwitterJob.ANALYZER_OP: oa = self.redis.decr(Stats.ANALYZER_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.ANALYZER_COMPLETED) elif result.operation == TwitterJob.UPDATE_OP: ou = self.redis.decr(Stats.UPDATE_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.UPDATE_COMPLETED) self.updateStatistics(self.getNickname(client), result, attributes, ot + 1, of + 1, oa + 1, ou + 1) if result.operation == result.ANALYZER_OP: counter = 0 for target_user in attributes.get('analyzer.target_users', []): try: newjob = TwitterJob(TwitterJob.TIMELINE_OP, int(target_user), 0) #log.msg("Following %d friend => %d %s" % (result.user_id, int(target_user), str(newjob))) # NOTE: This is the point you may want to customize. As it is now new jobs # are simply inserted in the FRONTIER_NAME. If you are willing to implement # a continuous BFS just insert the job at the of STREAM (rpush). # For DFS traversal insert the job in front of STREAM (lpush). # If you need to implement a priority queue use ZSETs if not self.redis.sismember(self.USERS_SELECTED, target_user): if settings.TRAVERSING.upper() == 'BFS': self.redis.rpush(self.STREAM, TwitterJob.serialize(newjob)) elif settings.TRAVERSING.upper() == 'DFS': self.redis.lpush(self.STREAM, TwitterJob.serialize(newjob)) else: self.redis.rpush(self.FRONTIER_NAME, TwitterJob.serialize(newjob)) self.redis.sadd(self.USERS_SELECTED, target_user) counter += 1 except Exception, exc: log.msg("Bogus data from the analyzer: %s is not a user_id" % target_user) if counter > 0: log.msg("Adding a total of %d users. These were %d 's friends" % (counter, result.user_id)) else: log.msg("No admissible friends were discovered while traversing %d 's followers list" % (result.user_id))
def onJobReturned(self, response): self.factory.thread_working = False if not isinstance(response, TwitterResponse): reason = failure.Failure( Exception("I was expecting a TwitterResponse object. Got %s" % str(response)), Exception) return self.onJobFailed(reason) log.msg("Twitter job executed. Response is %s" % str(response)) job = self.current_job self.current_job = None if response.status in (STATUS_COMPLETED, STATUS_BANNED): if response.status == STATUS_BANNED: self.sleep_time = response.sleep_time next_job = TwitterJob(job.operation, job.user_id, response.state) return self.onJobCompleted(response.status, next_job, response.attributes) elif response.status in (STATUS_UNAUTHORIZED, STATUS_ERROR): return self.onJobCompleted(response.status, job, response.attributes) reason = failure.Failure( Exception("Unknown status %d. Don't know how to proceed" % response.status), Exception) return self.onJobFailed(reason)
def transformJob(self, result): # Actually this check is pretty useless. It will fall back to None anyway if self.transformation in (TRANSFORM_TIMELINE, TRANSFORM_FOLLOWER, TRANSFORM_ANALYZER): return None # If you specify just -t and -a -> -f is implied if result.operation == result.TIMELINE_OP and \ self.transformation & (TRANSFORM_FOLLOWER | TRANSFORM_ANALYZER): return False, TwitterJob(TwitterJob.FOLLOWER_OP, result.user_id, -1) elif result.operation == result.FOLLOWER_OP and \ self.transformation & (TRANSFORM_ANALYZER): return False, TwitterJob(TwitterJob.ANALYZER_OP, result.user_id, 0)
def manageLostClient(self, client): if self.clients[client] == WORKER_WORKING: deserialized = TwitterJob.deserialize(self.assigned_jobs[client]) if deserialized.operation == TwitterJob.TIMELINE_OP: self.redis.decr(Stats.TIMELINE_ONGOING) elif deserialized.operation == TwitterJob.FOLLOWER_OP: self.redis.decr(Stats.FOLLOWER_ONGOING) elif deserialized.operation == TwitterJob.ANALYZER_OP: self.redis.decr(Stats.ANALYZER_ONGOING) elif deserialized.operation == TwitterJob.UPDATE_OP: self.redis.decr(Stats.UPDATE_ONGOING) return JobTrackerFactory.manageLostClient(self, client)
def assignJobTo(self, client): type, job = JobTrackerFactory.assignJobTo(self, client) # Just used to keep track of some general statistics if type == TYPE_JOB: deserialized = TwitterJob.deserialize(job) if deserialized.operation == TwitterJob.TIMELINE_OP: self.redis.incr(Stats.TIMELINE_ONGOING) elif deserialized.operation == TwitterJob.FOLLOWER_OP: self.redis.incr(Stats.FOLLOWER_ONGOING) elif deserialized.operation == TwitterJob.ANALYZER_OP: self.redis.incr(Stats.ANALYZER_ONGOING) elif deserialized.operation == TwitterJob.UPDATE_OP: self.redis.incr(Stats.UPDATE_ONGOING) return type, job
def notifyProgress(self, client, result, status, attributes): # We use this notification to check the progress of the analyzer and just add the # results to our stream ot, of, oa, ou = 0, 0, 0, 0 if result.operation == TwitterJob.TIMELINE_OP: ot = self.redis.decr(Stats.TIMELINE_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.TIMELINE_COMPLETED) elif result.operation == TwitterJob.FOLLOWER_OP: of = self.redis.decr(Stats.FOLLOWER_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.FOLLOWER_COMPLETED) elif result.operation == TwitterJob.ANALYZER_OP: oa = self.redis.decr(Stats.ANALYZER_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.ANALYZER_COMPLETED) elif result.operation == TwitterJob.UPDATE_OP: ou = self.redis.decr(Stats.UPDATE_ONGOING) if status == STATUS_COMPLETED: self.redis.incr(Stats.UPDATE_COMPLETED) self.updateStatistics(self.getNickname(client), result, attributes, ot + 1, of + 1, oa + 1, ou + 1) if result.operation == result.ANALYZER_OP: counter = 0 for target_user in attributes.get('analyzer.target_users', []): try: newjob = TwitterJob(TwitterJob.TIMELINE_OP, int(target_user), 0) #log.msg("Following %d friend => %d %s" % (result.user_id, int(target_user), str(newjob))) # NOTE: This is the point you may want to customize. As it is now new jobs # are simply inserted in the FRONTIER_NAME. If you are willing to implement # a continuous BFS just insert the job at the of STREAM (rpush). # For DFS traversal insert the job in front of STREAM (lpush). # If you need to implement a priority queue use ZSETs if not self.redis.sismember(self.USERS_SELECTED, target_user): if settings.TRAVERSING.upper() == 'BFS': self.redis.rpush(self.STREAM, TwitterJob.serialize(newjob)) elif settings.TRAVERSING.upper() == 'DFS': self.redis.lpush(self.STREAM, TwitterJob.serialize(newjob)) else: self.redis.rpush(self.FRONTIER_NAME, TwitterJob.serialize(newjob)) self.redis.sadd(self.USERS_SELECTED, target_user) counter += 1 except Exception, exc: log.msg( "Bogus data from the analyzer: %s is not a user_id" % target_user) if counter > 0: log.msg( "Adding a total of %d users. These were %d 's friends" % (counter, result.user_id)) else: log.msg( "No admissible friends were discovered while traversing %d 's followers list" % (result.user_id))