def run(self): while True: task = self.task_queue.get() self.child_count = 0 task = Task.from_dict(task) log.info("will process task: [%s] [%s]" % (task.id, task.q)) api, cred_id = self._ensure_api() try: log.info("work for task: [%s] started" % (task.id)) count = 0 start = time() if task.since_id: result = api.search_future(task.q, task.since_id, task.max_id) else: result = api.search_past(task.q) try: for message in result: count += 1 self.process_queue.put({"message": message, "task": task}) if count % 100 == 0: log.info( "was retrieve %s tweets\nprocess queue len: %s" % (count, self.process_queue.qsize())) if self.process_queue.qsize() > properties.process_queue_size / 2 and self.child_count <= properties.max_process_workers_count: self.__start_process_worker(properties.process_queue_size) except TwitterApiError as tae: log.error("i have twitter api error %s \ni have since_id = [%s], max_id = [%s]" % ( tae, api.since_id, api.current_max_id)) end = time() log.info( "work for task: [%s] ended!\nResults: \ntime: \t[%s], \nprocessed: \t[%s] elements\nlast since_id = [%s]\nlast max_id = [%s]" % ( task.id, (end - start), count, api.since_id, api.current_max_id)) if count == 0: self.result_queue.put({'task_id': task.id, 'since_id': api.since_id or task.since_id, 'state': DEFERRED}) else: self.result_queue.put({'task_id': task.id, 'since_id': api.since_id or task.since_id, 'state': WORK}) except Exception as e: log.exception(e) self.result_queue.put( {'task_id': task.id, 'since_id': api.since_id or task.since_id, 'max_id': api.current_max_id, 'error': True, 'error_detail': traceback.format_exc()}) continue finally: self.cred_handler.set_credentials_used_info(cred_id, api.iterations, api.token)