class Consumer(): def __init__(self, beanstalk_host, beanstalk_port): self.beanstalk = Connection(host=beanstalk_host, port=beanstalk_port) def consume(self, max_jobs, src_queue): """consume from the incoming job queue.""" # incoming jobs self.beanstalk.watch(src_queue) self.beanstalk.ignore('default') print("now watching", self.beanstalk.watching()) queue_timeout = 10 if max_jobs >= 0 else None done = 0 while(max_jobs <= 0 or done < max_jobs): job = self.beanstalk.reserve(timeout=queue_timeout) if job is None: break try: res = self.process_job(job.body) if res is True: job.delete() else: job.bury() except Exception as e: print("error from process_job()", e) job.bury() finally: done += 1 self.beanstalk.close() def process_job(self, json_job): """default: do nothing.""" return False
def push_city_jobs(city, sample_order): """get image download jobs for a city from job api then push them to image download queue.""" dst_tube = 'backlog_' + city.replace(' ', '_').lower() beanstalk = Connection(host='localhost', port=11300) print("tubes:", beanstalk.tubes()) print("switching to", beanstalk.use(dst_tube)) print("now using", beanstalk.using()) job_api = API() ok, jobs = job_api.jobs(city, sample_order) if not ok: return 0 for job in jobs: job_json = json.dumps(job) beanstalk.put(job_json) print("pushed {}_{}_{}_{}".format(job['city'], job['osm_way_id'], job['sequence'], job['cam_dir'])) beanstalk.close() return len(jobs)
class BeanstalkdBroker(BaseBroker): def __init__(self, queue_name: str): self.queue_name = queue_name self.connection = Connection(host=settings.beanstalkd_host, port=settings.beanstalkd_port) self.connection.watch(name=queue_name) self.connection.use(name=queue_name) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.connection.close() def add_to_queue(self, task_uuid: uuid.UUID): self.connection.put(body=str(task_uuid)) def reserve(self, timeout: int = None) -> pystalkd.Job: job = self.connection.reserve(timeout=timeout) return job def delete(self, job: pystalkd.Job): job.delete()
class Shovel(): def __init__(self, beanstalk_host, beanstalk_port): self.logger = logging.getLogger(__name__) self.beanstalk = Connection(host=beanstalk_host, port=beanstalk_port) self.logger.info("host: {} {}".format(beanstalk_host, beanstalk_port)) def watch_single_tube(self, tube): """watch a single tube.""" # todo: is this necessary? self.beanstalk.watch(tube) watching = [x for x in self.beanstalk.watching() if x != tube] for x in watching: self.beanstalk.ignore(x) self.logger.info("now watching {}".format(self.beanstalk.watching())) def move_jobs(self, src_tube, dst_tube, n=0): """move n jobs from one tube to another.""" self.watch_single_tube(src_tube) self.beanstalk.watch(src_tube) self.beanstalk.use(dst_tube) # BATCH DRAIN INTO THIS (note that this bit is not persistent!) lifo = [] while (n > 0): job = self.beanstalk.reserve(timeout=60) if job is None: print("timed out. nothing to do?!") return lifo.append(job) n -= 1 stack_len = len(lifo) # dump stack into destination work queue. while (len(lifo) > 0): job = lifo.pop() self.beanstalk.put(job.body) job.delete() self.logger.info("drained {} jobs".format(stack_len)) def drain(self, total_shovel, target_queue, queue_prefix="backlog"): self.logger.info( "total_shovel: [{}] target_queue: [{}] queue_prefix: [{}]".format( total_shovel, target_queue, queue_prefix)) backlog = [ self.beanstalk.stats_tube(x) for x in self.beanstalk.tubes() if x.startswith(queue_prefix + "_") ] # shuffle cities. # we do this because there is a chance that some of the jobs in the last # city to be processed may be left on backlog if number of jobs shoveled # so far exceeds maximum processing limit. this happens due to # accumulation of rounding error. shuffle(backlog) total_jobs = sum(city['current-jobs-ready'] for city in backlog) total_shovel = min(total_jobs, total_shovel) self.logger.info("jobs remaining: [{}] jobs to shovel: [{}]".format( total_jobs, total_shovel)) done = 0 for city in backlog: name, jobs = city['name'], city['current-jobs-ready'] weight = jobs / total_jobs shovel = ceil(weight * total_shovel) done += shovel if done > total_shovel: excess = done - total_shovel shovel = max(0, shovel - excess) self.logger.info( "tube: {} jobs: {} weight: {:0.1f}%, shovel: {}".format( name, jobs, 100 * weight, shovel)) self.move_jobs(name, target_queue, shovel) self.beanstalk.close()