def __init__(self, client, group, topic, partitions=None, auto_commit=True, auto_commit_every_n=AUTO_COMMIT_MSG_COUNT, auto_commit_every_t=AUTO_COMMIT_INTERVAL, num_procs=1, partitions_per_proc=0, **simple_consumer_options): # Initiate the base consumer class super(MultiProcessConsumer, self).__init__( client, group, topic, partitions=partitions, auto_commit=auto_commit, auto_commit_every_n=auto_commit_every_n, auto_commit_every_t=auto_commit_every_t) # Variables for managing and controlling the data flow from # consumer child process to master manager = MPManager() self.queue = manager.Queue(1024) # Child consumers dump messages into this self.events = Events( start = manager.Event(), # Indicates the consumers to start fetch exit = manager.Event(), # Requests the consumers to shutdown pause = manager.Event()) # Requests the consumers to pause fetch self.size = manager.Value('i', 0) # Indicator of number of messages to fetch # dict.keys() returns a view in py3 + it's not a thread-safe operation # http://blog.labix.org/2008/06/27/watch-out-for-listdictkeys-in-python-3 # It's safer to copy dict as it only runs during the init. partitions = list(self.offsets.copy().keys()) # By default, start one consumer process for all partitions # The logic below ensures that # * we do not cross the num_procs limit # * we have an even distribution of partitions among processes if partitions_per_proc: num_procs = len(partitions) / partitions_per_proc if num_procs * partitions_per_proc < len(partitions): num_procs += 1 # The final set of chunks chunks = [partitions[proc::num_procs] for proc in range(num_procs)] self.procs = [] for chunk in chunks: options = {'partitions': list(chunk)} if simple_consumer_options: simple_consumer_options.pop('partitions', None) options.update(simple_consumer_options) args = (client.copy(), self.group, self.topic, self.queue, self.size, self.events) proc = Process(target=_mp_consume, args=args, kwargs=options) proc.daemon = True proc.start() self.procs.append(proc)
def __init__(self, use_confluent_kafka=False, num_procs=1, report_interval=5, json_logging=False, log_level='INFO', verbose=False, **consumer_options): # Variables for managing and controlling the data flow from # consumer child process to master manager = MPManager() self.queue = manager.Queue( 1024) # Child consumers dump messages into this self.events = Events( start=manager.Event(), # Indicates the consumers to start fetch stop=manager.Event( ), # Indicates to stop fetching and pushing data till start is set back exit=manager.Event() # Requests the consumers to shutdown ) self.report_interval = report_interval self.num_procs = num_procs self.consumer_options = consumer_options self.json_logging = json_logging self.verbose = verbose self.log_level = log_level self.procs = [] for proc in range(self.num_procs): args = (self.queue, self.report_interval, self.json_logging, self.log_level, self.verbose, self.events) proc = Process(target=_mp_consume_confluent_kafka if use_confluent_kafka else _mp_consume_kafka_python, args=args, kwargs=consumer_options) proc.daemon = True proc.start() self.procs.append(proc)
def __init__(self, *args, **kwargs): """ .. note:: All args are optional unless otherwise noted. Args: name (str): unique name of this instance. By default a uuid will be generated. queues (tuple): List of queue names to listen on. skip_signal (bool): Don't register the signal handlers. Useful for testing. """ super(JobManager, self).__init__(*args, **kwargs) setup_logger("eventmq") #: Define the name of this JobManager instance. Useful to know when #: referring to the logs. self.name = kwargs.pop('name', None) or generate_device_name() logger.info('EventMQ Version {}'.format(__version__)) logger.info('Initializing JobManager {}...'.format(self.name)) #: keep track of workers concurrent_jobs = kwargs.pop('concurrent_jobs', None) if concurrent_jobs is not None: conf.CONCURRENT_JOBS = concurrent_jobs #: List of queues that this job manager is listening on self.queues = kwargs.pop('queues', None) if not kwargs.pop('skip_signal', False): # handle any sighups by reloading config signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) signal.signal(signal.SIGINT, self.sigterm_handler) signal.signal(signal.SIGQUIT, self.sigterm_handler) signal.signal(signal.SIGUSR1, self.handle_pdb) #: JobManager starts out by INFORMing the router of it's existence, #: then telling the router that it is READY. The reply will be the unit #: of work. # Despite the name, jobs are received on this socket self.outgoing = Sender(name=self.name) self.poller = Poller() #: Stats and monitoring information #: Jobs in flight tracks all jobs currently executing. #: Key: msgid, Value: The message with all the details of the job self.jobs_in_flight = {} #: Running total number of REQUEST messages received on the broker self.total_requests = 0 #: Running total number of READY messages sent to the broker self.total_ready_sent = 0 #: Keep track of what pids are servicing our requests #: Key: pid, Value: # of jobs completed on the process with that pid self.pid_distribution = {} #: Setup worker queues self._mp_manager = MPManager() self.request_queue = self._mp_manager.Queue() self.finished_queue = self._mp_manager.Queue() self._setup()