def get_venues(config, timestamp, logger_queue): connectionTo4sq = Common.ConnectionTo4sq(config['auth_keys'], None) categories = Common.get_categories_dict_with_full_inheritance(connectionTo4sq.get_categories()) auth_keys = config['auth_keys'] max_threads_count = config['max_threads_count'] batch_size = config['steps']['get_venues']['batch_size'] auth_per_thread = config["auth_per_thread"] threads_count = min([len(auth_keys) / auth_per_thread, max_threads_count]) logger = MultiProcessLogger.get_logger("Main", logger_queue) writer_queue = Common.init_threaded_writer(config, logger_queue, threads_count, batch_size, timestamp) task_queue = init_threaded_get_ids(config, batch_size, logger_queue, threads_count, timestamp) args = [(categories, auth_keys[auth_per_thread * i:auth_per_thread * i + auth_per_thread]) for i in range(threads_count)] pool = multiprocessing.Pool(threads_count, second_step_grabber_init, [logger_queue, writer_queue, task_queue]) logger.info("Starting {} processes for second step...".format(threads_count)) start_time = time.time() try: result = pool.map(second_step_grabber, args) except KeyboardInterrupt: while not task_queue.empty(): try: task_queue.get_nowait() except Empty: pass except Exception as e: logger.error('{} \n {}'.format(e.args, traceback.format_exc())) for _ in range(threads_count): try: writer_queue.put(("die", None)) except Empty: pass end_time = time.time() logger.warn('Program finished execution. It took: {} seconds'.format(end_time - start_time))
def search_venues(config, timestamp, logger_queue): auth_keys = config['auth_keys'] search_parameter = Common.SearchParameter(config['steps']['search_venues']) batch_size = config['steps']['search_venues']['batch_size'] max_threads_count = config['max_threads_count'] connectionTo4sq = Common.ConnectionTo4sq(config['auth_keys'], None) categories = Common.get_categories_dict_with_full_inheritance(connectionTo4sq.get_categories()) auth_per_thread = config["auth_per_thread"] threads_count = min([len(auth_keys) / auth_per_thread, max_threads_count]) start_time = time.time() task_queue = multiprocessing.Queue() logger = MultiProcessLogger.get_logger("Main", logger_queue) try: writer_queue = Common.init_threaded_writer(config, logger_queue, threads_count, batch_size, timestamp) split_rate = 32 search_parameters = search_parameter.split(True, split_rate) search_parameters = [x for param in search_parameters for x in param.split(False, split_rate)] logger.warn("Search parameters count: {}.".format(len(search_parameters))) for param in search_parameters: task_queue.put_nowait(param) # search_parameters = self.search_parameter.split(True, threads_count) args = [(auth_keys[auth_per_thread * i:auth_per_thread * i + auth_per_thread], categories) for i in range(threads_count)] pool = multiprocessing.Pool(threads_count, first_step_grabber_init, [logger_queue, writer_queue, task_queue]) logger.info("Starting {} processes for first step...".format(threads_count)) try: result = pool.map(firstStepGrabber, args) except KeyboardInterrupt: while not task_queue.empty(): try: task_queue.get_nowait() except Empty: pass except KeyboardInterrupt: pass except Exception as e: logger.error('{} \n {}'.format(e.args, traceback.format_exc())) end_time = time.time() logger.warn('Program finished execution. It took: {} seconds'.format(end_time - start_time))