示例#1
0
def get_venues(config, timestamp, logger_queue):
    connectionTo4sq = Common.ConnectionTo4sq(config['auth_keys'], None)
    categories = Common.get_categories_dict_with_full_inheritance(connectionTo4sq.get_categories())
    auth_keys = config['auth_keys']
    max_threads_count = config['max_threads_count']
    batch_size = config['steps']['get_venues']['batch_size']
    auth_per_thread = config["auth_per_thread"]
    threads_count = min([len(auth_keys) / auth_per_thread, max_threads_count])

    logger = MultiProcessLogger.get_logger("Main", logger_queue)
    writer_queue = Common.init_threaded_writer(config, logger_queue, threads_count, batch_size, timestamp)
    task_queue = init_threaded_get_ids(config, batch_size, logger_queue, threads_count, timestamp)
    args = [(categories, auth_keys[auth_per_thread * i:auth_per_thread * i + auth_per_thread]) for i in
            range(threads_count)]

    pool = multiprocessing.Pool(threads_count, second_step_grabber_init, [logger_queue, writer_queue, task_queue])
    logger.info("Starting {} processes for second step...".format(threads_count))
    start_time = time.time()
    try:
        result = pool.map(second_step_grabber, args)
    except KeyboardInterrupt:
        while not task_queue.empty():
            try:
                task_queue.get_nowait()
            except Empty:
                pass
    except Exception as e:
        logger.error('{} \n {}'.format(e.args, traceback.format_exc()))
    for _ in range(threads_count):
        try:
            writer_queue.put(("die", None))
        except Empty:
            pass
    end_time = time.time()
    logger.warn('Program finished execution. It took: {} seconds'.format(end_time - start_time))
示例#2
0
def search_venues(config, timestamp, logger_queue):
    auth_keys = config['auth_keys']
    search_parameter = Common.SearchParameter(config['steps']['search_venues'])
    batch_size = config['steps']['search_venues']['batch_size']
    max_threads_count = config['max_threads_count']
    connectionTo4sq = Common.ConnectionTo4sq(config['auth_keys'], None)
    categories = Common.get_categories_dict_with_full_inheritance(connectionTo4sq.get_categories())
    auth_per_thread = config["auth_per_thread"]
    threads_count = min([len(auth_keys) / auth_per_thread, max_threads_count])

    start_time = time.time()
    task_queue = multiprocessing.Queue()
    logger = MultiProcessLogger.get_logger("Main", logger_queue)
    try:
        writer_queue = Common.init_threaded_writer(config, logger_queue, threads_count, batch_size, timestamp)

        split_rate = 32
        search_parameters = search_parameter.split(True, split_rate)
        search_parameters = [x for param in search_parameters for x in param.split(False, split_rate)]
        logger.warn("Search parameters count: {}.".format(len(search_parameters)))
        for param in search_parameters:
            task_queue.put_nowait(param)
        # search_parameters = self.search_parameter.split(True, threads_count)

        args = [(auth_keys[auth_per_thread * i:auth_per_thread * i + auth_per_thread], categories) for i in
                range(threads_count)]

        pool = multiprocessing.Pool(threads_count, first_step_grabber_init,
                                    [logger_queue, writer_queue, task_queue])
        logger.info("Starting {} processes for first step...".format(threads_count))
        try:
            result = pool.map(firstStepGrabber, args)
        except KeyboardInterrupt:
            while not task_queue.empty():
                try:
                    task_queue.get_nowait()
                except Empty:
                    pass
    except KeyboardInterrupt:
        pass
    except Exception as e:
        logger.error('{} \n {}'.format(e.args, traceback.format_exc()))

    end_time = time.time()
    logger.warn('Program finished execution. It took: {} seconds'.format(end_time - start_time))