Пример #1
0
    def run_loader(self):
        """Main function for running loader"""

        if self.args.verbose:
            self.logger.warn('DEBUG mode enabled!')
            time.sleep(3)

        data_manager = DataFileManager(self.context_info.config_file_location)
        file_transactor = FileTransactor()

        file_transactor.start_threads(
            data_manager.get_file_transactor_thread_settings())

        data_manager.download_and_validate()
        self.logger.debug("finished downloading now doing thread")

        file_transactor.check_for_thread_errors()
        self.logger.debug("finished threads waiting for queues")

        file_transactor.wait_for_queues()
        self.logger.debug("finished queues waiting for shutdown")
        file_transactor.shutdown()

        neo_transactor = Neo4jTransactor()
        neo_transactor.start_threads(
            data_manager.get_neo_transactor_thread_settings())

        self.logger.debug("finished starting neo threads ")

        if not self.context_info.env["USING_PICKLE"]:
            self.logger.info("Creating indices.")
            Neo4jHelper.create_indices()

        etl_time_tracker_list = self.run_etl_groups(self.logger, data_manager,
                                                    neo_transactor)

        neo_transactor.shutdown()

        elapsed_time = time.time() - self.start_time

        for time_item in etl_time_tracker_list:
            self.logger.info(time_item)

        self.logger.info('Loader finished. Elapsed time: %s' %
                         time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
Пример #2
0
    def run_preprocessor(self):

        if args.verbose:
            logger.warn('DEBUG mode enabled!')
            time.sleep(3)

        start_time = time.time()
        data_manager = DataFileManager(context_info.config_file_location)
        logger.info("config_file_location %s" %
                    (context_info.config_file_location))

        ft = FileTransactor()

        ft.start_threads(data_manager.get_FT_thread_settings())
        data_manager.download_and_validate()
        logger.info("finished downloading now doing thread")
        ft.check_for_thread_errors()
        logger.info("finished threads waiting for queues")
        ft.wait_for_queues()

        logger.info("finished queues waiting for shutdown")
        ft.shutdown()

        configs_dict = {
            'INTERACTION-SOURCE-MOL': ['INTERACTION-SOURCE', 'BGI'],
            'INTERACTION-SOURCE-GEN': ['INTERACTION-SOURCE', 'BGI']
        }

        config_dict = {
            'INTERACTION-SOURCE-MOL': 'INTERACTION-SOURCE',
            'INTERACTION-SOURCE-GEN': 'INTERACTION-SOURCE'
        }

        processor_dispatch = {
            'INTERACTION-SOURCE-MOL': InteractionMolecularProcessor,
            'INTERACTION-SOURCE-GEN': InteractionGeneticProcessor
        }

        list_of_processor_groups = [[
            'INTERACTION-SOURCE-MOL', 'INTERACTION-SOURCE-GEN'
        ]]

        processor_time_tracker_list = []

        for processor_group in list_of_processor_groups:
            processor_group_start_time = time.time()
            logger.info("Starting Processor group: %s" % processor_group)
            thread_pool = []
            for processor_name in processor_group:
                logger.info("Processor Name: %s" % processor_name)

                configs = []
                for config_type in configs_dict[processor_name]:
                    config = data_manager.get_config(config_type)
                    if config is not None:
                        configs.append(config)
                    else:
                        logger.info("No Config found for: %s %s" %
                                    (processor_name, config_type))

                if len(configs) > 0:
                    processor = processor_dispatch[processor_name](configs)
                    p = multiprocessing.Process(target=processor.run_processor)
                    p.start()
                    thread_pool.append(p)
                else:
                    logger.info("No Configs found for: %s" % processor_name)

            Processor.wait_for_threads(thread_pool)

            logger.info("Waiting for Queues to sync up")
            processor_elapsed_time = time.time() - processor_group_start_time
            processor_time_message = (
                "Finished Processor group: %s, Elapsed time: %s" %
                (processor_group,
                 time.strftime("%H:%M:%S",
                               time.gmtime(processor_elapsed_time))))
            logger.info(processor_time_message)
            processor_time_tracker_list.append(processor_time_message)

        end_time = time.time()
        elapsed_time = end_time - start_time

        for time_item in processor_time_tracker_list:
            logger.info(time_item)
        logger.info('PreProcess finished. Elapsed time: %s' %
                    time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))