Пример #1
0
    def requires(self):
        register_tasks()
        tasks = list()
        pipeline_ids = data_access.query_pipeline_ids(int(self.phenotype),
                                                      util.conn_string)
        phenotype_config = data_access.query_phenotype(int(self.phenotype),
                                                       util.conn_string)
        phenotype_config['phenotype_id'] = int(self.phenotype)

        log("getting ready to execute pipelines...")
        log(pipeline_ids)
        if len(pipeline_ids) > 0:
            configs = dict()
            for pipeline_id in pipeline_ids:
                pipeline_config = data_access.get_pipeline_config(
                    pipeline_id, util.conn_string)
                pipeline_config['pipeline_id'] = pipeline_id
                configs[pipeline_config['name']] = pipeline_config

            update_phenotype_model(phenotype_config, util.conn_string)
            for pipeline_config in configs.values():
                pipeline_id = pipeline_config['pipeline_id']
                tasks.append(
                    PipelineTask(pipeline=pipeline_id,
                                 job=self.job,
                                 owner=self.owner,
                                 pipelinetype=pipeline_config.config_type))
        log(tasks)

        return tasks
Пример #2
0
def run_pipeline(pipeline, pipelinetype, job, owner):
    pipeline_config = data_access.get_pipeline_config(pipeline, util.conn_string)

    print('get collector')
    collector_name = str(pipelinetype)
    if collector_name in registered_collectors:
        collector_class = registered_collectors[collector_name]
        if collector_class:
            print('run collector')
            collector = collector_class()
            collector.run(pipeline, job, owner, pipelinetype, pipeline_config)
            collector.cleanup(pipeline, job, owner, pipelinetype, pipeline_config)

    jobs.update_job_status(str(job), util.conn_string, jobs.COMPLETED, "Finished %s Pipeline" % pipelinetype)
Пример #3
0
def run_pipeline(pipeline, pipelinetype, job, owner):
    pipeline_config = data_access.get_pipeline_config(pipeline, util.conn_string)

    print('get collector')
    collector_name = str(pipelinetype)
    if collector_name in registered_collectors:
        collector_class = registered_collectors[collector_name]
        if collector_class:
            print('run collector')
            collector = collector_class()
            collector.run(pipeline, job, owner, pipelinetype, pipeline_config)
            collector.cleanup(pipeline, job, owner, pipelinetype, pipeline_config)

    jobs.update_job_status(str(job), util.conn_string, jobs.COMPLETED, "Finished %s Pipeline" % pipelinetype)
Пример #4
0
    def requires(self):
        tasks = list()
        pipeline_ids = data_access.query_pipeline_ids(int(self.phenotype),
                                                      util.conn_string)
        print("getting ready to execute pipelines...")
        print(pipeline_ids)
        if len(pipeline_ids) > 0:
            for pipeline_id in pipeline_ids:
                pipeline_config = data_access.get_pipeline_config(
                    pipeline_id, util.conn_string)
                tasks.append(
                    PipelineTask(pipeline=pipeline_id,
                                 job=self.job,
                                 owner=self.owner,
                                 pipelinetype=pipeline_config.config_type))
        print(tasks)

        return tasks
Пример #5
0
    def requires(self):

        try:

            self.solr_query, total_docs, doc_limit, ranges = initialize_task_and_get_documents(self.pipeline, self.job,
                                                                                               self
                                                                                               .owner)

            self.dependent_tasks.append(PipelineTask(pipeline=self.pipeline, job=self.job, owner=self.owner,
                                                     pipelinetype=self.pipelinetype))

            for sde in self.dependent_pipeline_ids:
                pipeline_config = data_access.get_pipeline_config(sde, util.conn_string)
                self.dependent_tasks.append(PipelineTask(pipeline=sde, job=self.job, owner=self.owner,
                                                         pipelinetype=pipeline_config.config_type))
            self.done_requires = True
            return self.dependent_tasks

        except Exception as ex:
            traceback.print_exc(file=sys.stderr)
            jobs.update_job_status(str(self.job), util.conn_string, jobs.WARNING, ''.join(traceback.format_stack()))
            print(ex)
        return list()
Пример #6
0
    def requires(self):
        register_tasks()
        tasks = list()
        pipeline_ids = data_access.query_pipeline_ids(int(self.phenotype), util.conn_string)
        phenotype_config = data_access.query_phenotype(int(self.phenotype), util.conn_string)
        phenotype_config['phenotype_id'] = int(self.phenotype)

        print("getting ready to execute pipelines...")
        actually_use_chaining = False
        print(pipeline_ids)
        if len(pipeline_ids) > 0:
            configs = dict()
            for pipeline_id in pipeline_ids:
                pipeline_config = data_access.get_pipeline_config(pipeline_id, util.conn_string)
                pipeline_config['pipeline_id'] = pipeline_id
                configs[pipeline_config['name']] = pipeline_config

            n = 0
            first_de = None
            secondary_des = list()
            if util.use_chained_queries == 'true':
                for op in phenotype_config['operations']:
                    if op['action'] == 'AND':
                        actually_use_chaining = True
                        first_de = op['data_entities'][0]
                        first_pipeline = configs[first_de]
                        secondary_des = op['data_entities'][1:]
                        name = "DownselectedCohort" + str(n)

                        cohort = dict()
                        cohort['name'] = name
                        cohort['named_arguments'] = dict()
                        cohort['named_arguments']['pipeline_id'] = first_pipeline['pipeline_id']
                        cohort['declaration'] = 'cohort'
                        cohort['funct'] = 'getJobResults'
                        cohort['library'] = 'Clarity'

                        found = False
                        for c in phenotype_config['cohorts']:
                            if name == c['name']:
                                found = True
                        if not found:
                            phenotype_config['cohorts'].append(cohort)
                        for de in secondary_des:
                            secondary_pipeline = configs[de]
                            job_res_config = dict()
                            job_res_config['context'] = 'document'
                            job_res_config['pipeline_id'] = secondary_pipeline['pipeline_id']
                            secondary_pipeline['job_results'][name] = job_res_config
                            secondary_pipeline['chained_query'] = name
                            configs[de] = secondary_pipeline
                            update_pipeline_config(secondary_pipeline, util.conn_string)
                            o = 0
                            for de2 in phenotype_config['data_entities']:
                                if de == de2['name']:
                                    cohorts = phenotype_config['data_entities'][o]['named_arguments']['cohort']
                                    if name in cohorts:
                                        continue
                                    if 'cohort' not in phenotype_config['data_entities'][o]['named_arguments']:
                                        phenotype_config['data_entities'][o]['named_arguments']['cohort'] = [name]
                                    else:
                                        phenotype_config['data_entities'][o]['named_arguments']['cohort'].append(name)
                                o += 1
                        n += 1

                phenotype_config.chained_queries = actually_use_chaining

            update_phenotype_model(phenotype_config, util.conn_string)
            for pipeline_config in configs.values():
                pipeline_id = pipeline_config['pipeline_id']
                if actually_use_chaining and first_de:
                    if first_de == pipeline_config['name']:
                        tasks.append(PipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner,
                                                  pipelinetype=pipeline_config.config_type))
                        dependent_pipeline_ids = list()
                        for de in secondary_des:
                            secondary_pipeline = configs[de]
                            dependent_pipeline_ids.append(secondary_pipeline['pipeline_id'])
                        # tasks.append(ChainedPipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner,
                        #                                  pipelinetype=pipeline_config.config_type, first_de=first_de,
                        #                                  dependent_pipeline_ids=
                        #                                  dependent_pipeline_ids))
                            tasks.append(PipelineTask(pipeline=secondary_pipeline.pipeline_id, job=self.job,
                                                      owner=self.owner,
                                                      pipelinetype=secondary_pipeline.config_type))

                else:
                    tasks.append(PipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner,
                                              pipelinetype=pipeline_config.config_type))
        print(tasks)

        return tasks