def requires(self): register_tasks() tasks = list() pipeline_ids = data_access.query_pipeline_ids(int(self.phenotype), util.conn_string) phenotype_config = data_access.query_phenotype(int(self.phenotype), util.conn_string) phenotype_config['phenotype_id'] = int(self.phenotype) log("getting ready to execute pipelines...") log(pipeline_ids) if len(pipeline_ids) > 0: configs = dict() for pipeline_id in pipeline_ids: pipeline_config = data_access.get_pipeline_config( pipeline_id, util.conn_string) pipeline_config['pipeline_id'] = pipeline_id configs[pipeline_config['name']] = pipeline_config update_phenotype_model(phenotype_config, util.conn_string) for pipeline_config in configs.values(): pipeline_id = pipeline_config['pipeline_id'] tasks.append( PipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner, pipelinetype=pipeline_config.config_type)) log(tasks) return tasks
def run_pipeline(pipeline, pipelinetype, job, owner): pipeline_config = data_access.get_pipeline_config(pipeline, util.conn_string) print('get collector') collector_name = str(pipelinetype) if collector_name in registered_collectors: collector_class = registered_collectors[collector_name] if collector_class: print('run collector') collector = collector_class() collector.run(pipeline, job, owner, pipelinetype, pipeline_config) collector.cleanup(pipeline, job, owner, pipelinetype, pipeline_config) jobs.update_job_status(str(job), util.conn_string, jobs.COMPLETED, "Finished %s Pipeline" % pipelinetype)
def requires(self): tasks = list() pipeline_ids = data_access.query_pipeline_ids(int(self.phenotype), util.conn_string) print("getting ready to execute pipelines...") print(pipeline_ids) if len(pipeline_ids) > 0: for pipeline_id in pipeline_ids: pipeline_config = data_access.get_pipeline_config( pipeline_id, util.conn_string) tasks.append( PipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner, pipelinetype=pipeline_config.config_type)) print(tasks) return tasks
def requires(self): try: self.solr_query, total_docs, doc_limit, ranges = initialize_task_and_get_documents(self.pipeline, self.job, self .owner) self.dependent_tasks.append(PipelineTask(pipeline=self.pipeline, job=self.job, owner=self.owner, pipelinetype=self.pipelinetype)) for sde in self.dependent_pipeline_ids: pipeline_config = data_access.get_pipeline_config(sde, util.conn_string) self.dependent_tasks.append(PipelineTask(pipeline=sde, job=self.job, owner=self.owner, pipelinetype=pipeline_config.config_type)) self.done_requires = True return self.dependent_tasks except Exception as ex: traceback.print_exc(file=sys.stderr) jobs.update_job_status(str(self.job), util.conn_string, jobs.WARNING, ''.join(traceback.format_stack())) print(ex) return list()
def requires(self): register_tasks() tasks = list() pipeline_ids = data_access.query_pipeline_ids(int(self.phenotype), util.conn_string) phenotype_config = data_access.query_phenotype(int(self.phenotype), util.conn_string) phenotype_config['phenotype_id'] = int(self.phenotype) print("getting ready to execute pipelines...") actually_use_chaining = False print(pipeline_ids) if len(pipeline_ids) > 0: configs = dict() for pipeline_id in pipeline_ids: pipeline_config = data_access.get_pipeline_config(pipeline_id, util.conn_string) pipeline_config['pipeline_id'] = pipeline_id configs[pipeline_config['name']] = pipeline_config n = 0 first_de = None secondary_des = list() if util.use_chained_queries == 'true': for op in phenotype_config['operations']: if op['action'] == 'AND': actually_use_chaining = True first_de = op['data_entities'][0] first_pipeline = configs[first_de] secondary_des = op['data_entities'][1:] name = "DownselectedCohort" + str(n) cohort = dict() cohort['name'] = name cohort['named_arguments'] = dict() cohort['named_arguments']['pipeline_id'] = first_pipeline['pipeline_id'] cohort['declaration'] = 'cohort' cohort['funct'] = 'getJobResults' cohort['library'] = 'Clarity' found = False for c in phenotype_config['cohorts']: if name == c['name']: found = True if not found: phenotype_config['cohorts'].append(cohort) for de in secondary_des: secondary_pipeline = configs[de] job_res_config = dict() job_res_config['context'] = 'document' job_res_config['pipeline_id'] = secondary_pipeline['pipeline_id'] secondary_pipeline['job_results'][name] = job_res_config secondary_pipeline['chained_query'] = name configs[de] = secondary_pipeline update_pipeline_config(secondary_pipeline, util.conn_string) o = 0 for de2 in phenotype_config['data_entities']: if de == de2['name']: cohorts = phenotype_config['data_entities'][o]['named_arguments']['cohort'] if name in cohorts: continue if 'cohort' not in phenotype_config['data_entities'][o]['named_arguments']: phenotype_config['data_entities'][o]['named_arguments']['cohort'] = [name] else: phenotype_config['data_entities'][o]['named_arguments']['cohort'].append(name) o += 1 n += 1 phenotype_config.chained_queries = actually_use_chaining update_phenotype_model(phenotype_config, util.conn_string) for pipeline_config in configs.values(): pipeline_id = pipeline_config['pipeline_id'] if actually_use_chaining and first_de: if first_de == pipeline_config['name']: tasks.append(PipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner, pipelinetype=pipeline_config.config_type)) dependent_pipeline_ids = list() for de in secondary_des: secondary_pipeline = configs[de] dependent_pipeline_ids.append(secondary_pipeline['pipeline_id']) # tasks.append(ChainedPipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner, # pipelinetype=pipeline_config.config_type, first_de=first_de, # dependent_pipeline_ids= # dependent_pipeline_ids)) tasks.append(PipelineTask(pipeline=secondary_pipeline.pipeline_id, job=self.job, owner=self.owner, pipelinetype=secondary_pipeline.config_type)) else: tasks.append(PipelineTask(pipeline=pipeline_id, job=self.job, owner=self.owner, pipelinetype=pipeline_config.config_type)) print(tasks) return tasks