def test_reset_pipeline_seed_for_rerun(self): base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() sra = SeqrunAdaptor(**{'session': base.session}) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pp = PipelineAdaptor(**{'session': base.session}) pipeline = pp.fetch_pipeline_records_pipeline_name( 'demultiplexing_fastq') pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed.status, 'SEEDED') pp.update_pipeline_seed(data=[{ 'pipeline_id': pipeline.pipeline_id, 'seed_id': seqrun.seqrun_id, 'seed_table': 'seqrun', 'status': 'FINISHED', }]) pipe_seed2 = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed2.status, 'FINISHED') base.close_session() with open(self.seqrun_input_list, 'w') as fp: fp.write('171003_M00001_0089_000000000-TEST') mps = Modify_pipeline_seed(igf_id_list=self.seqrun_input_list, table_name='seqrun', pipeline_name='demultiplexing_fastq', dbconfig_file=self.dbconfig, log_slack=False, log_asana=False, clean_up=True) mps.reset_pipeline_seed_for_rerun(seeded_label='SEEDED') base.start_session() sra = SeqrunAdaptor(**{'session': base.session}) seqrun = sra.fetch_seqrun_records_igf_id( seqrun_igf_id='171003_M00001_0089_000000000-TEST') pp = PipelineAdaptor(**{'session': base.session}) pipeline = pp.fetch_pipeline_records_pipeline_name( 'demultiplexing_fastq') pipe_seed = pp.fetch_pipeline_seed(pipeline_id=pipeline.pipeline_id, seed_id=seqrun.seqrun_id, seed_table='seqrun') self.assertEqual(pipe_seed.status, 'SEEDED') base.close_session()
def run(self): try: igf_session_class = self.param_required('igf_session_class') # set by base class pipeline_name = self.param_required('pipeline_name') igf_id = self.param_required('igf_id') task_id = self.param_required('task_id') seed_id = self.param_required('seed_id') seed_table = self.param_required('seed_table') new_status = self.param_required('new_status') pa = PipelineAdaptor(**{'session_class':igf_session_class}) pa.start_session() # connect to db pa.update_pipeline_seed(\ data=[{'pipeline_name':pipeline_name, 'seed_id':int(seed_id), 'seed_table':seed_table, 'status':new_status.upper()}]) # update seed record in db pa.close_session() # close db connection message = \ 'changing status in {0} for seed {1} as {2}'.\ format(\ pipeline_name, seed_id, new_status.upper()) # format message self.post_message_to_slack(message, reaction='pass') # send message to slack self.comment_asana_task(task_name=task_id, comment=message) # send message to asana except Exception as e: message = \ 'seqrun: {2}, Error in {0}: {1}'.\ format(\ self.__class__.__name__, e, igf_id) self.warning(message) self.post_message_to_slack(message,reaction='fail') # post msg to slack for failed jobs raise
def test_update_pipeline_seed(self): pl = PipelineAdaptor(**{'session_class': self.session_class}) pl.start_session() pipeline_seed_data1 = [ { 'pipeline_name': 'demultiplexing_fastq', 'seed_id': '2', 'seed_table': 'seqrun', }, ] with self.assertRaises(ValueError): pl.update_pipeline_seed(data=pipeline_seed_data1) pipeline_seed_data2 = [ { 'pipeline_name': 'demultiplexing_fastq', 'seed_id': '2', 'seed_table': 'seqrun', 'status': 'RUNNING' }, ] pl.update_pipeline_seed(data=pipeline_seed_data2) (pipe_seed1, table_data1) = pl.fetch_pipeline_seed_with_table_data( pipeline_name='demultiplexing_fastq') self.assertEqual(len(table_data1.to_dict(orient='records')), len(pipe_seed1.to_dict(orient='records'))) pipeline_seed_data3 = [ { 'pipeline_name': 'demultiplexing_fastq', 'seed_id': '1', 'seed_table': 'seqrun', 'status': 'RUNNING' }, ] pl.update_pipeline_seed(data=pipeline_seed_data3) (pipe_seed2, _) = pl.fetch_pipeline_seed_with_table_data( pipeline_name='demultiplexing_fastq', status='RUNNING') pl.close_session() self.assertEqual( pipe_seed2.loc[pipe_seed2.seed_id == 1]['status'].values[0], 'RUNNING')
def reset_pipeline_seed_for_rerun(self, seeded_label='SEEDED', restricted_status_list=('SEEDED', 'RUNNING')): ''' A method for setting the pipeline for re-run if the first run has failed or aborted This method will set the pipeline_seed.status as 'SEEDED' only if its not already 'SEEDED' or 'RUNNING' :param seeded_label: A text label for seeded status, default SEEDED :param restricted_status_list: A list of pipeline status to exclude from the search, default ['SEEDED','RUNNING'] ''' try: db_connected = False restricted_status_list = list(restricted_status_list) input_id_list = self._read_input_list( igf_id_list=self.igf_id_list) # get input ids from file failed_ids = list() # define empty list of failed ids pass_list = list() # required for logging in asana base = self.base_adaptor base.start_session() # connect to database db_connected = True for igf_id in input_id_list: pipe_seed_data = self._fetch_pipeline_seed_entry( igf_id=igf_id, restrict_seed_status=restricted_status_list ) # get pipe seed data for igf id if pipe_seed_data is None: failed_ids.append(igf_id) # add igf id to failed list else: pl = PipelineAdaptor(**{'session': base.session }) # connect to pipeline adaptor updated_seed_data = [{ 'pipeline_id': pipe_seed_data.pipeline_id, 'seed_id': pipe_seed_data.seed_id, 'seed_table': pipe_seed_data.seed_table, 'status': seeded_label }] # set data for seed update pl.update_pipeline_seed( data=updated_seed_data, autosave=False) # update data to pipeline seed table pass_list.append(igf_id) base.commit_session() # save data to database after all changes base.close_session() # close database connection db_connected = False if self.clean_up: self._clear_input_list( file_path=self.igf_id_list, igf_list=failed_ids ) # over write input list and add failed ids for next try message = 'Overwriting pipeseed input list {0}'.format( self.igf_id_list) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass' ) # comment to slack for file over writing if len(pass_list) > 0: for id_line in pass_list: message='Changed pipeline seed for id {0}, pipeline {1}, to {2}'.\ format(id_line,self.pipeline_name,seeded_label) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') # comment to slack channel if self.log_asana: self.igf_asana.comment_asana_task( task_name=id_line, comment=message) # comment on asana task except Exception as e: if db_connected: base.rollback_session() base.close_session() message = 'Failed to update pipeline seed, Error: {0}'.format( e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel(message, reaction='fail') raise
def run(self): ''' Run method for the seed job factory class of the all pipelines :param igf_session_class: A database session class :param pipeline_name: Name of the pipeline :param seed_id_label: A text label for the seed_id, default seed_id :param seqrun_id_label: A text for seqrun_id column name, default seqrun_id :param seqrun_date_label: A text label for the seqrun date, default seqrun_date :param seqrun_igf_id_label: A text label for sequencing run igf id, default seqrun_igf_id :param seeded_label: A text label for the status seeded in pipeline_seed table, default SEEDED :param running_label: A text label for the status running in the pipeline_seed table, default RUNNING :param seed_status_label: A text label for the pipeline_seed status column name, default status :param experiment_id_label: A text label for the experiment_id, default experiment_id :param pipeseed_mode: A text label for pipeline mode, default demultiplexing Allowed values are demultiplexing alignment :returns: A list of dictionary containing the seqrun ids or experiment_igf_ids seed for analysis ''' try: dbconnected=False igf_session_class = self.param_required('igf_session_class') # set by base class pipeline_name = self.param_required('pipeline_name') seed_id_label = self.param_required('seed_id_label') seqrun_id_label = self.param_required('seqrun_id_label') seeded_label=self.param_required('seeded_label') running_label=self.param_required('running_label') seqrun_date_label=self.param_required('seqrun_date_label') seqrun_igf_id_label=self.param_required('seqrun_igf_id_label') seed_status_label=self.param_required('seed_status_label') experiment_id_label = self.param_required('experiment_id_label') pipeseed_mode=self.param_required('pipeseed_mode') if pipeseed_mode not in ('demultiplexing','alignment'): raise ValueError('Pipeseed_mode {0} not supported'.format(pipeseed_mode)) pipeseeds_data,seed_data=get_pipeline_seeds(\ pipeseed_mode=pipeseed_mode, pipeline_name=pipeline_name, igf_session_class=igf_session_class) # fetch pipeseed data from db if len(seed_data.index)>0: seed_data=seed_data.\ to_dict(orient='records') # convert dataframe to list of dictionaries self.param('sub_tasks',seed_data) # set sub_tasks param for the data flow pipeseeds_data[seed_status_label]=pipeseeds_data[seed_status_label].\ map({seeded_label:running_label}) # update seed records in pipeseed table, changed status to RUNNING pa = PipelineAdaptor(**{'session_class':igf_session_class}) # get db adaptor pa.start_session() # connect to db dbconnected=True pa.update_pipeline_seed(data=pipeseeds_data.to_dict(orient='records'), autosave=False) # set pipeline seeds as running pa.commit_session() # save changes to db pa.close_session() # close db connection dbconnected=False message='Total {0} new job found for {1}, pipeline: {2}'.\ format(len(seed_data),self.__class__.__name__,pipeline_name) # format msg for slack self.post_message_to_slack(message,reaction='pass') # send update to slack else: message='{0}, {1}: no new job created'.format(self.__class__.__name__,\ pipeline_name) # format msg for failed jobs self.warning(message) self.post_message_to_slack(message,reaction='sleep') # post about failed job to slack except Exception as e: message='Error in {0},{1}: {2}'.format(self.__class__.__name__,\ pipeline_name, e) # format slack msg self.warning(message) self.post_message_to_slack(message,reaction='fail') # send msg to slack if dbconnected: pa.rollback_session() # remove changes from db pa.close_session() raise # mark worker as failed
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() # PLATFORM platform_data = [{ "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }] flowcell_rule_data = [{ "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) # SEQRUN seqrun_data = [{ 'seqrun_igf_id': '180416_M03291_0139_000000000-TEST', 'flowcell_id': '000000000-TEST', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }, { 'seqrun_igf_id': '180416_M03291_0140_000000000-TEST', 'flowcell_id': '000000000-TEST', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # PROJECT project_data = [{'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) # SAMPLE sample_data = [{ 'sample_igf_id': 'IGF00123', 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq' }, { 'sample_igf_id': 'IGF00124', 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq' }] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) # EXPERIMENT experiment_data = [{ 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq', 'sample_igf_id': 'IGF00123', 'experiment_igf_id': 'IGF00123_MISEQ', 'library_name': 'IGF00123', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', 'singlecell_chemistry': 'TENX' }, { 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq', 'sample_igf_id': 'IGF00124', 'experiment_igf_id': 'IGF00124_MISEQ', 'library_name': 'IGF00124', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', 'singlecell_chemistry': 'TENX' }] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) # RUN run_data = [{ 'experiment_igf_id': 'IGF00123_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-TEST', 'run_igf_id': 'IGF00123_MISEQ_000000000-TEST_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # PIPELINE pipeline_data = [{ "pipeline_name": "PrimaryAnalysis", "pipeline_db": "sqlite:////aln.db", }, { "pipeline_name": "DemultiplexingFastq", "pipeline_db": "sqlite:////fastq.db", }] pipeline_seed_data = [ { 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 1, 'seed_table': 'experiment' }, { 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 2, 'seed_table': 'experiment' }, { 'pipeline_name': 'DemultiplexingFastq', 'seed_id': 1, 'seed_table': 'seqrun' }, { 'pipeline_name': 'DemultiplexingFastq', 'seed_id': 2, 'seed_table': 'seqrun' }, ] update_data = [{ 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 2, 'seed_table': 'experiment', 'status': 'FINISHED' }, { 'pipeline_name': 'DemultiplexingFastq', 'seed_id': 2, 'seed_table': 'seqrun', 'status': 'FINISHED' }] pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=pipeline_data) pla.create_pipeline_seed(data=pipeline_seed_data) pla.update_pipeline_seed(update_data) base.close_session()