def run(self): ''' A method for running picard commands :param project_igf_id: A project igf id :param sample_igf_id: A sample igf id :param experiment_igf_id: A experiment igf id :param igf_session_class: A database session class :param reference_type: Reference genome collection type, default GENOME_FASTA :param reference_refFlat: Reference genome collection type, default GENE_REFFLAT :param ribosomal_interval_type: Collection type for ribosomal interval list, default RIBOSOMAL_INTERVAL :param species_name: species_name :param java_exe: Java path :param java_java_paramexe: Java run parameters :param picard_jar: Picard jar path :param picard_command: Picard command :param base_work_dir: Base workd directory :param copy_input: A toggle for copying input file to temp, 1 for True default 0 for False :param use_ephemeral_space: A toggle for temp dir setting, default 0 :param patterned_flowcell_list: A list of paterned flowcells, default ['HISEQ4000','NEXTSEQ'] ''' try: temp_output_dir = False project_igf_id = self.param_required('project_igf_id') experiment_igf_id = self.param_required('experiment_igf_id') sample_igf_id = self.param_required('sample_igf_id') java_exe = self.param_required('java_exe') java_param = self.param_required('java_param') picard_jar = self.param_required('picard_jar') input_files = self.param_required('input_files') picard_command = self.param_required('picard_command') igf_session_class = self.param_required('igf_session_class') species_name = self.param('species_name') reference_type = self.param('reference_type') reference_refFlat = self.param('reference_refFlat') ribosomal_interval_type = self.param('ribosomal_interval_type') base_work_dir = self.param_required('base_work_dir') analysis_files = self.param_required('analysis_files') picard_option = self.param('picard_option') patterned_flowcell_list = self.param('patterned_flowcell_list') platform_name = self.param_required('platform_name') output_prefix = self.param('output_prefix') load_metrics_to_cram = self.param('load_metrics_to_cram') cram_collection_type = self.param('cram_collection_type') seed_date_stamp = self.param_required('date_stamp') use_ephemeral_space = self.param('use_ephemeral_space') seed_date_stamp = get_datestamp_label(seed_date_stamp) if output_prefix is not None: output_prefix = \ '{0}_{1}'.\ format( output_prefix, seed_date_stamp) # adding seed datestamp to output prefix work_dir_prefix = \ os.path.join( base_work_dir, project_igf_id, sample_igf_id, experiment_igf_id) work_dir = \ self.get_job_work_dir(work_dir=work_dir_prefix) # get a run work dir temp_output_dir = \ get_temp_dir(use_ephemeral_space=use_ephemeral_space) # get temp work dir ref_genome = \ Reference_genome_utils( genome_tag=species_name, dbsession_class=igf_session_class, genome_fasta_type=reference_type, gene_reflat_type=reference_refFlat, ribosomal_interval_type=ribosomal_interval_type) # setup ref genome utils genome_fasta = ref_genome.get_genome_fasta() # get genome fasta ref_flat_file = ref_genome.get_gene_reflat() # get refFlat file ribosomal_interval_file = ref_genome.get_ribosomal_interval( ) # get ribosomal interval file patterned_flowcell = False if platform_name in patterned_flowcell_list: # check for patterned flowcell patterned_flowcell = True if load_metrics_to_cram and \ not cram_collection_type: raise ValueError( 'Cram file collection type is required for loading picard metrics to db' ) picard=\ Picard_tools(\ java_exe=java_exe, java_param=java_param, picard_jar=picard_jar, input_files=input_files, output_dir=temp_output_dir, ref_fasta=genome_fasta, patterned_flowcell=patterned_flowcell, ref_flat_file=ref_flat_file, picard_option=picard_option, output_prefix=output_prefix, use_ephemeral_space=use_ephemeral_space, ribisomal_interval=ribosomal_interval_file) # setup picard tool temp_output_files,picard_command_line,picard_metrics = \ picard.run_picard_command(command_name=picard_command) # run picard command output_file_list = list() for source_path in temp_output_files: dest_path=\ os.path.join( work_dir, os.path.basename(source_path)) # get destination filepath move_file(source_path=source_path, destinationa_path=dest_path, force=True) # move files to work dir output_file_list.append(dest_path) remove_dir(temp_output_dir) analysis_files.extend(output_file_list) bam_files = list() for file in output_file_list: if file.endswith('.bam'): bam_files.append(file) if load_metrics_to_cram and \ len(picard_metrics)>0: ca = CollectionAdaptor(**{'session_class': igf_session_class}) attribute_data = \ ca.prepare_data_for_collection_attribute( collection_name=experiment_igf_id, collection_type=cram_collection_type, data_list=picard_metrics) # fromat data for collection attribute table ca.start_session() try: ca.create_or_update_collection_attributes(\ data=attribute_data, autosave=False ) # load data to collection attribute table ca.commit_session() ca.close_session() except: ca.rollback_session() ca.close_session() raise self.param( 'dataflow_params', { 'analysis_files': analysis_files, 'bam_files': bam_files, 'seed_date_stamp': seed_date_stamp }) # pass on picard output list message = \ 'finished picard {0} for {1} {2}'.\ format( picard_command, project_igf_id, sample_igf_id) self.post_message_to_slack(message, reaction='pass') # send log to slack message = \ 'Picard {0} command: {1}'.\ format( picard_command, picard_command_line) #self.comment_asana_task(task_name=project_igf_id, comment=message) # send commandline to Asana except Exception as e: if temp_output_dir and \ os.path.exists(temp_output_dir): remove_dir(temp_output_dir) message = \ 'project: {2}, sample:{3}, Error in {0}: {1}'.\ format( self.__class__.__name__, e, project_igf_id, sample_igf_id) self.warning(message) self.post_message_to_slack( message, reaction='fail') # post msg to slack for failed jobs raise
def load_file_to_disk_and_db(self, input_file_list, withdraw_exisitng_collection=True, autosave_db=True, file_suffix=None, force=True, remove_file=False): ''' A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present. Directory structure of the final path is based on the collection_table information. Following will be the final directory structure if base_path is present project - base_path/project_igf_id/analysis_name sample - base_path/project_igf_id/sample_igf_id/analysis_name experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name :param input_file_list: A list of input file to load, all using the same collection info :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files :param autosave_db: Save changes to database, default True :param file_suffix: Use a specific file suffix, use None if it should be same as original file e.g. input.vcf.gz to output.vcf.gz :param force: Toggle for removing existing file, default True :param remove_file: A toggle for removing existing file from disk, default False :returns: A list of final filepath ''' try: project_igf_id = None sample_igf_id = None experiment_igf_id = None experiment_igf_id = None run_igf_id = None output_path_list = list() # define empty output list dbconnected = False if self.collection_name is None or \ self.collection_type is None or \ self.collection_table is None: raise ValueError('File collection information is incomplete' ) # check for collection information base = BaseAdaptor(**{'session_class': self.dbsession_class}) base.start_session() # connect to db dbconnected = True if self.base_path is not None: if self.collection_table == 'sample': sa = SampleAdaptor(**{'session': base.session}) sample_igf_id = self.collection_name sample_exists = sa.check_sample_records_igf_id( sample_igf_id=sample_igf_id) if not sample_exists: raise ValueError('Sample {0} not found in db'.\ format(sample_igf_id)) project_igf_id = \ sa.fetch_sample_project(sample_igf_id=sample_igf_id) # fetch project id for sample elif self.collection_table == 'experiment': ea = ExperimentAdaptor(**{'session': base.session}) experiment_igf_id = self.collection_name experiment_exists = \ ea.check_experiment_records_id( experiment_igf_id=experiment_igf_id) if not experiment_exists: raise ValueError('Experiment {0} not present in database'.\ format(experiment_igf_id)) (project_igf_id,sample_igf_id) = \ ea.fetch_project_and_sample_for_experiment( experiment_igf_id=experiment_igf_id) # fetch project and sample id for experiment elif self.collection_table == 'run': ra = RunAdaptor(**{'session': base.session}) run_igf_id = self.collection_name run_exists = ra.check_run_records_igf_id( run_igf_id=run_igf_id) if not run_exists: raise ValueError('Run {0} not found in database'.\ format(run_igf_id)) (project_igf_id,sample_igf_id,experiment_igf_id) = \ ra.fetch_project_sample_and_experiment_for_run( run_igf_id=run_igf_id) # fetch project, sample and experiment id for run elif self.collection_table == 'project': pa = ProjectAdaptor(**{'session': base.session}) project_igf_id = self.collection_name project_exists = \ pa.check_project_records_igf_id( project_igf_id=project_igf_id) if not project_exists: raise ValueError('Project {0} not found in database'.\ format(project_igf_id)) if self.rename_file and self.analysis_name is None: raise ValueError('Analysis name is required for renaming file' ) # check analysis name for input_file in input_file_list: final_path = '' if self.base_path is None: # do not move file if base_path is absent final_path = os.path.dirname(input_file) else: # move file path if self.collection_table == 'project': if project_igf_id is None: raise ValueError('Missing project id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, self.analysis_name) # final path for project elif self.collection_table == 'sample': if project_igf_id is None or \ sample_igf_id is None: raise ValueError('Missing project and sample id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, self.analysis_name) # final path for sample elif self.collection_table == 'experiment': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None: raise ValueError('Missing project,sample and experiment id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, self.analysis_name) # final path for experiment elif self.collection_table == 'run': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None or \ run_igf_id is None: raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join(\ self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, run_igf_id, self.analysis_name) # final path for run if self.rename_file: new_filename = \ self.get_new_file_name( input_file=input_file, file_suffix=file_suffix) final_path = \ os.path.join( final_path, new_filename) # get new filepath else: final_path = \ os.path.join( final_path, os.path.basename(input_file)) if final_path != input_file: # move file if its required final_path = preprocess_path_name( input_path=final_path ) # remove unexpected characters from file path move_file(source_path=input_file, destinationa_path=final_path, force=force ) # move or overwrite file to destination dir output_path_list.append( final_path) # add final path to the output list self.create_or_update_analysis_collection( file_path=final_path, dbsession=base.session, withdraw_exisitng_collection=withdraw_exisitng_collection, remove_file=remove_file, autosave_db=autosave_db) # load new file collection in db if autosave_db: base.commit_session() # save changes to db for each file base.commit_session() # save changes to db base.close_session() # close db connection return output_path_list except: if dbconnected: base.rollback_session() base.close_session() raise
def run(self): ''' A method for resetting md5 values in the samplesheet json files for all seqrun ids ''' try: db_connected = False seqrun_list = self._read_seqrun_list( self.seqrun_igf_list ) # fetch list of seqrun ids from input file if len(seqrun_list) > 0: base = self.base_adaptor base.start_session() # connect to database db_connected = True ca = CollectionAdaptor(**{'session': base.session }) # connect to collection table fa = FileAdaptor(**{'session': base.session}) # connect to file table for seqrun_id in seqrun_list: try: files_data = ca.get_collection_files( collection_name=seqrun_id, collection_type=self.json_collection_type, output_mode='one_or_none' ) # check for existing md5 json file in db # TO DO: skip seqrun_id if pipeline is still running if files_data is not None: json_file_path = [ element.file_path for element in files_data if isinstance(element, File) ][0] # get md5 json file path from sqlalchemy collection results samplesheet_md5 = self._get_samplesheet_md5( seqrun_id ) # get md5 value for new samplesheet file new_json_path = self._get_updated_json_file( json_file_path, samplesheet_md5, self.samplesheet_name ) # get updated md5 json file if samplesheet has been changed if new_json_path is not None: new_json_file_md5 = calculate_file_checksum( filepath=new_json_path, hasher='md5') fa.update_file_table_for_file_path( file_path=json_file_path, tag='md5', value=new_json_file_md5, autosave=False ) # update json file md5 in db, don't commit yet move_file(source_path=new_json_path, destinationa_path=json_file_path, force=True) # overwrite json file base.commit_session() # save changes in db message='Setting new Samplesheet info for run {0}'.\ format(seqrun_id) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') # send log to slack if self.log_asana: self.igf_asana.comment_asana_task( task_name=seqrun_id, comment=message) # send log to asana else: message = 'no change in samplesheet for seqrun {0}'.format( seqrun_id) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='pass') else: message='No md5 json file found for seqrun_igf_id: {0}'.\ format(seqrun_id) warnings.warn( message ) # not raising any exception if seqrun id is not found if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='fail') except Exception as e: base.rollback_session() message='Failed to update json file for seqrun id {0}, error : {1}'.\ format(seqrun_id,e) warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='fail') base.close_session() # close db connection if self.clean_up: self._clear_seqrun_list( self.seqrun_igf_list) # clear input file else: if self.log_slack: message = 'No new seqrun id found for changing samplesheet md5' warnings.warn(message) if self.log_slack: self.igf_slack.post_message_to_channel( message, reaction='sleep') except: if db_connected: base.rollback_session() base.close_session() raise
def run(self): ''' An ehive runnable method for cellranger count output processing for a given sample :param project_igf_id: A project igf id :param experiment_igf_id: An experiment igf id :param sample_igf_id: A sample igf id :param igf_session_class: A database session class :param cellranger_output: Cellranger output path :param base_work_dir: Base work directory path :param fastq_collection_type: Collection type name for input fastq files, default demultiplexed_fastq :param species_name: Reference genome collection name :param reference_type: Reference genome collection type, default TRANSCRIPTOME_TENX :param use_ephemeral_space: A toggle for temp dir settings, default 0 :returns: Adding cellranger_output to the dataflow_params ''' try: project_igf_id = self.param_required('project_igf_id') experiment_igf_id = self.param_required('experiment_igf_id') sample_igf_id = self.param_required('sample_igf_id') igf_session_class = self.param_required('igf_session_class') cellranger_output = self.param_required('cellranger_output') base_result_dir = self.param_required('base_results_dir') species_name = self.param('species_name') manifest_filename = self.param('manifest_filename') analysis_name = self.param('analysis_name') collection_type = self.param('collection_type') collection_table = self.param('collection_table') use_ephemeral_space = self.param('use_ephemeral_space') # prepare manifest file for the results dir manifest_file = \ os.path.join( cellranger_output, manifest_filename) # get name of the manifest file create_file_manifest_for_dir( results_dirpath=cellranger_output, output_file=manifest_file, md5_label='md5', exclude_list=['*.bam', '*.bai', '*.cram']) # create manifest for output dir # create archive for the results dir temp_archive_name = \ os.path.join( get_temp_dir(use_ephemeral_space=use_ephemeral_space), '{0}.tar.gz'.format(experiment_igf_id)) # get the name of temp archive file prepare_file_archive(results_dirpath=cellranger_output, output_file=temp_archive_name, exclude_list=['*.bam', '*.bai', '*.cram' ]) # archive cellranget output # load archive file to db collection and results dir au = \ Analysis_collection_utils( dbsession_class=igf_session_class, analysis_name=analysis_name, tag_name=species_name, collection_name=experiment_igf_id, collection_type=collection_type, collection_table=collection_table, base_path=base_result_dir) # initiate loading of archive file output_file_list = \ au.load_file_to_disk_and_db( input_file_list=[temp_archive_name], withdraw_exisitng_collection=True) # load file to db and disk # find bam path for the data flow bam_list = list() # define empty bamfile list for file in os.listdir(cellranger_output): if fnmatch(file, '*.bam'): bam_list.\ append( os.path.join( cellranger_output, file)) # add all bams to bam_list if len(bam_list) > 1: raise ValueError( 'More than one bam found for cellranger count run:{0}'.\ format(cellranger_output)) # check number of bams, presence of one bam is already validated by check method bam_file = bam_list[0] au = \ Analysis_collection_utils( dbsession_class=igf_session_class, analysis_name=analysis_name, tag_name=species_name, collection_name=experiment_igf_id, collection_type=collection_type, collection_table=collection_table) # initiate bam file rename new_bam_name = \ au.get_new_file_name(input_file=bam_file) if os.path.basename(bam_file) != new_bam_name: new_bam_name = \ os.path.join( os.path.dirname( bam_file), new_bam_name) # get ne bam path move_file(source_path=bam_file, destinationa_path=new_bam_name, force=True) # move bam file bam_file = new_bam_name # update bam file path self.param( 'dataflow_params', { 'cellranger_output': cellranger_output, 'bam_file': bam_file, 'analysis_output_list': output_file_list }) # pass on cellranger output path except Exception as e: message = \ 'project: {2}, sample:{3}, Error in {0}: {1}'.\ format( self.__class__.__name__, e, project_igf_id, sample_igf_id) self.warning(message) self.post_message_to_slack( message, reaction='fail') # post msg to slack for failed jobs raise
def run(self): ''' A method for running samtools commands :param project_igf_id: A project igf id :param sample_igf_id: A sample igf id :param experiment_igf_id: A experiment igf id :param igf_session_class: A database session class :param reference_type: Reference genome collection type, default GENOME_FASTA :param threads: Number of threads to use for Bam to Cram conversion, default 4 :param base_work_dir: Base workd directory :param samtools_command: Samtools command :param samFlagInclude: Sam flags to include in filtered bam, default None :param samFlagExclude: Sam flags to exclude from the filtered bam, default None :param mapq_threshold: Skip alignments with MAPQ smaller than this value, default None :param use_encode_filter: For samtools filter, use Encode epigenome filter, i.e. samFlagExclude 1804(PE) / 1796(SE), default False :param encodePeExcludeFlag: For samtools filter, Encode exclude flag for PE reads, default 1804 :param encodeSeExcludeFlag: For samtools filter, Encode exclude flag for PE reads, default 1796 :param use_ephemeral_space: A toggle for temp dir settings, default 0 :param copy_input: A toggle for copying input file to temp, 1 for True default 0 for False ''' try: temp_output_dir = False project_igf_id = self.param_required('project_igf_id') sample_igf_id = self.param_required('sample_igf_id') experiment_igf_id = self.param_required('experiment_igf_id') igf_session_class = self.param_required('igf_session_class') input_files = self.param_required('input_files') samtools_exe = self.param_required('samtools_exe') reference_type = self.param('reference_type') threads = self.param('threads') base_work_dir = self.param_required('base_work_dir') samtools_command = self.param_required('samtools_command') analysis_files = self.param_required('analysis_files') output_prefix = self.param_required('output_prefix') load_metrics_to_cram = self.param('load_metrics_to_cram') cram_collection_type = self.param('cram_collection_type') collection_table = self.param('collection_table') base_result_dir = self.param('base_result_dir') analysis_name = self.param('analysis_name') force_overwrite = self.param('force_overwrite') samFlagInclude = self.param('samFlagInclude') samFlagExclude = self.param('samFlagExclude') mapq_threshold = self.param('mapq_threshold') library_layout = self.param_required('library_layout') use_encode_filter = self.param('use_encode_filter') species_name = self.param_required('species_name') seed_date_stamp = self.param_required('date_stamp') use_ephemeral_space = self.param('use_ephemeral_space') seed_date_stamp = get_datestamp_label(seed_date_stamp) if output_prefix is not None: output_prefix = \ '{0}_{1}'.\ format( output_prefix, seed_date_stamp) # adding datestamp to the output file prefix if use_encode_filter: samFlagInclude = None if library_layout == 'PAIRED': samFlagExclude = 1804 else: samFlagExclude = 1796 if not isinstance(input_files, list) or \ len(input_files) == 0: raise ValueError('No input file found') if len(input_files) > 1: raise ValueError('More than one input file found: {0}'.\ format(input_files)) output_bam_cram_list = list() input_file = input_files[0] temp_output_dir = \ get_temp_dir( use_ephemeral_space=use_ephemeral_space) # get temp work dir work_dir_prefix = \ os.path.join( base_work_dir, project_igf_id, sample_igf_id, experiment_igf_id) work_dir = \ self.get_job_work_dir(work_dir=work_dir_prefix) # get a run work dir samtools_cmdline = '' temp_output = None if samtools_command == 'idxstats': temp_output,samtools_cmdline = \ run_bam_idxstat( samtools_exe=samtools_exe, bam_file=input_file, output_dir=temp_output_dir, output_prefix=output_prefix, force=True) # run samtools idxstats elif samtools_command == 'flagstat': temp_output,samtools_cmdline = \ run_bam_flagstat(\ samtools_exe=samtools_exe, bam_file=input_file, output_dir=temp_output_dir, output_prefix=output_prefix, threads=threads, force=True) # run samtools flagstat elif samtools_command == 'stats': temp_output,samtools_cmdline,stats_metrics = \ run_bam_stats(\ samtools_exe=samtools_exe, bam_file=input_file, output_dir=temp_output_dir, output_prefix=output_prefix, threads=threads, force=True) # run samtools stats if load_metrics_to_cram and \ len(stats_metrics) > 0: ca = CollectionAdaptor( **{'session_class': igf_session_class}) attribute_data = \ ca.prepare_data_for_collection_attribute(\ collection_name=experiment_igf_id, collection_type=cram_collection_type, data_list=stats_metrics) ca.start_session() try: ca.create_or_update_collection_attributes(\ data=attribute_data, autosave=False) ca.commit_session() ca.close_session() except Exception as e: ca.rollback_session() ca.close_session() raise ValueError('Failed to load data to db: {0}'.\ format(e)) elif samtools_command == 'merge': if output_prefix is None: raise ValueError( 'Missing output filename prefix for merged bam') sorted_by_name = self.param('sorted_by_name') temp_output = \ os.path.join(\ work_dir, '{0}_merged.bam'.format(output_prefix)) samtools_cmdline = \ merge_multiple_bam(\ samtools_exe=samtools_exe, input_bam_list=input_file, output_bam_path=temp_output, sorted_by_name=sorted_by_name, threads=threads, use_ephemeral_space=use_ephemeral_space, force=True) elif samtools_command == 'view_bamToCram': if base_result_dir is None: raise ValueError( 'base_result_dir is required for CRAM file loading') if analysis_name is None: raise ValueError( 'analysis_name is required for CRAM file loading') ref_genome = \ Reference_genome_utils(\ genome_tag=species_name, dbsession_class=igf_session_class, genome_fasta_type=reference_type) genome_fasta = ref_genome.get_genome_fasta( ) # get genome fasta cram_file = \ os.path.basename(input_file).\ replace('.bam','.cram') # get base cram file name cram_file = os.path.join( temp_output_dir, cram_file) # get cram file path in work dir samtools_cmdline = \ convert_bam_to_cram(\ samtools_exe=samtools_exe, bam_file=input_file, reference_file=genome_fasta, cram_path=cram_file, use_ephemeral_space=use_ephemeral_space, threads=threads, force=True, dry_run=False) au = \ Analysis_collection_utils(\ dbsession_class=igf_session_class, analysis_name=analysis_name, tag_name=species_name, collection_name=experiment_igf_id, collection_type=cram_collection_type, collection_table=collection_table, base_path=base_result_dir) temp_output_bam_cram_list = \ au.load_file_to_disk_and_db(\ input_file_list=[cram_file], file_suffix='cram', withdraw_exisitng_collection=force_overwrite) # load file to db and disk for cram in temp_output_bam_cram_list: index_bam_or_cram(\ samtools_exe=samtools_exe, input_path=cram, threads=threads, dry_run=False) index_path = '{0}.crai'.format(cram) output_bam_cram_list.append(cram) output_bam_cram_list.append(index_path) if len(output_bam_cram_list) == 0: raise ValueError('No output cram file found') elif samtools_command == 'view_filterBam': temp_output_bam = \ os.path.join(\ temp_output_dir, os.path.basename(input_file).replace('.bam','.filtered.bam')) samtools_cmdline = \ filter_bam_file( samtools_exe=samtools_exe, input_bam=input_file, output_bam=temp_output_bam, samFlagInclude=samFlagInclude, samFlagExclude=samFlagExclude, threads=threads, mapq_threshold=mapq_threshold, index_output=False, dry_run=False) dest_path = \ os.path.join(\ work_dir, os.path.basename(temp_output_bam)) move_file(\ source_path=temp_output_bam, destinationa_path=dest_path, force=True) index_bam_or_cram(\ samtools_exe=samtools_exe, input_path=dest_path, threads=threads, dry_run=False) index_path = '{0}.bai'.format(dest_path) output_bam_cram_list.append(dest_path) output_bam_cram_list.append(index_path) else: raise ValueError('Samtools command {0} not supported'.\ format(samtools_command)) if temp_output is not None: dest_path = \ os.path.join(\ work_dir, os.path.basename(temp_output)) if dest_path != temp_output: move_file(\ source_path=temp_output, destinationa_path=dest_path, force=True) analysis_files.append(dest_path) self.param( 'dataflow_params', { 'analysis_files': analysis_files, 'output_bam_cram_list': output_bam_cram_list }) # pass on samtools output list message = \ 'finished samtools {0} for {1} {2}'.\ format( samtools_command, project_igf_id, sample_igf_id) self.post_message_to_slack(message, reaction='pass') # send log to slack message = \ 'finished samtools {0} for {1} {2}: {3}'.\ format( samtools_command, project_igf_id, sample_igf_id, samtools_cmdline) #self.comment_asana_task(task_name=project_igf_id, comment=message) # send comment to Asana except Exception as e: message = \ 'project: {2}, sample:{3}, Error in {0}: {1}'.\ format( self.__class__.__name__, e, project_igf_id, sample_igf_id) self.warning(message) self.post_message_to_slack( message, reaction='fail') # post msg to slack for failed jobs raise