"Unable to download shock file , {0} or {1}".format( read1_name, read2_name)) try: logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram(logger, "bowtie2", bowtie2_cmd, None, directory) #print cmdline_output except Exception, e: #logger.exception("Failed to upload the index") raise Exception("Failed to upload the index") try: #stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( logger, "bowtie2", ws_client, ws_id, None, cmdline_output['stderr'], None) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(logger, "samtools", sam_to_bam, None, directory) final_bam_prefix = os.path.join(output_dir, "accepted_hits") logger.info("Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file, final_bam_prefix) script_util.runProgram(logger, "samtools", sort_bam_cmd, None, directory) except Exception, e: #logger.exception("Error Running the bowtie2 command {0},{1} {2}".format(bowtie2_cmd,directory," ".join(traceback.print_exc()))) raise Exception( "Error Running the bowtie2 command {0},{1} {2}".format(
class HiSat2(ExecutionBase): def __init__(self, logger, directory, urls, max_cores): pprint(self.__class__) super(HiSat2, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods #self.sample = None #self.sampleset_info = None self.num_threads = None def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['hisat2_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] r_sample = script_util.ws_get_obj(self.logger, ws_client, ws_id, read_sample)[0] sample_type = script_util.ws_get_type_name(self.logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) input_direc = os.path.join( directory, sample_name.split('.')[0] + "_hisat2_input") if not os.path.exists(input_direc): os.mkdir(input_direc) output_name = sample_name.split('.')[0] + "_hisat2_alignment" output_dir = os.path.join(directory, output_name) if not os.path.exists(output_dir): os.mkdir(output_dir) print directory base = handler_util.get_file_with_suffix(directory, ".1.ht2") print base hisat2_base = os.path.join(directory, base) ### Adding advanced options to Bowtie2Call hisat2_cmd = '' hisat2_cmd += (' -p {0}'.format(self.num_threads)) if ('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += (' --' + params['quality_score']) if ('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += (' --' + params['alignment_type']) if ('trim5' in params and params['trim5'] is not None): hisat2_cmd += (' --trim5 ' + str(params['trim5'])) if ('trim3' in params and params['trim3'] is not None): hisat2_cmd += (' --trim3 ' + str(params['trim3'])) if ('np' in params and params['np'] is not None): hisat2_cmd += (' --np ' + str(params['np'])) if ('minins' in params and params['minins'] is not None): hisat2_cmd += (' --minins ' + str(params['minins'])) if ('maxins' in params and params['maxins'] is not None): hisat2_cmd += (' --maxins ' + str(params['maxins'])) #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) if ('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += (' --min-intronlen ' + str(params['min_intron_length'])) if ('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += (' --max-intronlen ' + str(params['max_intron_length'])) if ('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += (' --no-spliced-alignment') if ('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += (' --transcriptome-mapping-only') if ('tailor_alignments' in params and params['tailor_alignments'] is not None): hisat2_cmd += (' --' + params['tailor_alignments']) out_file = output_dir + "/accepted_hits.sam" #### try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, input_direc, token) self.logger.info(ds) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' hisat2_cmd += " -U {0} -x {1} -S {2}".format( ds['fwd'], hisat2_base, out_file) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): hisat2_cmd += (' --' + params['orientation']) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format( ds['fwd'], ds['rev'], hisat2_base, out_file) #if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token) # hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) #if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) # hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),hisat2_base,out_file) # except Exception,e: # logger.exception(e) # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram( self.logger, "hisat2", hisat2_cmd, None, directory) except Exception, e: logger.exception(e) raise Exception("Failed to run command {0}".format(hisat2_cmd)) try: stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( self.logger, "bowtie2", ws_client, ws_id, None, cmdline_output['stderr'], None) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(self.logger, "samtools", sam_to_bam, None, directory) final_bam_prefix = os.path.join(output_dir, "accepted_hits") logger.info( "Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file, final_bam_prefix) script_util.runProgram(self.logger, "samtools", sort_bam_cmd, None, directory) except Exception, e: logger.exception(e) raise Exception( "Error Running the hisat2 command {0},{1} {2}".format( hisat2_cmd, directory, " ".join(traceback.print_exc())))
except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err)) try: bam_file = output_dir+"/accepted_hits.bam" align_stats_cmd="flagstat {0}".format(bam_file) stats = script_util.runProgram(logger,"samtools",align_stats_cmd,None,directory) #print stats stats_data = {} # Pass it to the stats['result'] #stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data =script_util.extractAlignmentStatsInfo(logger,"samtools",ws_client,ws_id,None,stats['result'],None) except Exception , e : raise Exception("Failed to create RNASeqAlignmentStats: {0}".format(bam_file)) # Zip tophat folder out_file_path = os.path.join(directory,"%s.zip" % output_name) try: logger.info("Zipping the output files".format(out_file_path)) script_util.zip_files(logger, output_dir,out_file_path) except Exception, e: raise Exception("Failed to compress the index: {0}".format(out_file_path)) ## Upload the file using handle service try: tophat_handle = hs.upload(out_file_path) except Exception, e: raise Exception("Failed to upload zipped output file".format(out_file_path)) #### Replace version with get_version command#####
) except Exception, e: # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name)) try: logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd)) cmdline_output = script_util.runProgram(logger, "bowtie2", bowtie2_cmd, None, directory) # print cmdline_output except Exception, e: # logger.exception("Failed to upload the index") raise Exception("Failed to upload the index") try: # stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data = {} stats_data = script_util.extractAlignmentStatsInfo( logger, "bowtie2", ws_client, ws_id, None, cmdline_output["stderr"], None ) bam_file = os.path.join(output_dir, "accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file, out_file) script_util.runProgram(logger, "samtools", sam_to_bam, None, directory) final_bam_prefix = os.path.join(output_dir, "accepted_hits") logger.info("Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file, final_bam_prefix) script_util.runProgram(logger, "samtools", sort_bam_cmd, None, directory) except Exception, e: # logger.exception("Error Running the bowtie2 command {0},{1} {2}".format(bowtie2_cmd,directory," ".join(traceback.print_exc()))) raise Exception( "Error Running the bowtie2 command {0},{1} {2}".format( bowtie2_cmd, directory, " ".join(traceback.print_exc()) )
read2_id = r_sample['data']['handle_2']['id'] read2_name = r_sample['data']['handle_2']['file_name'] try: script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token) script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token) hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file) except Exception,e: raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd)) cmdline_output = script_util.runProgram(self.logger,"hisat2",hisat2_cmd,None,directory) except Exception,e: raise Exception("Failed to run command {0}".format(hisat2_cmd)) try: stats_data = {} stats_data = script_util.extractAlignmentStatsInfo(self.logger,"bowtie2",ws_client,ws_id,None,cmdline_output['stderr'],None) bam_file = os.path.join(output_dir,"accepted_hits_unsorted.bam") logger.info("Executing: sam_to_bam {0}".format(bam_file)) sam_to_bam = "view -bS -o {0} {1}".format(bam_file,out_file) script_util.runProgram(self.logger,"samtools",sam_to_bam,None,directory) final_bam_prefix = os.path.join(output_dir,"accepted_hits") logger.info("Executing: Sorting bam file {0}".format(bam_file)) sort_bam_cmd = "sort {0} {1}".format(bam_file,final_bam_prefix) script_util.runProgram(self.logger,"samtools",sort_bam_cmd,None,directory) except Exception,e: raise Exception("Error Running the hisat2 command {0},{1} {2}".format(hisat2_cmd,directory," ".join(traceback.print_exc()))) # Zip tophat folder out_file_path = os.path.join(directory,"%s.zip" % output_name) try: logger.info("Zipping the output files".format(out_file_path))
class Tophat(ExecutionBase): def __init__(self, logger, directory, urls, max_cores): pprint(self.__class__) super(Tophat, self).__init__(logger, directory, urls, max_cores) # user defined shared variables across methods #self.sample = None #self.sampleset_info = None self.num_threads = None def runEach(self, task_params): ws_client = self.common_params['ws_client'] hs = self.common_params['hs_client'] params = self.method_params logger = self.logger token = self.common_params['user_token'] read_sample = task_params['job_id'] condition = task_params['label'] directory = task_params['tophat_dir'] ws_id = task_params['ws_id'] genome_id = task_params['annotation_id'] sampleset_id = task_params['sampleset_id'] gtf_file = task_params['gtf_file'] print "Downloading Read Sample{0}".format(read_sample) logger.info("Downloading Read Sample{0}".format(read_sample)) try: #r_sample = ws_client.get_objects( # [{ 'name' : read_sample, 'workspace' : ws_id}])[0] r_sample = script_util.ws_get_obj(logger, ws_client, ws_id, read_sample)[0] #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0] #sample_type = r_sample_info[2].split('-')[0] sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample) sample_name = script_util.ws_get_obj_name4file( self.logger, ws_client, ws_id, read_sample) output_name = sample_name.split('.')[0] + "_tophat_alignment" output_dir = os.path.join(directory, output_name) #if not os.path.exists(output_dir): os.makedirs(output_dir) #out_file = output_dir +"/accepted_hits.sam" bowtie2_base = os.path.join( directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2")) ### Adding advanced options to Bowtie2Call tophat_cmd = (' -p ' + str(self.num_threads)) if ('max_intron_length' in params and params['max_intron_length'] is not None): tophat_cmd += (' -I ' + str(params['max_intron_length'])) if ('min_intron_length' in params and params['min_intron_length'] is not None): tophat_cmd += (' -i ' + str(params['min_intron_length'])) if ('min_anchor_length' in params and params['min_anchor_length'] is not None): tophat_cmd += (' -a ' + str(params['min_anchor_length'])) if ('read_edit_dist' in params and params['read_edit_dist'] is not None): tophat_cmd += (' --read-edit-dist ' + str(params['read_edit_dist'])) if ('read_gap_length' in params and params['read_gap_length'] is not None): tophat_cmd += (' --read-gap-length ' + str(params['read_gap_length'])) if ('read_mismatches' in params and params['read_mismatches'] is not None): tophat_cmd += (' -N ' + str(params['read_mismatches'])) if ('library_type' in params and params['library_type'] is not None): tophat_cmd += (' --library-type ' + params['library_type']) if ('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1): tophat_cmd += ' --report-secondary-alignments' if ('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search' if ('preset_options' in params and params['preset_options'] is not None): tophat_cmd += ' --' + params['preset_options'] #out_file = output_dir +"/accepted_hits.sam" try: sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample) ds = script_util.ru_reads_download(self.logger, sample_ref, directory, token) except Exception, e: self.logger.exception(e) raise Exception( "Unable to download reads file , {0}".format(read_sample)) if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': lib_type = 'SingleEnd' tophat_cmd += ' -o {0} -G {1} {2} {3}'.format( output_dir, gtf_file, bowtie2_base, ds['fwd']) if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': lib_type = 'PairedEnd' if sample_type == 'KBaseAssembly.PairedEndLibrary': if ('orientation' in params and params['orientation'] is not None): tophat_cmd += (' --' + params['orientation']) else: # TODO: the following can be read from PEL object if ('orientation' in params and params['orientation'] is not None): tophat_cmd += (' --' + params['orientation']) tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format( output_dir, gtf_file, bowtie2_base, ds['fwd'], ds['rev']) # if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary': # lib_type = 'SingleEnd' # if sample_type == 'KBaseAssembly.SingleEndLibrary': # read_id = r_sample['data']['handle']['id'] # read_name = r_sample['data']['handle']['file_name'] # else: # read_id = r_sample['data']['lib']['file']['id'] # read_name = r_sample['data']['lib']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name)) # except Exception,e: # self.logger.exception(e) # raise Exception( "Unable to download shock file , {0}".format(read_name)) # if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary': # lib_type = 'PairedEnd' # if sample_type == 'KBaseAssembly.PairedEndLibrary': # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['handle_1']['id'] # read1_name = r_sample['data']['handle_1']['file_name'] # read2_id = r_sample['data']['handle_2']['id'] # read2_name = r_sample['data']['handle_2']['file_name'] # else: # # TODO: the following can be read from PEL object # if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation']) # read1_id = r_sample['data']['lib1']['file']['id'] # read1_name = r_sample['data']['lib1']['file']['file_name'] # read2_id = r_sample['data']['lib2']['file']['id'] # read2_name = r_sample['data']['lib2']['file']['file_name'] # try: # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token) # script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token) # tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name)) # except Exception,e: # raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name)) try: self.logger.info("Executing: tophat {0}".format(tophat_cmd)) cmdline_output, cmd_err = script_util.runProgram( self.logger, "tophat", tophat_cmd, None, directory) except Exception, e: raise Exception("Failed to run command {0}\n{1}\n{2}".format( tophat_cmd, cmdline_output, cmd_err)) try: bam_file = output_dir + "/accepted_hits.bam" align_stats_cmd = "flagstat {0}".format(bam_file) stats = script_util.runProgram(logger, "samtools", align_stats_cmd, None, directory) #print stats stats_data = {} # Pass it to the stats['result'] #stats_obj_name = params['output_obj_name']+"_"+str(hex(uuid.getnode()))+"_AlignmentStats" stats_data = script_util.extractAlignmentStatsInfo( logger, "samtools", ws_client, ws_id, None, stats['result'], None) except Exception, e: raise Exception( "Failed to create RNASeqAlignmentStats: {0}".format( bam_file))