Exemplo n.º 1
0
    print "Executing: stringtie {0}".format(stringtie_command)
    ret = script_util.runProgram(None,"stringtie",stringtie_command,None,directory)
    ##Parse output files
    try:
         exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'StringTie', 'FPKM' )
         tpm_exp_dict = rnaseq_util.parse_FPKMtracking( g_output_file, 'StringTie', 'TPM' )
    except Exception,e:
         raise Exception(e)
         logger.exception("".join(traceback.format_exc()))
         raise Exception("Error parsing FPKMtracking")
 ##  compress and upload to shock
    try:
         logger.info("Zipping Stringtie output")
         print "Zipping Stringtie output"
         out_file_path = os.path.join(directory,"%s.zip" % output_name)
         script_util.zip_files(logger,output_dir,out_file_path)
    except Exception,e:
         raise Exception(e)
         logger.exception("".join(traceback.format_exc()))
         raise Exception("Error executing stringtie")
    try:
         handle = script_util.upload_file_to_shock(logger,out_file_path)['handle']
    except Exception, e:
         raise Exception(e)
         logger.exception("".join(traceback.format_exc()))
         raise Exception("Error while zipping the output objects: {0}".format(out_file_path))
         ## Save object to workspace
    try:
         logger.info("Saving Stringtie object to workspace")
         es_obj = { 'id'                       : output_name,
                    'type'                     : 'RNA-Seq',
Exemplo n.º 2
0
                    "Error Creating  FASTA object from the workspace {0},{1},{2}".format(
                        params["reference"], os.getcwd(), e
                    )
                )

                ## Run the bowtie_indexing on the  command line
            try:
                bowtie_index_cmd = "{0} {1}".format(params["reference"], params["reference"])
                script_util.runProgram(self.__LOGGER, "bowtie2-build", bowtie_index_cmd, None, bowtie_dir)
            except Exception, e:
                raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params["reference"], e))

                ## Zip the Index files

            try:
                script_util.zip_files(self.__LOGGER, bowtie_dir, "%s.zip" % params["output_obj_name"])
            except Exception, e:
                raise KBaseRNASeqException("Failed to compress the index: {0}".format(e))
                ## Upload the file using handle service
            try:
                bowtie_handle = script_util.create_shock_handle(
                    self.__LOGGER,
                    "%s.zip" % params["output_obj_name"],
                    self.__SHOCK_URL,
                    self.__HS_URL,
                    "Zip",
                    user_token,
                )
            except Exception, e:
                raise KBaseRNASeqException("Failed to upload the index: {0}".format(e))
            bowtie2index = {"handle": bowtie_handle}
Exemplo n.º 3
0
	        ## Run the bowtie_indexing on the  command line
		try:
	    		if outfile_ref_name:
				bowtie_index_cmd = "{0} {1}".format(outfile_ref_name,params['reference'])
			else:
				bowtie_index_cmd = "{0} {1}".format(params['reference'],params['reference']) 
	    	        self.__LOGGER.info("Executing: bowtie2-build {0}".format(bowtie_index_cmd))  	
			cmdline_output = script_util.runProgram(self.__LOGGER,"bowtie2-build",bowtie_index_cmd,None,bowtie_dir)
			if 'result' in cmdline_output:
				report = cmdline_output['result']
		except Exception,e:
			raise KBaseRNASeqException("Error while running BowtieIndex {0},{1}".format(params['reference'],e))
		
	    ## Zip the Index files
		try:
			script_util.zip_files(self.__LOGGER, bowtie_dir,os.path.join(self.__SCRATCH ,"%s.zip" % params['output_obj_name']))
			out_file_path = os.path.join(self.__SCRATCH,"%s.zip" % params['output_obj_name'])
        	except Exception, e:
			raise KBaseRNASeqException("Failed to compress the index: {0}".format(e))
	    ## Upload the file using handle service
		try:
			bowtie_handle = hs.upload(out_file_path)
		except Exception, e:
			raise KBaseRNASeqException("Failed to upload the Zipped Bowtie2Indexes file: {0}".format(e))
	    	bowtie2index = { "handle" : bowtie_handle ,"size" : os.path.getsize(out_file_path),'genome_id' : ref_id}   

	     ## Save object to workspace
	   	self.__LOGGER.info( "Saving bowtie indexes object to  workspace")
	   	res= ws_client.save_objects(
					{"workspace":params['ws_id'],
					 "objects": [{
                        self.__LOGGER.info(stdout)
                    
                    if stderr is not None and len(stderr) > 0:
                        self.__LOGGER.error("Indexing error: " + stderr)
                        raise KBaseGenomeUtilException("Indexing error: " + stderr)
                except Exception, e:
                    raise KBaseGenomeUtilException("Failed to run indexing program (%s) : %s " %(self.__INDEX_CMD, e))
                if index_type == 'nucleotide': index_type = 'both'
                else: index_type = 'protein'
            
            #os.remove(target_nt_fn)
            #os.remove(target_aa_fn)
         
            # compress
            try: 
                script_util.zip_files(self.__LOGGER, blast_dir, "%s.zip" % params['blastindex_name'])
            except Exception, e:
                raise KBaseGenomeUtilException("Failed to compress the index: %s" %(e))
               
            try: 
                handle = hs.upload("%s.zip" % (params['blastindex_name']))
            except Exception, e:
                raise KBaseGenomeUtilException("Failed to upload the index: %s" %(e))

            bi = {'handle' : handle, 'genome_set' : gs, 'index_type' : index_type, 'index_program' : params['index_program']}
            if 'description' in params: bi['description'] = params['description']
         
            if index_type == 'none': 
                err_msg = 'No sequences were indexed'
                bi['description'] = err_msg
                res= ws_client.save_objects(
Exemplo n.º 5
0
                        self.__LOGGER.info(stdout)
                    
                    if stderr is not None and len(stderr) > 0:
                        self.__LOGGER.error("Indexing error: " + stderr)
                        raise KBaseGenomeUtilException("Indexing error: " + stderr)
                except Exception, e:
                    raise KBaseGenomeUtilException("Failed to run indexing program (%s) : %s " %(self.__INDEX_CMD, e))
                if index_type == 'nucleotide': index_type = 'both'
                else: index_type = 'protein'
            
            #os.remove(target_nt_fn)
            #os.remove(target_aa_fn)
         
            # compress
            try: 
                script_util.zip_files(self.__LOGGER, blast_dir, "%s.zip" % params['blastindex_name'])
            except Exception, e:
                raise KBaseGenomeUtilException("Failed to compress the index: %s" %(e))
               
            try: 
                handle = hs.upload("%s.zip" % (params['blastindex_name']))
            except Exception, e:
                raise KBaseGenomeUtilException("Failed to upload the index: %s" %(e))

            bi = {'handle' : handle, 'genome_set' : gs, 'index_type' : index_type, 'index_program' : params['index_program']}
            if 'description' in params: bi['description'] = params['description']
         
            if index_type == 'none': 
                err_msg = 'No sequences were indexed'
                bi['description'] = err_msg
                res= ws_client.save_objects(
Exemplo n.º 6
0
            final_bam_prefix = os.path.join(output_dir, "accepted_hits")
            logger.info("Executing: Sorting bam file  {0}".format(bam_file))
            sort_bam_cmd = "sort {0} {1}".format(bam_file, final_bam_prefix)
            script_util.runProgram(logger, "samtools", sort_bam_cmd, None,
                                   directory)
        except Exception, e:
            #logger.exception("Error Running the bowtie2 command {0},{1} {2}".format(bowtie2_cmd,directory," ".join(traceback.print_exc())))
            raise Exception(
                "Error Running the bowtie2 command {0},{1} {2}".format(
                    bowtie2_cmd, directory, " ".join(traceback.print_exc())))

# Zip tophat folder
        try:
            out_file_path = os.path.join(directory, "%s.zip" % output_name)
            logger.info("Zipping the output files".format(out_file_path))
            script_util.zip_files(logger, output_dir, out_file_path)
        except Exception, e:
            #logger.exception("Failed to compress the index: {0}".format(out_file_path))
            raise Exception(
                "Failed to compress the index: {0}".format(out_file_path))

## Upload the file using handle service
        try:
            bowtie2_handle = hs.upload(out_file_path)
        except Exception, e:
            logger.exception(
                "Failed to upload zipped output file".format(out_file_path))
            #raise Exception("Failed to upload zipped output file".format(out_file_path))
        #### Replace version with get_version command#####
        bowtie2_out = {
            "file": bowtie2_handle,
Exemplo n.º 7
0
class DiffExpforBallgown(ExecutionBase):
    def __init__(self, logger, directory, urls):
        logger.info("in DiffExprforBallgown, type logger is " +
                    pformat(type(logger)))
        logger.info(" urls are " + pformat(urls))
        pprint(self.__class__)
        super(self.__class__, self).__init__(logger, directory, urls)

        # user defined shared variables across methods
        #self.num_threads = None
        self.num_threads = 1
        self.num_cores = 1
        self.tool_used = None
        self.tool_version = None

    def prepare(self):
        # for quick testing, we recover parameters here

        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        token = self.common_params['user_token']
        diffexp_dir = self.directory
        logger = self.logger
        logger.info('in DiffExpfoBallgown.prepare(), method params are')
        logger.info(pformat(self.method_params))

        #self.details = rnaseq_util.get_details_for_diff_exp(logger,ws_client,hs,params['ws_id'],self.urls,diffexp_dir,params['expressionset_id'],token)
        #logger.info( 'back from get_details_for_diff_exp(), details are')
        #logger.info( pformat( self.details ) )
        self.num_threads = mp.cpu_count()
        self.num_jobs = 1

        self.details = {}
        self.details[
            "used_tool"] = "Ballgown (Bioconductor)"  # Question: where does this really get set?
        self.details["tool_version"] = "3.4"
        #als = []
        #for l in self.details['labels']:
        #        rep_files=[ (os.path.join(diffexp_dir+'/'+l,sub+'/accepted_hits.bam'), os.path.join(diffexp_dir+'/'+l,sub+'/transcripts.gtf')) for sub in os.listdir(os.path.join(diffexp_dir,l)) if os.path.isdir(os.path.join(diffexp_dir,l+'/'+sub))]
        #        #rep_files=",".join([ os.path.join(diffexp_dir+'/'+l,sub+'/accepted_hits.bam') for sub in os.listdir(os.path.join(diffexp_dir,l)) if os.path.isdir(os.path.join(diffexp_dir,l+'/'+sub))])
        #        als += rep_files
        #### Call Cuffmerge function
        #used_tool = self.details['used_tool']
        #merge_dir = os.path.join(diffexp_dir,"merge")
        #if used_tool == 'StringTie':
        #   run_tool =  "StringTie"
        #   tool_version = "1.2.3"
        #   #merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file'])
        #elif used_tool == 'Cufflinks':
        #   merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir,merge_dir,num_threads,gtf_file,self.details['gtf_list_file'])
        #   run_tool = "Tablemaker"
        #   tool_version = '2.0.9'
        #   merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir,merge_dir,self.num_threads,self.details['gtf_file'],self.details['gtf_list_file'])
        #
        #self.bam_files = " ".join([i for i in als])
        #self.t_labels = ",".join(self.details['labels'])
        #ballgown_dir = os.path.join(diffexp_dir,"ballgown")
        #if not os.path.exists(ballgown_dir): os.mkdir(ballgown_dir)
        #### Make Input_dir from expression_file_name

        self.task_list = [self.__class__]
        logger.info('exiting ')

    def runEach(self, task_list):
        logger = self.logger
        ### Call Cuffmerge function
        used_tool = self.details['used_tool']
        logger.info('in DiffExpfoBallgown.runEach()')
        if used_tool == "Ballgown (Bioconductor)":
            #merged_gtf = rnaseq_util.call_stringtiemerge(diffexp_dir,merge_dir,num_threads,self.details['gtf_file'],assembly_file)
            #run_tool =  "StringTie"
            #tool_version = "1.2.3"
            # For now, take no action for StringTie processing
            logger.info('Exiting immediately - StringTie case')
            return
        elif used_tool == 'Cufflinks':
            merged_gtf = rnaseq_util.call_cuffmerge(diffexp_dir, merge_dir,
                                                    num_threads, gtf_file,
                                                    assembly_file)
            run_tool = "Tablemaker"
            tool_version = '2.0.9'
        cuffmerge_dir = os.path.join(self.directory, "cuffmerge")
        merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir,
                                                self.num_threads,
                                                self.details['gtf_file'],
                                                self.details['gtf_list_file'])
        ### Run DiffExpforBallgown
        output_dir = os.path.join(self.directory,
                                  self.method_params['output_obj_name'])
        diffexp_command = (' -p ' + str(self.num_threads))

        ### Setting Advanced parameters for DiffExpforBallgown

        if ('time_series' in self.method_params
                and self.method_params['time_series'] != 0):
            diffexp_command += (' -T ')
        if ('min_alignment_count' in self.method_params
                and self.method_params['min_alignment_count'] is not None):
            diffexp_command += (' -c ' +
                                str(self.method_params['min_alignment_count']))
        if ('multi_read_correct' in self.method_params
                and self.method_params['multi_read_correct'] != 0):
            diffexp_command += (' --multi-read-correct ')
        if ('library_type' in self.method_params
                and self.method_params['library_type'] is not None):
            diffexp_command += (' --library-type ' +
                                self.method_params['library_type'])
        if ('library_norm_method' in self.method_params
                and self.method_params['library_norm_method'] is not None):
            diffexp_command += (' --library-norm-method ' +
                                self.method_params['library_norm_method'])
        try:
            diffexp_command += " -o {0} -L {1} -u {2} {3}".format(
                output_dir, self.t_labels, merged_gtf, self.bam_files)
            logger.info("Executing: diffexp {0}".format(diffexp_command))
            ret = script_util.runProgram(None, "diffexp", diffexp_command,
                                         None, self.directory)
            result = ret["result"]
            #error =  ret['stderr']
            #print result
            for line in result.splitlines(False):
                logger.info(line)
                stderr = ret["stderr"]
                prev_value = ''
                for line in stderr.splitlines(False):
                    if line.startswith('> Processing Locus'):
                        words = line.split()
                        cur_value = words[len(words) - 1]
                        if prev_value != cur_value:
                            prev_value = cur_value
                            logger.info(line)
                        else:
                            prev_value = ''
                            logger.info(line)
        except Exception, e:
            raise Exception(e)
            raise Exception("Error executing diffexp {0},{1}".format(
                diffexp_command, e))
        try:
            logger.info("Zipping DiffExpforBallgown output")
            out_file_path = os.path.join(
                self.directory,
                "{0}.zip".format(self.method_params['output_obj_name']))
            script_util.zip_files(logger, output_dir, out_file_path)
        except Exception, e:
            raise Exception("Error executing diffexp")
            results=run_diffexp_for_ballgown_in_parallel(b_tasks)
	    print results
	    #expr_file, single_ballgown_dir = results
	    #### Check if all the jobs passed
            ballgownobject_name = params['expressionset_id']+"_DifferentialExpression_Ballgown"
            ballgown_dir = os.path.join(diffexp_dir,"ballgown")
            #reportObj=script_util.create_RNASeq_ExpressionSet_and_build_report(logger,ws_client,TOOL_USED, TOOL_VERSION,tool_opts,params['ws_id'],align_names,expressionset_id,annotation_id,sampleset_id,results,expressionSet_name)
	    ### Save Ballgown differential Expression object to workspace
	    #except Exception,e:
            #    raise Exception("Error executing diffexp {0},{1}".format(cuffdiff_command,directory))

            ##  compress and upload to shock
            try:
                 logger.info("Zipping differential expression output for ballgown")
                 out_file_path = os.path.join(diffexp_dir,"{0}.zip".format(params['output_obj_name']))
                 script_util.zip_files(logger,ballgown_dir,out_file_path)
            except Exception,e:
                 raise Exception("Error zipping dir {0}".format(ballgown_dir)) 
            try:
                 handle = hs.upload(out_file_path)
            except Exception, e:
                 print " ".join(traceback.print_exc())
                 raise Exception("Failed to upload the diffexp output files: {0}".format(out_file_path))
            output_name = params['output_obj_name']
            ## Save object to workspace
            try:
                 logger.info("Saving diffexp object to workspace")
                 cm_obj = { "tool_used" : run_tool,
                            "tool_version" : tool_version,
                            "condition" : condition,
                            "genome_id" : genome_id,
Exemplo n.º 9
0
    #expr_file, single_ballgown_dir = results
    #### Check if all the jobs passed
    ballgownobject_name = params[
        'expressionset_id'] + "_DifferentialExpression_Ballgown"
    ballgown_dir = os.path.join(diffexp_dir, "ballgown")
    #reportObj=script_util.create_RNASeq_ExpressionSet_and_build_report(logger,ws_client,TOOL_USED, TOOL_VERSION,tool_opts,params['ws_id'],align_names,expressionset_id,annotation_id,sampleset_id,results,expressionSet_name)
    ### Save Ballgown differential Expression object to workspace
    #except Exception,e:
    #    raise Exception("Error executing diffexp {0},{1}".format(cuffdiff_command,directory))

    ##  compress and upload to shock
    try:
        logger.info("Zipping differential expression output for ballgown")
        out_file_path = os.path.join(
            diffexp_dir, "{0}.zip".format(params['output_obj_name']))
        script_util.zip_files(logger, ballgown_dir, out_file_path)
    except Exception, e:
        raise Exception("Error zipping dir {0}".format(ballgown_dir))
    try:
        handle = hs.upload(out_file_path)
    except Exception, e:
        print " ".join(traceback.print_exc())
        raise Exception(
            "Failed to upload the diffexp output files: {0}".format(
                out_file_path))
    output_name = params['output_obj_name']
    ## Save object to workspace
    try:
        logger.info("Saving diffexp object to workspace")
        cm_obj = {
            "tool_used": run_tool,
Exemplo n.º 10
0
class Cuffdiff(ExecutionBase):
    def __init__(self, logger, directory, urls, max_cores):
        super(self.__class__, self).__init__(logger, directory, urls,
                                             max_cores)

        # user defined shared variables across methods
        self.num_threads = None
        self.tool_used = "Cuffdiff"
        self.tool_version = "1.2.3"

    def prepare(self):
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        cuffdiff_dir = self.directory

        self.details = rnaseq_util.get_details_for_diff_exp(
            logger, ws_client, hs, params['ws_id'], self.urls, cuffdiff_dir,
            params['expressionset_id'], token)
        self.num_threads = mp.cpu_count()
        self.num_jobs = 1
        als = []
        for l in self.details['labels']:
            rep_files = ",".join([
                os.path.join(cuffdiff_dir + '/' + l,
                             sub + '/accepted_hits.bam')
                for sub in os.listdir(os.path.join(cuffdiff_dir, l))
                if os.path.isdir(os.path.join(cuffdiff_dir, l + '/' + sub))
            ])
            als.append(rep_files)

        self.bam_files = " ".join([i for i in als])
        self.t_labels = ",".join(self.details['labels'])
        self.task_list = [self.__class__]

    def runEach(self, task_list):
        ### Call Cuffmerge function
        cuffmerge_dir = os.path.join(self.directory, "cuffmerge")
        merged_gtf = rnaseq_util.call_cuffmerge(self.directory, cuffmerge_dir,
                                                self.num_threads,
                                                self.details['gtf_file'],
                                                self.details['gtf_list_file'])
        ### Run Cuffdiff
        output_dir = os.path.join(self.directory,
                                  self.method_params['output_obj_name'])
        cuffdiff_command = (' -p ' + str(self.num_threads))

        ### Setting Advanced parameters for Cuffdiff

        if ('time_series' in self.method_params
                and self.method_params['time_series'] != 0):
            cuffdiff_command += (' -T ')
        if ('min_alignment_count' in self.method_params
                and self.method_params['min_alignment_count'] is not None):
            cuffdiff_command += (
                ' -c ' + str(self.method_params['min_alignment_count']))
        if ('multi_read_correct' in self.method_params
                and self.method_params['multi_read_correct'] != 0):
            cuffdiff_command += (' --multi-read-correct ')
        if ('library_type' in self.method_params
                and self.method_params['library_type'] is not None):
            cuffdiff_command += (' --library-type ' +
                                 self.method_params['library_type'])
        if ('library_norm_method' in self.method_params
                and self.method_params['library_norm_method'] is not None):
            cuffdiff_command += (' --library-norm-method ' +
                                 self.method_params['library_norm_method'])
        try:
            cuffdiff_command += " -o {0} -L {1} -u {2} {3}".format(
                output_dir, self.t_labels, merged_gtf, self.bam_files)
            self.logger.info(
                "Executing: cuffdiff {0}".format(cuffdiff_command))
            ret = script_util.runProgram(None, "cuffdiff", cuffdiff_command,
                                         None, self.directory)
            result = ret["result"]
            for line in result.splitlines(False):
                self.logger.info(line)
                stderr = ret["stderr"]
                prev_value = ''
                for line in stderr.splitlines(False):
                    if line.startswith('> Processing Locus'):
                        words = line.split()
                        cur_value = words[len(words) - 1]
                        if prev_value != cur_value:
                            prev_value = cur_value
                            self.logger.info(line)
                        else:
                            prev_value = ''
                            self.logger.info(line)
        except Exception, e:
            raise Exception(e)
            raise Exception("Error executing cuffdiff {0},{1}".format(
                cuffdiff_command, e))
        try:
            self.logger.info("Zipping Cuffdiff output")
            out_file_path = os.path.join(
                self.directory,
                "{0}.zip".format(self.method_params['output_obj_name']))
            script_util.zip_files(self.logger, output_dir, out_file_path)
        except Exception, e:
            raise Exception("Error executing cuffdiff")