示例#1
0
def _CallCufflinks(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token):
	print "Downloading Read Sample{0}".format(s_alignment)
	alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
	if not logger:
		logger = create_logger(directory,"run_Cufflinks_"+alignment_name)	
	try:
		alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
		#alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		#sample_type = r_sample_info[2].split('-')[0]
		output_name = alignment_name.split('_alignment')[0]+"_cufflinks_expression"
		output_dir = os.path.join(directory,output_name)
		#Download Alignment from shock
		a_file_id = alignment['data']['file']['id']
		a_filename = alignment['data']['file']['file_name']
		condition = alignment['data']['condition']
		#i_name = alignment_name+"_"+a_filename
		#if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id']
		try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=directory,token=token)
                except Exception,e:
                        raise Exception( "Unable to download shock file, {0}".format(i_name))
                try:
		    input_dir = os.path.join(directory,alignment_name)
		    if not os.path.exists(input_dir): os.mkdir(input_dir)
                    script_util.unzip_files(logger,os.path.join(directory,a_filename), input_dir)
                except Exception, e:
                       logger.error("".join(traceback.format_exc()))
                       raise Exception("Unzip alignment files")
示例#2
0
def _CallStringtie(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token):
        print "Downloading Read Sample{0}".format(s_alignment)
        alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
        if not logger:
                logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name)
        try:
                alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
                input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input")
                if not os.path.exists(input_direc) : os.mkdir(input_direc)
                output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression"
                output_dir = os.path.join(directory,output_name)
                #Download Alignment from shock
                a_file_id = alignment['data']['file']['id']
                a_filename = alignment['data']['file']['file_name']
                condition = alignment['data']['condition']
                try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token)
                except Exception,e:
                        raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join.tracback.format_exc()))
                try:
                    input_dir = os.path.join(input_direc,alignment_name)
                    if not os.path.exists(input_dir): os.mkdir(input_dir)
                    script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir)
                except Exception, e:
                       logger.error("".join(traceback.format_exc()))
                       raise Exception("Unzip alignment files  error")
示例#3
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(s_alignment))
        alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
        if not logger:
           logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name)
        try:
           alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
           input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input")
           if not os.path.exists(input_direc) : os.mkdir(input_direc)
           output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression"
           output_dir = os.path.join(directory,output_name)
           #Download Alignment from shock
           a_file_id = alignment['data']['file']['id']
           a_filename = alignment['data']['file']['file_name']
           condition = alignment['data']['condition']
           try:
                script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token)
           except Exception,e:
                raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join(traceback.format_exc())))
           try:
                input_dir = os.path.join(input_direc,alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir)
           except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")
示例#4
0
def _CallCufflinks(logger, services, ws_client, hs, ws_id, num_threads,
                   s_alignment, gtf_file, directory, genome_id, annotation_id,
                   sample_id, alignmentset_id, params, token):
    print "Downloading Read Sample{0}".format(s_alignment)
    alignment_name = ws_client.get_object_info([{
        "ref": s_alignment
    }],
                                               includeMetadata=None)[0][1]
    if not logger:
        logger = create_logger(directory, "run_Cufflinks_" + alignment_name)
    try:
        alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
        #alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
        #sample_type = r_sample_info[2].split('-')[0]
        output_name = alignment_name.split(
            '_alignment')[0] + "_cufflinks_expression"
        output_dir = os.path.join(directory, output_name)
        #Download Alignment from shock
        a_file_id = alignment['data']['file']['id']
        a_filename = alignment['data']['file']['file_name']
        condition = alignment['data']['condition']
        #i_name = alignment_name+"_"+a_filename
        #if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id']
        try:
            script_util.download_file_from_shock(
                logger,
                shock_service_url=services['shock_service_url'],
                shock_id=a_file_id,
                filename=a_filename,
                directory=directory,
                token=token)
        except Exception, e:
            raise Exception(
                "Unable to download shock file, {0}".format(i_name))
        try:
            input_dir = os.path.join(directory, alignment_name)
            if not os.path.exists(input_dir): os.mkdir(input_dir)
            script_util.unzip_files(logger,
                                    os.path.join(directory,
                                                 a_filename), input_dir)
        except Exception, e:
            logger.error("".join(traceback.format_exc()))
            raise Exception("Unzip alignment files")
示例#5
0
def extract_cuffdiff_data (logger, shock_url, scratch, s_res, user_token):

        returnVal = False
       # Get input data Shock Id and Filename.
        cuffdiff_shock_id = s_res[0]['data']['file']['id']
        cuffdiff_file_name = s_res[0]['data']['file']['file_name']


        filesize = None

        dx = script_util.download_file_from_shock( logger,
            shock_url, cuffdiff_shock_id, cuffdiff_file_name,
            scratch, filesize, user_token)

        #cuffdiff_file_name =None

        #Decompress tar file and keep it in a directory
        zipfile = join(scratch, cuffdiff_file_name)
        dstnExtractFolder1 = join(scratch, "cuffdiffData")
        dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff")

        if not os.path.exists(dstnExtractFolder):
            os.makedirs(dstnExtractFolder)

        #untarStatus = untar_files(logger, tarfile, dstnExtractFolder)
        #if untarStatus == False:
        #    logger.info("Problem extracting the archive")
        #    return returnVal
        unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder)
        if unzipStatus == False:
            logger.info("Problem extracting the archive")
            return returnVal


        foldersinExtractFolder = os.listdir(dstnExtractFolder)

        if len(foldersinExtractFolder) == 0:
            logger.info("Problem extracting the archive")
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        cuffdiff_dir = dstnExtractFolder

        return cuffdiff_dir
示例#6
0
def extract_cuffdiff_data(logger, shock_url, scratch, s_res, user_token):

    returnVal = False
    # Get input data Shock Id and Filename.
    cuffdiff_shock_id = s_res[0]['data']['file']['id']
    cuffdiff_file_name = s_res[0]['data']['file']['file_name']

    filesize = None

    dx = script_util.download_file_from_shock(logger, shock_url,
                                              cuffdiff_shock_id,
                                              cuffdiff_file_name, scratch,
                                              filesize, user_token)

    #cuffdiff_file_name =None

    #Decompress tar file and keep it in a directory
    zipfile = join(scratch, cuffdiff_file_name)
    dstnExtractFolder1 = join(scratch, "cuffdiffData")
    dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff")

    if not os.path.exists(dstnExtractFolder):
        os.makedirs(dstnExtractFolder)

    #untarStatus = untar_files(logger, tarfile, dstnExtractFolder)
    #if untarStatus == False:
    #    logger.info("Problem extracting the archive")
    #    return returnVal
    unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder)
    if unzipStatus == False:
        logger.info("Problem extracting the archive")
        return returnVal

    foldersinExtractFolder = os.listdir(dstnExtractFolder)

    if len(foldersinExtractFolder) == 0:
        logger.info("Problem extracting the archive")
        return returnVal

    # Run R script to run cummerbund json and update the cummerbund output json file
    cuffdiff_dir = dstnExtractFolder

    return cuffdiff_dir
示例#7
0
class Bowtie2SampleSet(Bowtie2):
    def __init__(self, logger, directory, urls, max_cores):
        super(self.__class__, self).__init__(logger, directory, urls,
                                             max_cores)

        # user defined shared variables across methods
        self.sample = None
        self.sampleset_info = None
        #self.num_threads = None

    def prepare(self):
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        bowtie2_dir = self.directory

        try:
            #sample,bowtie_index = ws_client.get_objects(
            #                         [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']},
            #                         { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}])
            sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],
                                            params['sampleset_id'])[0]
            bowtie_index = script_util.ws_get_obj(logger, ws_client,
                                                  params['ws_id'],
                                                  params['bowtie_index'])[0]
            self.sample = sample
        except Exception, e:
            logger.exception("".join(traceback.format_exc()))
            raise ValueError(" Error Downloading objects from the workspace ")
        ### Get obejct IDs
        #sampleset_info,bowtie_index_info = ws_client.get_object_info_new({"objects": [
        #                                   {'name': params['sampleset_id'], 'workspace': params['ws_id']},
        #                                   {'name': params['bowtie_index'], 'workspace': params['ws_id']}
        #                                   ]})
        sampleset_info = script_util.ws_get_obj_info(logger, ws_client,
                                                     params['ws_id'],
                                                     params['sampleset_id'])[0]
        #bowtie_index_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['bowtie_index'])[0]

        ### Get the workspace object ids for the objects ###
        sampleset_id = str(sampleset_info[6]) + '/' + str(
            sampleset_info[0]) + '/' + str(sampleset_info[4])
        #bowtie_index_id = str(bowtie_index_info[6]) + '/' + str(bowtie_index_info[0]) + '/' + str(bowtie_index_info[4])
        self.sampleset_info = sampleset_info
        ### Get the workspace object ids for the objects ###
        sample_type = sampleset_info[2].split('-')[0]

        ### Check if the Library objects exist in the same workspace
        if not (sample_type == 'KBaseRNASeq.RNASeqSampleSet'
                or sample_type == 'KBaseSets.ReadsSet'):
            raise Bowtie2SampleSetException(
                'RNASeqSampleSet or ReadsSet is required')
        #logger.info("Check if the Library objects do exist in the current workspace")
        (reads,
         r_label) = rnaseq_util.get_reads_conditions(logger, sample,
                                                     sample_type)
        #reads = sample['data']['sample_ids']
        #r_label = sample['data']['condition']
        #reads_type= sample['data']['Library_type']
        #e_ws_objs = script_util.if_ws_obj_exists_notype(None,ws_client,params['ws_id'],reads) # removed read type as it will be added only if it satisfies input types
        #missing_objs = [i for i in reads if not i in e_ws_objs]
        #if len(e_ws_objs) != len(reads):
        #    raise Bowtie2SampleSetException('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id']))

        self.num_jobs = len(reads)
        bw_index_files = script_util.check_and_download_existing_handle_obj(
            logger, ws_client, self.urls, params['ws_id'],
            params['bowtie_index'], "KBaseRNASeq.Bowtie2Indexes", bowtie2_dir,
            token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger,
                                    os.path.join(bowtie2_dir, bw_index_files),
                                    bowtie2_dir)
            mv_dir = handler_util.get_dir(bowtie2_dir)
            if mv_dir is not None:
                script_util.move_files(logger, mv_dir, bowtie2_dir)
        except Exception, e:
            logger.error("".join(traceback.format_exc()))
            raise Exception("Unzip indexfile error")
示例#8
0
    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome

        # TODO: Rename blast_search

        try:
           self.__LOGGER.info( "Preparing FA")
           if len(params['query']) > 5:
               sequence=params['query']
           else:
               self.__LOGGER.error("The input sequence is too short!")
               raise KBaseGenomeUtilException("The input sequence is too short!")
        
           if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR)
         
           #print "generate input file for query sequence\n"
           query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA)
           target=open(query_fn,'w')
           if sequence.startswith(">"):
             target.write(sequence)
           else:
             seqes = sequence.split("\n")
             for i in range(len(seqes)):
               target.write(">query_seq_%d\n" %(i))
               target.write(seqes[i])
           target.close()
         
           user_token=ctx['token']
           svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token
           ws_client=Workspace(url=self.__WS_URL, token=user_token)
        
        
           err_msg = ""
        
           blast_dir =self.__BLAST_DIR
           if os.path.exists(blast_dir):
               files=glob.glob("%s/*" % blast_dir)
               for f in files: os.remove(f)
           if not os.path.exists(blast_dir): os.makedirs(blast_dir)
           target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA)
           if 'target_seqs' in params:
               # let's build index directly and throw away
               sequence = params['target_seqs']
        
               target=open(target_fn,'w')
               if sequence.startswith(">"):
                 target.write(sequence)
               else:
                 seqes = sequence.split("\n")
                 for i in range(len(seqes)):
                   target.write(">target_seq_%d\n" %(i))
                   target.write(seqes[i])
               target.close()
            
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   formatdb_type='T'
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   formatdb_type='F'
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
               cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type)
               # TODO: replace it to subprocess.Popen
               tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True)
               stdout, stderr = tool_process.communicate()
   
               if stdout is not None and len(stdout) > 0:
                   self.__LOGGER.info(stdout)
   
               if stderr is not None and len(stderr) > 0:
                   self.__LOGGER.error("Index error: " + stderr)
                   raise KBaseGenomeUtilException("Index error: " + stderr)
        
           else:
               try:
                   blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'],
                                                             'workspace': params['ws_id'], 
                                                             'included':['handle', 'index_type']}])
               except:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
                   
               if len(blast_indexes) < 1:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
        
               
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip'
        
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   target_fn += '_aa.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide":
                       self.__LOGGER.error("The index object does not contain amino acid sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain amino acid sequence indexes")                    
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   target_fn += '_nt.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein":
                       self.__LOGGER.error("The index object does not contain nucleotide sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes")                    
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
        
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               #pprint(blast_indexes[0])
              
               self.__LOGGER.info("Downloading the genome index")
               #hs = HandleService(url=self.__HS_URL, token=user_token)
               try:
                   script_util.download_file_from_shock(self.__LOGGER,
                                   shock_service_url= blast_indexes[0]['data']['handle']['url'],
                                   shock_id= blast_indexes[0]['data']['handle']['id'],
                                   filename= blast_indexes[0]['data']['handle']['file_name'],
                                   directory= '.',
                                   token = user_token)
               except Exception, e:
                   self.__LOGGER.error("Downloading error from shock: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]")
               try:
                   script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir)
               except Exception, e:
                   self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")
    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome

        # TODO: Rename blast_search

        try:
           self.__LOGGER.info( "Preparing FA")
           if len(params['query']) > 5:
               sequence=params['query']
           else:
               self.__LOGGER.error("The input sequence is too short!")
               raise KBaseGenomeUtilException("The input sequence is too short!")
        
           if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR)
         
           #print "generate input file for query sequence\n"
           query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA)
           target=open(query_fn,'w')
           if sequence.startswith(">"):
             target.write(sequence)
           else:
             seqes = sequence.split("\n")
             for i in range(len(seqes)):
               target.write(">query_seq_%d\n" %(i))
               target.write(seqes[i])
           target.close()
         
           user_token=ctx['token']
           svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token
           ws_client=Workspace(url=self.__WS_URL, token=user_token)
        
        
           err_msg = ""
        
           blast_dir =self.__BLAST_DIR
           if os.path.exists(blast_dir):
               files=glob.glob("%s/*" % blast_dir)
               for f in files: os.remove(f)
           if not os.path.exists(blast_dir): os.makedirs(blast_dir)
           target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA)
           if 'target_seqs' in params:
               # let's build index directly and throw away
               sequence = params['target_seqs']
        
               target=open(target_fn,'w')
               if sequence.startswith(">"):
                 target.write(sequence)
               else:
                 seqes = sequence.split("\n")
                 for i in range(len(seqes)):
                   target.write(">target_seq_%d\n" %(i))
                   target.write(seqes[i])
               target.close()
            
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   formatdb_type='T'
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   formatdb_type='F'
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
               cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type)
               # TODO: replace it to subprocess.Popen
               tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True)
               stdout, stderr = tool_process.communicate()
   
               if stdout is not None and len(stdout) > 0:
                   self.__LOGGER.info(stdout)
   
               if stderr is not None and len(stderr) > 0:
                   self.__LOGGER.error("Index error: " + stderr)
                   raise KBaseGenomeUtilException("Index error: " + stderr)
        
           else:
               try:
                   blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'],
                                                             'workspace': params['ws_id'], 
                                                             'included':['handle', 'index_type']}])
               except:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
                   
               if len(blast_indexes) < 1:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
        
               
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip'
        
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   target_fn += '_aa.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide":
                       self.__LOGGER.error("The index object does not contain amino acid sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain  amino acid sequence indexes. This index will only work with blastn (nucleotide query, nucleotide index), tblastx(protein query, nucleotide index) and tblastx(nucleotide query, nucleotide index)")
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   target_fn += '_nt.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein":
                       self.__LOGGER.error("The index object does not contain nucleotide sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes. This index will only work with blastp (protein query, protein index) and blastx(nucleotide query, protein index)")                    
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
        
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               #pprint(blast_indexes[0])
              
               self.__LOGGER.info("Downloading the genome index")
               #hs = HandleService(url=self.__HS_URL, token=user_token)
               try:
                   script_util.download_file_from_shock(self.__LOGGER,
                                   shock_service_url= blast_indexes[0]['data']['handle']['url'],
                                   shock_id= blast_indexes[0]['data']['handle']['id'],
                                   filename= blast_indexes[0]['data']['handle']['file_name'],
                                   directory= '.',
                                   token = user_token)
               except Exception, e:
                   self.__LOGGER.error("Downloading error from shock: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]")
               try:
                   script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir)
               except Exception, e:
                   self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")
                                        [{'ref' : a_id},{'ref': e_id}])
                        ### Get the condition name, replicate_id , shock_id and shock_filename
                        condition = a_obj['data']['condition']
                        if 'replicate_id' in a_obj['data'] : replicate_id = a_obj['data']['replicate_id']
                        files[a_obj['data']['file']['file_name']] = a_obj['data']['file']['id']
                        files[e_obj['data']['file']['file_name']] = e_obj['data']['file']['id']
                        if not condition in labels: labels.append(condition)
                        else :  counter += 1 #### comment it when replicate_id is available from methods
                        s_path = os.path.join(diffexp_dir,condition+"/"+str(counter)) ### Comment this line when replicate_id is available from the methods
                        if not os.path.exists(s_path): os.makedirs(s_path)
                        try:
                                script_util.download_shock_files(logger,services['shock_service_url'],s_path,files,token)
                        except Exception,e:
                                raise Exception( "Unable to download shock file, {0}".format(e))
                        try:
                                script_util.unzip_files(logger,os.path.join(s_path,a_obj['data']['file']['file_name']),s_path)
                                script_util.unzip_files(logger,os.path.join(s_path,e_obj['data']['file']['file_name']),s_path)
                                e_file_path =  os.path.join(s_path,"transcripts.gtf")
                                a_file_path = os.path.join(s_path,"accepted_hits.bam")
                                if os.path.exists(a_file_path) : print a_file_path
                                if os.path.exists(e_file_path) : 
					print e_file_path 
					list_file.write("{0}\n".format(e_file_path))
                        except Exception, e:
                                logger.exception("".join(traceback.format_exc()))
                                raise Exception("Unzip file error")
	    list_file.close()
	    print labels
            #output_dir = os.path.join(cuffdiff_dir, params['output_obj_name'])
            for l in labels:
                  #rep_files=",".join([ os.path.join(diffexp_dir+'/'+l,sub+'/accepted_hits.bam') for sub in os.listdir(os.path.join(diffexp_dir,l)) if os.path.isdir(os.path.join(diffexp_dir,l+'/'+sub))])
示例#11
0
class Bowtie2Sample(Bowtie2):
    def __init__(self, logger, directory, urls, max_cores):
        #super(Bowtie2Sample, self).__init__(logger, directory, urls)
        super(self.__class__, self).__init__(logger, directory, urls,
                                             max_cores)
        # user defined shared variables across methods
        self.sample_info = None
        #self.sampleset_info = None
        self.num_threads = 1

    def prepare(self):
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        bowtie2_dir = self.directory

        try:
            #sample,bowtie_index = ws_client.get_objects(
            #                         [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']},
            #                         { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}])
            sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],
                                            params['sampleset_id'])[0]
            bowtie_index = script_util.ws_get_obj(logger, ws_client,
                                                  params['ws_id'],
                                                  params['bowtie_index'])[0]
            self.sample = sample
        except Exception, e:
            logger.exception("".join(traceback.format_exc()))
            raise ValueError(" Error Downloading objects from the workspace ")
        ### Get obejct IDs
        #sample_info,bowtie_index_info = ws_client.get_object_info_new({"objects": [
        #                                   {'name': params['sampleset_id'], 'workspace': params['ws_id']},
        #                                   {'name': params['bowtie_index'], 'workspace': params['ws_id']}
        #                                   ]})
        sample_info = script_util.ws_get_obj_info(logger, ws_client,
                                                  params['ws_id'],
                                                  params['sampleset_id'])[0]
        bowtie_index_info = script_util.ws_get_obj_info(
            logger, ws_client, params['ws_id'], params['bowtie_index'])[0]
        self.sample_info = sample_info
        ### Get the workspace object ids for the objects ###
        sample_id = str(sample_info[6]) + '/' + str(
            sample_info[0]) + '/' + str(sample_info[4])
        bowtie_index_id = str(bowtie_index_info[6]) + '/' + str(
            bowtie_index_info[0]) + '/' + str(bowtie_index_info[4])
        sample_type = sample_info[2].split('-')[0]
        lib_types = [
            'KBaseAssembly.SingleEndLibrary', 'KBaseAssembly.PairedEndLibrary',
            'KBaseFile.SingleEndLibrary', 'KBaseFile.PairedEndLibrary'
        ]
        ### Check if the Library objects exist in the same workspace
        if not sample_type in lib_types:  #'KBaseAssembly.SingleEndLibrary' or sample_type != 'KBaseAssembly.PairedEndLibrary':
            raise Bowtie2SampleException(
                'Either of the Library typed objects SingleEndLibrary or PairedEndLibrary is required'
            )
        r_label = 'Single'
        ### Get the Bw index file

        bw_index_files = script_util.check_and_download_existing_handle_obj(
            logger, ws_client, self.urls, params['ws_id'],
            params['bowtie_index'], "KBaseRNASeq.Bowtie2Indexes", bowtie2_dir,
            token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger,
                                    os.path.join(bowtie2_dir, bw_index_files),
                                    bowtie2_dir)
            mv_dir = handler_util.get_dir(bowtie2_dir)
            if mv_dir is not None:
                script_util.move_files(logger, mv_dir, bowtie2_dir)
        except Exception, e:
            logger.error("".join(traceback.format_exc()))
            raise Exception("Unzip indexfile error")
示例#12
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(
            s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(
            s_alignment))
        alignment_name = ws_client.get_object_info([{
            "ref": s_alignment
        }],
                                                   includeMetadata=None)[0][1]
        if not logger:
            logger = handler_util.create_logger(
                directory, "run_Stringtie_" + alignment_name)
        try:
            alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
            input_direc = os.path.join(
                directory,
                alignment_name.split('_alignment')[0] + "_stringtie_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = alignment_name.split(
                '_alignment')[0] + "_stringtie_expression"
            output_dir = os.path.join(directory, output_name)
            #Download Alignment from shock
            a_file_id = alignment['data']['file']['id']
            a_filename = alignment['data']['file']['file_name']
            condition = alignment['data']['condition']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=self.urls['shock_service_url'],
                    shock_id=a_file_id,
                    filename=a_filename,
                    directory=input_direc,
                    token=token)
            except Exception, e:
                raise Exception(
                    "Unable to download shock file, {0},{1}".format(
                        a_filename, "".join(traceback.format_exc())))
            try:
                input_dir = os.path.join(input_direc, alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,
                                        os.path.join(input_direc, a_filename),
                                        input_dir)
            except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")
示例#13
0
         if not condition in labels: labels.append(condition)
         else:
             counter += 1  #### comment it when replicate_id is available from methods
         s_path = os.path.join(
             diffexp_dir, condition + "/" + str(counter)
         )  ### Comment this line when replicate_id is available from the methods
         if not os.path.exists(s_path): os.makedirs(s_path)
         try:
             script_util.download_shock_files(logger,
                                              services['shock_service_url'],
                                              s_path, files, token)
         except Exception, e:
             raise Exception("Unable to download shock file, {0}".format(e))
         try:
             script_util.unzip_files(
                 logger,
                 os.path.join(s_path, a_obj['data']['file']['file_name']),
                 s_path)
             script_util.unzip_files(
                 logger,
                 os.path.join(s_path, e_obj['data']['file']['file_name']),
                 s_path)
             e_file_path = os.path.join(s_path, "transcripts.gtf")
             a_file_path = os.path.join(s_path, "accepted_hits.bam")
             if os.path.exists(a_file_path): print a_file_path
             if os.path.exists(e_file_path):
                 print e_file_path
                 list_file.write("{0}\n".format(e_file_path))
         except Exception, e:
             logger.exception("".join(traceback.format_exc()))
             raise Exception("Unzip file error")
 list_file.close()
示例#14
0
class TophatSampleSet(Tophat): 

    def __init__(self, logger, directory, urls, max_cores):
        super(TophatSampleSet, self).__init__(logger, directory, urls, max_cores)

        # user defined shared variables across methods
        self.sample = None
	self.bowtie2index_id = None
        #self.num_threads = None


    def prepare(self): 
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        tophat_dir = self.directory

        try:
               #sample ,bowtie_index = ws_client.get_objects(
               #                         [{'name' : params['sampleset_id'],'workspace' : params['ws_id']},
               #                          { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}])
               sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['sampleset_id'])[0]
               bowtie_index = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['bowtie_index'])[0]
               self.sample = sample
        except Exception,e:
               logger.exception("".join(traceback.format_exc()))
               raise ValueError(" Error Downloading objects from the workspace ")
        ### Get object Info and IDs
        sample_info = script_util.ws_get_obj_info(logger, ws_client, params['ws_id'], params['sampleset_id'])[0]
        sample_type = sample_info[2].split('-')[0]

        # SampleSet
        if not (sample_type == 'KBaseRNASeq.RNASeqSampleSet' or sample_type == 'KBaseSets.ReadsSet'):
            raise TophatSampleSetException('RNASeqSampleSet or ReadsSet is required')
        (reads, r_label) = rnaseq_util.get_reads_conditions(logger, sample, sample_type)
        #reads = sample['data']['sample_ids']
        #reads_type= sample['data']['Library_type']
        # Note: do not need the following as we support ws reference
        #e_ws_objs = script_util.if_ws_obj_exists_notype(None,ws_client,params['ws_id'],reads)
        #missing_objs = [i for i in reads if not i in e_ws_objs]
        #if len(e_ws_objs) != len(reads):
        #   raise ValueError('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id']))



	### Get obejct IDs
        #bowtie2_index_info,sampleset_info = ws_client.get_object_info_new({"objects": [{'name': params['bowtie_index'], 'workspace': params['ws_id']},{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})
        #self.bowtie2index_id = str(bowtie2_index_info[6]) + '/' + str(bowtie2_index_info[0]) + '/' + str(bowtie2_index_info[4])  
        #sampleset_id = str(sampleset_info[6]) + '/' + str(sampleset_info[0]) + '/' + str(sampleset_info[4]) 
        self.bowtie2index_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['bowtie_index'])
        sampleset_id = script_util.ws_get_ref(logger, ws_client, params['ws_id'], params['sampleset_id'])
        bw_id = bowtie_index['data']['handle']['id'] 
        bw_name =  bowtie_index['data']['handle']['file_name']
        genome_id = bowtie_index['data']['genome_id']
        annotation_gtf = ws_client.get_object_info([{"ref" :genome_id}],includeMetadata=None)[0][1]
        shared_files={}
        shared_files[bw_name] = bw_id
        script_util.download_shock_files(logger,self.urls['shock_service_url'],tophat_dir,shared_files,token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger,os.path.join(tophat_dir,bw_name),tophat_dir)
            mv_dir= handler_util.get_dir(tophat_dir)
            if mv_dir is not None:
                    script_util.move_files(logger,mv_dir,tophat_dir)
        except Exception, e:
               logger.error("".join(traceback.format_exc()))
               raise Exception("Unzip indexfile error")
示例#15
0
class TophatSample(Tophat):
    def __init__(self, logger, directory, urls, max_cores):
        super(TophatSample, self).__init__(logger, directory, urls, max_cores)
        # user defined shared variables across methods
        self.bowtie2index_id = None
        self.num_threads = 1

    def prepare(self):
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        tophat_dir = self.directory

        try:
            #sample ,bowtie_index = ws_client.get_objects(
            #                         [{'name' : params['sampleset_id'],'workspace' : params['ws_id']},
            #                          { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}])
            sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],
                                            params['sampleset_id'])[0]
            bowtie_index = script_util.ws_get_obj(logger, ws_client,
                                                  params['ws_id'],
                                                  params['bowtie_index'])[0]
            self.sample = sample
        except Exception, e:
            logger.exception("".join(traceback.format_exc()))
            raise ValueError(" Error Downloading objects from the workspace ")
        ### Get object Info and IDs
        #sample_info = ws_client.get_object_info_new({"objects": [{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})[0]
        #sample_type = sample_info[2].split('-')[0]
        sample_type = script_util.ws_get_type_name(logger, ws_client,
                                                   params['ws_id'],
                                                   params['sampleset_id'])

        ### Get obejct IDs
        #bowtie2_index_info,sampleset_info = ws_client.get_object_info_new({"objects": [{'name': params['bowtie_index'], 'workspace': params['ws_id']},{'name': params['sampleset_id'], 'workspace': params['ws_id']}]})
        #self.bowtie2index_id = str(bowtie2_index_info[6]) + '/' + str(bowtie2_index_info[0]) + '/' + str(bowtie2_index_info[4])
        #sampleset_id = str(sampleset_info[6]) + '/' + str(sampleset_info[0]) + '/' + str(sampleset_info[4])
        self.bowtie2index_id = script_util.ws_get_ref(logger, ws_client,
                                                      params['ws_id'],
                                                      params['bowtie_index'])
        sampleset_id = script_util.ws_get_ref(logger, ws_client,
                                              params['ws_id'],
                                              params['sampleset_id'])
        bw_id = bowtie_index['data']['handle']['id']
        bw_name = bowtie_index['data']['handle']['file_name']
        genome_id = bowtie_index['data']['genome_id']
        annotation_gtf = ws_client.get_object_info([{
            "ref": genome_id
        }],
                                                   includeMetadata=None)[0][1]
        shared_files = {}
        shared_files[bw_name] = bw_id
        script_util.download_shock_files(logger,
                                         self.urls['shock_service_url'],
                                         tophat_dir, shared_files, token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger, os.path.join(tophat_dir, bw_name),
                                    tophat_dir)
            mv_dir = handler_util.get_dir(tophat_dir)
            if mv_dir is not None:
                script_util.move_files(logger, mv_dir, tophat_dir)
        except Exception, e:
            logger.error("".join(traceback.format_exc()))
            raise Exception("Unzip indexfile error")