示例#1
0
        reads = sample['data']['sample_ids']
        reads_type = sample['data']['Library_type']
        if reads_type == 'PairedEnd': r_type = 'KBaseAssembly.PairedEndLibrary'
        else: r_type = 'KBaseAssembly.SingleEndLibrary'
        e_ws_objs = script_util.if_ws_obj_exists(None, ws_client,
                                                 params['ws_id'], r_type,
                                                 reads)
        missing_objs = [i for i in reads if not i in e_ws_objs]
        if len(e_ws_objs) != len(reads):
            raise Exception(
                'Missing Library objects {0} in the {1}. please copy them and run this method'
                .format(",".join(missing_objs), params['ws_id']))

    ### Build Hisat2 index
    fasta_file = script_util.generate_fasta(logger, services, token,
                                            annotation_id, hisat2_dir,
                                            params['genome_id'])
    logger.info("Sanitizing the fasta file to correct id names {}".format(
        datetime.datetime.utcnow()))
    mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
    c_mapping.replace_fasta_contig_ids(fasta_file,
                                       mapping_filename,
                                       to_modified=True)
    logger.info("Generating FASTA file completed successfully : {}".format(
        datetime.datetime.utcnow()))
    hisat2base = os.path.join(
        hisat2_dir, handler_util.get_file_with_suffix(hisat2_dir, ".fasta"))
    hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base)
    try:
        logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
        cmdline_output = script_util.runProgram(logger, "hisat2-build",
示例#2
0
     gtf_name = gtf_obj['data']['handle']['file_name']
     try:
         script_util.download_file_from_shock(
             logger,
             shock_service_url=services['shock_service_url'],
             shock_id=gtf_id,
             filename=gtf_name,
             directory=diffexp_dir,
             token=token)
         gtf_file = os.path.join(diffexp_dir, gtf_name)
     except Exception, e:
         raise Exception(
             "Unable to download shock file, {0}".format(gtf_name))
 else:
     fasta_file = script_util.generate_fasta(logger, services, token,
                                             annotation_id, diffexp_dir,
                                             annotation_name)
     logger.info("Sanitizing the fasta file to correct id names {}".format(
         datetime.datetime.utcnow()))
     mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
     c_mapping.replace_fasta_contig_ids(fasta_file,
                                        mapping_filename,
                                        to_modified=True)
     logger.info("Generating FASTA file completed successfully : {}".format(
         datetime.datetime.utcnow()))
     gtf_file = script_util.create_gtf_annotation(logger, ws_client, hs,
                                                  services, params['ws_id'],
                                                  annotation_id,
                                                  gtf_obj_name, fasta_file,
                                                  diffexp_dir, token)
 m_expr_ids = e_sample['data']['mapped_expression_ids']
            gtf_obj_name = annotation_name+"_GTF_Annotation"
            ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[gtf_obj_name])
            if not ret is None:
                logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_name))
                gtf_obj= ws_client.get_objects([{'name' : gtf_obj_name,'workspace' : params['ws_id']}])[0]
                gtf_info = ws_client.get_object_info_new({"objects": [{'name': gtf_obj_name, 'workspace': params['ws_id']}]})[0]
                gtf_annotation_id = str(gtf_info[6]) + '/' + str(gtf_info[0]) + '/' + str(gtf_info[4])
                gtf_id=gtf_obj['data']['handle']['id']
                gtf_name=gtf_obj['data']['handle']['file_name']
                try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=diffexp_dir,token=token)
                     gtf_file = os.path.join(diffexp_dir,gtf_name)
                except Exception,e:
                     raise Exception( "Unable to download shock file, {0}".format(gtf_name))
            else:
                fasta_file= script_util.generate_fasta(logger,services,token,annotation_id,diffexp_dir,annotation_name)
                logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow()))
                mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
                c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True)
                logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow()))
                gtf_file = script_util.create_gtf_annotation(logger,ws_client,hs,services,params['ws_id'],annotation_id,gtf_obj_name,fasta_file,diffexp_dir,token)
            m_expr_ids = e_sample['data']['mapped_expression_ids']
	    m_align_exp = []
            labels = []
            expressions = []
            counter = 0
            assembly_file = os.path.join(diffexp_dir,ASSEMBLY_GTF_FN)
            list_file = open(assembly_file,'w')
            for i in m_expr_ids:
                for a_id ,e_id in i.items():
                        #print a_id  + ":" + e_id
示例#4
0
        annotation_id = str(annotation_info[6]) + '/' + str(annotation_info[0]) + '/' + str(annotation_info[4])
	sample_type = sampleset_info[2].split('-')[0]
	### Check if the Library objects exist in the same workspace
	logger.info("Check if the Library objects do exist in the current workspace")
        if sample_type == 'KBaseRNASeq.RNASeqSampleSet':
        	reads = sample['data']['sample_ids']
        	reads_type= sample['data']['Library_type']
        	if reads_type == 'PairedEnd': r_type = 'KBaseAssembly.PairedEndLibrary'
        	else: r_type = 'KBaseAssembly.SingleEndLibrary'
        	e_ws_objs = script_util.if_ws_obj_exists(None,ws_client,params['ws_id'],r_type,reads)
        	missing_objs = [i for i in reads if not i in e_ws_objs]
        	if len(e_ws_objs) != len(reads):
            		raise Exception('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id']))

	### Build Hisat2 index
	fasta_file = script_util.generate_fasta(logger,services,token,annotation_id,hisat2_dir,params['genome_id'])
        logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow()))
        mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
        c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True)
        logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow()))
        hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fasta"))
        hisat2base_cmd = '{0} {1}'.format(fasta_file,hisat2base)
	try:
            logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
            cmdline_output = script_util.runProgram(logger,"hisat2-build",hisat2base_cmd,None,hisat2_dir)
        except Exception,e:
            raise Exception("Failed to run command {0}".format(hisat2base_cmd))
        ws_gtf = params['genome_id']+"_GTF"
        ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf])
        print ret
        if not ret is None: