try: a_sample = ws_client.get_objects( [{'name' : params['alignmentset_id'],'workspace' : params['ws_id']}])[0] except Exception,e: logger.exception("".join(traceback.format_exc())) raise Exception("Error Downloading objects from the workspace ") ## Get the Input object type ## a_sample_info = ws_client.get_object_info_new({"objects": [{'name': params['alignmentset_id'], 'workspace': params['ws_id']}]})[0] a_sample_type = a_sample_info[2].split('-')[0] alignmentset_id = str(a_sample_info[6]) + '/' + str(a_sample_info[0]) + '/' + str(a_sample_info[4]) ## Check if the Alignment objects exist in the same workspace logger.info("Check if the Alignment objects do exist in the current workspace") if a_sample_type == 'KBaseRNASeq.RNASeqAlignmentSet': a_names = list(numpy.array([ i.values() for i in a_sample['data']['mapped_rnaseq_alignments']]).flatten()) a_type = 'KBaseRNASeq.RNASeqAlignment' e_ws_objs = script_util.if_ws_obj_exists(None,ws_client,params['ws_id'],a_type,a_names) missing_objs = [i for i in a_names if not i in e_ws_objs] if len(e_ws_objs) != len(a_names): raise ValueError('Missing Alignment objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id'])) ### Check if the gtf file exists in the workspace. if exists download the file from that annotation_id = a_sample['data']['genome_id'] annotation_name = ws_client.get_object_info([{"ref" :annotation_id}],includeMetadata=None)[0][1] gtf_obj_name = annotation_name+"_GTF_Annotation" ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[gtf_obj_name]) if not ret is None: logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_name)) gtf_obj= ws_client.get_objects([{'name' : gtf_obj_name,'workspace' : params['ws_id']}])[0] gtf_info = ws_client.get_object_info_new({"objects": [{'name': gtf_obj_name, 'workspace': params['ws_id']}]})[0] gtf_annotation_id = str(gtf_info[6]) + '/' + str(gtf_info[0]) + '/' + str(gtf_info[4]) gtf_id=gtf_obj['data']['handle']['id']
### Get the workspace object ids for the objects ### sampleset_id = str(sampleset_info[6]) + '/' + str( sampleset_info[0]) + '/' + str(sampleset_info[4]) annotation_id = str(annotation_info[6]) + '/' + str( annotation_info[0]) + '/' + str(annotation_info[4]) sample_type = sampleset_info[2].split('-')[0] ### Check if the Library objects exist in the same workspace logger.info( "Check if the Library objects do exist in the current workspace") if sample_type == 'KBaseRNASeq.RNASeqSampleSet': reads = sample['data']['sample_ids'] reads_type = sample['data']['Library_type'] if reads_type == 'PairedEnd': r_type = 'KBaseAssembly.PairedEndLibrary' else: r_type = 'KBaseAssembly.SingleEndLibrary' e_ws_objs = script_util.if_ws_obj_exists(None, ws_client, params['ws_id'], r_type, reads) missing_objs = [i for i in reads if not i in e_ws_objs] if len(e_ws_objs) != len(reads): raise Exception( 'Missing Library objects {0} in the {1}. please copy them and run this method' .format(",".join(missing_objs), params['ws_id'])) ### Build Hisat2 index fasta_file = script_util.generate_fasta(logger, services, token, annotation_id, hisat2_dir, params['genome_id']) logger.info("Sanitizing the fasta file to correct id names {}".format( datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file,
sampleset_info,annotation_info = ws_client.get_object_info_new({"objects": [ {'name': params['sampleset_id'], 'workspace': params['ws_id']}, {'name': params['genome_id'], 'workspace': params['ws_id']} ]}) ### Get the workspace object ids for the objects ### sampleset_id = str(sampleset_info[6]) + '/' + str(sampleset_info[0]) + '/' + str(sampleset_info[4]) annotation_id = str(annotation_info[6]) + '/' + str(annotation_info[0]) + '/' + str(annotation_info[4]) sample_type = sampleset_info[2].split('-')[0] ### Check if the Library objects exist in the same workspace logger.info("Check if the Library objects do exist in the current workspace") if sample_type == 'KBaseRNASeq.RNASeqSampleSet': reads = sample['data']['sample_ids'] reads_type= sample['data']['Library_type'] if reads_type == 'PairedEnd': r_type = 'KBaseAssembly.PairedEndLibrary' else: r_type = 'KBaseAssembly.SingleEndLibrary' e_ws_objs = script_util.if_ws_obj_exists(None,ws_client,params['ws_id'],r_type,reads) missing_objs = [i for i in reads if not i in e_ws_objs] if len(e_ws_objs) != len(reads): raise Exception('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id'])) ### Build Hisat2 index fasta_file = script_util.generate_fasta(logger,services,token,annotation_id,hisat2_dir,params['genome_id']) logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True) logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow())) hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fasta")) hisat2base_cmd = '{0} {1}'.format(fasta_file,hisat2base) try: logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd)) cmdline_output = script_util.runProgram(logger,"hisat2-build",hisat2base_cmd,None,hisat2_dir)