def __init__(self, scratch_dir, ws_url, callback_url, service_wizard_url, provenance): self.scratch_dir = scratch_dir self.ws_url = ws_url self.ws = Workspace(self.ws_url) self.callback_url = callback_url self.service_wizard_url = service_wizard_url self.bwa = BwaRunner(self.scratch_dir) self.provenance = provenance
def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, provenance): self.scratch_dir = scratch_dir self.workspace_url = workspace_url self.callback_url = callback_url self.srv_wiz_url = srv_wiz_url self.provenance = provenance # from the provenance, extract out the version to run by exact hash if possible self.my_version = 'release' if len(provenance) > 0: if 'subactions' in provenance[0]: self.my_version = self.get_version_from_subactions( 'kb_Bwa', provenance[0]['subactions']) print('Running kb_Bwa version = ' + self.my_version) self.ws = Workspace(self.workspace_url) self.bwa = BwaRunner(self.scratch_dir) self.parallel_runner = KBParallel(self.callback_url) self.qualimap = kb_QualiMap(self.callback_url)
class BwaIndexBuilder: def __init__(self, scratch_dir, ws_url, callback_url, service_wizard_url, provenance): self.scratch_dir = scratch_dir self.ws_url = ws_url self.ws = Workspace(self.ws_url) self.callback_url = callback_url self.service_wizard_url = service_wizard_url self.bwa = BwaRunner(self.scratch_dir) self.provenance = provenance def get_index(self, params): ''' The key function of this module- get a bwa index for the specified input ''' # validate the parameters and fetch assembly_info validated_params = self._validate_params(params) assembly_info = self._get_assembly_info(validated_params['ref']) # check the cache (keyed off of assembly_info) index_info = self._get_cached_index(assembly_info, validated_params) if index_info: index_info['from_cache'] = 1 index_info['pushed_to_cache'] = 0 else: # on a cache miss, build the index index_info = self._build_index(assembly_info, validated_params) index_info['from_cache'] = 0 # pushed_to_cache will be set in return from _build_index index_info['assembly_ref'] = assembly_info['ref'] index_info['genome_ref'] = assembly_info['genome_ref'] return index_info def _validate_params(self, params): ''' validate parameters; can do some processing here to produce validated params ''' # params['ref'] = params['assembly_or_genome_ref'] validated_params = {'ref': None} if 'ref' in params and params['ref']: validated_params['ref'] = params['ref'] else: raise ValueError('"ref" field indicating either an assembly or genome is required.') if 'output_dir' in params: validated_params['output_dir'] = params['output_dir'] else: validated_params['output_dir'] = os.path.join(self.scratch_dir, 'bwa_index_' + str(int(time.time() * 100))) if os.path.exists(validated_params['output_dir']): raise ('Output directory name specified (' + validated_params['output_dir'] + ') already exists. Will not overwrite, so aborting.') if 'ws_for_cache' in params and params['ws_for_cache']: validated_params['ws_for_cache'] = params['ws_for_cache'] else: print('WARNING: bwa index if created will not be cached because "ws_for_cache" field not set') validated_params['ws_for_cache'] = None return validated_params def _get_assembly_info(self, ref): ''' given a ref to an assembly or genome, figure out the assembly and return its info ''' info = self.ws.get_object_info3({'objects': [{'ref': ref}]})['infos'][0] obj_type = info[2] if obj_type.startswith('KBaseGenomeAnnotations.Assembly') or obj_type.startswith('KBaseGenomes.ContigSet'): return {'info': info, 'ref': ref, 'genome_ref': None} if obj_type.startswith('KBaseGenomes.Genome'): # we need to get the assembly for this genome ga = GenomeAnnotationAPI(self.service_wizard_url) assembly_ref = ga.get_assembly({'ref': ref}) # using the path ensures we can access the assembly even if we don't have direct access ref_path = ref + ';' + assembly_ref info = self.ws.get_object_info3({'objects': [{'ref': ref_path}]})['infos'][0] return {'info': info, 'ref': ref_path, 'genome_ref': ref} raise ValueError('Input object was not of type: Assembly, ContigSet or Genome. Cannot get bwa Index.') def _get_cached_index(self, assembly_info, validated_params): try: # note: list_reference_objects does not yet support reference paths, so we need to call # with the direct reference. So we won't get a cache hit if you don't have direct access # to the assembly object right now (although you can still always build the assembly object) # Once this call supports paths, this should be changed to set ref = assembly_info['ref'] info = assembly_info['info'] ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) objs = self.ws.list_referencing_objects([{'ref': ref}])[0] # iterate through each of the objects that reference the assembly bwa_indexes = [] for o in objs: if o[2].startswith('KBaseRNASeq.Bowtie2IndexV2'): bwa_indexes.append(o) # Nothing refs this assembly, so cache miss if len(bwa_indexes) == 0: return False # if there is more than one hit, get the most recent one # (obj_info[3] is the save_date timestamp (eg 2017-05-30T22:56:49+0000), so we can sort on that) bwa_indexes.sort(key=lambda x: x[3]) bwa_index_info = bwa_indexes[-1] index_ref = str(bwa_index_info[6]) + '/' + str(bwa_index_info[0]) + '/' + str(bwa_index_info[4]) # get the object data index_obj_data = self.ws.get_objects2({'objects': [{'ref': index_ref}]})['data'][0]['data'] # download the handle object os.makedirs(validated_params['output_dir']) dfu = DataFileUtil(self.callback_url) dfu.shock_to_file({'file_path': os.path.join(validated_params['output_dir'], 'bt2_index.tar.gz'), 'handle_id': index_obj_data['handle']['hid'], 'unpack': 'unpack'}) print('Cache hit: ') pprint(index_obj_data) return {'output_dir': validated_params['output_dir'], 'index_files_basename': index_obj_data['index_files_basename']} except Exception: # if we fail in saving the cached object, don't worry print('WARNING: exception encountered when trying to lookup in cache:') print(traceback.format_exc()) print('END WARNING: exception encountered when trying to lookup in cache.') return None def _put_cached_index(self, assembly_info, index_files_basename, output_dir, ws_for_cache): if not ws_for_cache: print('WARNING: bwa index cannot be cached because "ws_for_cache" field not set') return False try: dfu = DataFileUtil(self.callback_url) result = dfu.file_to_shock({'file_path': output_dir, 'make_handle': 1, 'pack': 'targz'}) bwa_index = {'handle': result['handle'], 'size': result['size'], 'assembly_ref': assembly_info['ref'], 'index_files_basename': index_files_basename} ws = Workspace(self.ws_url) save_params = {'objects': [{'hidden': 1, 'provenance': self.provenance, 'name': os.path.basename(output_dir), 'data': bwa_index, 'type': 'KBaseRNASeq.Bowtie2IndexV2' }] } if ws_for_cache.strip().isdigit(): save_params['id'] = int(ws_for_cache) else: save_params['workspace'] = ws_for_cache.strip() save_result = ws.save_objects(save_params) print('Bowtie2IndexV2 cached to: ') pprint(save_result[0]) return True except Exception: # if we fail in saving the cached object, don't worry print('WARNING: exception encountered when trying to cache the index files:') print(traceback.format_exc()) print('END WARNING: exception encountered when trying to cache the index files') return False def _build_index(self, assembly_info, validated_params): # get the assembly as a fasta file using AssemblyUtil au = AssemblyUtil(self.callback_url) fasta_info = au.get_assembly_as_fasta({'ref': assembly_info['ref']}) # make the target destination folder (check again it wasn't created yet) if os.path.exists(validated_params['output_dir']): raise ('Output directory name specified (' + validated_params['output_dir'] + ') already exists. Will not overwrite, so aborting.') os.makedirs(validated_params['output_dir']) # configure the command line args and run it cli_params = self._build_cli_params(fasta_info['path'], fasta_info['assembly_name'], validated_params) self.bwa.run('index', cli_params) # self.bwa.run('index', cli_params) for file in glob.glob(r'/kb/module/work/tmp/' + fasta_info['assembly_name'] + '.*'): print(file) shutil.copy(file, validated_params['output_dir']) index_info = {'output_dir': validated_params['output_dir'], 'index_files_basename': fasta_info['assembly_name']} # cache the result, mark if it worked or not cache_success = self._put_cached_index(assembly_info, fasta_info['assembly_name'], validated_params['output_dir'], validated_params['ws_for_cache']) if cache_success: index_info['pushed_to_cache'] = 1 else: index_info['pushed_to_cache'] = 0 return index_info def _build_cli_params(self, fasta_file_path, index_files_basename, validated_params): cli_params = [] # always run in quiet mode # positional args: first the fasta path, then the base name used for the index files cli_params.append(fasta_file_path) cli_params.append("-p") cli_params.append(index_files_basename) return cli_params
class BwaAligner: def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, provenance): self.scratch_dir = scratch_dir self.workspace_url = workspace_url self.callback_url = callback_url self.srv_wiz_url = srv_wiz_url self.provenance = provenance # from the provenance, extract out the version to run by exact hash if possible self.my_version = 'release' if len(provenance) > 0: if 'subactions' in provenance[0]: self.my_version = self.get_version_from_subactions( 'kb_Bwa', provenance[0]['subactions']) print('Running kb_Bwa version = ' + self.my_version) self.ws = Workspace(self.workspace_url) self.bwa = BwaRunner(self.scratch_dir) self.parallel_runner = KBParallel(self.callback_url) self.qualimap = kb_QualiMap(self.callback_url) def get_version_from_subactions(self, module_name, subactions): # go through each sub action looking for if not subactions: return 'release' # default to release if we can't find anything for sa in subactions: if 'name' in sa: if sa['name'] == module_name: # local-docker-image implies that we are running in kb-test, so return 'dev' if sa['commit'] == 'local-docker-image': return 'dev' # to check that it is a valid hash, make sure it is the right # length and made up of valid hash characters if re.match('[a-fA-F0-9]{40}$', sa['commit']): return sa['commit'] # again, default to setting this to release return 'release' def align(self, params): validated_params = self.validate_params(params) input_info = self.determine_input_info(validated_params) # input info provides information on the input and tells us if we should # run as a single_library or as a set: # input_info = {'run_mode': '', 'info': [..], 'ref': '55/1/2'} assembly_or_genome_ref = validated_params['assembly_or_genome_ref'] if input_info['run_mode'] == 'single_library': if 'output_alignment_name' not in validated_params: suffix = '_alignment' if 'output_alignment_suffix' in validated_params: suffix = validated_params['output_alignment_suffix'] validated_params[ 'output_alignment_name'] = input_info['info'][1] + suffix single_lib_result = self.single_reads_lib_run( input_info, assembly_or_genome_ref, validated_params, create_report=validated_params['create_report']) return single_lib_result if input_info['run_mode'] == 'sample_set': reads = self.fetch_reads_refs_from_sampleset( input_info['ref'], input_info['info'], validated_params) self.build_bwa_index(assembly_or_genome_ref, validated_params['output_workspace']) print('Running on set of reads=') pprint(reads) tasks = [] for r in reads: tasks.append( self.build_single_execution_task( r['ref'], params, r['alignment_output_name'], r['condition'])) batch_run_params = { 'tasks': tasks, 'runner': 'parallel', 'max_retries': 2 } if validated_params['concurrent_local_tasks'] is not None: batch_run_params['concurrent_local_tasks'] = validated_params[ 'concurrent_local_tasks'] if validated_params['concurrent_njsw_tasks'] is not None: batch_run_params['concurrent_njsw_tasks'] = validated_params[ 'concurrent_njsw_tasks'] results = self.parallel_runner.run_batch(batch_run_params) print('Batch run results=') pprint(results) batch_result = self.process_batch_result(results, validated_params, reads, input_info['info']) return batch_result raise ('Improper run mode') def build_single_execution_task(self, reads_lib_ref, params, output_name, condition): task_params = copy.deepcopy(params) task_params['input_ref'] = reads_lib_ref task_params['output_alignment_name'] = output_name task_params['create_report'] = 0 task_params['condition_label'] = condition return { 'module_name': 'kb_Bwa', 'function_name': 'align_reads_to_assembly_app', 'version': self.my_version, 'parameters': task_params } def single_reads_lib_run(self, read_lib_info, assembly_or_genome_ref, validated_params, create_report=False, bwa_index_info=None): ''' run on one reads ''' # download reads and prepare any bwa index files input_configuration = self.prepare_single_run( read_lib_info, assembly_or_genome_ref, bwa_index_info, validated_params['output_workspace']) # run the actual program run_output_info = self.run_bwa_align_cli(input_configuration, validated_params) # process the result and save the output upload_results = self.save_read_alignment_output( run_output_info, input_configuration, validated_params) run_output_info['upload_results'] = upload_results report_info = None if create_report: report_info = self.create_report_for_single_run( run_output_info, input_configuration, validated_params) self.clean(run_output_info) return {'output_info': run_output_info, 'report_info': report_info} def build_bwa_index(self, assembly_or_genome_ref, ws_for_cache): bwaIndexBuilder = BwaIndexBuilder(self.scratch_dir, self.workspace_url, self.callback_url, self.srv_wiz_url, self.provenance) return bwaIndexBuilder.get_index({ 'ref': assembly_or_genome_ref, 'ws_for_cache': ws_for_cache }) def prepare_single_run(self, input_info, assembly_or_genome_ref, bwa_index_info, ws_for_cache): ''' Given a reads ref and an assembly, setup the bwa index ''' # first setup the bwa index of the assembly input_configuration = {'bwa_index_info': bwa_index_info} if not bwa_index_info: bwaIndexBuilder = BwaIndexBuilder(self.scratch_dir, self.workspace_url, self.callback_url, self.srv_wiz_url, self.provenance) index_result = bwaIndexBuilder.get_index({ 'ref': assembly_or_genome_ref, 'ws_for_cache': ws_for_cache }) input_configuration['bwa_index_info'] = index_result # next download the reads read_lib_ref = input_info['ref'] read_lib_info = input_info['info'] reads_params = { 'read_libraries': [read_lib_ref], 'interleaved': 'false', 'gzipped': None } ru = ReadsUtils(self.callback_url) reads = ru.download_reads(reads_params)['files'] input_configuration['reads_lib_type'] = self.get_type_from_obj_info( read_lib_info).split('.')[1] input_configuration['reads_files'] = reads[read_lib_ref] input_configuration['reads_lib_ref'] = read_lib_ref return input_configuration def run_bwa_align_cli(self, input_configuration, validated_params): # pprint('======== input_configuration =====') # pprint(input_configuration) options = [] run_output_info = {} # set the bwa index location bt2_index_dir = input_configuration['bwa_index_info']['output_dir'] bt2_index_basename = input_configuration['bwa_index_info'][ 'index_files_basename'] #options.extend(['-x', bt2_index_basename]) reference = os.path.join(bt2_index_dir, bt2_index_basename) options_r = [] options_l = [] options.append(reference) options_r.append(reference) options_l.append(reference) output_dir = os.path.join( self.scratch_dir, 'bwa_alignment_output_' + str(int(time.time() * 10000))) output_sam_file = os.path.join(output_dir, 'reads_alignment.sam') os.makedirs(output_dir) # set the input reads sam_parameter = '' if input_configuration['reads_lib_type'] == 'SingleEndLibrary': options.extend( ['-0', input_configuration['reads_files']['files']['fwd']]) run_output_info['library_type'] = 'single_end' output_sai_file = os.path.join(output_dir, bt2_index_basename) + ".sai" options.extend(["-f", output_sai_file]) self.bwa.run('aln', options, cwd=bt2_index_dir) sam_parameter = 'samse' options2 = [] options2.append(reference) options2.append(output_sai_file) options2.append(input_configuration['reads_files']['files']['fwd']) options2.extend(["-f", output_sam_file]) self.bwa.run(sam_parameter, options2, cwd=bt2_index_dir) elif input_configuration['reads_lib_type'] == 'PairedEndLibrary': options_l.extend( ['-1', input_configuration['reads_files']['files']['fwd']]) output_l_sai_file = os.path.join(output_dir, bt2_index_basename) + "_l.sai" options_l.extend(["-f", output_l_sai_file]) self.bwa.run('aln', options_l, cwd=bt2_index_dir) options_r.extend( ['-2', input_configuration['reads_files']['files']['rev']]) output_r_sai_file = os.path.join(output_dir, bt2_index_basename) + "_r.sai" options_r.extend(["-f", output_r_sai_file]) self.bwa.run('aln', options_r, cwd=bt2_index_dir) sam_parameter = 'sampe' options2 = [] options2.append(reference) options2.append(output_r_sai_file) options2.append(output_l_sai_file) options2.append(input_configuration['reads_files']['files']['rev']) options2.append(input_configuration['reads_files']['files']['fwd']) options2.extend(["-f", output_sam_file]) self.bwa.run(sam_parameter, options2, cwd=bt2_index_dir) run_output_info['library_type'] = 'paired_end' ''' align = bash('bwa aln -I -t 8 reference.fa reads.txt > out.sai') sam = bash('bwa samse reference.fa out.sai reads.txt > out.sam') ''' # setup the output file name # options.extend(['-S', output_sam_file]) run_output_info['output_sam_file'] = output_sam_file run_output_info['output_dir'] = output_dir return run_output_info def save_read_alignment_output(self, run_output_info, input_configuration, validated_params): rau = ReadsAlignmentUtils(self.callback_url) destination_ref = validated_params[ 'output_workspace'] + '/' + validated_params[ 'output_alignment_name'] condition = 'unknown' if 'condition_label' in validated_params: condition = validated_params['condition_label'] upload_params = { 'file_path': run_output_info['output_sam_file'], 'destination_ref': destination_ref, 'read_library_ref': input_configuration['reads_lib_ref'], 'assembly_or_genome_ref': validated_params['assembly_or_genome_ref'], 'condition': condition } upload_results = rau.upload_alignment(upload_params) return upload_results def clean(self, run_output_info): ''' Not really necessary on a single run, but if we are running multiple local subjobs, we should clean up files that have already been saved back up to kbase ''' pass def create_report_for_single_run(self, run_output_info, input_configuration, validated_params): # first run qualimap qualimap_report = self.qualimap.run_bamqc( {'input_ref': run_output_info['upload_results']['obj_ref']}) qc_result_zip_info = qualimap_report['qc_result_zip_info'] # create report report_text = 'Ran on a single reads library.\n\n' alignment_info = self.get_obj_info( run_output_info['upload_results']['obj_ref']) report_text = 'Created ReadsAlignment: ' + str( alignment_info[1]) + '\n' report_text = ' ' + run_output_info[ 'upload_results']['obj_ref'] + '\n' kbr = KBaseReport(self.callback_url) report_info = kbr.create_extended_report({ 'message': report_text, 'objects_created': [{ 'ref': run_output_info['upload_results']['obj_ref'], 'description': 'ReadsAlignment' }], 'report_object_name': 'kb_Bwa_' + str(uuid.uuid4()), 'direct_html_link_index': 0, 'html_links': [{ 'shock_id': qc_result_zip_info['shock_id'], 'name': qc_result_zip_info['index_html_file_name'], 'label': qc_result_zip_info['name'] }], 'workspace_name': validated_params['output_workspace'] }) return { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } def process_batch_result(self, batch_result, validated_params, reads, input_set_info): n_jobs = len(batch_result['results']) n_success = 0 n_error = 0 ran_locally = 0 ran_njsw = 0 # reads alignment set items items = [] objects_created = [] for k in range(0, len(batch_result['results'])): job = batch_result['results'][k] result_package = job['result_package'] if job['is_error']: n_error += 1 else: n_success += 1 print(result_package['result']) print(result_package['result'][0]) print(result_package['result'][0]['output_info']) output_info = result_package['result'][0]['output_info'] ra_ref = output_info['upload_results']['obj_ref'] # Note: could add a label to the alignment here? items.append({'ref': ra_ref, 'label': reads[k]['condition']}) objects_created.append({'ref': ra_ref}) if result_package['run_context']['location'] == 'local': ran_locally += 1 if result_package['run_context']['location'] == 'njsw': ran_njsw += 1 # Save the alignment set alignment_set_data = {'description': '', 'items': items} alignment_set_save_params = { 'data': alignment_set_data, 'workspace': validated_params['output_workspace'], 'output_object_name': str(input_set_info[1]) + validated_params['output_obj_name_suffix'] } set_api = SetAPI(self.srv_wiz_url) save_result = set_api.save_reads_alignment_set_v1( alignment_set_save_params) print('Saved ReadsAlignment=') pprint(save_result) objects_created.append({ 'ref': save_result['set_ref'], 'description': 'Set of all reads alignments generated' }) set_name = save_result['set_info'][1] # run qualimap qualimap_report = self.qualimap.run_bamqc( {'input_ref': save_result['set_ref']}) qc_result_zip_info = qualimap_report['qc_result_zip_info'] # create the report report_text = 'Ran on SampleSet or ReadsSet.\n\n' report_text = 'Created ReadsAlignmentSet: ' + str(set_name) + '\n\n' report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n' report_text += ' Successful runs = ' + str(n_success) + '\n' report_text += ' Failed runs = ' + str(n_error) + '\n' report_text += ' Ran on main node = ' + str(ran_locally) + '\n' report_text += ' Ran on remote worker = ' + str(ran_njsw) + '\n\n' print('Report text=') print(report_text) kbr = KBaseReport(self.callback_url) report_info = kbr.create_extended_report({ 'message': report_text, 'objects_created': objects_created, 'report_object_name': 'kb_Bwa_' + str(uuid.uuid4()), 'direct_html_link_index': 0, 'html_links': [{ 'shock_id': qc_result_zip_info['shock_id'], 'name': qc_result_zip_info['index_html_file_name'], 'label': qc_result_zip_info['name'] }], 'workspace_name': validated_params['output_workspace'] }) result = { 'report_info': { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } } result['batch_output_info'] = batch_result return result def validate_params(self, params): validated_params = {} required_string_fields = [ 'input_ref', 'assembly_or_genome_ref', 'output_obj_name_suffix', 'output_workspace' ] for field in required_string_fields: if field in params and params[field]: validated_params[field] = params[field] else: raise ValueError('"' + field + '" field required to run bwa aligner app') optional_fields = [ 'quality_score', 'alignment_type', 'preset_options', 'trim5', 'trim3', 'condition_label', 'np', 'minins', 'maxins', 'output_alignment_suffix', 'output_alignment_name' ] for field in optional_fields: if field in params: if params[field] is not None: validated_params[field] = params[field] validated_params['create_report'] = True if 'create_report' in params and params['create_report'] is not None: if int(params['create_report']) == 1: validated_params['create_report'] = True elif int(params['create_report']) == 0: validated_params['create_report'] = False else: raise ValueError( '"create_report" field, if present, should be set to a boolean value: 0 or 1' ) validated_params['concurrent_local_tasks'] = None validated_params['concurrent_njsw_tasks'] = None if 'concurrent_local_tasks' in params and params[ 'concurrent_local_tasks'] is not None: validated_params['concurrent_local_tasks'] = int( params['concurrent_local_tasks']) if 'concurrent_njsw_tasks' in params and params[ 'concurrent_njsw_tasks'] is not None: validated_params['concurrent_njsw_tasks'] = int( params['concurrent_njsw_tasks']) return validated_params def fetch_reads_refs_from_sampleset(self, ref, info, validated_params): """ Note: adapted from kbaseapps/kb_hisat2 - file_util.py From the given object ref, return a list of all reads objects that are a part of that object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows: { "ref": reads object reference, "condition": condition string associated with that reads object } The only one required is "ref", all other keys may or may not be present, based on the reads object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info for each reads object, but a single PairedEndLibrary may not have that info. If ref is already a Reads library, just returns a list with ref as a single element. """ obj_type = self.get_type_from_obj_info(info) refs = list() refs_for_ws_info = list() if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type: print("Looking up reads references in ReadsSet object") set_api = SetAPI(self.srv_wiz_url) reads_set = set_api.get_reads_set_v1({ 'ref': ref, 'include_item_info': 0, 'include_set_item_ref_paths': 1 }) for reads in reads_set["data"]["items"]: refs.append({ 'ref': reads['ref_path'], 'condition': reads['label'] }) refs_for_ws_info.append({'ref': reads['ref_path']}) else: raise ValueError("Unable to fetch reads reference from object {} " "which is a {}".format(ref, obj_type)) # get object info so we can name things properly infos = self.ws.get_object_info3({'objects': refs_for_ws_info})['infos'] name_ext = '_alignment' if 'output_alignment_suffix' in validated_params \ and validated_params['output_alignment_suffix'] is not None: ext = validated_params['output_alignment_suffix'].replace(' ', '') if ext: name_ext = ext unique_name_lookup = {} for k in range(0, len(refs)): refs[k]['info'] = infos[k] name = infos[k][1] if name not in unique_name_lookup: unique_name_lookup[name] = 1 else: unique_name_lookup[name] += 1 name = name + '_' + str(unique_name_lookup[name]) name = name + name_ext refs[k]['alignment_output_name'] = name return refs def determine_input_info(self, validated_params): ''' get info on the input_ref object and determine if we run once or run on a set ''' info = self.get_obj_info(validated_params['input_ref']) obj_type = self.get_type_from_obj_info(info) if obj_type in [ 'KBaseAssembly.PairedEndLibrary', 'KBaseAssembly.SingleEndLibrary', 'KBaseFile.PairedEndLibrary', 'KBaseFile.SingleEndLibrary' ]: return { 'run_mode': 'single_library', 'info': info, 'ref': validated_params['input_ref'] } if obj_type == 'KBaseRNASeq.RNASeqSampleSet': return { 'run_mode': 'sample_set', 'info': info, 'ref': validated_params['input_ref'] } if obj_type == 'KBaseSets.ReadsSet': return { 'run_mode': 'sample_set', 'info': info, 'ref': validated_params['input_ref'] } raise ValueError('Object type of input_ref is not valid, was: ' + str(obj_type)) def get_type_from_obj_info(self, info): return info[2].split('-')[0] def get_obj_info(self, ref): return self.ws.get_object_info3({'objects': [{ 'ref': ref }]})['infos'][0]