Пример #1
0
 def __init__(self, scratch_dir, ws_url, callback_url, service_wizard_url, provenance):
     self.scratch_dir = scratch_dir
     self.ws_url = ws_url
     self.ws = Workspace(self.ws_url)
     self.callback_url = callback_url
     self.service_wizard_url = service_wizard_url
     self.bwa = BwaRunner(self.scratch_dir)
     self.provenance = provenance
Пример #2
0
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_Bwa', provenance[0]['subactions'])
        print('Running kb_Bwa version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.bwa = BwaRunner(self.scratch_dir)
        self.parallel_runner = KBParallel(self.callback_url)
        self.qualimap = kb_QualiMap(self.callback_url)
Пример #3
0
class BwaIndexBuilder:
    def __init__(self, scratch_dir, ws_url, callback_url, service_wizard_url, provenance):
        self.scratch_dir = scratch_dir
        self.ws_url = ws_url
        self.ws = Workspace(self.ws_url)
        self.callback_url = callback_url
        self.service_wizard_url = service_wizard_url
        self.bwa = BwaRunner(self.scratch_dir)
        self.provenance = provenance

    def get_index(self, params):
        ''' The key function of this module- get a bwa index for the specified input '''

        # validate the parameters and fetch assembly_info
        validated_params = self._validate_params(params)
        assembly_info = self._get_assembly_info(validated_params['ref'])

        # check the cache (keyed off of assembly_info)
        index_info = self._get_cached_index(assembly_info, validated_params)
        if index_info:
            index_info['from_cache'] = 1
            index_info['pushed_to_cache'] = 0
        else:
            # on a cache miss, build the index
            index_info = self._build_index(assembly_info, validated_params)
            index_info['from_cache'] = 0
            # pushed_to_cache will be set in return from _build_index

        index_info['assembly_ref'] = assembly_info['ref']
        index_info['genome_ref'] = assembly_info['genome_ref']

        return index_info

    def _validate_params(self, params):
        ''' validate parameters; can do some processing here to produce validated params '''
        # params['ref'] = params['assembly_or_genome_ref']
        validated_params = {'ref': None}
        if 'ref' in params and params['ref']:
            validated_params['ref'] = params['ref']
        else:
            raise ValueError('"ref" field indicating either an assembly or genome is required.')

        if 'output_dir' in params:
            validated_params['output_dir'] = params['output_dir']
        else:
            validated_params['output_dir'] = os.path.join(self.scratch_dir,
                                                          'bwa_index_' + str(int(time.time() * 100)))

        if os.path.exists(validated_params['output_dir']):
            raise ('Output directory name specified (' + validated_params['output_dir'] +
                   ') already exists. Will not overwrite, so aborting.')

        if 'ws_for_cache' in params and params['ws_for_cache']:
            validated_params['ws_for_cache'] = params['ws_for_cache']
        else:
            print('WARNING: bwa index if created will not be cached because "ws_for_cache" field not set')
            validated_params['ws_for_cache'] = None

        return validated_params

    def _get_assembly_info(self, ref):
        ''' given a ref to an assembly or genome, figure out the assembly and return its info '''
        info = self.ws.get_object_info3({'objects': [{'ref': ref}]})['infos'][0]
        obj_type = info[2]
        if obj_type.startswith('KBaseGenomeAnnotations.Assembly') or obj_type.startswith('KBaseGenomes.ContigSet'):
            return {'info': info, 'ref': ref, 'genome_ref': None}

        if obj_type.startswith('KBaseGenomes.Genome'):
            # we need to get the assembly for this genome
            ga = GenomeAnnotationAPI(self.service_wizard_url)
            assembly_ref = ga.get_assembly({'ref': ref})
            # using the path ensures we can access the assembly even if we don't have direct access
            ref_path = ref + ';' + assembly_ref
            info = self.ws.get_object_info3({'objects': [{'ref': ref_path}]})['infos'][0]
            return {'info': info, 'ref': ref_path, 'genome_ref': ref}

        raise ValueError('Input object was not of type: Assembly, ContigSet or Genome.  Cannot get bwa Index.')

    def _get_cached_index(self, assembly_info, validated_params):

        try:
            # note: list_reference_objects does not yet support reference paths, so we need to call
            # with the direct reference.  So we won't get a cache hit if you don't have direct access
            # to the assembly object right now (although you can still always build the assembly object)
            # Once this call supports paths, this should be changed to set ref = assembly_info['ref']
            info = assembly_info['info']
            ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4])
            objs = self.ws.list_referencing_objects([{'ref': ref}])[0]

            # iterate through each of the objects that reference the assembly
            bwa_indexes = []
            for o in objs:
                if o[2].startswith('KBaseRNASeq.Bowtie2IndexV2'):
                    bwa_indexes.append(o)

            # Nothing refs this assembly, so cache miss
            if len(bwa_indexes) == 0:
                return False

            # if there is more than one hit, get the most recent one
            # (obj_info[3] is the save_date timestamp (eg 2017-05-30T22:56:49+0000), so we can sort on that)
            bwa_indexes.sort(key=lambda x: x[3])
            bwa_index_info = bwa_indexes[-1]
            index_ref = str(bwa_index_info[6]) + '/' + str(bwa_index_info[0]) + '/' + str(bwa_index_info[4])

            # get the object data
            index_obj_data = self.ws.get_objects2({'objects': [{'ref': index_ref}]})['data'][0]['data']

            # download the handle object
            os.makedirs(validated_params['output_dir'])

            dfu = DataFileUtil(self.callback_url)
            dfu.shock_to_file({'file_path': os.path.join(validated_params['output_dir'], 'bt2_index.tar.gz'),
                               'handle_id': index_obj_data['handle']['hid'],
                               'unpack': 'unpack'})
            print('Cache hit: ')
            pprint(index_obj_data)
            return {'output_dir': validated_params['output_dir'],
                    'index_files_basename': index_obj_data['index_files_basename']}

        except Exception:
            # if we fail in saving the cached object, don't worry
            print('WARNING: exception encountered when trying to lookup in cache:')
            print(traceback.format_exc())
            print('END WARNING: exception encountered when trying to lookup in cache.')

        return None

    def _put_cached_index(self, assembly_info, index_files_basename, output_dir, ws_for_cache):

        if not ws_for_cache:
            print('WARNING: bwa index cannot be cached because "ws_for_cache" field not set')
            return False

        try:
            dfu = DataFileUtil(self.callback_url)
            result = dfu.file_to_shock({'file_path': output_dir,
                                        'make_handle': 1,
                                        'pack': 'targz'})

            bwa_index = {'handle': result['handle'], 'size': result['size'],
                         'assembly_ref': assembly_info['ref'],
                         'index_files_basename': index_files_basename}

            ws = Workspace(self.ws_url)
            save_params = {'objects': [{'hidden': 1,
                                        'provenance': self.provenance,
                                        'name': os.path.basename(output_dir),
                                        'data': bwa_index,
                                        'type': 'KBaseRNASeq.Bowtie2IndexV2'
                                        }]
                           }
            if ws_for_cache.strip().isdigit():
                save_params['id'] = int(ws_for_cache)
            else:
                save_params['workspace'] = ws_for_cache.strip()
            save_result = ws.save_objects(save_params)
            print('Bowtie2IndexV2 cached to: ')
            pprint(save_result[0])
            return True

        except Exception:
            # if we fail in saving the cached object, don't worry
            print('WARNING: exception encountered when trying to cache the index files:')
            print(traceback.format_exc())
            print('END WARNING: exception encountered when trying to cache the index files')

        return False

    def _build_index(self, assembly_info, validated_params):
        # get the assembly as a fasta file using AssemblyUtil
        au = AssemblyUtil(self.callback_url)
        fasta_info = au.get_assembly_as_fasta({'ref': assembly_info['ref']})

        # make the target destination folder (check again it wasn't created yet)
        if os.path.exists(validated_params['output_dir']):
            raise ('Output directory name specified (' + validated_params['output_dir'] +
                   ') already exists. Will not overwrite, so aborting.')
        os.makedirs(validated_params['output_dir'])

        # configure the command line args and run it
        cli_params = self._build_cli_params(fasta_info['path'], fasta_info['assembly_name'], validated_params)
        self.bwa.run('index', cli_params)
        # self.bwa.run('index', cli_params)
        for file in glob.glob(r'/kb/module/work/tmp/' + fasta_info['assembly_name'] + '.*'):
            print(file)
            shutil.copy(file, validated_params['output_dir'])

        index_info = {'output_dir': validated_params['output_dir'],
                      'index_files_basename': fasta_info['assembly_name']}

        # cache the result, mark if it worked or not
        cache_success = self._put_cached_index(assembly_info,
                                               fasta_info['assembly_name'],
                                               validated_params['output_dir'],
                                               validated_params['ws_for_cache'])
        if cache_success:
            index_info['pushed_to_cache'] = 1
        else:
            index_info['pushed_to_cache'] = 0

        return index_info

    def _build_cli_params(self, fasta_file_path, index_files_basename, validated_params):
        cli_params = []

        # always run in quiet mode
        # positional args: first the fasta path, then the base name used for the index files
        cli_params.append(fasta_file_path)
        cli_params.append("-p")
        cli_params.append(index_files_basename)

        return cli_params
Пример #4
0
class BwaAligner:
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_Bwa', provenance[0]['subactions'])
        print('Running kb_Bwa version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.bwa = BwaRunner(self.scratch_dir)
        self.parallel_runner = KBParallel(self.callback_url)
        self.qualimap = kb_QualiMap(self.callback_url)

    def get_version_from_subactions(self, module_name, subactions):
        # go through each sub action looking for
        if not subactions:
            return 'release'  # default to release if we can't find anything
        for sa in subactions:
            if 'name' in sa:
                if sa['name'] == module_name:
                    # local-docker-image implies that we are running in kb-test, so return 'dev'
                    if sa['commit'] == 'local-docker-image':
                        return 'dev'
                    # to check that it is a valid hash, make sure it is the right
                    # length and made up of valid hash characters
                    if re.match('[a-fA-F0-9]{40}$', sa['commit']):
                        return sa['commit']
        # again, default to setting this to release
        return 'release'

    def align(self, params):
        validated_params = self.validate_params(params)
        input_info = self.determine_input_info(validated_params)
        # input info provides information on the input and tells us if we should
        # run as a single_library or as a set:
        #     input_info = {'run_mode': '', 'info': [..], 'ref': '55/1/2'}

        assembly_or_genome_ref = validated_params['assembly_or_genome_ref']

        if input_info['run_mode'] == 'single_library':
            if 'output_alignment_name' not in validated_params:
                suffix = '_alignment'
                if 'output_alignment_suffix' in validated_params:
                    suffix = validated_params['output_alignment_suffix']
                validated_params[
                    'output_alignment_name'] = input_info['info'][1] + suffix
            single_lib_result = self.single_reads_lib_run(
                input_info,
                assembly_or_genome_ref,
                validated_params,
                create_report=validated_params['create_report'])

            return single_lib_result

        if input_info['run_mode'] == 'sample_set':
            reads = self.fetch_reads_refs_from_sampleset(
                input_info['ref'], input_info['info'], validated_params)
            self.build_bwa_index(assembly_or_genome_ref,
                                 validated_params['output_workspace'])

            print('Running on set of reads=')
            pprint(reads)

            tasks = []
            for r in reads:
                tasks.append(
                    self.build_single_execution_task(
                        r['ref'], params, r['alignment_output_name'],
                        r['condition']))

            batch_run_params = {
                'tasks': tasks,
                'runner': 'parallel',
                'max_retries': 2
            }
            if validated_params['concurrent_local_tasks'] is not None:
                batch_run_params['concurrent_local_tasks'] = validated_params[
                    'concurrent_local_tasks']
            if validated_params['concurrent_njsw_tasks'] is not None:
                batch_run_params['concurrent_njsw_tasks'] = validated_params[
                    'concurrent_njsw_tasks']
            results = self.parallel_runner.run_batch(batch_run_params)
            print('Batch run results=')
            pprint(results)
            batch_result = self.process_batch_result(results, validated_params,
                                                     reads, input_info['info'])
            return batch_result

        raise ('Improper run mode')

    def build_single_execution_task(self, reads_lib_ref, params, output_name,
                                    condition):
        task_params = copy.deepcopy(params)

        task_params['input_ref'] = reads_lib_ref
        task_params['output_alignment_name'] = output_name
        task_params['create_report'] = 0
        task_params['condition_label'] = condition

        return {
            'module_name': 'kb_Bwa',
            'function_name': 'align_reads_to_assembly_app',
            'version': self.my_version,
            'parameters': task_params
        }

    def single_reads_lib_run(self,
                             read_lib_info,
                             assembly_or_genome_ref,
                             validated_params,
                             create_report=False,
                             bwa_index_info=None):
        ''' run on one reads '''

        # download reads and prepare any bwa index files
        input_configuration = self.prepare_single_run(
            read_lib_info, assembly_or_genome_ref, bwa_index_info,
            validated_params['output_workspace'])

        # run the actual program
        run_output_info = self.run_bwa_align_cli(input_configuration,
                                                 validated_params)

        # process the result and save the output
        upload_results = self.save_read_alignment_output(
            run_output_info, input_configuration, validated_params)
        run_output_info['upload_results'] = upload_results

        report_info = None
        if create_report:
            report_info = self.create_report_for_single_run(
                run_output_info, input_configuration, validated_params)

        self.clean(run_output_info)

        return {'output_info': run_output_info, 'report_info': report_info}

    def build_bwa_index(self, assembly_or_genome_ref, ws_for_cache):
        bwaIndexBuilder = BwaIndexBuilder(self.scratch_dir, self.workspace_url,
                                          self.callback_url, self.srv_wiz_url,
                                          self.provenance)

        return bwaIndexBuilder.get_index({
            'ref': assembly_or_genome_ref,
            'ws_for_cache': ws_for_cache
        })

    def prepare_single_run(self, input_info, assembly_or_genome_ref,
                           bwa_index_info, ws_for_cache):
        ''' Given a reads ref and an assembly, setup the bwa index '''
        # first setup the bwa index of the assembly
        input_configuration = {'bwa_index_info': bwa_index_info}
        if not bwa_index_info:
            bwaIndexBuilder = BwaIndexBuilder(self.scratch_dir,
                                              self.workspace_url,
                                              self.callback_url,
                                              self.srv_wiz_url,
                                              self.provenance)

            index_result = bwaIndexBuilder.get_index({
                'ref':
                assembly_or_genome_ref,
                'ws_for_cache':
                ws_for_cache
            })
            input_configuration['bwa_index_info'] = index_result

        # next download the reads
        read_lib_ref = input_info['ref']
        read_lib_info = input_info['info']
        reads_params = {
            'read_libraries': [read_lib_ref],
            'interleaved': 'false',
            'gzipped': None
        }
        ru = ReadsUtils(self.callback_url)
        reads = ru.download_reads(reads_params)['files']

        input_configuration['reads_lib_type'] = self.get_type_from_obj_info(
            read_lib_info).split('.')[1]
        input_configuration['reads_files'] = reads[read_lib_ref]
        input_configuration['reads_lib_ref'] = read_lib_ref

        return input_configuration

    def run_bwa_align_cli(self, input_configuration, validated_params):
        # pprint('======== input_configuration =====')
        # pprint(input_configuration)
        options = []
        run_output_info = {}

        # set the bwa index location
        bt2_index_dir = input_configuration['bwa_index_info']['output_dir']

        bt2_index_basename = input_configuration['bwa_index_info'][
            'index_files_basename']
        #options.extend(['-x', bt2_index_basename])

        reference = os.path.join(bt2_index_dir, bt2_index_basename)

        options_r = []
        options_l = []

        options.append(reference)
        options_r.append(reference)
        options_l.append(reference)

        output_dir = os.path.join(
            self.scratch_dir,
            'bwa_alignment_output_' + str(int(time.time() * 10000)))
        output_sam_file = os.path.join(output_dir, 'reads_alignment.sam')
        os.makedirs(output_dir)

        # set the input reads
        sam_parameter = ''
        if input_configuration['reads_lib_type'] == 'SingleEndLibrary':
            options.extend(
                ['-0', input_configuration['reads_files']['files']['fwd']])
            run_output_info['library_type'] = 'single_end'
            output_sai_file = os.path.join(output_dir,
                                           bt2_index_basename) + ".sai"
            options.extend(["-f", output_sai_file])
            self.bwa.run('aln', options, cwd=bt2_index_dir)
            sam_parameter = 'samse'
            options2 = []
            options2.append(reference)
            options2.append(output_sai_file)
            options2.append(input_configuration['reads_files']['files']['fwd'])
            options2.extend(["-f", output_sam_file])
            self.bwa.run(sam_parameter, options2, cwd=bt2_index_dir)
        elif input_configuration['reads_lib_type'] == 'PairedEndLibrary':
            options_l.extend(
                ['-1', input_configuration['reads_files']['files']['fwd']])
            output_l_sai_file = os.path.join(output_dir,
                                             bt2_index_basename) + "_l.sai"
            options_l.extend(["-f", output_l_sai_file])
            self.bwa.run('aln', options_l, cwd=bt2_index_dir)
            options_r.extend(
                ['-2', input_configuration['reads_files']['files']['rev']])
            output_r_sai_file = os.path.join(output_dir,
                                             bt2_index_basename) + "_r.sai"
            options_r.extend(["-f", output_r_sai_file])
            self.bwa.run('aln', options_r, cwd=bt2_index_dir)
            sam_parameter = 'sampe'
            options2 = []
            options2.append(reference)
            options2.append(output_r_sai_file)
            options2.append(output_l_sai_file)
            options2.append(input_configuration['reads_files']['files']['rev'])
            options2.append(input_configuration['reads_files']['files']['fwd'])
            options2.extend(["-f", output_sam_file])
            self.bwa.run(sam_parameter, options2, cwd=bt2_index_dir)
            run_output_info['library_type'] = 'paired_end'
        '''
        align = bash('bwa aln -I -t 8 reference.fa reads.txt > out.sai')
        sam = bash('bwa samse reference.fa out.sai reads.txt > out.sam')
        '''
        # setup the output file name

        # options.extend(['-S', output_sam_file])
        run_output_info['output_sam_file'] = output_sam_file
        run_output_info['output_dir'] = output_dir

        return run_output_info

    def save_read_alignment_output(self, run_output_info, input_configuration,
                                   validated_params):
        rau = ReadsAlignmentUtils(self.callback_url)
        destination_ref = validated_params[
            'output_workspace'] + '/' + validated_params[
                'output_alignment_name']
        condition = 'unknown'
        if 'condition_label' in validated_params:
            condition = validated_params['condition_label']
        upload_params = {
            'file_path': run_output_info['output_sam_file'],
            'destination_ref': destination_ref,
            'read_library_ref': input_configuration['reads_lib_ref'],
            'assembly_or_genome_ref':
            validated_params['assembly_or_genome_ref'],
            'condition': condition
        }
        upload_results = rau.upload_alignment(upload_params)
        return upload_results

    def clean(self, run_output_info):
        ''' Not really necessary on a single run, but if we are running multiple local subjobs, we
        should clean up files that have already been saved back up to kbase '''
        pass

    def create_report_for_single_run(self, run_output_info,
                                     input_configuration, validated_params):
        # first run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': run_output_info['upload_results']['obj_ref']})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']

        # create report
        report_text = 'Ran on a single reads library.\n\n'
        alignment_info = self.get_obj_info(
            run_output_info['upload_results']['obj_ref'])
        report_text = 'Created ReadsAlignment: ' + str(
            alignment_info[1]) + '\n'
        report_text = '                        ' + run_output_info[
            'upload_results']['obj_ref'] + '\n'
        kbr = KBaseReport(self.callback_url)
        report_info = kbr.create_extended_report({
            'message':
            report_text,
            'objects_created': [{
                'ref':
                run_output_info['upload_results']['obj_ref'],
                'description':
                'ReadsAlignment'
            }],
            'report_object_name':
            'kb_Bwa_' + str(uuid.uuid4()),
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'workspace_name':
            validated_params['output_workspace']
        })
        return {
            'report_name': report_info['name'],
            'report_ref': report_info['ref']
        }

    def process_batch_result(self, batch_result, validated_params, reads,
                             input_set_info):

        n_jobs = len(batch_result['results'])
        n_success = 0
        n_error = 0
        ran_locally = 0
        ran_njsw = 0

        # reads alignment set items
        items = []
        objects_created = []

        for k in range(0, len(batch_result['results'])):
            job = batch_result['results'][k]
            result_package = job['result_package']
            if job['is_error']:
                n_error += 1
            else:
                n_success += 1
                print(result_package['result'])
                print(result_package['result'][0])
                print(result_package['result'][0]['output_info'])
                output_info = result_package['result'][0]['output_info']
                ra_ref = output_info['upload_results']['obj_ref']
                # Note: could add a label to the alignment here?
                items.append({'ref': ra_ref, 'label': reads[k]['condition']})
                objects_created.append({'ref': ra_ref})

            if result_package['run_context']['location'] == 'local':
                ran_locally += 1
            if result_package['run_context']['location'] == 'njsw':
                ran_njsw += 1

        # Save the alignment set
        alignment_set_data = {'description': '', 'items': items}
        alignment_set_save_params = {
            'data':
            alignment_set_data,
            'workspace':
            validated_params['output_workspace'],
            'output_object_name':
            str(input_set_info[1]) + validated_params['output_obj_name_suffix']
        }

        set_api = SetAPI(self.srv_wiz_url)
        save_result = set_api.save_reads_alignment_set_v1(
            alignment_set_save_params)
        print('Saved ReadsAlignment=')
        pprint(save_result)
        objects_created.append({
            'ref':
            save_result['set_ref'],
            'description':
            'Set of all reads alignments generated'
        })
        set_name = save_result['set_info'][1]

        # run qualimap
        qualimap_report = self.qualimap.run_bamqc(
            {'input_ref': save_result['set_ref']})
        qc_result_zip_info = qualimap_report['qc_result_zip_info']

        # create the report
        report_text = 'Ran on SampleSet or ReadsSet.\n\n'
        report_text = 'Created ReadsAlignmentSet: ' + str(set_name) + '\n\n'
        report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n'
        report_text += '        Successful runs = ' + str(n_success) + '\n'
        report_text += '            Failed runs = ' + str(n_error) + '\n'
        report_text += '       Ran on main node = ' + str(ran_locally) + '\n'
        report_text += '   Ran on remote worker = ' + str(ran_njsw) + '\n\n'

        print('Report text=')
        print(report_text)

        kbr = KBaseReport(self.callback_url)
        report_info = kbr.create_extended_report({
            'message':
            report_text,
            'objects_created':
            objects_created,
            'report_object_name':
            'kb_Bwa_' + str(uuid.uuid4()),
            'direct_html_link_index':
            0,
            'html_links': [{
                'shock_id': qc_result_zip_info['shock_id'],
                'name': qc_result_zip_info['index_html_file_name'],
                'label': qc_result_zip_info['name']
            }],
            'workspace_name':
            validated_params['output_workspace']
        })

        result = {
            'report_info': {
                'report_name': report_info['name'],
                'report_ref': report_info['ref']
            }
        }
        result['batch_output_info'] = batch_result

        return result

    def validate_params(self, params):
        validated_params = {}

        required_string_fields = [
            'input_ref', 'assembly_or_genome_ref', 'output_obj_name_suffix',
            'output_workspace'
        ]
        for field in required_string_fields:
            if field in params and params[field]:
                validated_params[field] = params[field]
            else:
                raise ValueError('"' + field +
                                 '" field required to run bwa aligner app')

        optional_fields = [
            'quality_score', 'alignment_type', 'preset_options', 'trim5',
            'trim3', 'condition_label', 'np', 'minins', 'maxins',
            'output_alignment_suffix', 'output_alignment_name'
        ]
        for field in optional_fields:
            if field in params:
                if params[field] is not None:
                    validated_params[field] = params[field]

        validated_params['create_report'] = True
        if 'create_report' in params and params['create_report'] is not None:
            if int(params['create_report']) == 1:
                validated_params['create_report'] = True
            elif int(params['create_report']) == 0:
                validated_params['create_report'] = False
            else:
                raise ValueError(
                    '"create_report" field, if present, should be set to a boolean value: 0 or 1'
                )

        validated_params['concurrent_local_tasks'] = None
        validated_params['concurrent_njsw_tasks'] = None

        if 'concurrent_local_tasks' in params and params[
                'concurrent_local_tasks'] is not None:
            validated_params['concurrent_local_tasks'] = int(
                params['concurrent_local_tasks'])
        if 'concurrent_njsw_tasks' in params and params[
                'concurrent_njsw_tasks'] is not None:
            validated_params['concurrent_njsw_tasks'] = int(
                params['concurrent_njsw_tasks'])

        return validated_params

    def fetch_reads_refs_from_sampleset(self, ref, info, validated_params):
        """
        Note: adapted from kbaseapps/kb_hisat2 - file_util.py
        From the given object ref, return a list of all reads objects that are a part of that
        object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary
        refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows:
        {
            "ref": reads object reference,
            "condition": condition string associated with that reads object
        }
        The only one required is "ref", all other keys may or may not be present, based on the reads
        object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info
        for each reads object, but a single PairedEndLibrary may not have that info.
        If ref is already a Reads library, just returns a list with ref as a single element.
        """
        obj_type = self.get_type_from_obj_info(info)
        refs = list()
        refs_for_ws_info = list()
        if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type:
            print("Looking up reads references in ReadsSet object")
            set_api = SetAPI(self.srv_wiz_url)
            reads_set = set_api.get_reads_set_v1({
                'ref':
                ref,
                'include_item_info':
                0,
                'include_set_item_ref_paths':
                1
            })

            for reads in reads_set["data"]["items"]:
                refs.append({
                    'ref': reads['ref_path'],
                    'condition': reads['label']
                })
                refs_for_ws_info.append({'ref': reads['ref_path']})
        else:
            raise ValueError("Unable to fetch reads reference from object {} "
                             "which is a {}".format(ref, obj_type))

        # get object info so we can name things properly
        infos = self.ws.get_object_info3({'objects':
                                          refs_for_ws_info})['infos']

        name_ext = '_alignment'
        if 'output_alignment_suffix' in validated_params \
                and validated_params['output_alignment_suffix'] is not None:
            ext = validated_params['output_alignment_suffix'].replace(' ', '')
            if ext:
                name_ext = ext

        unique_name_lookup = {}
        for k in range(0, len(refs)):
            refs[k]['info'] = infos[k]
            name = infos[k][1]
            if name not in unique_name_lookup:
                unique_name_lookup[name] = 1
            else:
                unique_name_lookup[name] += 1
                name = name + '_' + str(unique_name_lookup[name])
            name = name + name_ext
            refs[k]['alignment_output_name'] = name

        return refs

    def determine_input_info(self, validated_params):
        ''' get info on the input_ref object and determine if we run once or run on a set '''
        info = self.get_obj_info(validated_params['input_ref'])
        obj_type = self.get_type_from_obj_info(info)
        if obj_type in [
                'KBaseAssembly.PairedEndLibrary',
                'KBaseAssembly.SingleEndLibrary', 'KBaseFile.PairedEndLibrary',
                'KBaseFile.SingleEndLibrary'
        ]:
            return {
                'run_mode': 'single_library',
                'info': info,
                'ref': validated_params['input_ref']
            }
        if obj_type == 'KBaseRNASeq.RNASeqSampleSet':
            return {
                'run_mode': 'sample_set',
                'info': info,
                'ref': validated_params['input_ref']
            }
        if obj_type == 'KBaseSets.ReadsSet':
            return {
                'run_mode': 'sample_set',
                'info': info,
                'ref': validated_params['input_ref']
            }

        raise ValueError('Object type of input_ref is not valid, was: ' +
                         str(obj_type))

    def get_type_from_obj_info(self, info):
        return info[2].split('-')[0]

    def get_obj_info(self, ref):
        return self.ws.get_object_info3({'objects': [{
            'ref': ref
        }]})['infos'][0]