示例#1
0
    def upload_fastq(self, ctx, params):
        """
        :param params: instance of type "UploadFastqParams" (testing
           invocation of ReadsUtils) -> structure: parameter "fwd_id" of
           String, parameter "wsid" of Long, parameter "wsname" of String,
           parameter "objid" of Long, parameter "name" of String, parameter
           "rev_id" of String, parameter "sequencing_tech" of String
        :returns: instance of type "UploadFastqObjref"
        """
        # ctx is the context object
        # return variables are: objref
        #BEGIN upload_fastq
        print("hai this is upload_fastq here, params are")
        pprint.pprint(params)
        ReadsUtils_instance = ReadsUtils(url=self.callbackURL,
                                         token=ctx['token'],
                                         service_ver='dev')
        print("got ReadsUtilsinstance")
        method_retVal = ReadsUtils_instance.upload_reads(params)
        print("back from ReadsUtils_instance.upload_reads")
        pprint(method_retVal)
        objref = "Vooch"
        #END upload_fastq

        # At some point might do deeper type checking...
        if not isinstance(objref, basestring):
            raise ValueError('Method upload_fastq return value ' +
                             'objref is not type basestring as required.')
        # return the results
        return [objref]
示例#2
0
    def _stage_input_file(self, cutadapt_runner, ref, reads_type):

        ru = ReadsUtils(self.callbackURL)
        if reads_type == 'KBaseFile.PairedEndLibrary' or 'KBaseAssembly.PairedEndLibrary':
            input_file_info = ru.download_reads({
                'read_libraries': [ref],
                'interleaved': 'true'
            })['files'][ref]
        elif reads_type == 'KBaseFile.SingleEndLibrary' or 'KBaseAssembly.SingleEndLibrary':
            input_file_info = ru.download_reads({'read_libraries':
                                                 [ref]})['files'][ref]
        else:
            raise ValueError("Can't download_reads() for object type: '" +
                             str(reads_type) + "'")
        input_file_info['input_ref'] = ref
        file_location = input_file_info['files']['fwd']

        # DEBUG
        #with open (file_location, 'r', 0)  as fasta_file:
        #    for line in fasta_file.readlines():
        #        print ("LINE: '"+line+"'\n")

        interleaved = False
        if input_file_info['files']['type'] == 'interleaved':
            interleaved = True
        cutadapt_runner.set_interleaved(interleaved)
        cutadapt_runner.set_input_file(file_location)
        return input_file_info
    def test_fastqc_app(self):
        # create ws, and load test reads
        wsName = self.getWsName()
        ru = ReadsUtils(os.environ['SDK_CALLBACK_URL'])
        input_file_ref = ru.upload_reads({
            'fwd_file': self.small_fq_test_file2,
            'sequencing_tech': 'tech1',
            'wsname': wsName,
            'name': 'reads1',
            'interleaved': 1
        })['obj_ref']

        input_params = {'input_ws': wsName, 'input_file_ref': input_file_ref}
        output = self.getImpl().runFastQC(self.getContext(), input_params)[0]
        self.assertIn('report_name', output)
        self.assertIn('report_ref', output)
        #        pprint(output)

        report = self.getWsClient().get_objects2(
            {'objects': [{
                'ref': output['report_ref']
            }]})['data'][0]['data']
        #        pprint(report)

        self.assertIn('direct_html', report)
        self.assertIn('file_links', report)
        self.assertIn('html_links', report)
        self.assertIn('objects_created', report)
        self.assertIn('text_message', report)
示例#4
0
    def prepare_single_run(self, input_info, assembly_or_genome_ref,
                           bowtie2_index_info, ws_for_cache):
        ''' Given a reads ref and an assembly, setup the bowtie2 index '''
        # first setup the bowtie2 index of the assembly
        input_configuration = {'bowtie2_index_info': bowtie2_index_info}
        if not bowtie2_index_info:
            bowtie2IndexBuilder = Bowtie2IndexBuilder(self.scratch_dir, self.workspace_url,
                                                      self.callback_url, self.srv_wiz_url,
                                                      self.provenance)

            index_result = bowtie2IndexBuilder.get_index({'ref': assembly_or_genome_ref,
                                                          'ws_for_cache': ws_for_cache})
            input_configuration['bowtie2_index_info'] = index_result

        # next download the reads
        read_lib_ref = input_info['ref']
        read_lib_info = input_info['info']
        reads_params = {'read_libraries': [read_lib_ref],
                        'interleaved': 'false',
                        'gzipped': None}
        ru = ReadsUtils(self.callback_url)
        reads = ru.download_reads(reads_params)['files']

        input_configuration['reads_lib_type'] = self.get_type_from_obj_info(read_lib_info).split('.')[1]
        input_configuration['reads_files'] = reads[read_lib_ref]
        input_configuration['reads_lib_ref'] = read_lib_ref

        return input_configuration
示例#5
0
    def get_reads_RU(self, ctx, refs, console):
        readcli = ReadsUtils(self.callbackURL,
                             token=ctx['token'],
                             service_ver='dev')

        typeerr = ('Supported types: KBaseFile.SingleEndLibrary ' +
                   'KBaseFile.PairedEndLibrary ' +
                   'KBaseAssembly.SingleEndLibrary ' +
                   'KBaseAssembly.PairedEndLibrary')
        try:
            reads = readcli.download_reads({
                'read_libraries': refs,
                'interleaved': 'true',
                'gzipped': None
            })['files']
        except ServerError as se:
            self.log(console, 'logging stacktrace from dynamic client error')
            self.log(console, se.data)
            if typeerr in se.message:
                prefix = se.message.split('.')[0]
                raise ValueError(
                    prefix + '. Only the types ' +
                    'KBaseAssembly.PairedEndLibrary ' +
                    'and KBaseFile.PairedEndLibrary are supported')
            else:
                raise

        self.log(console, 'Got reads data from converter:\n' + pformat(reads))
        return reads
示例#6
0
    def get_ea_utils_result (self,refid, input_params):
      ref = [refid] 
      DownloadReadsParams={'read_libraries':ref}
      dfUtil = ReadsUtils(self.callbackURL)
      x=dfUtil.download_reads(DownloadReadsParams)
      report = ''
      fwd_file = None 
      rev_file = None 

      fwd_file    =  x['files'][ref[0]]['files']['fwd']
      otype =  x['files'][ref[0]]['files']['otype']

      #case of interleaved
      if (otype == 'interleaved'):
          report += self.get_report_string (fwd_file)
          
      #case of separate pair 
      if (otype == 'paired'):
         report += self.get_report_string (fwd_file)

         rev_file    =  x['files'][ref[0]]['files']['rev']
         report += self.get_report_string (rev_file)

      #case of single end 
      if (otype == 'single'):
         report += self.get_report_string (fwd_file)
      #print report
      return report
    def __init__(self, config):
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.scratch = config['scratch']

        self.dfu = DataFileUtil(self.callback_url)
        self.ru = ReadsUtils(self.callback_url)
示例#8
0
def fetch_reads_from_reference(ref, callback_url):
    """
    Fetch a FASTQ file (or 2 for paired-end) from a reads reference.
    Returns the following structure:
    {
        "style": "paired", "single", or "interleaved",
        "file_fwd": path_to_file,
        "file_rev": path_to_file, only if paired end,
        "object_ref": reads reference for downstream convenience.
    }
    """
    try:
        print("Fetching reads from object {}".format(ref))
        reads_client = ReadsUtils(callback_url)
        reads_dl = reads_client.download_reads({
            "read_libraries": [ref],
            "interleaved": "false"
        })
        pprint(reads_dl)
        reads_files = reads_dl['files'][ref]['files']
        ret_reads = {
            "object_ref": ref,
            "style": reads_files["type"],
            "file_fwd": reads_files["fwd"]
        }
        if reads_files.get("rev", None) is not None:
            ret_reads["file_rev"] = reads_files["rev"]
        return ret_reads
    except:
        print("Unable to fetch a file from expected reads object {}".format(ref))
        raise
示例#9
0
    def __init__(self, config, ctx):
        self.ctx = ctx
        self.scratch = os.path.abspath(config['scratch'])
        self.ws_url = config['workspace-url']
        self.serviceWizardURL = config['srv-wiz-url']
        self.callbackURL = config['SDK_CALLBACK_URL']
        if not os.path.exists(self.scratch):
            os.makedirs(self.scratch)

        self.SE_flag = 'SE'
        self.PE_flag = 'PE'

        SERVICE_VER = 'release'

        # readsUtils_Client
        try:
            self.readsUtils_Client = ReadsUtils(self.callbackURL,
                                                token=self.ctx['token'],
                                                service_ver=SERVICE_VER)
        except Exception as e:
            raise ValueError(
                'Unable to instantiate readsUtils_Client with callbackURL: ' +
                self.callbackURL + ' ERROR: ' + str(e))

        # setAPI_Client
        try:
            #setAPI_Client = SetAPI (url=self.callbackURL, token=self.ctx['token'])  # for SDK local.  local doesn't work for SetAPI
            self.setAPI_Client = SetAPI(
                url=self.serviceWizardURL,
                token=self.ctx['token'])  # for dynamic service
        except Exception as e:
            raise ValueError(
                'Unable to instantiate setAPI_Client with serviceWizardURL: ' +
                self.serviceWizardURL + ' ERROR: ' + str(e))
示例#10
0
    def loadPairedEndReads(self):
        if hasattr(self.__class__, 'pairedEndLibInfo'):
            return self.__class__.pairedEndLibInfo
        # 1) upload files to shock
        shared_dir = "/kb/module/work/tmp"
        forward_data_file = '../work/testReads/small.forward.fq'
        forward_file = os.path.join(shared_dir,
                                    os.path.basename(forward_data_file))
        shutil.copy(forward_data_file, forward_file)
        reverse_data_file = '../work/testReads/small.reverse.fq'
        reverse_file = os.path.join(shared_dir,
                                    os.path.basename(reverse_data_file))
        shutil.copy(reverse_data_file, reverse_file)

        ru = ReadsUtils(os.environ['SDK_CALLBACK_URL'])
        pe_reads_ref = ru.upload_reads({
            'fwd_file': forward_file,
            'rev_file': reverse_file,
            'sequencing_tech': 'artificial reads',
            'interleaved': 0,
            'wsname': self.getWsName(),
            'name': 'test_pe_reads'
        })['obj_ref']

        self.__class__.pe_reads_ref = pe_reads_ref
        print('Loaded PairedEndReads: ' + pe_reads_ref)
        new_obj_info = self.wsClient.get_object_info_new(
            {'objects': [{
                'ref': pe_reads_ref
            }]})
        self.__class__.pairedEndLibInfo = new_obj_info[0]
        pprint(pformat(new_obj_info))
        #return new_obj_info[0]
        return pe_reads_ref
示例#11
0
    def _package_result(self, output_file, output_name, ws_name_or_id,
                        data_info, report):
        upload_params = {'fwd_file': output_file, 'name': output_name}

        if str(ws_name_or_id).isdigit():
            upload_params['wsid'] = int(ws_name_or_id)
        else:
            upload_params['wsname'] = str(ws_name_or_id)

        fields = [
            'sequencing_tech', 'strain', 'source', 'read_orientation_outward',
            'insert_size_mean', 'insert_size_std_dev'
        ]

        if 'input_ref' in data_info and data_info[
                'input_ref'] != None and data_info['sequencing_tech']:
            upload_params['source_reads_ref'] = data_info['input_ref']
        else:
            for f in fields:
                if f in data_info:
                    upload_params[f] = data_info[f]
            if 'single_genome' in data_info:
                if data_info['single_genome'] == 'true':
                    upload_params['single_genome'] = 1
                elif data_info['single_genome'] == 'false':
                    upload_params['single_genome'] = 0
            if 'sequencing_tech' not in upload_params:
                upload_params['sequencing_tech'] = 'unknown'
            if not upload_params['sequencing_tech']:
                upload_params['sequencing_tech'] = 'unknown'

        if data_info['files']['type'] == 'interleaved':
            upload_params['interleaved'] = 1

        ru = ReadsUtils(self.callbackURL)
        result = ru.upload_reads(upload_params)

        # THE REPORT MUST BE CREATED OUTSIDE SO THAT LIBS AND SETS ARE HANDLED
        """
        # create report
        kbreport = KBaseReport(self.callbackURL)
        rep = kbreport.create({
                              'report': {
                                  'text_message': report,
                                  'objects_created': [{
                                      "ref": str(ws_name_or_id) + '/' + upload_params['name'],
                                      "description": ''
                                  }]
                              },
                              "workspace_name": str(ws_name_or_id)
                              })

        return {
            'report_ref': rep['ref'],
            'report_name': rep['name'],
            'output_reads_ref': result['obj_ref']
        }
        """
        return {'report': report, 'output_reads_ref': result['obj_ref']}
示例#12
0
    def get_input_reads(self, params, token):
        print('in get input reads')

        wsname = params[self.PARAM_IN_WS]
        libfile_args = params[self.PARAM_IN_LIBFILE_ARGS]

        obj_ids = []
        for libarg in libfile_args:
            read_name = libarg[self.PARAM_IN_LIBRARY]
            r = read_name if '/' in read_name else (wsname + '/' + read_name)
            obj_ids.append({'ref': r})
            libarg['ref_library'] = r

            if self.PARAM_IN_UNPAIRED in libarg and libarg[self.PARAM_IN_UNPAIRED] is not None:
                read_name = libarg[self.PARAM_IN_UNPAIRED]
                r = read_name if '/' in read_name else (wsname + '/' + read_name)
                obj_ids.append({'ref': r})
                libarg['ref_unpaired'] = r

        ws = workspaceService(self.workspaceURL, token=token)
        ws_info = ws.get_object_info_new({'objects': obj_ids})
        reads_params = []

        reftoname = {}
        for wsi, oid in zip(ws_info, obj_ids):
            ref = oid['ref']
            reads_params.append(ref)
            obj_name = wsi[1]
            reftoname[ref] = wsi[7] + '/' + obj_name

        readcli = ReadsUtils(self.callbackURL, token=token,
                             service_ver='dev')

        typeerr = ('Supported types: KBaseFile.SingleEndLibrary ' +
                   'KBaseFile.PairedEndLibrary ' +
                   'KBaseAssembly.SingleEndLibrary ' +
                   'KBaseAssembly.PairedEndLibrary')

        try:
            reads = readcli.download_reads({'read_libraries': reads_params,
                                            'interleaved': 'true',
                                            'gzipped': None
                                            })['files']
        except ServerError as se:
            self.log('logging stacktrace from dynamic client error')
            self.log(se.data)
            if typeerr in se.message:
                prefix = se.message.split('.')[0]
                raise ValueError(
                    prefix + '. Only the types ' +
                    'KBaseAssembly.PairedEndLibrary ' +
                    'and KBaseFile.PairedEndLibrary are supported')
            else:
                raise

        self.log('Got reads data from converter:\n' + pformat(reads))
        print("READS:")
        pprint(reads)
        return reads
示例#13
0
def download_interleaved_reads(callback_url, reads_upa):
    ru = ReadsUtils(callback_url)
    reads_info = ru.download_reads({
        'read_libraries': [reads_upa],
        'interleaved': 'true',
        'gzipped': None
    })['files'][reads_upa]
    return reads_info
示例#14
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        user_id = requests.post(
            'https://kbase.us/services/authorization/Sessions/Login',
            data='token={}&fields=user_id'.format(token)).json()['user_id']
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'SetAPI',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('SetAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=token)
        cls.serviceImpl = SetAPI(cls.cfg)
        cls.serviceWizardURL = cls.cfg['service-wizard']
        cls.dataPaletteServiceVersion = cls.cfg['datapaletteservice-version']


        # setup data at the class level for now (so that the code is run
        # once for all tests, not before each test case.  Not sure how to
        # do that outside this function..)
        suffix = int(time.time() * 1000)
        wsName = "test_SetAPI_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': wsName})
        cls.wsName = wsName

        # copy test file to scratch area
        fq_filename = "interleaved.fastq"
        fq_path = os.path.join(cls.cfg['scratch'], fq_filename)
        shutil.copy(os.path.join("data", fq_filename), fq_path)

        ru = ReadsUtils(os.environ['SDK_CALLBACK_URL'])
        cls.read1ref = ru.upload_reads({
                'fwd_file': fq_path,
                'sequencing_tech': 'tech1',
                'wsname': wsName,
                'name': 'reads1',
                'interleaved':1
            })['obj_ref']
        cls.read2ref = ru.upload_reads({
                'fwd_file': fq_path,
                'sequencing_tech': 'tech2',
                'wsname': wsName,
                'name': 'reads2',
                'interleaved':1
            })['obj_ref']
示例#15
0
 def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.token = config['KB_AUTH_TOKEN']
     self.scratch = os.path.join(config['scratch'],
                                 'import_SRA_' + str(uuid.uuid4()))
     handler_utils._mkdir_p(self.scratch)
     self.dfu = DataFileUtil(self.callback_url)
     self.ru = ReadsUtils(self.callback_url)
     self.uploader_utils = UploaderUtil(config)
示例#16
0
    def run_mash_sketch(self, ctx, params):
        """
        Generate a sketch file from a fasta/fastq file
        :param params: instance of type "MashSketchParams" (* * Pass in **one
           of** input_path, assembly_ref, or reads_ref *   input_path -
           string - local file path to an input fasta/fastq *   assembly_ref
           - string - workspace reference to an Assembly type *   reads_ref -
           string - workspace reference to a Reads type * Optionally, pass in
           a boolean indicating whether you are using paired-end reads. *
           paired_ends - boolean - whether you are passing in paired ends) ->
           structure: parameter "input_path" of String, parameter
           "assembly_ref" of String, parameter "reads_ref" of String,
           parameter "paired_ends" of type "boolean" (params:
           input_upa: workspace reference to an assembly object
           workspace_name: name of current workspace search_db: database to
           search n_max_results: number of results to return, integer between
           1 and 100)
        :returns: instance of type "MashSketchResults" (* * Returns the local
           scratch file path of the generated sketch file. * Will have the
           extension '.msh') -> structure: parameter "sketch_path" of String
        """
        # ctx is the context object
        # return variables are: results
        #BEGIN run_mash_sketch
        if 'reads_ref' in params:
            reads_utils = ReadsUtils(self.callbackURL)
            result = reads_utils.download_reads({
                'read_libraries': [params['reads_ref']],
                'interleaved':
                'true'
            })
            input_path = result['files'][params['reads_ref']]['files']['fwd']
        elif 'assembly_ref' in params:
            assembly_util = AssemblyUtil(self.callbackURL)
            result = assembly_util.get_assembly_as_fasta(
                {'ref': params['assembly_ref']})
            input_path = result['path']
        elif 'input_path' in params:
            input_path = params['input_path']
        else:
            raise ValueError(
                'Invalid params; must provide one of `reads_ref`, `assembly_ref`, or `input_path`.'
            )
        mash_utils = MashUtils(self.config, self.auth_token)
        output_file_path = mash_utils.mash_sketch(
            input_path, paired_ends=params.get('paired_ends'))
        results = {'sketch_path': output_file_path}
        #END run_mash_sketch

        # At some point might do deeper type checking...
        if not isinstance(results, dict):
            raise ValueError('Method run_mash_sketch return value ' +
                             'results is not type dict as required.')
        # return the results
        return [results]
    def upload_test_reads(cls):
        """
        Seeding an initial SE and PE Reads objects to test filtering
        """
        header = dict()
        header["Authorization"] = "Oauth {0}".format(cls.token)
        # readsUtils_Client = ReadsUtils(url=self.callback_url, token=ctx['token'])  # SDK local
        readsUtils_Client = ReadsUtils(os.environ['SDK_CALLBACK_URL'],
                                       token=cls.token)

        temp_nodes = []
        fwdtf = 'small_forward.fq'
        revtf = 'small_reverse.fq'
        fwdtarget = os.path.join(cls.scratch, fwdtf)
        revtarget = os.path.join(cls.scratch, revtf)
        print "CWD: " + str(os.getcwd())
        shutil.copy('/kb/module/test/data/' + fwdtf, fwdtarget)
        shutil.copy('/kb/module/test/data/' + revtf, revtarget)

        # Upload single end reads
        cls.se_reads_reference = \
            readsUtils_Client.upload_reads({'wsname': cls.getWsName(),
                                            'name': "se_reads",
                                            'sequencing_tech': 'Illumina',
                                            'fwd_file': fwdtarget}
                                           )['obj_ref']

        se_data = cls.dfu.get_objects(
            {'object_refs':
             [cls.getWsName() + '/se_reads']})['data'][0]['data']

        temp_nodes.append(se_data['lib']['file']['id'])

        # Upload paired end reads
        cls.pe_reads_reference = \
            readsUtils_Client.upload_reads({'wsname': cls.getWsName(),
                                            'name': "pe_reads",
                                            'sequencing_tech': 'Illumina',
                                            'fwd_file': fwdtarget,
                                            'rev_file': revtarget,
                                            'insert_size_mean': 42,
                                            'insert_size_std_dev': 10,
                                            }
                                           )['obj_ref']
        pe_data = cls.dfu.get_objects(
            {'object_refs':
             [cls.getWsName() + '/pe_reads']})['data'][0]['data']
        temp_nodes.append(pe_data['lib1']['file']['id'])

        return temp_nodes
示例#18
0
    def setUpClass(cls):
        token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)

        for nameval in config.items('kb_cufflinks'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({'token': token,
                        'user_id': user_id,
                        'provenance': [
                            {'service': 'kb_cufflinks',
                             'method': 'please_never_use_it_in_production',
                             'method_params': []
                             }],
                        'authenticated': 1})
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = Workspace(url=cls.wsURL, token=token)
        cls.serviceImpl = kb_cufflinks(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = environ.get('SDK_CALLBACK_URL')
        cls.srv_wiz_url = cls.cfg['srv-wiz-url']

        # cls.wsName = 'cufflinks_test_' + user_id  # reuse existing workspace
        suffix = int(time.time() * 1000)
        cls.wsName = "test_kb_cufflinks_" + str(suffix)
        print('workspace_name: ' + cls.wsName)

        try:
            # reuse existing (previously torn down) workspace
            cls.wsClient.undelete_workspace({'workspace': cls.wsName})
            print('reusing old workspace...')
        except BaseException:
            try:
                # create if workspace does not exist
                cls.wsClient.create_workspace({'workspace': cls.wsName})
            except BaseException:
                # get workspace if it exists and was not previously deleted (previously
                # not torn down)
                ws_info = cls.wsClient.get_workspace_info({'workspace': cls.wsName})
                print("creating new workspace: " + str(ws_info))

        cls.dfu = DataFileUtil(cls.callback_url)

        cls.gfu = GenomeFileUtil(cls.callback_url)
        cls.ru = ReadsUtils(cls.callback_url)
        cls.rau = ReadsAlignmentUtils(cls.callback_url)
        cls.set_api = SetAPI(cls.srv_wiz_url, service_ver='dev')

        cls.cufflinks_runner = CufflinksUtils(cls.cfg)

        cls.prepare_data()
 def setUpClass(cls):
     token = environ.get('KB_AUTH_TOKEN', None)
     config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
     cls.cfg = {}
     config = ConfigParser()
     config.read(config_file)
     for nameval in config.items('AlignmentSetEditor'):
         cls.cfg[nameval[0]] = nameval[1]
     # Getting username from Auth profile for token
     authServiceUrl = cls.cfg['auth-service-url']
     auth_client = _KBaseAuth(authServiceUrl)
     user_id = auth_client.get_user(token)
     # WARNING: don't call any logging methods on the context object,
     # it'll result in a NoneType error
     cls.ctx = MethodContext(None)
     cls.ctx.update({'token': token,
                     'user_id': user_id,
                     'provenance': [
                         {'service': 'AlignmentSetEditor',
                          'method': 'please_never_use_it_in_production',
                          'method_params': []
                          }],
                     'authenticated': 1})
     cls.wsURL = cls.cfg['workspace-url']
     cls.wsClient = workspaceService(cls.wsURL)
     cls.serviceImpl = AlignmentSetEditor(cls.cfg)
     cls.scratch = cls.cfg['scratch']
     cls.callback_url = os.environ['SDK_CALLBACK_URL']
     cls.setAPI = SetAPI(cls.callback_url)
     cls.gfu = GenomeFileUtil(cls.callback_url)
     cls.ru = ReadsUtils(cls.callback_url)
     cls.rau = ReadsAlignmentUtils(cls.callback_url)
     suffix = int(time.time() * 1000)
     cls.wsName = "test_AlignmentSetEditor_" + str(suffix)
     cls.wsClient.create_workspace({'workspace': cls.wsName})
示例#20
0
 def load_reads_file(self, tech, file_fwd, file_rev, target_name):
     """
     Loads FASTQ files as either SingleEndLibrary or PairedEndLibrary. If file_rev is None,
     then we get a single end, otherwise, paired.
     """
     reads_util = ReadsUtils(self.callback_url)
     upload_params = {
         "wsname": self.ws_name,
         "fwd_file": file_fwd,
         "name": target_name,
         "sequencing_tech": tech
     }
     if file_rev is not None:
         upload_params["rev_file"] = file_rev
     reads_ref = reads_util.upload_reads(upload_params)
     return reads_ref["obj_ref"]
示例#21
0
def load_pe_reads(fwd_file, rev_file):
    """
    Copies from given dir to scratch. Then calls ReadsUtils to upload from scratch.
    """
    callback_url = os.environ['SDK_CALLBACK_URL']
    fwd_file_path = file_to_scratch(fwd_file, overwrite=True)
    rev_file_path = file_to_scratch(rev_file, overwrite=True)
    ru = ReadsUtils(callback_url)
    pe_reads_params = {
        'fwd_file': fwd_file_path,
        'rev_file': rev_file_path,
        'sequencing_tech': 'Illumina',
        'wsname': get_ws_name(),
        'name': 'MyPairedEndLibrary'
    }
    return ru.upload_reads(pe_reads_params)['obj_ref']
    def loadSingleEndReads(self):
        if hasattr(self.__class__, 'se_reads_ref'):
            return self.__class__.se_reads_ref
        # return '23735/2/1'
        fq_path = os.path.join(self.scratch, 'reads_1_se.fq')
        shutil.copy(os.path.join('data', 'reads_1.fq'), fq_path)

        ru = ReadsUtils(self.callback_url)
        se_reads_ref = ru.upload_reads({
            'fwd_file': fq_path,
            'wsname': self.getWsName(),
            'name': 'test_readsSE',
            'sequencing_tech': 'artificial reads'
        })['obj_ref']
        self.__class__.se_reads_ref = se_reads_ref
        print('Loaded SingleEndReads: ' + se_reads_ref)
        return se_reads_ref
示例#23
0
    def loadSEReads(self, reads_file_path):
        #if hasattr(self.__class__, 'reads_ref'):
        #return self.__class__.reads_ref
        se_reads_name = os.path.basename(reads_file_path)
        fq_path = os.path.join(self.scratch,
                               se_reads_name)  #'star_test_reads.fastq')
        shutil.copy(reads_file_path, fq_path)

        ru = ReadsUtils(self.callback_url)
        reads_ref = ru.upload_reads({
            'fwd_file': fq_path,
            'wsname': self.getWsName(),
            'name': se_reads_name.split('.')[0],
            'sequencing_tech': 'rnaseq reads'
        })['obj_ref']
        #self.__class__.reads_ref = reads_ref
        return reads_ref
    def _upload_reads(self, refid, callbackURL, input_params):
        ref = [refid]
        DownloadReadsParams = {'read_libraries': ref}
        dfUtil = ReadsUtils(callbackURL)
        x = dfUtil.download_reads(DownloadReadsParams)

        uploadReadParams = {}
        fwd_file = x['files'][ref[0]]['files']['fwd']
        otype = x['files'][ref[0]]['files']['otype']
        #case of interleaved
        if (otype == 'interleaved'):
            uploadReadParams = {
                'fwd_file': fwd_file,
                'wsname': input_params['workspace_name'],
                'name': input_params['output'],
                'rev_file': '',
                'sequencing_tech': input_params['sequencing_tech'],
                'single_genome': input_params['single_genome'],
                'interleaved': 1
            }

        #case of separate pair
        if (otype == 'paired'):
            rev_file = x['files'][ref[0]]['files']['rev']
            uploadReadParams = {
                'fwd_file': fwd_file,
                'wsname': input_params['workspace_name'],
                'name': input_params['output'],
                'rev_file': rev_file,
                'sequencing_tech': input_params['sequencing_tech'],
                'single_genome': input_params['single_genome']
            }

        #case of single end
        if (otype == 'single'):
            uploadReadParams = {
                'fwd_file': fwd_file,
                'wsname': input_params['workspace_name'],
                'name': input_params['output'],
                'rev_file': '',
                'sequencing_tech': input_params['sequencing_tech'],
                'single_genome': input_params['single_genome']
            }
        y = dfUtil.upload_reads(uploadReadParams)
        return y['obj_ref']
示例#25
0
def ru_reads_download(logger, ref, tdir, token):
    check_disk_space(logger)
    logger.info("{0} will be downloaded and transferred to {1}".format(ref,tdir))
    ru = ReadsUtils(url=os.environ['SDK_CALLBACK_URL'], token=token)
    ds = ru.download_reads({"read_libraries" : [ref], "interleaved" : "false"})
    logger.info("{0} will be downloaded and transferred to {1}".format(ref,tdir))
    
    #ds['fwd'] = os.path.join(tdir, trim_gz(ds['files'][ref]['files']['fwd_name']))
    ds['fwd'] = os.path.join(tdir, os.path.basename(ds['files'][ref]['files']['fwd']))
    os.rename(ds['files'][ref]['files']['fwd'],ds['fwd'])
    if ds['files'][ref]['files']['type'] == 'paired':
        if ds['files'][ref]['files']['rev_name'] is None:
            ds['rev'] = os.path.join(tdir, 'rev.fastq')
        else:
            ds['rev'] = os.path.join(tdir, os.path.basename(ds['files'][ref]['files']['rev']))
        os.rename(ds['files'][ref]['files']['rev'],ds['rev'])
    logger.info("{0} will be downloaded and transferred to {1}".format(ref,tdir))
    return ds
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        cls.callbackURL = environ.get('SDK_CALLBACK_URL')
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('ExpressionUtils'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            cls.token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'ExpressionUtils',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.shockURL = cls.cfg['shock-url']
        cls.wsURL = cls.cfg['workspace-url']
        cls.service_wizard_url = cls.cfg['srv-wiz-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.ws = Workspace(cls.wsURL, token=cls.token)
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)
        # create workspace
        wssuffix = int(time.time() * 1000)
        wsname = "test_expression_" + str(wssuffix)
        cls.wsinfo = cls.wsClient.create_workspace({'workspace': wsname})
        print('created workspace ' + cls.getWsName())

        cls.serviceImpl = ExpressionUtils(cls.cfg)
        cls.readUtils = ReadsUtils(cls.callbackURL)
        cls.dfu = DataFileUtil(cls.callbackURL, service_ver='dev')
        cls.dfu.ws_name_to_id(wsname)
        cls.assemblyUtil = AssemblyUtil(cls.callbackURL)
        cls.gfu = GenomeFileUtil(cls.callbackURL)
        cls.gaAPI = GenomeAnnotationAPI(cls.service_wizard_url)
        cls.rau = ReadsAlignmentUtils(cls.callbackURL)
        cls.scratch = cls.cfg['scratch']

        cls.staged = {}
        cls.nodes_to_delete = []
        cls.handles_to_delete = []
        cls.setupTestData()
示例#27
0
 def fetch_reads_files(self, reads_upas):
     """
     From a list of reads UPAs, uses ReadsUtils to fetch the reads as files.
     Returns them as a dictionary from reads_upa -> filename
     """
     if reads_upas is None:
         raise ValueError("reads_upas must be a list of UPAs")
     if len(reads_upas) == 0:
         raise ValueError("reads_upas must contain at least one UPA")
     ru = ReadsUtils(self.callback_url)
     reads_info = ru.download_reads(({
         'read_libraries': reads_upas,
         'interleaved': 'true',
         'gzipped': None
     }))['files']
     file_set = dict()
     for reads in reads_info:
         file_set[reads] = reads_info[reads]['files']['fwd']
     return file_set
示例#28
0
    def __init__(self, config):
        self.ws_url = config["workspace-url"]
        self.callback_url = config['SDK_CALLBACK_URL']
        self.token = config['KB_AUTH_TOKEN']
        self.scratch = config['scratch']

        self.dfu = DataFileUtil(self.callback_url)
        self.ru = ReadsUtils(self.callback_url)
        self.au = AssemblyUtil(self.callback_url)
        self.gfu = GenomeFileUtil(self.callback_url)
        self.rau = ReadsAlignmentUtils(self.callback_url)
示例#29
0
    def getPairedEndLibInfo(self):
        if hasattr(self.__class__, 'pairedEndLibInfo'):
            return self.__class__.pairedEndLibInfo
        # 1) upload files to shock
        shared_dir = "/kb/module/work/tmp"
        forward_data_file = 'data/small.forward.fq'
        forward_file = os.path.join(shared_dir, os.path.basename(forward_data_file))
        shutil.copy(forward_data_file, forward_file)
        reverse_data_file = 'data/small.reverse.fq'
        reverse_file = os.path.join(shared_dir, os.path.basename(reverse_data_file))
        shutil.copy(reverse_data_file, reverse_file)

        ru = ReadsUtils(os.environ['SDK_CALLBACK_URL'])
        paired_end_ref = ru.upload_reads({'fwd_file': forward_file, 'rev_file': reverse_file,
                                          'sequencing_tech': 'artificial reads',
                                          'interleaved': 0, 'wsname': self.getWsName(),
                                          'name': 'test.pe.reads'})['obj_ref']

        new_obj_info = self.ws.get_object_info_new({'objects': [{'ref': paired_end_ref}]})
        self.__class__.pairedEndLibInfo = new_obj_info[0]
        return new_obj_info[0]
示例#30
0
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('kb_ballgown'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            cls.token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'kb_ballgown',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)
        cls.shockURL = cls.cfg['shock-url']
        cls.wsURL = cls.cfg['workspace-url']
        cls.wsClient = workspaceService(cls.wsURL, token=cls.token)
        cls.serviceImpl = kb_ballgown(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']

        cls.gfu = GenomeFileUtil(cls.callback_url)
        cls.dfu = DataFileUtil(cls.callback_url)
        cls.ru = ReadsUtils(cls.callback_url)
        cls.rau = ReadsAlignmentUtils(cls.callback_url, service_ver='dev')
        cls.eu = ExpressionUtils(cls.callback_url, service_ver='dev')
        cls.set_api = SetAPI(cls.callback_url)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_kb_ballgown_" + str(suffix)
        #cls.wsName = "test_kb_ballgown_1004"
        cls.wsClient.create_workspace({'workspace': cls.wsName})

        cls.nodes_to_delete = []
        cls.handles_to_delete = []

        cls.prepare_data()
示例#31
0
    def getPairedEndLibInfo(self):
        if hasattr(self.__class__, 'pairedEndLibInfo'):
            return self.__class__.pairedEndLibInfo

        # copy the local test file to the shared scratch space so that the ReadsUtils
        # container can see it.
        test_fastq_file_local = 'data/interleaved.fastq'
        test_fastq_file_scratch = os.path.join(self.scratch, os.path.basename(test_fastq_file_local))
        shutil.copy(test_fastq_file_local, test_fastq_file_scratch)

        # call the ReadsUtils libary to upload the test data to KBase
        ru = ReadsUtils(os.environ['SDK_CALLBACK_URL'])
        paired_end_ref = ru.upload_reads({'fwd_file': test_fastq_file_scratch,
                                          'sequencing_tech': 'artificial reads',
                                          'interleaved': 1, 'wsname': self.getWsName(),
                                          'name': 'test.pe.reads'})['obj_ref']

        # get the object metadata for the new test dataset
        new_obj_info = self.ws.get_object_info_new({'objects': [{'ref': paired_end_ref}]})
        self.__class__.pairedEndLibInfo = new_obj_info[0]
        return new_obj_info[0]
示例#32
0
    def run_megahit(self, ctx, params):
        """
        :param params: instance of type "MegaHitParams" (Run MEGAHIT.  Most
           parameters here are just passed forward to MEGAHIT workspace_name
           - the name of the workspace for input/output read_library_ref -
           the name of the PE read library (SE library support in the future)
           output_contig_set_name - the name of the output contigset
           megahit_parameter_preset - override a group of parameters;
           possible values: meta            '--min-count 2 --k-list
           21,41,61,81,99' (generic metagenomes, default) meta-sensitive 
           '--min-count 2 --k-list 21,31,41,51,61,71,81,91,99' (more
           sensitive but slower) meta-large      '--min-count 2 --k-list
           27,37,47,57,67,77,87' (large & complex metagenomes, like soil)
           bulk            '--min-count 3 --k-list 31,51,71,91,99 --no-mercy'
           (experimental, standard bulk sequencing with >= 30x depth)
           single-cell     '--min-count 3 --k-list 21,33,55,77,99,121
           --merge_level 20,0.96' (experimental, single cell data) min_count
           - minimum multiplicity for filtering (k_min+1)-mers, default 2
           min_k - minimum kmer size (<= 127), must be odd number, default 21
           max_k - maximum kmer size (<= 127), must be odd number, default 99
           k_step - increment of kmer size of each iteration (<= 28), must be
           even number, default 10 k_list - list of kmer size (all must be
           odd, in the range 15-127, increment <= 28); override `--k-min',
           `--k-max' and `--k-step' min_contig_length - minimum length of
           contigs to output, default is 2000 @optional
           megahit_parameter_preset @optional min_count @optional k_min
           @optional k_max @optional k_step @optional k_list @optional
           min_contig_length) -> structure: parameter "workspace_name" of
           String, parameter "read_library_ref" of String, parameter
           "output_contigset_name" of String, parameter
           "megahit_parameter_preset" of String, parameter "min_count" of
           Long, parameter "k_min" of Long, parameter "k_max" of Long,
           parameter "k_step" of Long, parameter "k_list" of list of Long,
           parameter "min_contig_length" of Long
        :returns: instance of type "MegaHitOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN run_megahit
        print('Running run_megahit with params=')
        pprint(params)

        # STEP 1: basic parameter checks + parsing
        if 'workspace_name' not in params:
            raise ValueError('workspace_name parameter is required')
        if 'read_library_ref' not in params:
            raise ValueError('read_library_ref parameter is required')
        if 'output_contigset_name' not in params:
            raise ValueError('output_contigset_name parameter is required')

        # STEP 2: get the read library as deinterleaved fastq files
        input_ref = params['read_library_ref']
        reads_params = {'read_libraries': [input_ref],
                        'interleaved': 'false',
                        'gzipped': None
                        }
        ru = ReadsUtils(self.callbackURL)
        reads = ru.download_reads(reads_params)['files']

        print('Input reads files:')
        fwd = reads[input_ref]['files']['fwd']
        rev = reads[input_ref]['files']['rev']
        pprint('forward: ' + fwd)
        pprint('reverse: ' + rev)

        # STEP 3: run megahit
        # construct the command
        megahit_cmd = [self.MEGAHIT]

        # we only support PE reads, so add that
        megahit_cmd.append('-1')
        megahit_cmd.append(fwd)
        megahit_cmd.append('-2')
        megahit_cmd.append(rev)

        # if a preset is defined, use that:
        if 'megahit_parameter_preset' in params:
            if params['megahit_parameter_preset']:
                megahit_cmd.append('--presets')
                megahit_cmd.append(params['megahit_parameter_preset'])

        if 'min_count' in params:
            if params['min_count']:
                megahit_cmd.append('--min-count')
                megahit_cmd.append(str(params['min_count']))
        if 'k_min' in params:
            if params['k_min']:
                megahit_cmd.append('--k-min')
                megahit_cmd.append(str(params['k_min']))
        if 'k_max' in params:
            if params['k_max']:
                megahit_cmd.append('--k-max')
                megahit_cmd.append(str(params['k_max']))
        if 'k_step' in params:
            if params['k_step']:
                megahit_cmd.append('--k-step')
                megahit_cmd.append(str(params['k_step']))
        if 'k_list' in params:
            if params['k_list']:
                k_list = []
                for k_val in params['k_list']:
                    k_list.append(str(k_val))
                megahit_cmd.append('--k-list')
                megahit_cmd.append(','.join(k_list))

        min_contig_length = self.DEFAULT_MIN_CONTIG_LENGTH
        if 'min_contig_length' in params:
            if params['min_contig_length']:
                if str(params['min_contig_length']).isdigit():
                    min_contig_length = params['min_contig_length']
                else:
                    raise ValueError('min_contig_length parameter must be a non-negative integer')

        megahit_cmd.append('--min-contig-len')
        megahit_cmd.append(str(min_contig_length))

        # set the output location
        timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000)
        output_dir = os.path.join(self.scratch, 'output.' + str(timestamp))
        megahit_cmd.append('-o')
        megahit_cmd.append(output_dir)

        # run megahit
        print('running megahit:')
        print('    ' + ' '.join(megahit_cmd))
        p = subprocess.Popen(megahit_cmd, cwd=self.scratch, shell=False)
        retcode = p.wait()

        print('Return code: ' + str(retcode))
        if p.returncode != 0:
            raise ValueError('Error running MEGAHIT, return code: ' +
                             str(retcode) + '\n')

        output_contigs = os.path.join(output_dir, 'final.contigs.fa')

        # on macs, we cannot run megahit in the shared host scratch space, so we need to move the file there
        if self.mac_mode:
            shutil.move(output_contigs, os.path.join(self.host_scratch, 'final.contigs.fa'))
            output_contigs = os.path.join(self.host_scratch, 'final.contigs.fa')

        # STEP 4: save the resulting assembly
        assemblyUtil = AssemblyUtil(self.callbackURL)
        output_data_ref = assemblyUtil.save_assembly_from_fasta({
                                                                'file': {'path': output_contigs},
                                                                'workspace_name': params['workspace_name'],
                                                                'assembly_name': params['output_contigset_name']
                                                                })


        # STEP 5: generate and save the report

        # compute a simple contig length distribution for the report
        lengths = []
        for seq_record in SeqIO.parse(output_contigs, 'fasta'):
            lengths.append(len(seq_record.seq))

        report = ''
        report += 'ContigSet saved to: ' + params['workspace_name'] + '/' + params['output_contigset_name'] + '\n'
        report += 'Assembled into ' + str(len(lengths)) + ' contigs.\n'
        report += 'Avg Length: ' + str(sum(lengths) / float(len(lengths))) + ' bp.\n'

        bins = 10
        counts, edges = np.histogram(lengths, bins)
        report += 'Contig Length Distribution (# of contigs -- min to max basepairs):\n'
        for c in range(bins):
            report += '   ' + str(counts[c]) + '\t--\t' + str(edges[c]) + ' to ' + str(edges[c + 1]) + ' bp\n'

        print('Running QUAST')
        kbq = kb_quast(self.callbackURL)
        try:
            quastret = kbq.run_QUAST({'files': [{'path': output_contigs,
                                                 'label': params['output_contigset_name']}]})
        except QUASTError as qe:
            # not really any way to test this, all inputs have been checked earlier and should be
            # ok 
            print('Logging exception from running QUAST')
            print(str(qe))
            # TODO delete shock node
            raise

        print('Saving report')
        kbr = KBaseReport(self.callbackURL)
        try:
            report_info = kbr.create_extended_report(
                {'message': report,
                 'objects_created': [{'ref': output_data_ref, 'description': 'Assembled contigs'}],
                 'direct_html_link_index': 0,
                 'html_links': [{'shock_id': quastret['shock_id'],
                                 'name': 'report.html',
                                 'label': 'QUAST report'}
                                ],
                 'report_object_name': 'kb_megahit_report_' + str(uuid.uuid4()),
                 'workspace_name': params['workspace_name']
                 })
        except _RepError as re:
            # not really any way to test this, all inputs have been checked earlier and should be
            # ok 
            print('Logging exception from creating report object')
            print(str(re))
            # TODO delete shock node
            raise

        # STEP 6: contruct the output to send back
        output = {'report_name': report_info['name'], 'report_ref': report_info['ref']}

        #END run_megahit

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method run_megahit return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
示例#33
0
    def fastqutils_stats(self, ctx, params):
        """
        :param params: instance of type "FastqUtilsStatsParams" -> structure:
           parameter "workspace_name" of type "workspace_name" (A string
           representing a workspace name.), parameter "read_library_ref" of
           type "read_library_ref" (A string representing a ContigSet id.)
        :returns: instance of type "FastqUtilsStatsResult" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN fastqutils_stats

        print('Running fastqutils_stats with params=')
        print(pformat(params))

        if 'workspace_name' not in params:
            raise ValueError('workspace_name parameter is required')
        if 'read_library_ref' not in params:
            raise ValueError('read_library_ref parameter is required')

        # Get the read library as deinterleaved fastq files
        input_ref = params['read_library_ref']
        reads_params = {'read_libraries': [input_ref],
                        'interleaved': 'false',
                        'gzipped': None
                        }
        ru = ReadsUtils(self.callbackURL, token=ctx['token'])
        reads = ru.download_reads(reads_params)['files']
        files = [reads[input_ref]['files']['fwd']]
        if reads[input_ref]['files']['rev']:
            files.append(reads[input_ref]['files']['rev'])
        print('running on files:')
        for f in files:
            print(f)

        # construct the command
        stats_cmd = [self.FASTQUTILS, 'stats']

        report = ''
        for f in files:
            cmd = stats_cmd
            cmd.append(f)

            report += '============== ' + f + ' ==============\n'
            print('running: ' + ' '.join(cmd))
            p = subprocess.Popen(cmd,
                                 cwd=self.scratch,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
                                 shell=False)

            while True:
                line = p.stdout.readline()
                if not line:
                    break
                report += line
                print(line.replace('\n', ''))

            p.stdout.close()
            p.wait()
            report += "\n\n"
            print('return code: ' + str(p.returncode))
            if p.returncode != 0:
                raise ValueError('Error running ' + self.FASTQUTILS + ', return code: ' + str(p.returncode))


        reportObj = {
            'objects_created': [],
            'text_message': report
        }
        report = KBaseReport(self.callbackURL)
        report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']})
        returnVal = {'report_name': report_info['name'], 'report_ref': report_info['ref']}

        #END fastqutils_stats

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method fastqutils_stats return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]