def upload_fastq(self, ctx, params): """ :param params: instance of type "UploadFastqParams" (testing invocation of ReadsUtils) -> structure: parameter "fwd_id" of String, parameter "wsid" of Long, parameter "wsname" of String, parameter "objid" of Long, parameter "name" of String, parameter "rev_id" of String, parameter "sequencing_tech" of String :returns: instance of type "UploadFastqObjref" """ # ctx is the context object # return variables are: objref #BEGIN upload_fastq print("hai this is upload_fastq here, params are") pprint.pprint(params) ReadsUtils_instance = ReadsUtils(url=self.callbackURL, token=ctx['token'], service_ver='dev') print("got ReadsUtilsinstance") method_retVal = ReadsUtils_instance.upload_reads(params) print("back from ReadsUtils_instance.upload_reads") pprint(method_retVal) objref = "Vooch" #END upload_fastq # At some point might do deeper type checking... if not isinstance(objref, basestring): raise ValueError('Method upload_fastq return value ' + 'objref is not type basestring as required.') # return the results return [objref]
def _stage_input_file(self, cutadapt_runner, ref, reads_type): ru = ReadsUtils(self.callbackURL) if reads_type == 'KBaseFile.PairedEndLibrary' or 'KBaseAssembly.PairedEndLibrary': input_file_info = ru.download_reads({ 'read_libraries': [ref], 'interleaved': 'true' })['files'][ref] elif reads_type == 'KBaseFile.SingleEndLibrary' or 'KBaseAssembly.SingleEndLibrary': input_file_info = ru.download_reads({'read_libraries': [ref]})['files'][ref] else: raise ValueError("Can't download_reads() for object type: '" + str(reads_type) + "'") input_file_info['input_ref'] = ref file_location = input_file_info['files']['fwd'] # DEBUG #with open (file_location, 'r', 0) as fasta_file: # for line in fasta_file.readlines(): # print ("LINE: '"+line+"'\n") interleaved = False if input_file_info['files']['type'] == 'interleaved': interleaved = True cutadapt_runner.set_interleaved(interleaved) cutadapt_runner.set_input_file(file_location) return input_file_info
def test_fastqc_app(self): # create ws, and load test reads wsName = self.getWsName() ru = ReadsUtils(os.environ['SDK_CALLBACK_URL']) input_file_ref = ru.upload_reads({ 'fwd_file': self.small_fq_test_file2, 'sequencing_tech': 'tech1', 'wsname': wsName, 'name': 'reads1', 'interleaved': 1 })['obj_ref'] input_params = {'input_ws': wsName, 'input_file_ref': input_file_ref} output = self.getImpl().runFastQC(self.getContext(), input_params)[0] self.assertIn('report_name', output) self.assertIn('report_ref', output) # pprint(output) report = self.getWsClient().get_objects2( {'objects': [{ 'ref': output['report_ref'] }]})['data'][0]['data'] # pprint(report) self.assertIn('direct_html', report) self.assertIn('file_links', report) self.assertIn('html_links', report) self.assertIn('objects_created', report) self.assertIn('text_message', report)
def prepare_single_run(self, input_info, assembly_or_genome_ref, bowtie2_index_info, ws_for_cache): ''' Given a reads ref and an assembly, setup the bowtie2 index ''' # first setup the bowtie2 index of the assembly input_configuration = {'bowtie2_index_info': bowtie2_index_info} if not bowtie2_index_info: bowtie2IndexBuilder = Bowtie2IndexBuilder(self.scratch_dir, self.workspace_url, self.callback_url, self.srv_wiz_url, self.provenance) index_result = bowtie2IndexBuilder.get_index({'ref': assembly_or_genome_ref, 'ws_for_cache': ws_for_cache}) input_configuration['bowtie2_index_info'] = index_result # next download the reads read_lib_ref = input_info['ref'] read_lib_info = input_info['info'] reads_params = {'read_libraries': [read_lib_ref], 'interleaved': 'false', 'gzipped': None} ru = ReadsUtils(self.callback_url) reads = ru.download_reads(reads_params)['files'] input_configuration['reads_lib_type'] = self.get_type_from_obj_info(read_lib_info).split('.')[1] input_configuration['reads_files'] = reads[read_lib_ref] input_configuration['reads_lib_ref'] = read_lib_ref return input_configuration
def get_reads_RU(self, ctx, refs, console): readcli = ReadsUtils(self.callbackURL, token=ctx['token'], service_ver='dev') typeerr = ('Supported types: KBaseFile.SingleEndLibrary ' + 'KBaseFile.PairedEndLibrary ' + 'KBaseAssembly.SingleEndLibrary ' + 'KBaseAssembly.PairedEndLibrary') try: reads = readcli.download_reads({ 'read_libraries': refs, 'interleaved': 'true', 'gzipped': None })['files'] except ServerError as se: self.log(console, 'logging stacktrace from dynamic client error') self.log(console, se.data) if typeerr in se.message: prefix = se.message.split('.')[0] raise ValueError( prefix + '. Only the types ' + 'KBaseAssembly.PairedEndLibrary ' + 'and KBaseFile.PairedEndLibrary are supported') else: raise self.log(console, 'Got reads data from converter:\n' + pformat(reads)) return reads
def get_ea_utils_result (self,refid, input_params): ref = [refid] DownloadReadsParams={'read_libraries':ref} dfUtil = ReadsUtils(self.callbackURL) x=dfUtil.download_reads(DownloadReadsParams) report = '' fwd_file = None rev_file = None fwd_file = x['files'][ref[0]]['files']['fwd'] otype = x['files'][ref[0]]['files']['otype'] #case of interleaved if (otype == 'interleaved'): report += self.get_report_string (fwd_file) #case of separate pair if (otype == 'paired'): report += self.get_report_string (fwd_file) rev_file = x['files'][ref[0]]['files']['rev'] report += self.get_report_string (rev_file) #case of single end if (otype == 'single'): report += self.get_report_string (fwd_file) #print report return report
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = config['scratch'] self.dfu = DataFileUtil(self.callback_url) self.ru = ReadsUtils(self.callback_url)
def fetch_reads_from_reference(ref, callback_url): """ Fetch a FASTQ file (or 2 for paired-end) from a reads reference. Returns the following structure: { "style": "paired", "single", or "interleaved", "file_fwd": path_to_file, "file_rev": path_to_file, only if paired end, "object_ref": reads reference for downstream convenience. } """ try: print("Fetching reads from object {}".format(ref)) reads_client = ReadsUtils(callback_url) reads_dl = reads_client.download_reads({ "read_libraries": [ref], "interleaved": "false" }) pprint(reads_dl) reads_files = reads_dl['files'][ref]['files'] ret_reads = { "object_ref": ref, "style": reads_files["type"], "file_fwd": reads_files["fwd"] } if reads_files.get("rev", None) is not None: ret_reads["file_rev"] = reads_files["rev"] return ret_reads except: print("Unable to fetch a file from expected reads object {}".format(ref)) raise
def __init__(self, config, ctx): self.ctx = ctx self.scratch = os.path.abspath(config['scratch']) self.ws_url = config['workspace-url'] self.serviceWizardURL = config['srv-wiz-url'] self.callbackURL = config['SDK_CALLBACK_URL'] if not os.path.exists(self.scratch): os.makedirs(self.scratch) self.SE_flag = 'SE' self.PE_flag = 'PE' SERVICE_VER = 'release' # readsUtils_Client try: self.readsUtils_Client = ReadsUtils(self.callbackURL, token=self.ctx['token'], service_ver=SERVICE_VER) except Exception as e: raise ValueError( 'Unable to instantiate readsUtils_Client with callbackURL: ' + self.callbackURL + ' ERROR: ' + str(e)) # setAPI_Client try: #setAPI_Client = SetAPI (url=self.callbackURL, token=self.ctx['token']) # for SDK local. local doesn't work for SetAPI self.setAPI_Client = SetAPI( url=self.serviceWizardURL, token=self.ctx['token']) # for dynamic service except Exception as e: raise ValueError( 'Unable to instantiate setAPI_Client with serviceWizardURL: ' + self.serviceWizardURL + ' ERROR: ' + str(e))
def loadPairedEndReads(self): if hasattr(self.__class__, 'pairedEndLibInfo'): return self.__class__.pairedEndLibInfo # 1) upload files to shock shared_dir = "/kb/module/work/tmp" forward_data_file = '../work/testReads/small.forward.fq' forward_file = os.path.join(shared_dir, os.path.basename(forward_data_file)) shutil.copy(forward_data_file, forward_file) reverse_data_file = '../work/testReads/small.reverse.fq' reverse_file = os.path.join(shared_dir, os.path.basename(reverse_data_file)) shutil.copy(reverse_data_file, reverse_file) ru = ReadsUtils(os.environ['SDK_CALLBACK_URL']) pe_reads_ref = ru.upload_reads({ 'fwd_file': forward_file, 'rev_file': reverse_file, 'sequencing_tech': 'artificial reads', 'interleaved': 0, 'wsname': self.getWsName(), 'name': 'test_pe_reads' })['obj_ref'] self.__class__.pe_reads_ref = pe_reads_ref print('Loaded PairedEndReads: ' + pe_reads_ref) new_obj_info = self.wsClient.get_object_info_new( {'objects': [{ 'ref': pe_reads_ref }]}) self.__class__.pairedEndLibInfo = new_obj_info[0] pprint(pformat(new_obj_info)) #return new_obj_info[0] return pe_reads_ref
def _package_result(self, output_file, output_name, ws_name_or_id, data_info, report): upload_params = {'fwd_file': output_file, 'name': output_name} if str(ws_name_or_id).isdigit(): upload_params['wsid'] = int(ws_name_or_id) else: upload_params['wsname'] = str(ws_name_or_id) fields = [ 'sequencing_tech', 'strain', 'source', 'read_orientation_outward', 'insert_size_mean', 'insert_size_std_dev' ] if 'input_ref' in data_info and data_info[ 'input_ref'] != None and data_info['sequencing_tech']: upload_params['source_reads_ref'] = data_info['input_ref'] else: for f in fields: if f in data_info: upload_params[f] = data_info[f] if 'single_genome' in data_info: if data_info['single_genome'] == 'true': upload_params['single_genome'] = 1 elif data_info['single_genome'] == 'false': upload_params['single_genome'] = 0 if 'sequencing_tech' not in upload_params: upload_params['sequencing_tech'] = 'unknown' if not upload_params['sequencing_tech']: upload_params['sequencing_tech'] = 'unknown' if data_info['files']['type'] == 'interleaved': upload_params['interleaved'] = 1 ru = ReadsUtils(self.callbackURL) result = ru.upload_reads(upload_params) # THE REPORT MUST BE CREATED OUTSIDE SO THAT LIBS AND SETS ARE HANDLED """ # create report kbreport = KBaseReport(self.callbackURL) rep = kbreport.create({ 'report': { 'text_message': report, 'objects_created': [{ "ref": str(ws_name_or_id) + '/' + upload_params['name'], "description": '' }] }, "workspace_name": str(ws_name_or_id) }) return { 'report_ref': rep['ref'], 'report_name': rep['name'], 'output_reads_ref': result['obj_ref'] } """ return {'report': report, 'output_reads_ref': result['obj_ref']}
def get_input_reads(self, params, token): print('in get input reads') wsname = params[self.PARAM_IN_WS] libfile_args = params[self.PARAM_IN_LIBFILE_ARGS] obj_ids = [] for libarg in libfile_args: read_name = libarg[self.PARAM_IN_LIBRARY] r = read_name if '/' in read_name else (wsname + '/' + read_name) obj_ids.append({'ref': r}) libarg['ref_library'] = r if self.PARAM_IN_UNPAIRED in libarg and libarg[self.PARAM_IN_UNPAIRED] is not None: read_name = libarg[self.PARAM_IN_UNPAIRED] r = read_name if '/' in read_name else (wsname + '/' + read_name) obj_ids.append({'ref': r}) libarg['ref_unpaired'] = r ws = workspaceService(self.workspaceURL, token=token) ws_info = ws.get_object_info_new({'objects': obj_ids}) reads_params = [] reftoname = {} for wsi, oid in zip(ws_info, obj_ids): ref = oid['ref'] reads_params.append(ref) obj_name = wsi[1] reftoname[ref] = wsi[7] + '/' + obj_name readcli = ReadsUtils(self.callbackURL, token=token, service_ver='dev') typeerr = ('Supported types: KBaseFile.SingleEndLibrary ' + 'KBaseFile.PairedEndLibrary ' + 'KBaseAssembly.SingleEndLibrary ' + 'KBaseAssembly.PairedEndLibrary') try: reads = readcli.download_reads({'read_libraries': reads_params, 'interleaved': 'true', 'gzipped': None })['files'] except ServerError as se: self.log('logging stacktrace from dynamic client error') self.log(se.data) if typeerr in se.message: prefix = se.message.split('.')[0] raise ValueError( prefix + '. Only the types ' + 'KBaseAssembly.PairedEndLibrary ' + 'and KBaseFile.PairedEndLibrary are supported') else: raise self.log('Got reads data from converter:\n' + pformat(reads)) print("READS:") pprint(reads) return reads
def download_interleaved_reads(callback_url, reads_upa): ru = ReadsUtils(callback_url) reads_info = ru.download_reads({ 'read_libraries': [reads_upa], 'interleaved': 'true', 'gzipped': None })['files'][reads_upa] return reads_info
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) user_id = requests.post( 'https://kbase.us/services/authorization/Sessions/Login', data='token={}&fields=user_id'.format(token)).json()['user_id'] # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'SetAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('SetAPI'): cls.cfg[nameval[0]] = nameval[1] cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL, token=token) cls.serviceImpl = SetAPI(cls.cfg) cls.serviceWizardURL = cls.cfg['service-wizard'] cls.dataPaletteServiceVersion = cls.cfg['datapaletteservice-version'] # setup data at the class level for now (so that the code is run # once for all tests, not before each test case. Not sure how to # do that outside this function..) suffix = int(time.time() * 1000) wsName = "test_SetAPI_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': wsName}) cls.wsName = wsName # copy test file to scratch area fq_filename = "interleaved.fastq" fq_path = os.path.join(cls.cfg['scratch'], fq_filename) shutil.copy(os.path.join("data", fq_filename), fq_path) ru = ReadsUtils(os.environ['SDK_CALLBACK_URL']) cls.read1ref = ru.upload_reads({ 'fwd_file': fq_path, 'sequencing_tech': 'tech1', 'wsname': wsName, 'name': 'reads1', 'interleaved':1 })['obj_ref'] cls.read2ref = ru.upload_reads({ 'fwd_file': fq_path, 'sequencing_tech': 'tech2', 'wsname': wsName, 'name': 'reads2', 'interleaved':1 })['obj_ref']
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = os.path.join(config['scratch'], 'import_SRA_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.dfu = DataFileUtil(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.uploader_utils = UploaderUtil(config)
def run_mash_sketch(self, ctx, params): """ Generate a sketch file from a fasta/fastq file :param params: instance of type "MashSketchParams" (* * Pass in **one of** input_path, assembly_ref, or reads_ref * input_path - string - local file path to an input fasta/fastq * assembly_ref - string - workspace reference to an Assembly type * reads_ref - string - workspace reference to a Reads type * Optionally, pass in a boolean indicating whether you are using paired-end reads. * paired_ends - boolean - whether you are passing in paired ends) -> structure: parameter "input_path" of String, parameter "assembly_ref" of String, parameter "reads_ref" of String, parameter "paired_ends" of type "boolean" (params: input_upa: workspace reference to an assembly object workspace_name: name of current workspace search_db: database to search n_max_results: number of results to return, integer between 1 and 100) :returns: instance of type "MashSketchResults" (* * Returns the local scratch file path of the generated sketch file. * Will have the extension '.msh') -> structure: parameter "sketch_path" of String """ # ctx is the context object # return variables are: results #BEGIN run_mash_sketch if 'reads_ref' in params: reads_utils = ReadsUtils(self.callbackURL) result = reads_utils.download_reads({ 'read_libraries': [params['reads_ref']], 'interleaved': 'true' }) input_path = result['files'][params['reads_ref']]['files']['fwd'] elif 'assembly_ref' in params: assembly_util = AssemblyUtil(self.callbackURL) result = assembly_util.get_assembly_as_fasta( {'ref': params['assembly_ref']}) input_path = result['path'] elif 'input_path' in params: input_path = params['input_path'] else: raise ValueError( 'Invalid params; must provide one of `reads_ref`, `assembly_ref`, or `input_path`.' ) mash_utils = MashUtils(self.config, self.auth_token) output_file_path = mash_utils.mash_sketch( input_path, paired_ends=params.get('paired_ends')) results = {'sketch_path': output_file_path} #END run_mash_sketch # At some point might do deeper type checking... if not isinstance(results, dict): raise ValueError('Method run_mash_sketch return value ' + 'results is not type dict as required.') # return the results return [results]
def upload_test_reads(cls): """ Seeding an initial SE and PE Reads objects to test filtering """ header = dict() header["Authorization"] = "Oauth {0}".format(cls.token) # readsUtils_Client = ReadsUtils(url=self.callback_url, token=ctx['token']) # SDK local readsUtils_Client = ReadsUtils(os.environ['SDK_CALLBACK_URL'], token=cls.token) temp_nodes = [] fwdtf = 'small_forward.fq' revtf = 'small_reverse.fq' fwdtarget = os.path.join(cls.scratch, fwdtf) revtarget = os.path.join(cls.scratch, revtf) print "CWD: " + str(os.getcwd()) shutil.copy('/kb/module/test/data/' + fwdtf, fwdtarget) shutil.copy('/kb/module/test/data/' + revtf, revtarget) # Upload single end reads cls.se_reads_reference = \ readsUtils_Client.upload_reads({'wsname': cls.getWsName(), 'name': "se_reads", 'sequencing_tech': 'Illumina', 'fwd_file': fwdtarget} )['obj_ref'] se_data = cls.dfu.get_objects( {'object_refs': [cls.getWsName() + '/se_reads']})['data'][0]['data'] temp_nodes.append(se_data['lib']['file']['id']) # Upload paired end reads cls.pe_reads_reference = \ readsUtils_Client.upload_reads({'wsname': cls.getWsName(), 'name': "pe_reads", 'sequencing_tech': 'Illumina', 'fwd_file': fwdtarget, 'rev_file': revtarget, 'insert_size_mean': 42, 'insert_size_std_dev': 10, } )['obj_ref'] pe_data = cls.dfu.get_objects( {'object_refs': [cls.getWsName() + '/pe_reads']})['data'][0]['data'] temp_nodes.append(pe_data['lib1']['file']['id']) return temp_nodes
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_cufflinks'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'kb_cufflinks', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(url=cls.wsURL, token=token) cls.serviceImpl = kb_cufflinks(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = environ.get('SDK_CALLBACK_URL') cls.srv_wiz_url = cls.cfg['srv-wiz-url'] # cls.wsName = 'cufflinks_test_' + user_id # reuse existing workspace suffix = int(time.time() * 1000) cls.wsName = "test_kb_cufflinks_" + str(suffix) print('workspace_name: ' + cls.wsName) try: # reuse existing (previously torn down) workspace cls.wsClient.undelete_workspace({'workspace': cls.wsName}) print('reusing old workspace...') except BaseException: try: # create if workspace does not exist cls.wsClient.create_workspace({'workspace': cls.wsName}) except BaseException: # get workspace if it exists and was not previously deleted (previously # not torn down) ws_info = cls.wsClient.get_workspace_info({'workspace': cls.wsName}) print("creating new workspace: " + str(ws_info)) cls.dfu = DataFileUtil(cls.callback_url) cls.gfu = GenomeFileUtil(cls.callback_url) cls.ru = ReadsUtils(cls.callback_url) cls.rau = ReadsAlignmentUtils(cls.callback_url) cls.set_api = SetAPI(cls.srv_wiz_url, service_ver='dev') cls.cufflinks_runner = CufflinksUtils(cls.cfg) cls.prepare_data()
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('AlignmentSetEditor'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'AlignmentSetEditor', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.serviceImpl = AlignmentSetEditor(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.setAPI = SetAPI(cls.callback_url) cls.gfu = GenomeFileUtil(cls.callback_url) cls.ru = ReadsUtils(cls.callback_url) cls.rau = ReadsAlignmentUtils(cls.callback_url) suffix = int(time.time() * 1000) cls.wsName = "test_AlignmentSetEditor_" + str(suffix) cls.wsClient.create_workspace({'workspace': cls.wsName})
def load_reads_file(self, tech, file_fwd, file_rev, target_name): """ Loads FASTQ files as either SingleEndLibrary or PairedEndLibrary. If file_rev is None, then we get a single end, otherwise, paired. """ reads_util = ReadsUtils(self.callback_url) upload_params = { "wsname": self.ws_name, "fwd_file": file_fwd, "name": target_name, "sequencing_tech": tech } if file_rev is not None: upload_params["rev_file"] = file_rev reads_ref = reads_util.upload_reads(upload_params) return reads_ref["obj_ref"]
def load_pe_reads(fwd_file, rev_file): """ Copies from given dir to scratch. Then calls ReadsUtils to upload from scratch. """ callback_url = os.environ['SDK_CALLBACK_URL'] fwd_file_path = file_to_scratch(fwd_file, overwrite=True) rev_file_path = file_to_scratch(rev_file, overwrite=True) ru = ReadsUtils(callback_url) pe_reads_params = { 'fwd_file': fwd_file_path, 'rev_file': rev_file_path, 'sequencing_tech': 'Illumina', 'wsname': get_ws_name(), 'name': 'MyPairedEndLibrary' } return ru.upload_reads(pe_reads_params)['obj_ref']
def loadSingleEndReads(self): if hasattr(self.__class__, 'se_reads_ref'): return self.__class__.se_reads_ref # return '23735/2/1' fq_path = os.path.join(self.scratch, 'reads_1_se.fq') shutil.copy(os.path.join('data', 'reads_1.fq'), fq_path) ru = ReadsUtils(self.callback_url) se_reads_ref = ru.upload_reads({ 'fwd_file': fq_path, 'wsname': self.getWsName(), 'name': 'test_readsSE', 'sequencing_tech': 'artificial reads' })['obj_ref'] self.__class__.se_reads_ref = se_reads_ref print('Loaded SingleEndReads: ' + se_reads_ref) return se_reads_ref
def loadSEReads(self, reads_file_path): #if hasattr(self.__class__, 'reads_ref'): #return self.__class__.reads_ref se_reads_name = os.path.basename(reads_file_path) fq_path = os.path.join(self.scratch, se_reads_name) #'star_test_reads.fastq') shutil.copy(reads_file_path, fq_path) ru = ReadsUtils(self.callback_url) reads_ref = ru.upload_reads({ 'fwd_file': fq_path, 'wsname': self.getWsName(), 'name': se_reads_name.split('.')[0], 'sequencing_tech': 'rnaseq reads' })['obj_ref'] #self.__class__.reads_ref = reads_ref return reads_ref
def _upload_reads(self, refid, callbackURL, input_params): ref = [refid] DownloadReadsParams = {'read_libraries': ref} dfUtil = ReadsUtils(callbackURL) x = dfUtil.download_reads(DownloadReadsParams) uploadReadParams = {} fwd_file = x['files'][ref[0]]['files']['fwd'] otype = x['files'][ref[0]]['files']['otype'] #case of interleaved if (otype == 'interleaved'): uploadReadParams = { 'fwd_file': fwd_file, 'wsname': input_params['workspace_name'], 'name': input_params['output'], 'rev_file': '', 'sequencing_tech': input_params['sequencing_tech'], 'single_genome': input_params['single_genome'], 'interleaved': 1 } #case of separate pair if (otype == 'paired'): rev_file = x['files'][ref[0]]['files']['rev'] uploadReadParams = { 'fwd_file': fwd_file, 'wsname': input_params['workspace_name'], 'name': input_params['output'], 'rev_file': rev_file, 'sequencing_tech': input_params['sequencing_tech'], 'single_genome': input_params['single_genome'] } #case of single end if (otype == 'single'): uploadReadParams = { 'fwd_file': fwd_file, 'wsname': input_params['workspace_name'], 'name': input_params['output'], 'rev_file': '', 'sequencing_tech': input_params['sequencing_tech'], 'single_genome': input_params['single_genome'] } y = dfUtil.upload_reads(uploadReadParams) return y['obj_ref']
def ru_reads_download(logger, ref, tdir, token): check_disk_space(logger) logger.info("{0} will be downloaded and transferred to {1}".format(ref,tdir)) ru = ReadsUtils(url=os.environ['SDK_CALLBACK_URL'], token=token) ds = ru.download_reads({"read_libraries" : [ref], "interleaved" : "false"}) logger.info("{0} will be downloaded and transferred to {1}".format(ref,tdir)) #ds['fwd'] = os.path.join(tdir, trim_gz(ds['files'][ref]['files']['fwd_name'])) ds['fwd'] = os.path.join(tdir, os.path.basename(ds['files'][ref]['files']['fwd'])) os.rename(ds['files'][ref]['files']['fwd'],ds['fwd']) if ds['files'][ref]['files']['type'] == 'paired': if ds['files'][ref]['files']['rev_name'] is None: ds['rev'] = os.path.join(tdir, 'rev.fastq') else: ds['rev'] = os.path.join(tdir, os.path.basename(ds['files'][ref]['files']['rev'])) os.rename(ds['files'][ref]['files']['rev'],ds['rev']) logger.info("{0} will be downloaded and transferred to {1}".format(ref,tdir)) return ds
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN', None) cls.callbackURL = environ.get('SDK_CALLBACK_URL') config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('ExpressionUtils'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(cls.token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'user_id': user_id, 'provenance': [{ 'service': 'ExpressionUtils', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.shockURL = cls.cfg['shock-url'] cls.wsURL = cls.cfg['workspace-url'] cls.service_wizard_url = cls.cfg['srv-wiz-url'] cls.wsClient = workspaceService(cls.wsURL) cls.ws = Workspace(cls.wsURL, token=cls.token) cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) # create workspace wssuffix = int(time.time() * 1000) wsname = "test_expression_" + str(wssuffix) cls.wsinfo = cls.wsClient.create_workspace({'workspace': wsname}) print('created workspace ' + cls.getWsName()) cls.serviceImpl = ExpressionUtils(cls.cfg) cls.readUtils = ReadsUtils(cls.callbackURL) cls.dfu = DataFileUtil(cls.callbackURL, service_ver='dev') cls.dfu.ws_name_to_id(wsname) cls.assemblyUtil = AssemblyUtil(cls.callbackURL) cls.gfu = GenomeFileUtil(cls.callbackURL) cls.gaAPI = GenomeAnnotationAPI(cls.service_wizard_url) cls.rau = ReadsAlignmentUtils(cls.callbackURL) cls.scratch = cls.cfg['scratch'] cls.staged = {} cls.nodes_to_delete = [] cls.handles_to_delete = [] cls.setupTestData()
def fetch_reads_files(self, reads_upas): """ From a list of reads UPAs, uses ReadsUtils to fetch the reads as files. Returns them as a dictionary from reads_upa -> filename """ if reads_upas is None: raise ValueError("reads_upas must be a list of UPAs") if len(reads_upas) == 0: raise ValueError("reads_upas must contain at least one UPA") ru = ReadsUtils(self.callback_url) reads_info = ru.download_reads(({ 'read_libraries': reads_upas, 'interleaved': 'true', 'gzipped': None }))['files'] file_set = dict() for reads in reads_info: file_set[reads] = reads_info[reads]['files']['fwd'] return file_set
def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = config['scratch'] self.dfu = DataFileUtil(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url) self.rau = ReadsAlignmentUtils(self.callback_url)
def getPairedEndLibInfo(self): if hasattr(self.__class__, 'pairedEndLibInfo'): return self.__class__.pairedEndLibInfo # 1) upload files to shock shared_dir = "/kb/module/work/tmp" forward_data_file = 'data/small.forward.fq' forward_file = os.path.join(shared_dir, os.path.basename(forward_data_file)) shutil.copy(forward_data_file, forward_file) reverse_data_file = 'data/small.reverse.fq' reverse_file = os.path.join(shared_dir, os.path.basename(reverse_data_file)) shutil.copy(reverse_data_file, reverse_file) ru = ReadsUtils(os.environ['SDK_CALLBACK_URL']) paired_end_ref = ru.upload_reads({'fwd_file': forward_file, 'rev_file': reverse_file, 'sequencing_tech': 'artificial reads', 'interleaved': 0, 'wsname': self.getWsName(), 'name': 'test.pe.reads'})['obj_ref'] new_obj_info = self.ws.get_object_info_new({'objects': [{'ref': paired_end_ref}]}) self.__class__.pairedEndLibInfo = new_obj_info[0] return new_obj_info[0]
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_ballgown'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(cls.token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'user_id': user_id, 'provenance': [{ 'service': 'kb_ballgown', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) cls.shockURL = cls.cfg['shock-url'] cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL, token=cls.token) cls.serviceImpl = kb_ballgown(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.gfu = GenomeFileUtil(cls.callback_url) cls.dfu = DataFileUtil(cls.callback_url) cls.ru = ReadsUtils(cls.callback_url) cls.rau = ReadsAlignmentUtils(cls.callback_url, service_ver='dev') cls.eu = ExpressionUtils(cls.callback_url, service_ver='dev') cls.set_api = SetAPI(cls.callback_url) suffix = int(time.time() * 1000) cls.wsName = "test_kb_ballgown_" + str(suffix) #cls.wsName = "test_kb_ballgown_1004" cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.nodes_to_delete = [] cls.handles_to_delete = [] cls.prepare_data()
def getPairedEndLibInfo(self): if hasattr(self.__class__, 'pairedEndLibInfo'): return self.__class__.pairedEndLibInfo # copy the local test file to the shared scratch space so that the ReadsUtils # container can see it. test_fastq_file_local = 'data/interleaved.fastq' test_fastq_file_scratch = os.path.join(self.scratch, os.path.basename(test_fastq_file_local)) shutil.copy(test_fastq_file_local, test_fastq_file_scratch) # call the ReadsUtils libary to upload the test data to KBase ru = ReadsUtils(os.environ['SDK_CALLBACK_URL']) paired_end_ref = ru.upload_reads({'fwd_file': test_fastq_file_scratch, 'sequencing_tech': 'artificial reads', 'interleaved': 1, 'wsname': self.getWsName(), 'name': 'test.pe.reads'})['obj_ref'] # get the object metadata for the new test dataset new_obj_info = self.ws.get_object_info_new({'objects': [{'ref': paired_end_ref}]}) self.__class__.pairedEndLibInfo = new_obj_info[0] return new_obj_info[0]
def run_megahit(self, ctx, params): """ :param params: instance of type "MegaHitParams" (Run MEGAHIT. Most parameters here are just passed forward to MEGAHIT workspace_name - the name of the workspace for input/output read_library_ref - the name of the PE read library (SE library support in the future) output_contig_set_name - the name of the output contigset megahit_parameter_preset - override a group of parameters; possible values: meta '--min-count 2 --k-list 21,41,61,81,99' (generic metagenomes, default) meta-sensitive '--min-count 2 --k-list 21,31,41,51,61,71,81,91,99' (more sensitive but slower) meta-large '--min-count 2 --k-list 27,37,47,57,67,77,87' (large & complex metagenomes, like soil) bulk '--min-count 3 --k-list 31,51,71,91,99 --no-mercy' (experimental, standard bulk sequencing with >= 30x depth) single-cell '--min-count 3 --k-list 21,33,55,77,99,121 --merge_level 20,0.96' (experimental, single cell data) min_count - minimum multiplicity for filtering (k_min+1)-mers, default 2 min_k - minimum kmer size (<= 127), must be odd number, default 21 max_k - maximum kmer size (<= 127), must be odd number, default 99 k_step - increment of kmer size of each iteration (<= 28), must be even number, default 10 k_list - list of kmer size (all must be odd, in the range 15-127, increment <= 28); override `--k-min', `--k-max' and `--k-step' min_contig_length - minimum length of contigs to output, default is 2000 @optional megahit_parameter_preset @optional min_count @optional k_min @optional k_max @optional k_step @optional k_list @optional min_contig_length) -> structure: parameter "workspace_name" of String, parameter "read_library_ref" of String, parameter "output_contigset_name" of String, parameter "megahit_parameter_preset" of String, parameter "min_count" of Long, parameter "k_min" of Long, parameter "k_max" of Long, parameter "k_step" of Long, parameter "k_list" of list of Long, parameter "min_contig_length" of Long :returns: instance of type "MegaHitOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN run_megahit print('Running run_megahit with params=') pprint(params) # STEP 1: basic parameter checks + parsing if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'read_library_ref' not in params: raise ValueError('read_library_ref parameter is required') if 'output_contigset_name' not in params: raise ValueError('output_contigset_name parameter is required') # STEP 2: get the read library as deinterleaved fastq files input_ref = params['read_library_ref'] reads_params = {'read_libraries': [input_ref], 'interleaved': 'false', 'gzipped': None } ru = ReadsUtils(self.callbackURL) reads = ru.download_reads(reads_params)['files'] print('Input reads files:') fwd = reads[input_ref]['files']['fwd'] rev = reads[input_ref]['files']['rev'] pprint('forward: ' + fwd) pprint('reverse: ' + rev) # STEP 3: run megahit # construct the command megahit_cmd = [self.MEGAHIT] # we only support PE reads, so add that megahit_cmd.append('-1') megahit_cmd.append(fwd) megahit_cmd.append('-2') megahit_cmd.append(rev) # if a preset is defined, use that: if 'megahit_parameter_preset' in params: if params['megahit_parameter_preset']: megahit_cmd.append('--presets') megahit_cmd.append(params['megahit_parameter_preset']) if 'min_count' in params: if params['min_count']: megahit_cmd.append('--min-count') megahit_cmd.append(str(params['min_count'])) if 'k_min' in params: if params['k_min']: megahit_cmd.append('--k-min') megahit_cmd.append(str(params['k_min'])) if 'k_max' in params: if params['k_max']: megahit_cmd.append('--k-max') megahit_cmd.append(str(params['k_max'])) if 'k_step' in params: if params['k_step']: megahit_cmd.append('--k-step') megahit_cmd.append(str(params['k_step'])) if 'k_list' in params: if params['k_list']: k_list = [] for k_val in params['k_list']: k_list.append(str(k_val)) megahit_cmd.append('--k-list') megahit_cmd.append(','.join(k_list)) min_contig_length = self.DEFAULT_MIN_CONTIG_LENGTH if 'min_contig_length' in params: if params['min_contig_length']: if str(params['min_contig_length']).isdigit(): min_contig_length = params['min_contig_length'] else: raise ValueError('min_contig_length parameter must be a non-negative integer') megahit_cmd.append('--min-contig-len') megahit_cmd.append(str(min_contig_length)) # set the output location timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) output_dir = os.path.join(self.scratch, 'output.' + str(timestamp)) megahit_cmd.append('-o') megahit_cmd.append(output_dir) # run megahit print('running megahit:') print(' ' + ' '.join(megahit_cmd)) p = subprocess.Popen(megahit_cmd, cwd=self.scratch, shell=False) retcode = p.wait() print('Return code: ' + str(retcode)) if p.returncode != 0: raise ValueError('Error running MEGAHIT, return code: ' + str(retcode) + '\n') output_contigs = os.path.join(output_dir, 'final.contigs.fa') # on macs, we cannot run megahit in the shared host scratch space, so we need to move the file there if self.mac_mode: shutil.move(output_contigs, os.path.join(self.host_scratch, 'final.contigs.fa')) output_contigs = os.path.join(self.host_scratch, 'final.contigs.fa') # STEP 4: save the resulting assembly assemblyUtil = AssemblyUtil(self.callbackURL) output_data_ref = assemblyUtil.save_assembly_from_fasta({ 'file': {'path': output_contigs}, 'workspace_name': params['workspace_name'], 'assembly_name': params['output_contigset_name'] }) # STEP 5: generate and save the report # compute a simple contig length distribution for the report lengths = [] for seq_record in SeqIO.parse(output_contigs, 'fasta'): lengths.append(len(seq_record.seq)) report = '' report += 'ContigSet saved to: ' + params['workspace_name'] + '/' + params['output_contigset_name'] + '\n' report += 'Assembled into ' + str(len(lengths)) + ' contigs.\n' report += 'Avg Length: ' + str(sum(lengths) / float(len(lengths))) + ' bp.\n' bins = 10 counts, edges = np.histogram(lengths, bins) report += 'Contig Length Distribution (# of contigs -- min to max basepairs):\n' for c in range(bins): report += ' ' + str(counts[c]) + '\t--\t' + str(edges[c]) + ' to ' + str(edges[c + 1]) + ' bp\n' print('Running QUAST') kbq = kb_quast(self.callbackURL) try: quastret = kbq.run_QUAST({'files': [{'path': output_contigs, 'label': params['output_contigset_name']}]}) except QUASTError as qe: # not really any way to test this, all inputs have been checked earlier and should be # ok print('Logging exception from running QUAST') print(str(qe)) # TODO delete shock node raise print('Saving report') kbr = KBaseReport(self.callbackURL) try: report_info = kbr.create_extended_report( {'message': report, 'objects_created': [{'ref': output_data_ref, 'description': 'Assembled contigs'}], 'direct_html_link_index': 0, 'html_links': [{'shock_id': quastret['shock_id'], 'name': 'report.html', 'label': 'QUAST report'} ], 'report_object_name': 'kb_megahit_report_' + str(uuid.uuid4()), 'workspace_name': params['workspace_name'] }) except _RepError as re: # not really any way to test this, all inputs have been checked earlier and should be # ok print('Logging exception from creating report object') print(str(re)) # TODO delete shock node raise # STEP 6: contruct the output to send back output = {'report_name': report_info['name'], 'report_ref': report_info['ref']} #END run_megahit # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method run_megahit return value ' + 'output is not type dict as required.') # return the results return [output]
def fastqutils_stats(self, ctx, params): """ :param params: instance of type "FastqUtilsStatsParams" -> structure: parameter "workspace_name" of type "workspace_name" (A string representing a workspace name.), parameter "read_library_ref" of type "read_library_ref" (A string representing a ContigSet id.) :returns: instance of type "FastqUtilsStatsResult" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN fastqutils_stats print('Running fastqutils_stats with params=') print(pformat(params)) if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'read_library_ref' not in params: raise ValueError('read_library_ref parameter is required') # Get the read library as deinterleaved fastq files input_ref = params['read_library_ref'] reads_params = {'read_libraries': [input_ref], 'interleaved': 'false', 'gzipped': None } ru = ReadsUtils(self.callbackURL, token=ctx['token']) reads = ru.download_reads(reads_params)['files'] files = [reads[input_ref]['files']['fwd']] if reads[input_ref]['files']['rev']: files.append(reads[input_ref]['files']['rev']) print('running on files:') for f in files: print(f) # construct the command stats_cmd = [self.FASTQUTILS, 'stats'] report = '' for f in files: cmd = stats_cmd cmd.append(f) report += '============== ' + f + ' ==============\n' print('running: ' + ' '.join(cmd)) p = subprocess.Popen(cmd, cwd=self.scratch, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False) while True: line = p.stdout.readline() if not line: break report += line print(line.replace('\n', '')) p.stdout.close() p.wait() report += "\n\n" print('return code: ' + str(p.returncode)) if p.returncode != 0: raise ValueError('Error running ' + self.FASTQUTILS + ', return code: ' + str(p.returncode)) reportObj = { 'objects_created': [], 'text_message': report } report = KBaseReport(self.callbackURL) report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']}) returnVal = {'report_name': report_info['name'], 'report_ref': report_info['ref']} #END fastqutils_stats # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method fastqutils_stats return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]