def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = os.path.join(config['scratch'], 'import_SRA_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.dfu = DataFileUtil(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.uploader_utils = UploaderUtil(config)
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.fba = fba_tools(self.callback_url) self.SBMLTools = SBMLTools(self.callback_url) self.uploader_utils = UploaderUtil(config)
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = os.path.join(config['scratch'], 'import_GenBank_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url, service_ver='dev') self.uploader_utils = UploaderUtil(config)
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = os.path.join(config['scratch'], 'import_assembly_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.uploader_utils = UploaderUtil(config) self.max_contigs_for_report = 200
class ImportSRAUtil: SRA_TOOLKIT_PATH = '/kb/deployment/bin/fastq-dump' def _run_command(self, command): """ _run_command: run command and print result """ log('Start executing command:\n{}'.format(command)) pipe = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) output = pipe.communicate()[0] exitCode = pipe.returncode if (exitCode == 0): log('Executed command:\n{}\n'.format(command) + 'Exit Code: {}\nOutput:\n{}'.format(exitCode, output)) else: error_msg = 'Error running command:\n{}\n'.format(command) error_msg += 'Exit Code: {}\nOutput:\n{}'.format(exitCode, output) raise ValueError(error_msg) def _check_fastq_dump_result(self, tmp_dir, sra_name): """ _check_fastq_dump_result: check fastq_dump result is PE or SE """ return os.path.exists(tmp_dir + '/' + sra_name + '/1') def _sra_to_fastq(self, scratch_sra_file_path, params): """ _sra_to_fastq: convert SRA file to FASTQ file(s) """ tmp_dir = os.path.join(self.scratch, str(uuid.uuid4())) handler_utils._mkdir_p(tmp_dir) command = self.SRA_TOOLKIT_PATH + ' --split-3 -T -O ' command += tmp_dir + ' ' + scratch_sra_file_path self._run_command(command) sra_name = os.path.basename(scratch_sra_file_path).partition('.')[0] paired_end = self._check_fastq_dump_result(tmp_dir, sra_name) if paired_end: self._validate_paired_end_advanced_params(params) fwd_file = os.path.join(tmp_dir, sra_name, '1', 'fastq') os.rename(fwd_file, fwd_file + '.fastq') fwd_file = fwd_file + '.fastq' rev_file = os.path.join(tmp_dir, sra_name, '2', 'fastq') os.rename(rev_file, rev_file + '.fastq') rev_file = rev_file + '.fastq' else: self._validate_single_end_advanced_params(params) fwd_file = os.path.join(tmp_dir, sra_name, 'fastq') os.rename(fwd_file, fwd_file + '.fastq') fwd_file = fwd_file + '.fastq' rev_file = None fastq_file_path = { 'fwd_file': fwd_file, 'rev_file': rev_file } return fastq_file_path def _validate_single_end_advanced_params(self, params): """ _validate_single_end_advanced_params: validate advanced params for single end reads """ if (params.get('insert_size_mean') or params.get('insert_size_std_dev') or params.get('read_orientation_outward')): error_msg = 'Advanced params "Mean Insert Size", "St. Dev. of Insert Size" or ' error_msg += '"Reads Orientation Outward" is Paried End Reads specific' raise ValueError(error_msg) if 'interleaved' in params: del params['interleaved'] def _validate_paired_end_advanced_params(self, params): """ _validate_paired_end_advanced_params: validate advanced params for paired end reads """ sequencing_tech = params.get('sequencing_tech') if sequencing_tech in ['PacBio CCS', 'PacBio CLR']: error_msg = 'Sequencing Technology: "PacBio CCS" or "PacBio CLR" ' error_msg += 'is Single End Reads specific' raise ValueError(error_msg) def _validate_upload_staging_file_availability(self, staging_file_subdir_path): """ _validate_upload_file_path_availability: validates file availability in user's staging area """ pass # TODO ftp_server needs to be fixed for subdir # list = ftp_service(self.callback_url).list_files() # if staging_file_subdir_path not in list: # error_msg = 'Target file: {} is NOT available.\n'.format( # staging_file_subdir_path.rpartition('/')[-1]) # error_msg += 'Available files:\n {}'.format("\n".join(list)) # raise ValueError(error_msg) def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = os.path.join(config['scratch'], 'import_SRA_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.dfu = DataFileUtil(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.uploader_utils = UploaderUtil(config) def import_sra_from_staging(self, params): ''' import_sra_from_staging: wrapper method for GenomeFileUtil.genbank_to_genome required params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name sequencing_tech: sequencing technology name: output reads file name workspace_name: workspace name/ID of the object Optional Params: single_genome: whether the reads are from a single genome or a metagenome. insert_size_mean: mean (average) insert length insert_size_std_dev: standard deviation of insert lengths read_orientation_outward: whether reads in a pair point outward return: obj_ref: return object reference ''' log('--->\nrunning ImportSRAUtil.import_sra_from_staging\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self.validate_import_sra_from_staging_params(params) download_staging_file_params = { 'staging_file_subdir_path': params.get('staging_file_subdir_path') } scratch_sra_file_path = self.dfu.download_staging_file( download_staging_file_params).get('copy_file_path') log('Downloaded staging file to: {}'.format(scratch_sra_file_path)) fastq_file_path = self._sra_to_fastq(scratch_sra_file_path, params) import_sra_reads_params = params import_sra_reads_params.update(fastq_file_path) workspace_name_or_id = params.get('workspace_name') if str(workspace_name_or_id).isdigit(): import_sra_reads_params['wsid'] = int(workspace_name_or_id) else: import_sra_reads_params['wsname'] = str(workspace_name_or_id) log('--->\nrunning ReadsUtils.upload_reads\nparams:\n{}'.format( json.dumps(import_sra_reads_params, indent=1))) returnVal = self.ru.upload_reads(import_sra_reads_params) """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service(params.get('staging_file_subdir_path'), returnVal['obj_ref']) return returnVal def import_sra_from_web(self, params): ''' import_sra_from_web: wrapper method for GenomeFileUtil.genbank_to_genome required params: download_type: download type for web source fastq file ('Direct Download', 'FTP', 'DropBox', 'Google Drive') workspace_name: workspace name/ID of the object sra_urls_to_add: dict of SRA file URLs required params: file_url: SRA file URL sequencing_tech: sequencing technology name: output reads file name Optional Params: single_genome: whether the reads are from a single genome or a metagenome. insert_size_mean: mean (average) insert length insert_size_std_dev: standard deviation of insert lengths read_orientation_outward: whether reads in a pair point outward return: obj_ref: return object reference ''' log('--->\nrunning ImportSRAUtil.import_sra_from_web\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self.validate_import_sra_from_web_params(params) download_type = params.get('download_type') workspace_name = params.get('workspace_name') obj_refs = [] uploaded_files = [] for sra_url_to_add in params.get('sra_urls_to_add'): download_web_file_params = { 'download_type': download_type, 'file_url': sra_url_to_add.get('file_url') } scratch_sra_file_path = self.dfu.download_web_file( download_web_file_params).get('copy_file_path') log('Downloaded web file to: {}'.format(scratch_sra_file_path)) fastq_file_path = self._sra_to_fastq(scratch_sra_file_path, sra_url_to_add) import_sra_reads_params = sra_url_to_add import_sra_reads_params.update(fastq_file_path) workspace_name_or_id = workspace_name if str(workspace_name_or_id).isdigit(): import_sra_reads_params['wsid'] = int(workspace_name_or_id) else: import_sra_reads_params['wsname'] = str(workspace_name_or_id) log('--->\nrunning ReadsUtils.upload_reads\nparams:\n{}'.format( json.dumps(import_sra_reads_params, indent=1))) obj_ref = self.ru.upload_reads(import_sra_reads_params).get('obj_ref') obj_refs.append(obj_ref) uploaded_files.append(sra_url_to_add.get('file_url')) return {'obj_refs': obj_refs, 'uploaded_files': uploaded_files} def validate_import_sra_from_staging_params(self, params): """ validate_import_genbank_from_staging_params: validates params passed to import_genbank_from_staging method """ # check for required parameters for p in ['staging_file_subdir_path', 'sequencing_tech', 'name', 'workspace_name']: if p not in params: raise ValueError('"' + p + '" parameter is required, but missing') self._validate_upload_staging_file_availability(params.get('staging_file_subdir_path')) def validate_import_sra_from_web_params(self, params): """ validate_import_genbank_from_staging_params: validates params passed to import_genbank_from_staging method """ # check for required parameters for p in ['download_type', 'workspace_name', 'sra_urls_to_add']: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) if not isinstance(params.get('sra_urls_to_add'), list): raise ValueError('sra_urls_to_add is not type list as required') for sra_url_to_add in params.get('sra_urls_to_add'): for p in ['file_url', 'sequencing_tech', 'name']: if p not in sra_url_to_add: raise ValueError('"{}" parameter is required, but missing'.format(p)) def generate_report(self, obj_refs_list, params): """ generate_report: generate summary report obj_refs: generated workspace object references. (return of import_sra_from_staging/web) params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name workspace_name: workspace name/ID that reads will be stored to """ uuid_string = str(uuid.uuid4()) objects_created = list() objects_data = list() for obj_ref in obj_refs_list: get_objects_params = { 'object_refs': [obj_ref], 'ignore_errors': False } objects_data.append(self.dfu.get_objects(get_objects_params)) objects_created.append({'ref': obj_ref, 'description': 'Imported Reads'}) output_html_files = self.generate_html_report(objects_data, params, uuid_string) report_params = { 'message': '', 'workspace_name': params.get('workspace_name'), 'objects_created': objects_created, 'html_links': output_html_files, 'direct_html_link_index': 0, 'html_window_height': 460, 'report_object_name': 'kb_sra_upload_report_' + uuid_string} kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = {'report_name': output['name'], 'report_ref': output['ref']} return report_output def generate_html_report(self, reads_objs, params, uuid_string): """ _generate_html_report: generate html summary report """ log('Start generating html report') pprint(params) tmp_dir = os.path.join(self.scratch, uuid_string) handler_utils._mkdir_p(tmp_dir) result_file_path = os.path.join(tmp_dir, 'report.html') html_report = list() objects_content = '' for index, reads_obj in enumerate(reads_objs): idx = str(index) reads_data = reads_obj.get('data')[0].get('data') reads_info = reads_obj.get('data')[0].get('info') reads_ref = str(reads_info[6]) + '/' + str(reads_info[0]) + '/' + str(reads_info[4]) reads_obj_name = str(reads_info[1]) with open(os.path.join(os.path.dirname(__file__), 'report_template_sra/table_panel.html'), 'r') as object_content_file: report_template = object_content_file.read() report_template = report_template.replace('_NUM', str(idx)) report_template = report_template.replace('OBJECT_NAME', reads_obj_name) if index == 0: report_template = report_template.replace('panel-collapse collapse', 'panel-collapse collapse in') objects_content += report_template base_percentages = '' for key, val in reads_data.get('base_percentages').items(): base_percentages += '{}({}%) '.format(key, val) reads_overview_data = collections.OrderedDict() reads_overview_data['Name'] = '{} ({})'.format(reads_obj_name, reads_ref) reads_overview_data['Uploaded File'] = params.get('uploaded_files')[index] reads_overview_data['Date Uploaded'] = time.strftime("%c") reads_overview_data['Number of Reads'] = '{:,}'.format(reads_data.get('read_count')) reads_type = reads_info[2].lower() if 'single' in reads_type: reads_overview_data['Type'] = 'Single End' elif 'paired' in reads_type: reads_overview_data['Type'] = 'Paired End' else: reads_overview_data['Type'] = 'Unknown' reads_overview_data['Platform'] = reads_data.get('sequencing_tech', 'Unknown') reads_single_genome = str(reads_data.get('single_genome', 'Unknown')) if '0' in reads_single_genome: reads_overview_data['Single Genome'] = 'No' elif '1' in reads_single_genome: reads_overview_data['Single Genome'] = 'Yes' else: reads_overview_data['Single Genome'] = 'Unknown' insert_size_mean = params.get('insert_size_mean', 'Not Specified') if insert_size_mean is not None: reads_overview_data['Insert Size Mean'] = str(insert_size_mean) else: reads_overview_data['Insert Size Mean'] = 'Not Specified' insert_size_std_dev = params.get('insert_size_std_dev', 'Not Specified') if insert_size_std_dev is not None: reads_overview_data['Insert Size Std Dev'] = str(insert_size_std_dev) else: reads_overview_data['Insert Size Std Dev'] = 'Not Specified' reads_outward_orientation = str(reads_data.get('read_orientation_outward', 'Unknown')) if '0' in reads_outward_orientation: reads_overview_data['Outward Read Orientation'] = 'No' elif '1' in reads_outward_orientation: reads_overview_data['Outward Read Orientation'] = 'Yes' else: reads_overview_data['Outward Read Orientation'] = 'Unknown' reads_stats_data = collections.OrderedDict() reads_stats_data['Number of Reads'] = '{:,}'.format(reads_data.get('read_count')) reads_stats_data['Total Number of Bases'] = '{:,}'.format(reads_data.get('total_bases')) reads_stats_data['Mean Read Length'] = str(reads_data.get('read_length_mean')) reads_stats_data['Read Length Std Dev'] = str(reads_data.get('read_length_stdev')) dup_reads_percent = '{:.2f}'.format(float(reads_data.get('number_of_duplicates') * 100) / \ reads_data.get('read_count')) reads_stats_data['Number of Duplicate Reads(%)'] = '{} ({}%)' \ .format(str(reads_data.get('number_of_duplicates')), dup_reads_percent) reads_stats_data['Phred Type'] = str(reads_data.get('phred_type')) reads_stats_data['Quality Score Mean'] = '{0:.2f}'.format(reads_data.get('qual_mean')) reads_stats_data['Quality Score (Min/Max)'] = '{}/{}'.format(str(reads_data.get('qual_min')), str(reads_data.get('qual_max'))) reads_stats_data['GC Percentage'] = str(round(reads_data.get('gc_content') * 100, 2)) + '%' reads_stats_data['Base Percentages'] = base_percentages overview_content = '' for key, val in reads_overview_data.items(): overview_content += '<tr><td><b>{}</b></td>'.format(key) overview_content += '<td>{}</td>'.format(val) overview_content += '</tr>' stats_content = '' for key, val in reads_stats_data.items(): stats_content += '<tr><td><b>{}</b></td>'.format(key) stats_content += '<td>{}</td>'.format(val) stats_content += '</tr>' objects_content = objects_content.replace('###OVERVIEW_CONTENT###', overview_content) objects_content = objects_content.replace('###STATS_CONTENT###', stats_content) with open(result_file_path, 'w') as result_file: with open(os.path.join(os.path.dirname(__file__), 'report_template_sra/report_head.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace('###TABLE_PANELS_CONTENT###', objects_content) result_file.write(report_template) result_file.close() shutil.copytree(os.path.join(os.path.dirname(__file__), 'report_template_sra/bootstrap-3.3.7'), os.path.join(tmp_dir, 'bootstrap-3.3.7')) shutil.copy(os.path.join(os.path.dirname(__file__), 'report_template_sra/jquery-3.2.1.min.js'), os.path.join(tmp_dir, 'jquery-3.2.1.min.js')) matched_files = [] for root, dirnames, filenames in os.walk(tmp_dir): for filename in fnmatch.filter(filenames, '*.gz'): matched_files.append(os.path.join(root, filename)) for gz_file in matched_files: print(('Removing ' + gz_file)) os.remove(gz_file) report_shock_id = self.dfu.file_to_shock({'file_path': tmp_dir, 'pack': 'zip'})['shock_id'] html_report.append({'shock_id': report_shock_id, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report for Imported Assembly'}) return html_report
def __init__(self, config): self.uploader_utils = UploaderUtil(config) self.sra_importer = ImportSRAUtil(config)
class ImportReadsUtil: def __init__(self, config): self.uploader_utils = UploaderUtil(config) self.sra_importer = ImportSRAUtil(config) def import_reads_from_staging(self, params): self._validate_import_reads_from_staging_params(params) if params.get('import_type') == 'FASTQ/FASTA': fastq_importer_params = params fastq_importer_params['fwd_staging_file_name'] = params.get( 'fastq_fwd_staging_file_name') fastq_importer_params['rev_staging_file_name'] = params.get( 'fastq_rev_staging_file_name') returnVal = self.uploader_utils.upload_fastq_file( fastq_importer_params) uploaded_file = params.get('fastq_fwd_staging_file_name') if params.get('fastq_rev_staging_file_name') is not None: uploaded_file += '\n' + params.get( 'fastq_rev_staging_file_name') fastq_importer_params['uploaded_files'] = [uploaded_file] """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service( params.get('fastq_fwd_staging_file_name'), returnVal['obj_ref']) if params.get('fastq_rev_staging_file_name') is not None: self.uploader_utils.update_staging_service( params.get('fastq_rev_staging_file_name'), returnVal['obj_ref']) reportVal = self.sra_importer.generate_report( [returnVal['obj_ref']], fastq_importer_params) returnVal.update(reportVal) elif params.get('import_type') == 'SRA': sra_importer_params = params sra_importer_params['staging_file_subdir_path'] = params.get( 'sra_staging_file_name') returnVal = self.sra_importer.import_sra_from_staging( sra_importer_params) sra_importer_params['uploaded_files'] = [ params.get('sra_staging_file_name') ] """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service( params.get('sra_staging_file_name'), returnVal['obj_ref']) reportVal = self.sra_importer.generate_report( [returnVal['obj_ref']], sra_importer_params) returnVal.update(reportVal) return returnVal def _validate_import_reads_from_staging_params(self, params): """ _validate_import_reads_from_staging_params: validates params passed to import_reads_from_staging method """ # check for required parameters for p in ['import_type', 'sequencing_tech', 'name', 'workspace_name']: if p not in params: raise ValueError( '"{}" parameter is required, but missing'.format(p)) valide_import_type = ['FASTQ/FASTA', 'SRA'] if params.get('import_type') not in valide_import_type: error_msg = 'Import file type [{}] is not supported. '.format( params.get('import_type')) error_msg += 'Please selet one of {}'.format(valide_import_type) raise ValueError(error_msg) if (params.get('import_type') == 'FASTQ/FASTA' and not params.get('fastq_fwd_staging_file_name')): error_msg = 'FASTQ/FASTA input file type selected. But missing FASTQ/FASTA file.' raise ValueError(error_msg) if (params.get('import_type') == 'SRA' and not params.get('sra_staging_file_name')): error_msg = 'SRA input file type selected. But missing SRA file.' raise ValueError(error_msg) if ((params.get('fastq_fwd_staging_file_name') and params.get('sra_staging_file_name')) or (params.get('fastq_rev_staging_file_name') and params.get('sra_staging_file_name'))): error_msg = 'Both SRA and FASTQ/FASTA file given. Please provide one file type only.' raise ValueError(error_msg)
class ImportPhenotypeSetUtil: def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.fba = fba_tools(self.callback_url) self.uploader_utils = UploaderUtil(config) def import_phenotype_set_from_staging(self, params): ''' import_phenotype_set_from_staging: wrapper method for fba_tools.tsv_file_to_phenotype_set required params: staging_file_subdir_path - subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name phenotype_set_name: output PhenotypeSet object name workspace_name: workspace name/ID of the object genome: Genome object that contains features referenced by the Phenotype Set return: obj_ref: return object reference ''' log('--->\nrunning ImportPhenotypeSetUtil.import_phenotype_set_from_staging\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self.validate_import_phenotype_set_from_staging_params(params) download_staging_file_params = { 'staging_file_subdir_path': params.get('staging_file_subdir_path') } scratch_file_path = self.dfu.download_staging_file( download_staging_file_params).get('copy_file_path') file = { 'path': scratch_file_path } import_phenotype_set_params = params.copy() import_phenotype_set_params['phenotype_set_file'] = file ref = self.fba.tsv_file_to_phenotype_set(import_phenotype_set_params) """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service(params.get('staging_file_subdir_path'), ref.get('ref')) returnVal = {'obj_ref': ref.get('ref')} return returnVal def validate_import_phenotype_set_from_staging_params(self, params): """ validate_import_phenotype_set_from_staging_params: validates params passed to import_phenotype_set_from_staging method """ # check for required parameters for p in ['staging_file_subdir_path', 'workspace_name', 'phenotype_set_name', 'genome']: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) def generate_report(self, obj_ref, params): """ generate_report: generate summary report obj_ref: generated workspace object references. (return of import_phenotype_set_from_staging) params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name workspace_name: workspace name/ID that reads will be stored to """ uuid_string = str(uuid.uuid4()) upload_message = 'Import Finished\n' get_objects_params = { 'object_refs': [obj_ref], 'ignore_errors': False } object_data = self.dfu.get_objects(get_objects_params) upload_message += "Phenotype Set Name: " upload_message += str(object_data.get('data')[0].get('info')[1]) + '\n' upload_message += 'Imported File: {}\n'.format(params.get('staging_file_subdir_path')) report_params = {'message': upload_message, 'objects_created': [{'ref': obj_ref, 'description': 'Imported Phenotype Set'}], 'workspace_name': params.get('workspace_name'), 'report_object_name': 'kb_upload_mothods_report_' + uuid_string} kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = {'report_name': output['name'], 'report_ref': output['ref']} return report_output
class ImportFBAModelUtil: def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.fba = fba_tools(self.callback_url) self.uploader_utils = UploaderUtil(config) def import_fbamodel_from_staging(self, params): log('--->\nrunning {}.{}\n params:\n{}'.format( self.__class__.__name__, sys._getframe().f_code.co_name, json.dumps(params, indent=1))) self._check_param(params, [ 'model_file', 'file_type', 'workspace_name', 'model_name', 'biomass' ], ['genome', 'compounds_file']) if params['file_type'] == 'tsv' and not params.get( 'compounds_file', None): raise ValueError('A compound file is required for tsv upload.') fba_tools_params = params.copy() for infile in ['model_file', 'compounds_file']: if not params.get(infile, None): continue download_staging_file_params = { 'staging_file_subdir_path': params[infile] } scratch_file_path = self.dfu.download_staging_file( download_staging_file_params).get('copy_file_path') fba_tools_params[infile] = {'path': scratch_file_path} if params['file_type'] == 'sbml': res = self.fba.sbml_file_to_model(fba_tools_params) elif params['file_type'] == 'excel': res = self.fba.excel_file_to_model(fba_tools_params) elif params['file_type'] == 'tsv': res = self.fba.tsv_file_to_model(fba_tools_params) else: raise ValueError('"{}" is not a valid import file_type'.format( params['file_type'])) """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service( download_staging_file_params.get('staging_file_subdir_path'), res['ref']) return {'obj_ref': res['ref']} @staticmethod def _check_param(in_params, req_param, opt_param=list()): """ Check if each of the params in the list are in the input params """ for param in req_param: if param not in in_params: raise ValueError( 'Required parameter "{}" is missing'.format(param)) defined_param = set(req_param + opt_param) for param in in_params: if param not in defined_param: print('WARNING: received unexpected parameter "{}"'.format( param)) def generate_report(self, obj_ref, params): """ generate_report: generate summary report obj_ref: generated workspace object references. (return of import_excel(tsv)_as_media_from_staging) params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name workspace_name: workspace name/ID that reads will be stored to """ uuid_string = str(uuid.uuid4()) upload_message = 'Import Finished\n' upload_message += "FBAModel Object Name: " upload_message += params['model_name'] + '\n' upload_message += 'Imported File: {}\n'.format( params.get('model_file')) report_params = { 'message': upload_message, 'objects_created': [{ 'ref': obj_ref, 'description': 'Imported FBAModel' }], 'workspace_name': params.get('workspace_name'), 'report_object_name': 'kb_upload_methods_report_' + uuid_string } kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = { 'report_name': output['name'], 'report_ref': output['ref'] } return report_output
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.fv = KBaseFeatureValues(self.callback_url) self.uploader_utils = UploaderUtil(config)
class ImportExpressionMatrixUtil: def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.fv = KBaseFeatureValues(self.callback_url) self.uploader_utils = UploaderUtil(config) def import_tsv_as_expression_matrix_from_staging(self, params): ''' import_tsv_as_expression_matrix_from_staging: wrapper method for KBaseFeatureValues.tsv_file_to_matrix required params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name matrix_name: output Expressin Matirx file name workspace_name: workspace name/ID of the object optional params: genome_ref: optional reference to a Genome object that will be used for mapping feature IDs to fill_missing_values: optional flag for filling in missing values in matrix (default value is false) data_type: optional filed, value is one of 'untransformed', 'log2_level', 'log10_level', 'log2_ratio', 'log10_ratio' or 'unknown' (last one is default value) data_scale: optional parameter (default value is '1.0') return: obj_ref: return object reference ''' log('--->\nrunning ImportAssemblyUtil.import_tsv_as_expression_matrix_from_staging\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self.validate_import_tsv_as_expression_matrix_from_staging_params( params) download_staging_file_params = { 'staging_file_subdir_path': params.get('staging_file_subdir_path') } scratch_file_path = self.dfu.download_staging_file( download_staging_file_params).get('copy_file_path') import_matrix_params = params import_matrix_params['input_file_path'] = scratch_file_path import_matrix_params['output_ws_name'] = params.get('workspace_name') import_matrix_params['output_obj_name'] = params.get('matrix_name') ref = self.fv.tsv_file_to_matrix(import_matrix_params) """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service( params.get('staging_file_subdir_path'), ref.get('output_matrix_ref')) returnVal = {'obj_ref': ref.get('output_matrix_ref')} return returnVal def validate_import_tsv_as_expression_matrix_from_staging_params( self, params): """ validate_import_tsv_as_expression_matrix_from_staging_params: validates params passed to import_tsv_as_expression_matrix_from_staging method """ # check for required parameters for p in ['staging_file_subdir_path', 'workspace_name', 'matrix_name']: if p not in params: raise ValueError('"' + p + '" parameter is required, but missing') def generate_report(self, obj_ref, params): """ generate_report: generate summary report obj_ref: generated workspace object references. (return of import_tsv_as_expression_matrix_from_staging) params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name workspace_name: workspace name/ID that reads will be stored to """ uuid_string = str(uuid.uuid4()) upload_message = 'Import Finished\n' get_objects_params = {'object_refs': [obj_ref], 'ignore_errors': False} object_data = self.dfu.get_objects(get_objects_params) upload_message += "Expression Matrix Object Name: " upload_message += str(object_data.get('data')[0].get('info')[1]) + '\n' upload_message += 'Imported TSV File: {}\n'.format( params.get('staging_file_subdir_path')) report_params = { 'message': upload_message, 'workspace_name': params.get('workspace_name'), 'report_object_name': 'kb_upload_mothods_report_' + uuid_string } kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = { 'report_name': output['name'], 'report_ref': output['ref'] } return report_output
def upload_fastq_file(self, ctx, params): """ :param params: instance of type "UploadMethodParams" (sequencing_tech: sequencing technology name: output reads file name workspace_name: workspace name/ID of the object For files in user's staging area: fwd_staging_file_name: single-end fastq file name or forward/left paired-end fastq file name from user's staging area rev_staging_file_name: reverse/right paired-end fastq file name user's staging area For files from web: download_type: download type for web source fastq file ('Direct Download', 'FTP', 'DropBox', 'Google Drive') fwd_file_url: single-end fastq file URL or forward/left paired-end fastq file URL rev_file_url: reverse/right paired-end fastq file URL urls_to_add: used for parameter-groups. dict of {fwd_file_url, rev_file_url, name, single_genome, interleaved, insert_size_mean and read_orientation_outward} Optional Params: single_genome: whether the reads are from a single genome or a metagenome. interleaved: whether reads is interleaved insert_size_mean: mean (average) insert length insert_size_std_dev: standard deviation of insert lengths read_orientation_outward: whether reads in a pair point outward) -> structure: parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "fwd_staging_file_name" of type "fwd_staging_file_name" (input and output file path/url), parameter "rev_staging_file_name" of type "rev_staging_file_name", parameter "download_type" of type "download_type", parameter "fwd_file_url" of type "fwd_file_url", parameter "rev_file_url" of type "rev_file_url", parameter "sequencing_tech" of type "sequencing_tech", parameter "name" of type "name", parameter "urls_to_add" of type "urls_to_add" -> structure: parameter "fwd_file_url" of type "fwd_file_url", parameter "rev_file_url" of type "rev_file_url", parameter "name" of type "name", parameter "single_genome" of type "single_genome", parameter "interleaved" of type "interleaved", parameter "insert_size_mean" of type "insert_size_mean", parameter "insert_size_std_dev" of type "insert_size_std_dev", parameter "read_orientation_outward" of type "read_orientation_outward", parameter "single_genome" of type "single_genome", parameter "interleaved" of type "interleaved", parameter "insert_size_mean" of type "insert_size_mean", parameter "insert_size_std_dev" of type "insert_size_std_dev", parameter "read_orientation_outward" of type "read_orientation_outward" :returns: instance of type "UploadMethodResult" -> structure: parameter "obj_ref" of type "obj_ref", parameter "report_name" of type "report_name", parameter "report_ref" of type "report_ref" """ # ctx is the context object # return variables are: returnVal #BEGIN upload_fastq_file print '--->\nRunning uploadmethods.upload_fastq_file\nparams:' print json.dumps(params, indent=1) if params.get('urls_to_add'): returnVal = {'obj_ref': ''} for params_item in params.get('urls_to_add'): params_item['workspace_name'] = params.get('workspace_name') params_item['download_type'] = params.get('download_type') params_item['sequencing_tech'] = params.get('sequencing_tech') params_item['interleaved'] = params.get('interleaved') for key, value in params_item.iteritems(): if isinstance(value, basestring): params_item[key] = value.strip() fastqUploader = UploaderUtil(self.config) itemReturnVal = fastqUploader.upload_fastq_file(params_item) returnVal['obj_ref'] += itemReturnVal['obj_ref'] + ',' returnVal['obj_ref'] = returnVal['obj_ref'][:-1] else: for key, value in params.iteritems(): if isinstance(value, basestring): params[key] = value.strip() fastqUploader = UploaderUtil(self.config) returnVal = fastqUploader.upload_fastq_file(params) reportVal = fastqUploader.generate_report(returnVal['obj_ref'], params) returnVal.update(reportVal) #END upload_fastq_file # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method upload_fastq_file return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
class ImportAssemblyUtil: def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = os.path.join(config['scratch'], 'import_assembly_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.uploader_utils = UploaderUtil(config) def import_fasta_as_assembly_from_staging(self, params): ''' import_fasta_as_assembly_from_staging: wrapper method for AssemblyUtil.save_assembly_from_fasta required params: staging_file_subdir_path - subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name assembly_name - output Assembly file name workspace_name - the name of the workspace it gets saved to. return: obj_ref: return object reference ''' log('--->\nrunning ImportAssemblyUtil.import_fasta_as_assembly_from_staging\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self.validate_import_fasta_as_assembly_from_staging(params) download_staging_file_params = { 'staging_file_subdir_path': params.get('staging_file_subdir_path') } scratch_file_path = self.dfu.download_staging_file( download_staging_file_params).get('copy_file_path') file = {'path': scratch_file_path} import_assembly_params = params import_assembly_params['file'] = file ref = self.au.save_assembly_from_fasta(import_assembly_params) """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service( params.get('staging_file_subdir_path'), ref) returnVal = {'obj_ref': ref} return returnVal def validate_import_fasta_as_assembly_from_staging(self, params): """ validate_import_fasta_as_assembly_from_staging: validates params passed to import_fasta_as_assembly_from_staging method """ # check for required parameters for p in [ 'staging_file_subdir_path', 'workspace_name', 'assembly_name' ]: if p not in params: raise ValueError('"' + p + '" parameter is required, but missing') def generate_html_report(self, assembly_ref, assembly_object, params): """ _generate_html_report: generate html summary report """ log('start generating html report') html_report = list() assembly_data = assembly_object.get('data')[0].get('data') assembly_info = assembly_object.get('data')[0].get('info') result_file_path = os.path.join(self.scratch, 'report.html') assembly_name = str(assembly_info[1]) assembly_file = params.get('staging_file_subdir_path') dna_size = assembly_data.get('dna_size') num_contigs = assembly_data.get('num_contigs') assembly_overview_data = collections.OrderedDict() assembly_overview_data['Name'] = '{} ({})'.format( assembly_name, assembly_ref) assembly_overview_data['Uploaded File'] = assembly_file assembly_overview_data['Date Uploaded'] = time.strftime("%c") assembly_overview_data['DNA Size'] = dna_size assembly_overview_data['Number of Contigs'] = num_contigs overview_content = '' overview_content += '<br/><table>\n' for key, val in assembly_overview_data.iteritems(): overview_content += '<tr><td><b>{}</b></td>'.format(key) overview_content += '<td>{}</td>'.format(val) overview_content += '</tr>\n' overview_content += '</table>' contig_data = assembly_data.get('contigs').values() contig_content = str([[str(e['contig_id']), e['length']] for e in contig_data]) with open(result_file_path, 'w') as result_file: with open( os.path.join(os.path.dirname(__file__), 'report_template_assembly.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace( '<p>*Overview_Content*</p>', overview_content) report_template = report_template.replace( '*CONTIG_DATA*', contig_content) result_file.write(report_template) result_file.close() report_shock_id = self.dfu.file_to_shock({ 'file_path': self.scratch, 'pack': 'zip' })['shock_id'] html_report.append({ 'shock_id': report_shock_id, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report for Imported Assembly' }) return html_report def generate_report(self, obj_ref, params): """ generate_report: generate summary report obj_ref: generated workspace object references. (return of import_fasta_as_assembly_from_staging) params: staging_file_subdir_path: subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name workspace_name: workspace name/ID that reads will be stored to """ uuid_string = str(uuid.uuid4()) get_objects_params = {'object_refs': [obj_ref], 'ignore_errors': False} object_data = self.dfu.get_objects(get_objects_params) objects_created = [{ 'ref': obj_ref, 'description': 'Imported Assembly' }] output_html_files = self.generate_html_report(obj_ref, object_data, params) report_params = { 'message': '', 'workspace_name': params.get('workspace_name'), 'objects_created': objects_created, 'html_links': output_html_files, 'direct_html_link_index': 0, 'html_window_height': 270, 'report_object_name': 'kb_upload_assembly_report_' + uuid_string } kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = { 'report_name': output['name'], 'report_ref': output['ref'] } return report_output
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url, service_ver='beta') self.uploader_utils = UploaderUtil(config)
class ImportGFFFastaUtil: def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url) self.uploader_utils = UploaderUtil(config) def import_gff_fasta_from_staging(self, params): """ import_gff_fasta_from_staging: wrapper method for GenomeFileUtil.fasta_gff_to_genome required params: fasta_file: fasta file from user's staging area gff_file: gff file from user's staging area genome_name: output genome object name workspace_name: workspace name that genome will be stored to file paths for both fasta and gff files must be subdirectory file path in staging area e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name optional params: scientific_name: proper name for species, key for taxonomy lookup.Default to 'unknown_taxon' source: Source Of The GenBank File. Default to 'User' taxon_wsname - where the reference taxons are. Default to 'ReferenceTaxons' taxon_reference - if defined, will try to link the Genome to the specified taxonomy object release: Release Or Version Of The Source Data genetic_code: Genetic Code For The Organism type: 'Reference', 'User upload', 'Representative' return: genome_ref: return object reference report_name: name of generated report (if any) report_ref: report reference (if any) """ log('--->\nrunning ImportGFFFastaUtil.import_gff_fasta_from_staging\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self.validate_import_gff_fasta_from_staging_params(params) for key in ('fasta_file', 'gff_file'): file_path = params[key] download_staging_file_params = { 'staging_file_subdir_path': file_path } dfu_returnVal = self.dfu.download_staging_file( download_staging_file_params) params[key] = {'path': dfu_returnVal['copy_file_path']} returnVal = self.gfu.fasta_gff_to_genome(params) """ Update the workspace object related meta-data for staged file """ self.uploader_utils.update_staging_service( download_staging_file_params.get('staging_file_subdir_path'), returnVal['genome_ref']) return returnVal def validate_import_gff_fasta_from_staging_params(self, params): """ validate_import_gff_fasta_from_staging_params: validates params passed to import_gff_fasta_from_staging method """ # check for required parameters for p in ['genome_name', 'workspace_name', 'fasta_file', 'gff_file']: if p not in params: raise ValueError('"' + p + '" parameter is required, but missing') # for now must use workspace name, but no ws_id_to_name() function available if str(params["workspace_name"]).isdigit(): error_msg = '"{}" parameter is a workspace id and workspace name is required'.format( params["workspace_name"]) raise ValueError(error_msg)
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.genapi = GenericsAPI(self.callback_url) self.uploader_utils = UploaderUtil(config)