def __init__(self, config): #BEGIN_CONSTRUCTOR self.callback_url = os.environ['SDK_CALLBACK_URL'] self.shared_folder = config['scratch'] self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL']) self.MOU = MotifUtils(os.environ['SDK_CALLBACK_URL']) self.MEU = MemeUtil(self.shared_folder) self.dfu = DataFileUtil(self.callback_url) self.GR = GenerateReport() #END_CONSTRUCTOR pass
def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" (SS_ref - optional, used for exact genome locations if possible) -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "SS_ref" of String, parameter "obj_name" of String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs if 'motif_min_length' not in params: params['motif_min_length'] = 8 if 'motif_max_length' not in params: params['motif_max_length'] = 16 motMin = params['motif_min_length'] motMax = params['motif_max_length'] promoterFastaFilePath = params['fastapath'] HU = HomerUtil() HomerMotifCommand = HU.build_homer_motif_command( promoterFastaFilePath, motMin, motMax, params['background']) HomerLocationCommand = HU.build_homer_location_command( promoterFastaFilePath) HU.run_homer_command(HomerMotifCommand) HU.run_homer_command(HomerLocationCommand) homer_out_path = '/kb/module/work/tmp/homer_out' #homer_out_path = os.path.join(self.shared_folder, str(uuid.uuid4())) homer_params = { 'ws_name': params['workspace_name'], 'path': homer_out_path + '/homerMotifs.all.motifs', 'location_path': homer_out_path + '/homer_locations.txt', 'obj_name': params['obj_name'] } MOU = MotifUtils(self.callback_url) dfu = DataFileUtil(self.callback_url) locDict = {} if 'SS_ref' in params: get_ss_params = {'object_refs': [params['SS_ref']]} SS = dfu.get_objects(get_ss_params)['data'][0]['data'] for s in SS['sequences']: if s['source']['assembly_id'] != '' and s['source'][ 'location'] != []: locDict['sequence_id'] = { 'contig': s['source']['location'][0][0], 'start': str(s['source']['location'][0][1]) } if len(locDict.keys()) > 0: homer_params['absolute_locations'] = locDict homer_params['min_len'] = motMin homer_params['max_len'] = motMax obj_ref = MOU.UploadFromHomer(homer_params)['obj_ref'] HU.write_obj_ref(homer_out_path, obj_ref) #file = open(homer_out_path+"/homer_obj.txt","w") #file.write(obj_ref) #file.close() #HERE: #we've got object ref #we've got html building functions #build report, setup return, #make report and return it timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) timestamp = str(timestamp) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) lineCount = 0 with open(promoterFastaFilePath, 'r') as pFile: for line in pFile: lineCount += 1 numFeat = lineCount / 2 with open(promoterFastaFilePath, 'r') as pFile: fileStr = pFile.read() promHtmlStr = '<html><body> ' + fileStr + ' </body></html>' with open(htmlDir + '/promoters.html', 'w') as promHTML: promHTML.write(promHtmlStr) JsonPath = '/kb/module/work/tmp' dfu = DataFileUtil(self.callback_url) get_obj_params = {'object_refs': [obj_ref]} homerMotifSet = dfu.get_objects(get_obj_params)['data'][0]['data'] mr = MakeNewReport() mr.MakeReport(htmlDir, homerMotifSet) try: html_upload_ret = dfu.file_to_shock({ 'file_path': htmlDir, 'make_handle': 0, 'pack': 'zip' }) except: raise ValueError('error uploading HTML file to shock') reportName = 'HomerMotifFinder_report_' + str(uuid.uuid4()) reportObj = { 'objects_created': [{ 'ref': obj_ref, 'description': 'Motif Set generated by Homer' }], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # attach to report obj reportObj['direct_html'] = '' reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{ 'shock_id': html_upload_ret['shock_id'], #'name': 'promoter_download.zip', 'name': 'index.html', 'label': 'Save promoter_download.zip' }] report = KBaseReport(self.callback_url, token=ctx['token']) report_info = report.create_extended_report(reportObj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output]
def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" (SS_ref - optional, used for exact genome locations if possible) -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "SS_ref" of String, parameter "obj_name" of String, parameter "background" of Long :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs #TODO: Things to fix in here... # Use MotifUtils to parse output and create object # create new function for report ? if 'motif_min_length' not in params: params['motif_min_length'] = 8 if 'motif_max_length' not in params: params['motif_max_length'] = 16 motMin = params['motif_min_length'] motMax = params['motif_max_length'] promoterFastaFilePath = params['fastapath'] MEU = MemeUtil() MEMEMotifCommand = MEU.build_meme_command(promoterFastaFilePath, motMin, motMax, params['background']) MEU.run_meme_command(MEMEMotifCommand) meme_out_path = '/kb/module/work/tmp/meme_out/meme.txt' meme_params = { 'ws_name': params['workspace_name'], 'path': meme_out_path, 'obj_name': params['obj_name'] } MOU = MotifUtils(self.callback_url) dfu = DataFileUtil(self.callback_url) locDict = {} if 'SS_ref' in params: get_ss_params = {'object_refs': [params['SS_ref']]} SS = dfu.get_objects(get_ss_params)['data'][0]['data'] for s in SS['sequences']: if s['source']['assembly_id'] != '' and s['source'][ 'location'] != []: locDict['sequence_id'] = { 'contig': s['source']['location'][0][0], 'start': str(s['source']['location'][0][1]) } if len(locDict.keys()) > 0: meme_params['absolute_locations'] = locDict meme_params['min_len'] = motMin meme_params['max_len'] = motMax obj_ref = MOU.UploadFromMEME(meme_params)['obj_ref'] MEU.write_obj_ref("/kb/module/work/tmp/meme_out", obj_ref) timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) timestamp = str(timestamp) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) dfu = DataFileUtil(self.callback_url) get_obj_params = {'object_refs': [obj_ref]} memeMotifSet = dfu.get_objects(get_obj_params)['data'][0]['data'] GR = GenerateReport() GR.MakeMotifReport(htmlDir, memeMotifSet) try: html_upload_ret = dfu.file_to_shock({ 'file_path': htmlDir, 'make_handle': 0, 'pack': 'zip' }) except: raise ValueError('error uploading HTML file to shock') #Create motif set object from MotifList #TODO set parameters correctly reportName = 'MEMEMotifFinder_report_' + str(uuid.uuid4()) reportObj = { 'objects_created': [{ 'ref': obj_ref, 'description': 'Motif Set generated by MEME' }], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # attach to report obj #reportObj['direct_html'] = None reportObj['direct_html'] = '' reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{ 'shock_id': html_upload_ret['shock_id'], #'name': 'promoter_download.zip', 'name': 'index.html', 'label': 'Save promoter_download.zip' }] report = KBaseReport(self.callback_url, token=ctx['token']) report_info = report.create_extended_report(reportObj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output]
class MotifFinderMEME: ''' Module Name: MotifFinderMEME Module Description: A KBase module: MotifFinderMEME ''' ######## WARNING FOR GEVENT USERS ####### noqa # Since asynchronous IO can lead to methods - even the same method - # interrupting each other, you must be *very* careful when using global # state. A method could easily clobber the state set by another while # the latter method is running. ######################################### noqa VERSION = "0.0.1" GIT_URL = "https://github.com/kbasecollaborations/MotifFinderMEME.git" GIT_COMMIT_HASH = "ed2e967cf8aa213388e4166525cd8ce2918d4fae" #BEGIN_CLASS_HEADER #END_CLASS_HEADER # config contains contents of config file in a hash or None if it couldn't # be found def __init__(self, config): #BEGIN_CONSTRUCTOR self.callback_url = os.environ['SDK_CALLBACK_URL'] self.shared_folder = config['scratch'] self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL']) self.MOU = MotifUtils(os.environ['SDK_CALLBACK_URL']) self.MEU = MemeUtil(self.shared_folder) self.dfu = DataFileUtil(self.callback_url) self.GR = GenerateReport() #END_CONSTRUCTOR pass def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" (SS_ref - optional, used for exact genome locations if possible) -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "SS_ref" of String, parameter "obj_name" of String, parameter "background" of Long :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs if 'motif_min_length' not in params: params['motif_min_length'] = 8 if 'motif_max_length' not in params: params['motif_max_length'] = 16 motMin = params['motif_min_length'] motMax = params['motif_max_length'] promoterFastaFilePath = self.SSU.SeqSetToFasta({ 'ws_name': params['workspace_name'], 'SS_ref': params['SS_ref'] })['path'] MEMEMotifCommand = self.MEU.build_meme_command(promoterFastaFilePath, motMin, motMax, params['background']) meme_out_path = self.MEU.run_meme_command(MEMEMotifCommand) meme_params = { 'ws_name': params['workspace_name'], 'format': 'MEME', 'file': { 'path': meme_out_path }, 'obj_name': params['obj_name'], 'seq_set_ref': params['SS_ref'] } # MOU.parseMotifSet with the same parameters will # return a dictionary of the motifset object that you save on # your own # # MOU.saveMotifSet will save the object with DataFileUtils to # whatever workspace you specify in ws_name # # This function will also download the sequence set as a fasta to # unique (uuid4) file name in the scratch directory obj_ref = self.MOU.saveMotifSet(meme_params) timestamp = str( int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000)) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) get_obj_params = {'object_refs': [obj_ref]} memeMotifSet = self.dfu.get_objects(get_obj_params)['data'][0]['data'] self.GR.MakeMotifReport(htmlDir, memeMotifSet) try: html_upload_ret = self.dfu.file_to_shock({ 'file_path': htmlDir, 'make_handle': 0, 'pack': 'zip' }) except Exception: raise ValueError('Error uploading HTML file: ' + str(htmlDir) + ' to shock') reportname = 'MEMEMotifFinder_report_' + str(uuid.uuid4()) reportobj = { 'objects_created': [{ 'ref': obj_ref, 'description': 'Motif Set generated by MEME' }], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportname } # attach to report obj reportobj['direct_html'] = '' reportobj['direct_html_link_index'] = 0 reportobj['html_links'] = [{ 'shock_id': html_upload_ret['shock_id'], 'name': 'index.html', 'label': 'Save promoter_download.zip' }] report = KBaseReport(self.callback_url, token=ctx['token']) report_info = report.create_extended_report(reportobj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output] def ExtractPromotersFromFeatureSetandDiscoverMotifs(self, ctx, params): """ :param params: instance of type "extract_input" -> structure: parameter "workspace_name" of String, parameter "genome_ref" of String, parameter "featureSet_ref" of String, parameter "promoter_length" of Long, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "obj_name" of String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN ExtractPromotersFromFeatureSetandDiscoverMotifs BuildParams = { 'ws_name': params['workspace_name'], 'FeatureSet_ref': params['featureSet_ref'], 'genome_ref': params['genome_ref'], 'upstream_length': params['promoter_length'] } SSret = self.SSU.buildFromFeatureSet(BuildParams) SSref = SSret['SequenceSet_ref'] fastapath = '/kb/module/work/tmp/tmpSeqSet.fa' newfastapath = '/kb/module/work/tmp/SeqSet.fa' fastapath = newfastapath FastaParams = { 'workspace_name': params['workspace_name'], 'SequenceSetRef': SSref, 'fasta_outpath': fastapath } output = self.BuildFastaFromSequenceSet(ctx, FastaParams) findmotifsparams = { 'workspace_name': params['workspace_name'], 'fastapath': fastapath, 'motif_min_length': params['motif_min_length'], 'motif_max_length': params['motif_max_length'], 'SS_ref': SSref, 'obj_name': params['obj_name'] } output = self.find_motifs(ctx, findmotifsparams)[0] #END ExtractPromotersFromFeatureSetandDiscoverMotifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError( 'Method ExtractPromotersFromFeatureSetandDiscoverMotifs return value ' + 'output is not type dict as required.') # return the results return [output] def DiscoverMotifsFromFasta(self, ctx, params): """ :param params: instance of type "discover_fasta_input" -> structure: parameter "workspace_name" of String, parameter "fasta_path" of String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN DiscoverMotifsFromFasta raise NotImplementedError( 'Discovery of motifs from a FASTA file is not yet implemented.') #END DiscoverMotifsFromFasta # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method DiscoverMotifsFromFasta return value ' + 'output is not type dict as required.') # return the results return [output] def DiscoverMotifsFromSequenceSet(self, ctx, params): """ :param params: instance of type "discover_seq_input" -> structure: parameter "workspace_name" of String, parameter "genome_ref" of String, parameter "SS_ref" of String, parameter "promoter_length" of Long, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "obj_name" of String, parameter "background" of Long, parameter "mask_repeats" of Long, parameter "background_group" of mapping from String to String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN DiscoverMotifsFromSequenceSet if params['background_group'] == None: params['background_group'] = {'background': 0} findmotifsparams = { 'workspace_name': params['workspace_name'], 'motif_min_length': params['motif_min_length'], 'motif_max_length': params['motif_max_length'], 'SS_ref': params['SS_ref'], 'obj_name': params['obj_name'] } if params['background_group']['background'] == 1: findmotifsparams['background'] = 1 else: findmotifsparams['background'] = 0 output = self.find_motifs(ctx, findmotifsparams)[0] #END DiscoverMotifsFromSequenceSet # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError( 'Method DiscoverMotifsFromSequenceSet return value ' + 'output is not type dict as required.') # return the results return [output] def status(self, ctx): #BEGIN_STATUS returnVal = { 'state': "OK", 'message': "", 'version': self.VERSION, 'git_url': self.GIT_URL, 'git_commit_hash': self.GIT_COMMIT_HASH } #END_STATUS return [returnVal]
def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" (SS_ref - optional, used for exact genome locations if possible) -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "SS_ref" of String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs if 'motif_min_length' not in params: params['motif_min_length'] = 8 if 'motif_max_length' not in params: params['motif_max_length'] = 16 motMin = params['motif_min_length'] motMax = params['motif_max_length'] #promoterFastaFilePath = self.get_promoter_for_gene(ctx,params)[0] promoterFastaFilePath = params['fastapath'] #GibbsMotifCommand = GU.build_gibbs_command(promoterFastaFilePath) gibbsCommandList = [] for i in range(motMin, motMax + 1, 2): gibbsCommandList.append( GU.build_gibbs_command(promoterFastaFilePath, i)) for g in gibbsCommandList: GU.run_gibbs_command(g) #GU.run_gibbs_command(GibbsMotifCommand) gibbs_out_path = '/kb/module/work/tmp/gibbs' gibbs_params = { 'ws_name': params['workspace_name'], 'path': gibbs_out_path, 'obj_name': params['obj_name'] } MOU = MotifUtils(self.callback_url) dfu = DataFileUtil(self.callback_url) locDict = {} if 'SS_ref' in params: get_ss_params = {'object_refs': [params['SS_ref']]} SS = dfu.get_objects(get_ss_params)['data'][0]['data'] for s in SS['sequences']: if s['source'] is not None: locDict['sequence_id'] = { 'contig': s['source']['location'][0][0], 'start': str(s['source']['location'][0][1]) } if len(locDict.keys()) > 0: gibbs_params['absolute_locations'] = locDict gibbs_params['min_len'] = motMin gibbs_params['max_len'] = motMax obj_ref = MOU.UploadFromGibbs(gibbs_params)['obj_ref'] #memeMotifList = MEU.parse_meme_output() #HERE: #we've got object ref #we've got html building functions #build report, setup return, #make report and return it #buildReportFromMotifSet() timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) timestamp = str(timestamp) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) lineCount = 0 with open(promoterFastaFilePath, 'r') as pFile: for line in pFile: lineCount += 1 numFeat = lineCount / 2 with open(promoterFastaFilePath, 'r') as pFile: fileStr = pFile.read() promHtmlStr = '<html><body> ' + fileStr + ' </body></html>' with open(htmlDir + '/promoters.html', 'w') as promHTML: promHTML.write(promHtmlStr) JsonPath = '/kb/module/work/tmp' dfu = DataFileUtil(self.callback_url) get_obj_params = {'object_refs': [obj_ref]} gibbsMotifSet = dfu.get_objects(get_obj_params)['data'][0]['data'] MakeReport(htmlDir, gibbsMotifSet) #buildReportFromMotifSet(gibbsMotifSet,htmlDir,'gibbs') #TODO: Here replace the makereport with a call to motifset utils #subprocess.call(['python','/kb/module/lib/identify_promoter/Utils/makeReport.py',JsonPath + '/meme_out/meme.json',htmlDir + '/meme.html',str(numFeat)]) #fullMotifList = [] #for m in memeMotifList: # fullMotifList.append(m) #What needs to happen here: #call makeLogo for each of the json outputs(capture these from somewhere) #plt.rcParams['figure.dpi'] = 300 #htmlFiles = ['index.html','gibbs.html','homer.html'] #shockParamsList = [] #for f in htmlFiles: # shockParamsList.append({'file_path': htmlDir + f ,'make_handle': 0, 'pack': 'zip'}) try: html_upload_ret = dfu.file_to_shock({ 'file_path': htmlDir, 'make_handle': 0, 'pack': 'zip' }) except: raise ValueError('error uploading HTML file to shock') #Create motif set object from MotifList #TODO set parameters correctly #add narrative support to set #MSO = {} #MSO['Condition'] = 'Temp' #MSO['FeatureSet_ref'] = '123' #MSO['Motifs'] = [] #MSO['Alphabet'] = ['A','C','G','T'] #MSO['Background'] = {} #for letter in MSO['Alphabet']: # MSO['Background'][letter] = 0.0 #MSU.parseMotifList(fullMotifList,MSO) #objname = 'MotifSet' + str(int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000)) #Pass motif set into this #save_objects_params = {} #save_objects_params['id'] = self.ws_info[0] #save_objects_params['id'] = long(params['workspace_name'].split('_')[1]) #save_objects_params['id'] = dfu.ws_name_to_id(params['workspace_name']) #save_objects_params['objects'] = [{'type': 'KBaseGwasData.MotifSet' , 'data' : MSO , 'name' : objname}] #info = dfu.save_objects(save_objects_params)[0] #motif_set_ref = "%s/%s/%s" % (info[6], info[0], info[4]) #object_upload_ret = dfu.file_to_shock() reportName = 'GibbsMotifFinder_report_' + str(uuid.uuid4()) reportObj = { 'objects_created': [{ 'ref': obj_ref, 'description': 'Motif Set generated by Gibbs' }], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # attach to report obj #reportObj['direct_html'] = None reportObj['direct_html'] = '' reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{ 'shock_id': html_upload_ret['shock_id'], #'name': 'promoter_download.zip', 'name': 'index.html', 'label': 'Save promoter_download.zip' }] report = KBaseReport(self.callback_url, token=ctx['token']) #report_info = report.create({'report':reportObj, 'workspace_name':input_params['input_ws']}) report_info = report.create_extended_report(reportObj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output]
def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "SS_ref" of String, parameter "obj_name" of String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs if 'motif_length' not in params: params['motif_length'] = 8 motLen = params['motif_length'] promoterFastaFilePath = params['fastapath'] MDU=MdscanUtil() MdscanMotifCommand = MDU.build_mdscan_motif_command(promoterFastaFilePath,motLen,params['background']) MDU.run_mdscan_command(MdscanMotifCommand) mdscan_out_path = '/kb/module/work/tmp/mdscan_out' mdscan_params = {'ws_name' : params['workspace_name'], 'path' : mdscan_out_path,'obj_name' : params['obj_name']} MOU = MotifUtils(self.callback_url) dfu = DataFileUtil(self.callback_url) locDict = {} obj_ref = MDU.UploadFromMdscan(self.callback_url, mdscan_params)[0]['obj_ref'] MDU.write_obj_ref(mdscan_out_path, obj_ref) timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000) timestamp = str(timestamp) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) lineCount = 0 with open(promoterFastaFilePath,'r') as pFile: for line in pFile: lineCount += 1 numFeat = lineCount/2 with open(promoterFastaFilePath,'r') as pFile: fileStr = pFile.read() promHtmlStr = '<html><body> ' + fileStr + ' </body></html>' with open(htmlDir + '/promoters.html','w') as promHTML: promHTML.write(promHtmlStr) JsonPath = '/kb/module/work/tmp' dfu = DataFileUtil(self.callback_url) get_obj_params = {'object_refs' : [obj_ref]} mdscanMotifSet = dfu.get_objects(get_obj_params)['data'][0]['data'] mr=MakeNewReport() mr.MakeReport(htmlDir,mdscanMotifSet) try: html_upload_ret = dfu.file_to_shock({'file_path': htmlDir ,'make_handle': 0, 'pack': 'zip'}) except: raise ValueError ('error uploading HTML file to shock') reportName = 'MdscanMotifFinder_report_'+str(uuid.uuid4()) reportObj = {'objects_created': [{'ref' : obj_ref, 'description' : 'Motif Set generated by Mdscan'}], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # attach to report obj reportObj['direct_html'] = '' reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{'shock_id': html_upload_ret['shock_id'], #'name': 'promoter_download.zip', 'name': 'index.html', 'label': 'Save promoter_download.zip' } ] report = KBaseReport(self.callback_url, token=ctx['token']) report_info = report.create_extended_report(reportObj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output]
def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "prb" of Double, parameter "motif_length" of Long, parameter "obj_name" of String, parameter "mask_repeats" of Long :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs '''if 'motif_min_length' not in params: params['motif_min_length'] = 8 if 'motif_max_length' not in params: params['motif_max_length'] = 16 motMin = params['motif_min_length'] motMax = params['motif_max_length']''' if 'motif_length' not in params: params['motif_length'] = 20 if 'prb' not in params: params['prb'] = 0.05 motlen = params['motif_length'] prb = params['prb'] FastaFilePath = params['fastapath'] mfu = mfmdUtil() mfmdMotifCommand = mfu.build_mfmd_command(FastaFilePath, motlen, prb, self.config) mfu.run_mfmd_command(mfmdMotifCommand) mfmd_out_path = '/kb/module/work/tmp/mfmd_out' mfmd_params = { 'ws_name': params['workspace_name'], 'path': mfmd_out_path, 'location_path': mfmd_out_path + '/mfmd_out.txt', 'obj_name': params['obj_name'] } MOU = MotifUtils(self.callback_url) dfu = DataFileUtil(self.callback_url) locDict = {} if 'SS_ref' in params: get_ss_params = {'object_refs': [params['SS_ref']]} SS = dfu.get_objects(get_ss_params)['data'][0]['data'] for s in SS['sequences']: if s['source'] is not None: locDict['sequence_id'] = { 'contig': s['source']['location'][0][0], 'start': str(s['source']['location'][0][1]) } if len(locDict.keys()) > 0: mfmd_params['absolute_locations'] = locDict mfmd_params['motlen'] = motlen mfmd_params['prb'] = prb obj_ref = mfu.UploadFrommfmd(self.callback_url, mfmd_params)[0]['obj_ref'] mfu.write_obj_ref(mfmd_out_path, obj_ref) timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) timestamp = str(timestamp) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) lineCount = 0 dfu = DataFileUtil(self.callback_url) get_obj_params = {'object_refs': [obj_ref]} mfmdMotifSet = dfu.get_objects(get_obj_params)['data'][0]['data'] mr = MakeNewReport() mr.MakeReport(htmlDir, mfmdMotifSet) try: html_upload_ret = dfu.file_to_shock({ 'file_path': htmlDir, 'make_handle': 0, 'pack': 'zip' }) except: raise ValueError('error uploading HTML file to shock') reportName = 'mfmdMotifFinder_report_' + str(uuid.uuid4()) reportObj = { 'objects_created': [{ 'ref': obj_ref, 'description': 'Motif Set generated by mfmd' }], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # attach to report obj reportObj['direct_html'] = '' reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{ 'shock_id': html_upload_ret['shock_id'], #'name': 'promoter_download.zip', 'name': 'index.html', 'label': 'Save promoter_download.zip' }] report = KBaseReport(self.callback_url, token=ctx['token']) report_info = report.create_extended_report(reportObj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output]
class MotifFinderGibbs: ''' Module Name: MotifFinderGibbs Module Description: A KBase module: MotifFinderGibbs ''' ######## WARNING FOR GEVENT USERS ####### noqa # Since asynchronous IO can lead to methods - even the same method - # interrupting each other, you must be *very* careful when using global # state. A method could easily clobber the state set by another while # the latter method is running. ######################################### noqa VERSION = "0.0.1" GIT_URL = "" GIT_COMMIT_HASH = "" #BEGIN_CLASS_HEADER #END_CLASS_HEADER # config contains contents of config file in a hash or None if it couldn't # be found def __init__(self, config): #BEGIN_CONSTRUCTOR self.callback_url = os.environ['SDK_CALLBACK_URL'] self.shared_folder = config['scratch'] self.SSU = SequenceSetUtils(os.environ['SDK_CALLBACK_URL']) self.MOU = MotifUtils(os.environ['SDK_CALLBACK_URL']) #self.HU = GibbsUtil(self.shared_folder) self.dfu = DataFileUtil(self.callback_url) self.GR = GenerateReport() #END_CONSTRUCTOR pass def find_motifs(self, ctx, params): """ :param params: instance of type "find_motifs_params" (SS_ref - optional, used for exact genome locations if possible) -> structure: parameter "workspace_name" of String, parameter "fastapath" of String, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long, parameter "SS_ref" of String :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN find_motifs if 'motif_min_length' not in params: params['motif_min_length'] = 8 if 'motif_max_length' not in params: params['motif_max_length'] = 16 motMin = params['motif_min_length'] motMax = params['motif_max_length'] promoterFastaFilePath = params['fastapath'] GU=GibbsUtil() gibbsCommandList = [] for i in range(motMin,motMax+1,2): gibbsCommandList.append(GU.build_gibbs_command(promoterFastaFilePath,i)) for g in gibbsCommandList: GU.run_gibbs_command(g) gibbs_out_path = '/kb/module/work/tmp/gibbs' #gibbs_params = {'ws_name' : params['workspace_name'], 'path' : gibbs_out_path,'obj_name' : params['obj_name']} gibbs_params = { 'ws_name': params['workspace_name'], 'format': 'GIBBS', 'file': {'path': gibbs_out_path}, 'obj_name': params['obj_name'], 'seq_set_ref': params['SS_ref'] } #MOU = MotifUtils(self.callback_url) #dfu = DataFileUtil(self.callback_url) locDict = {} if 'SS_ref' in params: get_ss_params = {'object_refs' : [params['SS_ref']]} SS = self.dfu.get_objects(get_ss_params)['data'][0]['data'] for s in SS['sequences']: if s['source'] is not None: locDict['sequence_id'] = {'contig' : s['source']['location'][0][0],'start':str(s['source']['location'][0][1])} if len(locDict.keys()) > 0: gibbs_params['absolute_locations'] = locDict gibbs_params['min_len'] = motMin gibbs_params['max_len'] = motMax #obj_ref = MOU.UploadFromGibbs(gibbs_params)['obj_ref'] obj_ref = self.MOU.saveMotifSet(gibbs_params) GU.write_obj_ref(gibbs_out_path, obj_ref) timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000) timestamp = str(timestamp) htmlDir = self.shared_folder + '/html' + timestamp os.mkdir(htmlDir) lineCount = 0 with open(promoterFastaFilePath,'r') as pFile: for line in pFile: lineCount += 1 numFeat = lineCount/2 with open(promoterFastaFilePath,'r') as pFile: fileStr = pFile.read() promHtmlStr = '<html><body> ' + fileStr + ' </body></html>' with open(htmlDir + '/promoters.html','w') as promHTML: promHTML.write(promHtmlStr) JsonPath = '/kb/module/work/tmp' dfu = DataFileUtil(self.callback_url) get_obj_params = {'object_refs' : [obj_ref]} gibbsMotifSet = dfu.get_objects(get_obj_params)['data'][0]['data'] #g=GenerateReport() GR.GenerateMotifReport(htmlDir,gibbsMotifSet) try: html_upload_ret = dfu.file_to_shock({'file_path': htmlDir ,'make_handle': 0, 'pack': 'zip'}) except: raise ValueError ('error uploading HTML file to shock') reportName = 'GibbsMotifFinder_report_'+str(uuid.uuid4()) reportObj = {'objects_created': [{'ref' : obj_ref, 'description' : 'Motif Set generated by Gibbs'}], 'message': '', 'direct_html': None, 'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'html_window_height': 220, 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # attach to report obj reportObj['direct_html'] = '' reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{'shock_id': html_upload_ret['shock_id'], #'name': 'promoter_download.zip', 'name': 'index.html', 'label': 'Save promoter_download.zip' } ] report = KBaseReport(self.callback_url, token=ctx['token']) report_info = report.create_extended_report(reportObj) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END find_motifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method find_motifs return value ' + 'output is not type dict as required.') # return the results return [output] def BuildFastaFromSequenceSet(self, ctx, params): """ :param params: instance of type "BuildSeqIn" -> structure: parameter "workspace_name" of String, parameter "SequenceSetRef" of String, parameter "fasta_outpath" of String :returns: instance of type "BuildSeqOut" -> structure: parameter "fasta_outpath" of String """ # ctx is the context object # return variables are: output #BEGIN BuildFastaFromSequenceSet dfu = DataFileUtil(self.callback_url) get_objects_params = {'object_refs' : [params['SequenceSetRef']]} SeqSet = dfu.get_objects(get_objects_params)['data'][0]['data'] outFile = open(params['fasta_outpath'],'w') for s in SeqSet['sequences']: sname = '>' + s['sequence_id'] + '\n' outFile.write(sname) sseq = s['sequence'] + '\n' outFile.write(sseq) outFile.close() output = {'fasta_outpath' : params['fasta_outpath']} #END BuildFastaFromSequenceSet # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method BuildFastaFromSequenceSet return value ' + 'output is not type dict as required.') # return the results return [output] def ExtractPromotersFromFeatureSetandDiscoverMotifs(self, ctx, params): """ :param params: instance of type "extract_input" -> structure: parameter "workspace_name" of String, parameter "genome_ref" of String, parameter "featureSet_ref" of String, parameter "promoter_length" of Long, parameter "motif_min_length" of Long, parameter "motif_max_length" of Long :returns: instance of type "extract_output_params" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN ExtractPromotersFromFeatureSetandDiscoverMotifs SSU = SequenceSetUtils(self.callback_url) BuildParams = {'ws_name' : params['workspace_name'], 'FeatureSet_ref' : params['featureSet_ref'], 'genome_ref' : params['genome_ref'], 'upstream_length' : params['promoter_length']} SSret = SSU.buildFromFeatureSet(BuildParams) SSref = SSret['SequenceSet_ref'] fastapath = '/kb/module/work/tmp/tmpSeqSet.fa' FastaParams = {'workspace_name' : params['workspace_name'] , 'SequenceSetRef' : SSref , 'fasta_outpath' : fastapath} output = self.BuildFastaFromSequenceSet(ctx,FastaParams) newfastapath = '/kb/module/work/tmp/SeqSet.fa' fu=FastaUtils() fu.RemoveRepeats(fastapath,newfastapath) findmotifsparams= {'workspace_name' : params['workspace_name'],'fastapath':fastapath,'motif_min_length':params['motif_min_length'],'motif_max_length':params['motif_max_length'],'SS_ref':SSref,'obj_name':params['obj_name']} output = self.find_motifs(ctx,findmotifsparams)[0] #END ExtractPromotersFromFeatureSetandDiscoverMotifs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method ExtractPromotersFromFeatureSetandDiscoverMotifs return value ' + 'output is not type dict as required.') # return the results return [output]