import utility.referenceUtil as ref # Parse the input data text file data = io.readInputDataFiles('input_data.txt') datatypes = data.keys() data_dict = {} print datatypes if 'fasta_file' in datatypes: # Read in the amino acid sequence and the Secondary structure assignment handle, aa_seq = io.readFasta(data['fasta_file']) ss_seq = ru.matchSeq2SS(aa_seq, data['ss_file']) # Generate fuzzy +/-2 SSE combinations ss_def, ss_combi = ss.genSSCombinations(ss_seq) io.dumpPickle("ss_profiles.pickle", ss_combi) data_dict['ss_seq'] = ss_seq data_dict['aa_seq'] = aa_seq print ss_def print ss_seq else: pass if 'native_pdbs' in datatypes: # Read the native pdbs that you can exclude from the smotif search native_pdbs = data['native_pdbs'] native_pdbs = native_pdbs.lower() native_pdbs = native_pdbs.split() data_dict['natives'] = native_pdbs print native_pdbs else:
data = io.readInputDataFiles('input_data.txt') datatypes = data.keys() handle, aa_seq = io.readFasta(data['fasta_file']) ss_seq = matchSeq2SS(aa_seq, data['ss_file']) print ss_seq # ss_seq = io.readPsiPred(psipred_file) ss_def, ss_combi = ss.genSSCombinations(ss_seq) # ss_def, ss_combi = ss.genSSCombinations2(ss_seq) print ss_combi io.dumpPickle("ss_profiles.pickle", ss_combi) # Read in contacts at a given confidence level if 'contacts_file' in datatypes: contacts, contacts_seq = io.readContacts(contactsfile, probability=0.7) native_pdbs = data['native_pdbs'] native_pdbs = native_pdbs.lower() native_pdbs = native_pdbs.split() print native_pdbs axrh_cutoff = data['axrh_cutoff'] axrh_cutoff = axrh_cutoff.split()
def SmotifSearch(index_array): """ Main() :param index_array: :return: """ exp_data = io.readPickle("exp_data.pickle") exp_data_types = exp_data.keys( ) # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts'] psmotif = uts2.getPreviousSmotif(index_array[0]) current_ss, direction = uts2.getSS2(index_array[1]) csmotif_data, smotif_def = getfromDB(psmotif, current_ss, direction, exp_data['database_cutoff']) if not csmotif_data: # If the smotif library doesn't exist # Terminate further execution return True """ always narrow down to previous sse and current sse and operate on them individually """ sse_ordered = orderSSE(psmotif, current_ss, direction) dump_log = [] no_clashes = False # ************************************************************************************************ # Main # The 'for' loop below iterates over all of the Smotifs and applies various filters # This is the place to add new filters as you desire. For starters, look at Sequence filter. # ************************************************************************************************ for i in range(0, len(csmotif_data)): # ************************************************ # Applying different filters for the Smotif assembly # ************************************************ # Exclude natives if needed if 'natives' in exp_data_types: natives = exp_data['natives'] tpdbid = csmotif_data[i][0][0] pdbid = tpdbid[0:4] if pdbid in natives: #if pdbid not in ['2z2i']: # Stop further execution and continue # ************************************************ # RMSD filter using QCP method # quickly filters non-overlapping smotifs # ************************************************ rmsd, transformed_coos = qcp.rmsdQCP(psmotif[0], csmotif_data[i], direction) if rmsd <= exp_data['rmsd_cutoff'][1]: # Loop constraint restricts the overlapping smotifs is not drifted far away. loop_constraint = llc.loopConstraint(transformed_coos, sse_ordered, direction, smotif_def) if loop_constraint: # Check whether the SSEs with in the assembled smotifs are clashing to one another no_clashes = qcp.clahses(transformed_coos, exp_data['clash_distance']) else: no_clashes = False if rmsd <= exp_data['rmsd_cutoff'][1] and no_clashes: # Prepare temp log array to save data at the end tlog, noe_fmeasure, pcs_tensor_fits, rdc_tensor_fits = [], [], [], [] tlog.append(['smotif', csmotif_data[i]]) tlog.append(['smotif_def', sse_ordered]) tlog.append(['qcp_rmsd', transformed_coos, sse_ordered, rmsd]) cathcodes = sm.orderCATH(psmotif, csmotif_data[i][0], direction) tlog.append(['cathcodes', cathcodes]) # ************************************************ # Sequence filter # Aligns the smotif seq to target seq and calculates # sequence identity and the alignment score # ************************************************ csse_seq, seq_identity, blosum62_score = Sfilter.S2SequenceSimilarity( current_ss, csmotif_data[i], direction, exp_data) # concat current to previous seq concat_seq = sm.orderSeq(psmotif, csse_seq, direction) tlog.append([ 'seq_filter', concat_seq, csse_seq, seq_identity, blosum62_score ]) # ************************************************ # Pseudocontact Shift filter # uses experimental PCS data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'pcs_data' in exp_data_types: pcs_tensor_fits = Pfilter.PCSAxRhFit2(transformed_coos, sse_ordered, exp_data, stage=2) tlog.append(['PCS_filter', pcs_tensor_fits]) # ************************************************ # Ambiguous NOE score filter # uses experimental ambiguous noe data to filter Smotifs # scoring based on f-measure? # ************************************************ if 'noe_data' in exp_data_types: noe_fmeasure = Nfilter.s2NOEfit(transformed_coos, sse_ordered, exp_data) tlog.append(['NOE_filter', noe_fmeasure]) # ************************************************ # Residual dipolar coupling filter # uses experimental RDC data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'rdc_data' in exp_data_types: if noe_fmeasure and noe_fmeasure > 0.5: rdc_tensor_fits = Rfilter.RDCAxRhFit2(transformed_coos, sse_ordered, exp_data, stage=2) tlog.append(['RDC_filter', rdc_tensor_fits]) if pcs_tensor_fits or rdc_tensor_fits: #dump data to the disk print tpdbid, noe_fmeasure, rdc_tensor_fits # print csmotif_data[i][0], 'blosum62 score', blosum62_score, "seq_id", seq_identity, "rmsd=", rmsd, cathcodes dump_log.append(tlog) # prevent dumping empty arrays with no data if len(dump_log) > 0: print "num of hits", len(dump_log), io.dumpPickle( "tx_" + str(index_array[0]) + "_" + str(index_array[1]) + ".pickle", dump_log) return True
print smotif_db[i][45:] for j in range(0, len(smotif)): # for j in range(0, 1): # print smotif[j][0] # print smotif[j][0][0] #['1pp9P00', '172', '203', '222', '245'] # print smotif[j][0][1] #172- [203, 'THR', 'H', 49.366, 87.846, 102.07]] # print smotif[j][0][2] #222- [245, 'PHE', 'H', 40.016, 57.964, 78.166]] ss1 = rms.getcoo( smotif[j][0] [1]) # x,y,z, atom_type, res_no, res = [], [], [], [], [], [] ss2 = rms.getcoo( smotif[j][0][2]) # return [x,y,z, atom_type, res_no, res] # dumpPDBCoo2(ss1, 5) # dumpPDBCoo2(ss2, 6) # concatenate coordinate arrys of both SSEs to identify their center of mass ss1_plus_ss2 = conc_sss(ss1, ss2) cen_ss1, cm_ss1 = rms.centerCoo(ss1_plus_ss2) # dumpPDBCoo2(cen_ss1, 7) # Translate the original SSEs to the new center of mass tr_ss1 = translateSSE(cm_ss1, smotif[j][0][1]) tr_ss2 = translateSSE(cm_ss1, smotif[j][0][2]) smotif_def = smotif[j][0][0] # add the modified Smotifs to temp array temp_smotif.append([smotif_def, tr_ss1, tr_ss2]) io.dumpPickle('./smotif_cen_db/' + smotif_db[i][45:], temp_smotif)
def SmotifSearch(index_array): """ Main () :param index_array: :return: """ # print index_array s1_def, s2_def = getSSdef(index_array) smotif_def = sm.getSmotif(s1_def, s2_def) # print s1_def, s2_def exp_data = io.readPickle("exp_data.pickle") exp_data_types = exp_data.keys( ) # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts'] smotif_data = sm.readSmotifDatabase(smotif_def, exp_data['database_cutoff']) if not smotif_data: # If the smotif library doesn't exist, terminate further execution. return True dump_log = [] # ************************************************************************************************ # Main # The 'for' loop below iterates over all of the Smotifs and applies various filters # This is the place to add new filters as you desire. For starters, look at Sequence filter. # ************************************************************************************************ for i in range(0, len(smotif_data)): # loop over for all of the entries in the smotif_db file # ************************************************ # Excluding the natives # ************************************************ if 'natives' in exp_data_types: natives = exp_data['natives'] tpdbid = smotif_data[i][0][0] pdbid = tpdbid[0:4] if pdbid in natives: #if pdbid not in ['2z2i']: # Stop further execution, but, iterate. continue # ************************************************ # Applying different filters to Smotifs # Prepare temp log array to save data at the end # ************************************************ tlog, pcs_tensor_fits, rdc_tensor_fits, noe_fmeasure = [], [], [], [] tlog.append(['smotif', smotif_data[i]]) tlog.append(['smotif_def', [s1_def, s2_def]]) tlog.append(['cathcodes', [smotif_data[i][0]]]) # ************************************************ # Sequence filter # Aligns the smotif seq to target seq and calculates # sequence identity and the alignment score # ************************************************ smotif_seq, seq_identity, blosum62_score = \ Sfilter.SequenceSimilarity(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['seq_filter', smotif_seq, seq_identity, blosum62_score]) # ************************************************ # Pseudocontact Shift filter # uses experimental PCS data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'pcs_data' in exp_data_types: pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['PCS_filter', pcs_tensor_fits]) # ************************************************ # Ambiguous NOE score filter # uses experimental ambiguous noe data to filter Smotifs # scoring based on f-measure? # ************************************************ if 'noe_data' in exp_data_types: noe_fmeasure = Nfilter.s1NOEfit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['NOE_filter', noe_fmeasure]) # ************************************************ # Residual dipolar coupling filter # uses experimental RDC data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'rdc_data' in exp_data_types: if noe_fmeasure and noe_fmeasure > 0.5: rdc_tensor_fits = Rfilter.RDCAxRhFit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['RDC_filter', rdc_tensor_fits]) # Dump the data to the disk if pcs_tensor_fits or rdc_tensor_fits: # print smotif_data[i][0][0], "seq_id", seq_identity, "i=", i, "/", len(smotif_data) print tpdbid, noe_fmeasure, rdc_tensor_fits dump_log.append(tlog) # Save all of the hits in pickled arrays if dump_log: print "num of hits", len(dump_log) io.dumpPickle( '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".pickle", dump_log) return True