def createBoundModesFunction(config, setting): pdb_bound = config.getInputFile(setting, 'protein_bound') pdb_unbound = config.getInputFile(setting, 'protein_unbound') output = config.getOutputFile(setting, 'out') if config.getSetting(setting)['verbose']: print( "SETTING: ", setting.upper(), " Create bound modes for " + pdb_unbound + " and " + pdb_bound + " and output to " + output) if not config.getSetting(setting)["dryRun"]: bound_list = utils.readFileToList(pdb_bound) unbound_list = utils.readFileToList(pdb_unbound) bound_CA_pos = utils.getCoordinatesFromPDBlines(bound_list) unbound_CA_pos = utils.getCoordinatesFromPDBlines(unbound_list) pos_delta = bound_CA_pos - unbound_CA_pos norm = utils.getModeNorm(pos_delta) utils.writeModeFile(output, pos_delta.T[0], pos_delta.T[1], pos_delta.T[2], 1.0) #/norm**2
def isConnected(base_path, protein_list): distance_receptor = 0 distance_ligand = 0 distance_receptor_list = [] distance_ligand_list = [] count_receptor = 0 count_ligand = 0 max_rec = 0 max_lig = 0 for protein in protein_list: receptor = os.path.join(base_path, protein) + "/{}A-unbound.pdb".format(protein) ligand = os.path.join(base_path, protein) + "/{}B-unbound.pdb".format(protein) receptor_capos = utils.getCoordinatesFromPDBlines( utils.getCAOnlyFromPDBLines(utils.readFileToList(receptor))) ligand_capos = utils.getCoordinatesFromPDBlines( utils.getCAOnlyFromPDBLines(utils.readFileToList(ligand))) rec_list = [] lig_list = [] for i in range(len(receptor_capos) - 1): dpos = receptor_capos[i] - receptor_capos[i + 1] distance_receptor += np.sqrt(dpos.dot(dpos)) #distance_receptor_list.append(np.sqrt( dpos.dot(dpos))) rec_list.append(np.sqrt(dpos.dot(dpos))) count_receptor += 1 for i in range(len(ligand_capos) - 1): dpos = ligand_capos[i] - ligand_capos[i + 1] distance_ligand += np.sqrt(dpos.dot(dpos)) #distance_ligand_list.append(np.sqrt( dpos.dot(dpos))) lig_list.append(np.sqrt(dpos.dot(dpos))) count_ligand += 1 distance_receptor_list.append(np.asarray(rec_list)) distance_ligand_list.append(np.asarray(lig_list)) return distance_receptor / count_receptor, distance_ligand / count_ligand, np.asarray( distance_receptor_list), np.asarray(distance_ligand_list)
def cutTermini(config, setting): cutSetting = config.getSetting(setting) inputPdb = config.getInputFile(setting, "pdb") cutlog = config.getInputFile(setting, "cutlog") cutPdb = config.getOutputFile(setting, "out") if config.getSetting(setting)['verbose']: print("Cut Termini from " + inputPdb + " and output to " + cutPdb) if not config.getSetting(setting)["dryRun"]: log = utils.loadFromJson(cutlog) residues = log['looseTerminiFront'] + log['looseTerminiBack'] pdblines = utils.readFileToList(inputPdb) utils.cutTerminiAndWriteToPdb(residues,pdblines, cutPdb)
def manipulateModesFunction(config,setting): secondary_file = config.getInputFile(setting,'sec') pdb = config.getInputFile(setting,'protein') mode_file = config.getInputFile(setting,'modes') output = config.getOutputFile(setting,'out') pdb_list = utils.readFileToList(pdb) resIndices = utils.getResidueIndicesFromPDBLines(pdb_list) modes = utils.read_modes(mode_file) sec = readSecondaryStructure(secondary_file) settings = config.getSetting(setting) for mode in d_modes.values(): size = len(mode['evec']) for i in range(size): if sec[resIndices[i]] in settings['manipulate']: mode['evec'][i] = np.zeros(3) utils.writeModeFileFromDict(modes,output)
def evalProtein(config, setting): secondary_file = config.getInputFile(setting, 'secondary') output = config.getOutputFile(setting, 'out') if config.getSetting(setting)['verbose']: print("SETTING: ", setting.upper(), " evaluating protein for", secondary_file) if not config.getSetting(setting)["dryRun"]: secLines = utils.getSecLines(utils.readFileToList(secondary_file)) area = 0 secondary, aminoAcids = [], [] aa_area = { 'LYS': 0, 'PRO': 0, 'ILE': 0, 'TRP': 0, 'GLU': 0, 'GLN': 0, 'GLY': 0, 'SER': 0, 'PHE': 0, 'HIS': 0, 'TYR': 0, 'LEU': 0, 'ASP': 0, 'ASN': 0, 'ARG': 0, 'THR': 0, 'ALA': 0, 'CYS': 0, 'VAL': 0, 'MET': 0 } sec_area = {'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0} for line in secLines: a = float(line[9]) area += a secondary.append(line[5]) aminoAcids.append(line[1]) aa_area[line[1]] += a sec_area[line[5]] += a area = np.asarray(area).sum() aa = { 'LYS': 0, 'PRO': 0, 'ILE': 0, 'TRP': 0, 'GLU': 0, 'GLN': 0, 'GLY': 0, 'SER': 0, 'PHE': 0, 'HIS': 0, 'TYR': 0, 'LEU': 0, 'ASP': 0, 'ASN': 0, 'ARG': 0, 'THR': 0, 'ALA': 0, 'CYS': 0, 'VAL': 0, 'MET': 0 } sec = {'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0} size = float(len(secondary)) for key, val in Counter(secondary).items(): sec[key] = val / size for key, val in Counter(aminoAcids).items(): aa[key] = val / size for key in aa_area.keys(): aa_area[key] /= area for key in sec_area.keys(): sec_area[key] /= area utils.saveToJson( output, { 'secondary': sec, 'aminoAcids': aa, 'area': area, 'size': size, 'sec_area': sec_area, 'aa_area': aa_area })
def GetInterface(config, setting): pdb = config.getInputFile(setting, 'pdb') interfaceFile = config.getOutputFile(setting, 'out') receptor_filename = config.getInputFile(setting, 'receptor') ligand_filename = config.getInputFile(setting, 'ligand') receptorSec_filename = config.getInputFile(setting, 'receptorSec') ligandSec_filename = config.getInputFile(setting, 'ligandSec') cutoff = config.getSetting(setting)['cutoff'] if config.getSetting(setting)['verbose']: print("SETTING: ", setting.upper(), " Get interface from pdb " + pdb) if not config.getSetting(setting)["dryRun"]: try: receptorSec = utils.getSecLines( utils.readFileToList(receptorSec_filename)) ligandSec = utils.getSecLines( utils.readFileToList(ligandSec_filename)) recmap = utils.getUniqueResIds( utils.getResidueFromPDBlines( utils.readFileToList(receptor_filename))) ligmap = utils.getUniqueResIds( utils.getResidueFromPDBlines( utils.readFileToList(ligand_filename))) structures = utils.parseBIOPdbToStructure(pdb) interfaces = [] if len(structures) > 0: for struct in structures: receptor = struct['A'] ligand = struct['B'] contactResiduesRec, contactResiduesLig = utils.getInterfaceResidues( receptor, ligand, cutoff) if len(contactResiduesRec) > 0 or len( contactResiduesLig) > 0: recinterfaceResidues = utils.getResidueIds( contactResiduesRec) liginterfaceResidues = utils.getResidueIds( contactResiduesLig) interfacePosRec = utils.getResidueCoordinates( contactResiduesRec).T interfacePosLig = utils.getResidueCoordinates( contactResiduesLig).T interfaceRecIndices = [ recmap[key] for key in recinterfaceResidues ] interfaceLigIndices = [ ligmap[key] for key in liginterfaceResidues ] isecRec = [] AARec = [] areaRec = 0 for i in interfaceRecIndices: line = receptorSec[i] isecRec.append(line[5]) AARec.append(line[1]) areaRec += float(line[9]) isecLig = [] AALig = [] areaLig = 0 for i in interfaceLigIndices: line = ligandSec[i] isecLig.append(line[5]) AALig.append(line[1]) areaLig += float(line[9]) AARecCount = { 'LYS': 0, 'PRO': 0, 'ILE': 0, 'TRP': 0, 'GLU': 0, 'GLN': 0, 'GLY': 0, 'SER': 0, 'PHE': 0, 'HIS': 0, 'TYR': 0, 'LEU': 0, 'ASP': 0, 'ASN': 0, 'ARG': 0, 'THR': 0, 'ALA': 0, 'CYS': 0, 'VAL': 0, 'MET': 0 } aalen = float(len(AARec)) for key, value in Counter(AARec).items(): AARecCount[key] = value / aalen AALigCount = { 'LYS': 0, 'PRO': 0, 'ILE': 0, 'TRP': 0, 'GLU': 0, 'GLN': 0, 'GLY': 0, 'SER': 0, 'PHE': 0, 'HIS': 0, 'TYR': 0, 'LEU': 0, 'ASP': 0, 'ASN': 0, 'ARG': 0, 'THR': 0, 'ALA': 0, 'CYS': 0, 'VAL': 0, 'MET': 0 } aalen = float(len(AALig)) for key, value in Counter(AALig).items(): AALigCount[key] = value / aalen countSecRec = { 'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0 } lenSec = float(len(isecRec)) for key, value in Counter(isecRec).items(): countSecRec[key] = value / lenSec countSecLig = { 'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0 } lenSec = float(len(isecLig)) for key, value in Counter(isecLig).items(): countSecLig[key] = value / lenSec interfaces.append({ 'model': struct.id, "recInterfaceResidues": recinterfaceResidues, "ligInterfaceResidues": liginterfaceResidues, "recAA": AARec, 'ligAA': AALig, 'rec_x': list(interfacePosRec[0]), 'rec_y': list(interfacePosRec[1]), 'rec_z': list(interfacePosRec[2]), 'lig_x': list(interfacePosLig[0]), 'lig_y': list(interfacePosLig[1]), 'lig_z': list(interfacePosLig[2]), 'rec_sec': isecRec, 'lig_sec': isecLig, 'countSecRec': countSecRec, 'countSecLig': countSecLig, 'AALigCount': AALigCount, 'AARecCount': AARecCount, 'areaRec': areaRec, 'areaLig': areaLig }) utils.saveToJson(interfaceFile, { 'file': pdb, 'cutoff': cutoff, 'interfaces': interfaces }) except: print("eval interface: FAILED", interfaceFile) pass
def modeEvalFunction(config, setting): pdb_bound = config.getInputFile(setting, 'protein_bound') pdb_unbound = config.getInputFile(setting, 'protein_unbound') mode_file = config.getInputFile(setting, 'mode_file') secondary_file = config.getInputFile(setting, 'secondary') output = config.getOutputFile(setting, 'out') if config.getSetting(setting)['verbose']: print("SETTING: ", setting.upper(), " evaluating modes for", mode_file, " and output to ", output) if not config.getSetting(setting)["dryRun"]: try: bound_list = utils.readFileToList(pdb_bound) unbound_list = utils.readFileToList(pdb_unbound) secondary = [ line[5] for line in utils.getSecLines( utils.readFileToList(secondary_file)) ] currid = None #resMap = {} indices = [] count = 0 for rid in utils.getResidueFromPDBlines(unbound_list): if rid != currid: # resMap[rid] = count indices.append(count) currid = rid count += 1 #resMap = utils.getUniqueResIds(utils.getResidueFromPDBlines(unbound_list)) # indices = list(resMap.values()) # indices.sort() #print(indices) bound_CA = utils.getCAOnlyFromPDBLines(bound_list) unbound_CA = utils.getCAOnlyFromPDBLines(unbound_list) unbound_residues = utils.getResidueNamesFromPDBlines(unbound_CA) bound_CA_pos = utils.getCoordinatesFromPDBlines(bound_CA) unbound_CA_pos = utils.getCoordinatesFromPDBlines(unbound_CA) modes = utils.read_modes(mode_file) cumulative_overlap = 0 eval_dict = {} for modeIdx, mode in modes.items(): #ca_modes = utils.getCAModes(unbound_residues,mode['evec']) ca_modes = [mode['evec'][idx] for idx in indices] area_aa = { 'LYS': 0, 'PRO': 0, 'ILE': 0, 'TRP': 0, 'GLU': 0, 'GLN': 0, 'GLY': 0, 'SER': 0, 'PHE': 0, 'HIS': 0, 'TYR': 0, 'LEU': 0, 'ASP': 0, 'ASN': 0, 'ARG': 0, 'THR': 0, 'ALA': 0, 'CYS': 0, 'VAL': 0, 'MET': 0 } area_sec = { 'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0 } integral = 0 for i, vec in enumerate(ca_modes): ampl = vec[0]**2 + vec[1]**2 + vec[2]**2 integral += ampl area_aa[unbound_residues[i]] += ampl area_sec[secondary[i]] += ampl for key in area_aa.keys(): area_aa[key] /= integral for key in area_sec.keys(): area_sec[key] /= integral overlap = utils.getOverlap(unbound_CA_pos, bound_CA_pos, ca_modes) cumulative_overlap += overlap**2 contributionCA = utils.getModeContribution( bound_CA_pos - unbound_CA_pos, ca_modes).tolist() norm = utils.getModeNorm(mode['evec']) contribution = contributionCA * norm magnitude = utils.getModeMagnitude(ca_modes) maximaIndices = utils.getIndexMaxima(magnitude) maxima = magnitude[maximaIndices] eval_dict[modeIdx] = { 'overlap': overlap, 'cum_overlap': np.sqrt(cumulative_overlap), 'eigenvalue': mode['eval'], 'norm': norm, 'contribution': contribution, 'contribution_ca': contributionCA, 'maxima_indices': maximaIndices.tolist(), 'maxima_values': maxima.tolist(), 'area_aa': area_aa, 'area_sec': area_sec } utils.saveToJson( output, { 'bound': pdb_bound, 'unbound': pdb_unbound, 'mode_file': mode_file, 'modes': eval_dict }) except: print("filed to evaluate protein", pdb_unbound) pass