def createBoundModesFunction(config, setting):
    pdb_bound = config.getInputFile(setting, 'protein_bound')
    pdb_unbound = config.getInputFile(setting, 'protein_unbound')

    output = config.getOutputFile(setting, 'out')
    if config.getSetting(setting)['verbose']:
        print(
            "SETTING: ", setting.upper(), " Create bound modes for " +
            pdb_unbound + " and " + pdb_bound + " and output to " + output)
    if not config.getSetting(setting)["dryRun"]:
        bound_list = utils.readFileToList(pdb_bound)
        unbound_list = utils.readFileToList(pdb_unbound)

        bound_CA_pos = utils.getCoordinatesFromPDBlines(bound_list)
        unbound_CA_pos = utils.getCoordinatesFromPDBlines(unbound_list)

        pos_delta = bound_CA_pos - unbound_CA_pos
        norm = utils.getModeNorm(pos_delta)

        utils.writeModeFile(output, pos_delta.T[0], pos_delta.T[1],
                            pos_delta.T[2], 1.0)  #/norm**2
示例#2
0
def isConnected(base_path, protein_list):

    distance_receptor = 0
    distance_ligand = 0
    distance_receptor_list = []
    distance_ligand_list = []
    count_receptor = 0
    count_ligand = 0
    max_rec = 0
    max_lig = 0
    for protein in protein_list:
        receptor = os.path.join(base_path,
                                protein) + "/{}A-unbound.pdb".format(protein)
        ligand = os.path.join(base_path,
                              protein) + "/{}B-unbound.pdb".format(protein)
        receptor_capos = utils.getCoordinatesFromPDBlines(
            utils.getCAOnlyFromPDBLines(utils.readFileToList(receptor)))
        ligand_capos = utils.getCoordinatesFromPDBlines(
            utils.getCAOnlyFromPDBLines(utils.readFileToList(ligand)))
        rec_list = []
        lig_list = []
        for i in range(len(receptor_capos) - 1):
            dpos = receptor_capos[i] - receptor_capos[i + 1]
            distance_receptor += np.sqrt(dpos.dot(dpos))
            #distance_receptor_list.append(np.sqrt( dpos.dot(dpos)))
            rec_list.append(np.sqrt(dpos.dot(dpos)))
            count_receptor += 1
        for i in range(len(ligand_capos) - 1):
            dpos = ligand_capos[i] - ligand_capos[i + 1]
            distance_ligand += np.sqrt(dpos.dot(dpos))
            #distance_ligand_list.append(np.sqrt( dpos.dot(dpos)))
            lig_list.append(np.sqrt(dpos.dot(dpos)))
            count_ligand += 1
        distance_receptor_list.append(np.asarray(rec_list))
        distance_ligand_list.append(np.asarray(lig_list))

    return distance_receptor / count_receptor, distance_ligand / count_ligand, np.asarray(
        distance_receptor_list), np.asarray(distance_ligand_list)
def cutTermini(config, setting):
    cutSetting = config.getSetting(setting)
    inputPdb = config.getInputFile(setting,     "pdb")
    cutlog = config.getInputFile(setting,       "cutlog")

    cutPdb = config.getOutputFile(setting,      "out")

    if config.getSetting(setting)['verbose']:
        print("Cut Termini from  " + inputPdb + " and output to " + cutPdb)
    if not config.getSetting(setting)["dryRun"]:
        log = utils.loadFromJson(cutlog)
        residues = log['looseTerminiFront'] + log['looseTerminiBack']
        pdblines = utils.readFileToList(inputPdb)
        utils.cutTerminiAndWriteToPdb(residues,pdblines, cutPdb)
def manipulateModesFunction(config,setting):
    secondary_file = config.getInputFile(setting,'sec')
    pdb = config.getInputFile(setting,'protein')
    mode_file = config.getInputFile(setting,'modes')

    output = config.getOutputFile(setting,'out')

    pdb_list = utils.readFileToList(pdb)    
    resIndices = utils.getResidueIndicesFromPDBLines(pdb_list)   

    modes = utils.read_modes(mode_file)
    sec = readSecondaryStructure(secondary_file)
    settings = config.getSetting(setting)

    for mode in d_modes.values():
        size = len(mode['evec'])
        for i in range(size):
            if sec[resIndices[i]] in settings['manipulate']:
                mode['evec'][i] = np.zeros(3)

    utils.writeModeFileFromDict(modes,output)
def evalProtein(config, setting):
    secondary_file = config.getInputFile(setting, 'secondary')
    output = config.getOutputFile(setting, 'out')

    if config.getSetting(setting)['verbose']:
        print("SETTING: ", setting.upper(), " evaluating protein for",
              secondary_file)
    if not config.getSetting(setting)["dryRun"]:
        secLines = utils.getSecLines(utils.readFileToList(secondary_file))
        area = 0
        secondary, aminoAcids = [], []
        aa_area = {
            'LYS': 0,
            'PRO': 0,
            'ILE': 0,
            'TRP': 0,
            'GLU': 0,
            'GLN': 0,
            'GLY': 0,
            'SER': 0,
            'PHE': 0,
            'HIS': 0,
            'TYR': 0,
            'LEU': 0,
            'ASP': 0,
            'ASN': 0,
            'ARG': 0,
            'THR': 0,
            'ALA': 0,
            'CYS': 0,
            'VAL': 0,
            'MET': 0
        }
        sec_area = {'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0}
        for line in secLines:
            a = float(line[9])
            area += a
            secondary.append(line[5])
            aminoAcids.append(line[1])
            aa_area[line[1]] += a
            sec_area[line[5]] += a

        area = np.asarray(area).sum()
        aa = {
            'LYS': 0,
            'PRO': 0,
            'ILE': 0,
            'TRP': 0,
            'GLU': 0,
            'GLN': 0,
            'GLY': 0,
            'SER': 0,
            'PHE': 0,
            'HIS': 0,
            'TYR': 0,
            'LEU': 0,
            'ASP': 0,
            'ASN': 0,
            'ARG': 0,
            'THR': 0,
            'ALA': 0,
            'CYS': 0,
            'VAL': 0,
            'MET': 0
        }
        sec = {'C': 0, 'E': 0, 'B': 0, 'T': 0, 'H': 0, 'G': 0, 'b': 0}

        size = float(len(secondary))
        for key, val in Counter(secondary).items():
            sec[key] = val / size
        for key, val in Counter(aminoAcids).items():
            aa[key] = val / size

        for key in aa_area.keys():
            aa_area[key] /= area
        for key in sec_area.keys():
            sec_area[key] /= area
        utils.saveToJson(
            output, {
                'secondary': sec,
                'aminoAcids': aa,
                'area': area,
                'size': size,
                'sec_area': sec_area,
                'aa_area': aa_area
            })
def GetInterface(config, setting):
    pdb = config.getInputFile(setting, 'pdb')
    interfaceFile = config.getOutputFile(setting, 'out')
    receptor_filename = config.getInputFile(setting, 'receptor')
    ligand_filename = config.getInputFile(setting, 'ligand')
    receptorSec_filename = config.getInputFile(setting, 'receptorSec')
    ligandSec_filename = config.getInputFile(setting, 'ligandSec')

    cutoff = config.getSetting(setting)['cutoff']

    if config.getSetting(setting)['verbose']:
        print("SETTING: ", setting.upper(), " Get interface from pdb " + pdb)
    if not config.getSetting(setting)["dryRun"]:
        try:
            receptorSec = utils.getSecLines(
                utils.readFileToList(receptorSec_filename))
            ligandSec = utils.getSecLines(
                utils.readFileToList(ligandSec_filename))

            recmap = utils.getUniqueResIds(
                utils.getResidueFromPDBlines(
                    utils.readFileToList(receptor_filename)))
            ligmap = utils.getUniqueResIds(
                utils.getResidueFromPDBlines(
                    utils.readFileToList(ligand_filename)))
            structures = utils.parseBIOPdbToStructure(pdb)

            interfaces = []
            if len(structures) > 0:
                for struct in structures:
                    receptor = struct['A']
                    ligand = struct['B']

                    contactResiduesRec, contactResiduesLig = utils.getInterfaceResidues(
                        receptor, ligand, cutoff)
                    if len(contactResiduesRec) > 0 or len(
                            contactResiduesLig) > 0:
                        recinterfaceResidues = utils.getResidueIds(
                            contactResiduesRec)
                        liginterfaceResidues = utils.getResidueIds(
                            contactResiduesLig)

                        interfacePosRec = utils.getResidueCoordinates(
                            contactResiduesRec).T
                        interfacePosLig = utils.getResidueCoordinates(
                            contactResiduesLig).T

                        interfaceRecIndices = [
                            recmap[key] for key in recinterfaceResidues
                        ]
                        interfaceLigIndices = [
                            ligmap[key] for key in liginterfaceResidues
                        ]

                        isecRec = []
                        AARec = []
                        areaRec = 0
                        for i in interfaceRecIndices:
                            line = receptorSec[i]
                            isecRec.append(line[5])
                            AARec.append(line[1])
                            areaRec += float(line[9])

                        isecLig = []
                        AALig = []
                        areaLig = 0
                        for i in interfaceLigIndices:
                            line = ligandSec[i]
                            isecLig.append(line[5])
                            AALig.append(line[1])
                            areaLig += float(line[9])

                        AARecCount = {
                            'LYS': 0,
                            'PRO': 0,
                            'ILE': 0,
                            'TRP': 0,
                            'GLU': 0,
                            'GLN': 0,
                            'GLY': 0,
                            'SER': 0,
                            'PHE': 0,
                            'HIS': 0,
                            'TYR': 0,
                            'LEU': 0,
                            'ASP': 0,
                            'ASN': 0,
                            'ARG': 0,
                            'THR': 0,
                            'ALA': 0,
                            'CYS': 0,
                            'VAL': 0,
                            'MET': 0
                        }
                        aalen = float(len(AARec))
                        for key, value in Counter(AARec).items():
                            AARecCount[key] = value / aalen

                        AALigCount = {
                            'LYS': 0,
                            'PRO': 0,
                            'ILE': 0,
                            'TRP': 0,
                            'GLU': 0,
                            'GLN': 0,
                            'GLY': 0,
                            'SER': 0,
                            'PHE': 0,
                            'HIS': 0,
                            'TYR': 0,
                            'LEU': 0,
                            'ASP': 0,
                            'ASN': 0,
                            'ARG': 0,
                            'THR': 0,
                            'ALA': 0,
                            'CYS': 0,
                            'VAL': 0,
                            'MET': 0
                        }
                        aalen = float(len(AALig))
                        for key, value in Counter(AALig).items():
                            AALigCount[key] = value / aalen

                        countSecRec = {
                            'C': 0,
                            'E': 0,
                            'B': 0,
                            'T': 0,
                            'H': 0,
                            'G': 0,
                            'b': 0
                        }
                        lenSec = float(len(isecRec))
                        for key, value in Counter(isecRec).items():
                            countSecRec[key] = value / lenSec

                        countSecLig = {
                            'C': 0,
                            'E': 0,
                            'B': 0,
                            'T': 0,
                            'H': 0,
                            'G': 0,
                            'b': 0
                        }
                        lenSec = float(len(isecLig))
                        for key, value in Counter(isecLig).items():
                            countSecLig[key] = value / lenSec

                        interfaces.append({
                            'model': struct.id,
                            "recInterfaceResidues": recinterfaceResidues,
                            "ligInterfaceResidues": liginterfaceResidues,
                            "recAA": AARec,
                            'ligAA': AALig,
                            'rec_x': list(interfacePosRec[0]),
                            'rec_y': list(interfacePosRec[1]),
                            'rec_z': list(interfacePosRec[2]),
                            'lig_x': list(interfacePosLig[0]),
                            'lig_y': list(interfacePosLig[1]),
                            'lig_z': list(interfacePosLig[2]),
                            'rec_sec': isecRec,
                            'lig_sec': isecLig,
                            'countSecRec': countSecRec,
                            'countSecLig': countSecLig,
                            'AALigCount': AALigCount,
                            'AARecCount': AARecCount,
                            'areaRec': areaRec,
                            'areaLig': areaLig
                        })

                    utils.saveToJson(interfaceFile, {
                        'file': pdb,
                        'cutoff': cutoff,
                        'interfaces': interfaces
                    })
        except:
            print("eval interface: FAILED", interfaceFile)
            pass
def modeEvalFunction(config, setting):
    pdb_bound = config.getInputFile(setting, 'protein_bound')
    pdb_unbound = config.getInputFile(setting, 'protein_unbound')
    mode_file = config.getInputFile(setting, 'mode_file')
    secondary_file = config.getInputFile(setting, 'secondary')

    output = config.getOutputFile(setting, 'out')

    if config.getSetting(setting)['verbose']:
        print("SETTING: ", setting.upper(), " evaluating modes for", mode_file,
              " and output to ", output)
    if not config.getSetting(setting)["dryRun"]:
        try:
            bound_list = utils.readFileToList(pdb_bound)
            unbound_list = utils.readFileToList(pdb_unbound)

            secondary = [
                line[5] for line in utils.getSecLines(
                    utils.readFileToList(secondary_file))
            ]

            currid = None
            #resMap = {}
            indices = []
            count = 0
            for rid in utils.getResidueFromPDBlines(unbound_list):
                if rid != currid:
                    #       resMap[rid] = count
                    indices.append(count)
                    currid = rid
                count += 1
            #resMap = utils.getUniqueResIds(utils.getResidueFromPDBlines(unbound_list))
            # indices = list(resMap.values())
            # indices.sort()

            #print(indices)

            bound_CA = utils.getCAOnlyFromPDBLines(bound_list)
            unbound_CA = utils.getCAOnlyFromPDBLines(unbound_list)
            unbound_residues = utils.getResidueNamesFromPDBlines(unbound_CA)

            bound_CA_pos = utils.getCoordinatesFromPDBlines(bound_CA)
            unbound_CA_pos = utils.getCoordinatesFromPDBlines(unbound_CA)

            modes = utils.read_modes(mode_file)
            cumulative_overlap = 0
            eval_dict = {}
            for modeIdx, mode in modes.items():
                #ca_modes = utils.getCAModes(unbound_residues,mode['evec'])
                ca_modes = [mode['evec'][idx] for idx in indices]
                area_aa = {
                    'LYS': 0,
                    'PRO': 0,
                    'ILE': 0,
                    'TRP': 0,
                    'GLU': 0,
                    'GLN': 0,
                    'GLY': 0,
                    'SER': 0,
                    'PHE': 0,
                    'HIS': 0,
                    'TYR': 0,
                    'LEU': 0,
                    'ASP': 0,
                    'ASN': 0,
                    'ARG': 0,
                    'THR': 0,
                    'ALA': 0,
                    'CYS': 0,
                    'VAL': 0,
                    'MET': 0
                }
                area_sec = {
                    'C': 0,
                    'E': 0,
                    'B': 0,
                    'T': 0,
                    'H': 0,
                    'G': 0,
                    'b': 0
                }
                integral = 0
                for i, vec in enumerate(ca_modes):
                    ampl = vec[0]**2 + vec[1]**2 + vec[2]**2
                    integral += ampl
                    area_aa[unbound_residues[i]] += ampl
                    area_sec[secondary[i]] += ampl

                for key in area_aa.keys():
                    area_aa[key] /= integral
                for key in area_sec.keys():
                    area_sec[key] /= integral

                overlap = utils.getOverlap(unbound_CA_pos, bound_CA_pos,
                                           ca_modes)
                cumulative_overlap += overlap**2
                contributionCA = utils.getModeContribution(
                    bound_CA_pos - unbound_CA_pos, ca_modes).tolist()
                norm = utils.getModeNorm(mode['evec'])
                contribution = contributionCA * norm
                magnitude = utils.getModeMagnitude(ca_modes)
                maximaIndices = utils.getIndexMaxima(magnitude)
                maxima = magnitude[maximaIndices]
                eval_dict[modeIdx] = {
                    'overlap': overlap,
                    'cum_overlap': np.sqrt(cumulative_overlap),
                    'eigenvalue': mode['eval'],
                    'norm': norm,
                    'contribution': contribution,
                    'contribution_ca': contributionCA,
                    'maxima_indices': maximaIndices.tolist(),
                    'maxima_values': maxima.tolist(),
                    'area_aa': area_aa,
                    'area_sec': area_sec
                }

            utils.saveToJson(
                output, {
                    'bound': pdb_bound,
                    'unbound': pdb_unbound,
                    'mode_file': mode_file,
                    'modes': eval_dict
                })
        except:
            print("filed to evaluate protein", pdb_unbound)
            pass