Exemplo n.º 1
0
def get_RW(fn, inpro):
    ''' Select the HOH in [2.0, 3.5] of protein '''

    outfile = open("RW_info.txt", "w")
    pro = get_pdbinfo.pdbinfo(fn, file=inpro)
    pro_atoms = pro.getPolarAtoms()
    protein, waters = get_pdbinfo.pdbinfo(fn,
                                          lines=pro_atoms).getProteinWaters()
    waters_coord = get_pdbinfo.pdbinfo(fn, lines=waters).getCoords()
    protein_coord = get_pdbinfo.pdbinfo(fn, lines=protein).getCoords()
    ### calculate distance ###
    waters_coord = np.expand_dims(waters_coord, 1)
    protein_coord = np.expand_dims(protein_coord, 0)
    if waters_coord.shape[0] == 0:
        print("No Receptor Water")
        outfile.close()
    else:
        distance = np.linalg.norm(waters_coord - protein_coord, axis=2)
        distance_min = np.min(distance, axis=1)
        rw_index = []
        for idx, i in enumerate(distance_min):
            if i > 2.0 and i < 3.5:
                rw_index.append(idx)
        for i in rw_index:
            rw_line = waters[i]
            rw_distance = distance[i]
            rw_chain = get_pdbinfo.chid(rw_line)
            if rw_chain != " ":
                rw_name = str(int(get_pdbinfo.resi(
                    rw_line))) + "." + get_pdbinfo.chid(rw_line)
            else:
                rw_name = str(int(get_pdbinfo.resi(rw_line)))

            for idx, d in enumerate(rw_distance):
                if d < 3.5 and d > 2.0:
                    pro_line = protein[idx]
                    pro_chain = get_pdbinfo.chid(pro_line)
                    pro_name = get_pdbinfo.resn(pro_line)
                    if pro_chain != " ":
                        pro_idx = str(int(
                            get_pdbinfo.resi(pro_line))) + "." + pro_chain
                    else:
                        pro_idx = str(int(get_pdbinfo.resi(pro_line)))
                    pro_aname = get_pdbinfo.atmn(pro_line).strip()
                    outline = fn + "," + pro_name + "," + pro_idx + "," + pro_aname + "," + rw_name + "," + str(
                        round(d, 2)) + "\n"
                    outfile.write(outline)
        outfile.close()
Exemplo n.º 2
0
def get_Ions(fn, lig, pro, infile):
    outfile = open(infile, "w")

    lig = get_pdbinfo.pdbinfo(fn, file=lig)
    lig_atoms = lig.getPolarAtoms()

    pro = get_pdbinfo.pdbinfo(fn, file=pro)
    pro_ions = pro.getIons()

    ion_coord = get_pdbinfo.pdbinfo(fn, lines=pro_ions).getCoords()
    lig_coord = get_pdbinfo.pdbinfo(fn, lines=lig_atoms).getCoords()

    ion_coord = np.expand_dims(ion_coord, 1)
    lig_coord = np.expand_dims(lig_coord, 0)

    if ion_coord.shape[0] == 0:
        print("No Ion")
        outfile.close()
    else:
        distance = np.linalg.norm(ion_coord - lig_coord, axis=2)
        distance_min = np.min(distance, axis=1)

        ion_index = []
        for idx, i in enumerate(distance_min):
            if i < 3.5:
                ion_index.append(idx)

        for i in ion_index:
            ion_line = pro_ions[i]
            ion_distance = distance[i]
            ion_chain = get_pdbinfo.chid(ion_line)
            ion_name = get_pdbinfo.atmn(ion_line).strip()

            ion_chain = get_pdbinfo.chid(ion_line)
            if ion_chain != " ":
                ion_idx = str(int(
                    get_pdbinfo.resi(ion_line))) + "." + ion_chain
            else:
                ion_idx = str(int(get_pdbinfo.resi(ion_line)))

            for idx, d in enumerate(ion_distance):
                if d < 3.5:
                    lig_line = lig_atoms[idx]
                    lig_name = get_pdbinfo.atmn(lig_line).strip()
                    outline = fn + "," + ion_idx + "," + ion_name + "," + lig_name + "," + str(
                        round(d, 2)) + "\n"
                    outfile.write(outline)
        outfile.close()
Exemplo n.º 3
0
def addH(fn):
    ''' Add H to water molecule file (Vina need) '''

    for filename in os.listdir("."):
        if filename.startswith("RW") and filename.endswith(".pdb"):
            atoms = get_pdbinfo.pdbinfo(file=filename).getAtoms()
            if len(atoms) == 3:
                continue
            else:
                newfilename = filename.split(".")[0] + "_addh.pdb"
                os.system("obabel " + filename + " -O " + newfilename + " -h")

                lines = [
                    line for line in open(newfilename)
                    if line[0:6] in ["ATOM  ", "HETATM"]
                ]
                if "ATOM  " in lines[0]:
                    header = "ATOM  "
                else:
                    header = "HETATM"
                for i in range(1, 3):
                    lines[i] = header + lines[i][6:]

                out = open(filename.split(".")[0] + "_addh_correct.pdb", "w")
                out.write("".join(lines))
                out.close()

                os.system("mv " + filename.split(".")[0] +
                          "_addh_correct.pdb" + " " + filename)
                os.system("rm " + newfilename)
Exemplo n.º 4
0
def renumber(fmt, infile, outfile):
    """
    Rename atoms in file based on order 
    """
    if fmt == "mol2":
        lines = [line for line in open(infile)]
        atom_index = lines.index("@<TRIPOS>ATOM\n")
        bond_index = lines.index("@<TRIPOS>BOND\n")
        atom_lines = lines[atom_index + 1:bond_index]
        atoms = set([atom.split()[5].split(".")[0] for atom in atom_lines])
        atom_dic = {key: 1 for key in atoms}
        atom_lines_new = []
        for line in atom_lines:
            atom_key = line.split()[5].split(".")[0]
            atom_old = line.split()[1]
            atom_new = atom_key.upper() + str(atom_dic[atom_key])
            if len(atom_old) > len(atom_new):
                atom_new = atom_new + (len(atom_old) - len(atom_new)) * " "
            elif len(atom_old) < len(atom_new):
                atom_old = atom_old + (len(atom_new) - len(atom_old)) * " "

            newline = line.replace(atom_old, atom_new, 1)
            atom_lines_new.append(newline)
            atom_dic[atom_key] += 1

        outfile = open(outfile, "w")
        outfile.write("".join(lines[0:atom_index + 1]))
        outfile.write("".join(atom_lines_new))
        outfile.write("".join(lines[bond_index:]))
        outfile.close()
    elif fmt == "pdb":
        lines = [line for line in open(infile)]
        atom_lines = get_pdbinfo.pdbinfo(file=infile).getAtoms()
        atom_index = lines.index(atom_lines[0])
        bond_index = lines.index(atom_lines[-1]) + 1
        mol = Chem.MolFromPDBFile(infile, removeHs=False)
        atom_list = [atom.GetSymbol() for atom in mol.GetAtoms()]
        atoms = set([atom.GetSymbol() for atom in mol.GetAtoms()])
        atom_dic = {key: 1 for key in atoms}
        atom_lines_new = []
        for idx, line in enumerate(atom_lines):
            atom_key = atom_list[idx]
            atom_old = get_pdbinfo.atmn(line).strip()
            atom_new = atom_key.upper() + str(atom_dic[atom_key])
            if len(atom_old) > len(atom_new):
                atom_new = atom_new + (len(atom_old) - len(atom_new)) * " "
            elif len(atom_old) < len(atom_new):
                atom_old = atom_old + (len(atom_new) - len(atom_old)) * " "
            newline = line.replace(atom_old, atom_new, 1)
            atom_lines_new.append(newline)
            atom_dic[atom_key] += 1

        outfile = open(outfile, "w")
        outfile.write("".join(lines[0:atom_index]))
        outfile.write("".join(atom_lines_new))
        outfile.write("".join(lines[bond_index:]))
        outfile.close()
Exemplo n.º 5
0
def get_box(fn, inlig):
    if inlig.split(".")[-1] == "mol2":
        inputfile = open("../" + inlig)
        x = []
        y = []
        z = []
        flag = False
        for line in inputfile:
            if line[0:13] == "@<TRIPOS>ATOM":
                flag = True
                continue
            elif line[0:13] == "@<TRIPOS>BOND":
                flag = False
            if flag:
                x.append(float(line[16:26].split()[0]))
                y.append(float(line[27:37].split()[0]))
                if line[37] == "0":
                    z.append(float(line[38:48].split()[0]))
                else:
                    z.append(float(line[37:48].split()[0]))
    elif inlig.split(".")[-1] == "pdb":
        lines = get_pdbinfo.pdbinfo(name = fn, file = "../" + inlig).getAtoms()
        x,y,z = [],[],[]
        for line in lines:
            coords = get_pdbinfo.pdbinfo(name = fn, lines = [line]).getCoords()
            x.append(float(coords[0][0]))
            y.append(float(coords[0][1]))
            z.append(float(coords[0][2]))

    x_center =(max(x)+min(x))/2
    y_center =(max(y)+min(y))/2
    z_center = (max(z)+min(z))/2
    size_x = max(x) - min(x) + 10
    size_y = max(y) - min(y) + 10
    size_z = max(z) - min(z) + 10
    new_file = open("box.txt","w")
    new_file.write("center_x = " + str(x_center)+ "\n")
    new_file.write("center_y = " + str(y_center)+ "\n")
    new_file.write("center_z = " + str(z_center) + "\n")

    new_file.write("size_x = " + str(size_x)+ "\n")
    new_file.write("size_y = " + str(size_y)+ "\n")
    new_file.write("size_z = " + str(size_z) + "\n")
    new_file.close()
Exemplo n.º 6
0
def get_input(datadir, fn):
    """
    Get input files based on pdbid
    return: inlig_rdkit --> inlig mol2 or sdf for RDkit using
            inlig3 --> inlig pdb file
            inpro1 --> inpro with only protein
            inpro2 --> inpro with both proteina and water
    """
    olddir = os.getcwd()
    os.chdir(datadir)
    ### get ligand ###
    ### input should be provided as either of sdf file (best choice) or mol2 file ###
    inlig1 = fn + "_ligand.mol2"
    inlig2 = fn + "_ligand.sdf"
    inlig3 = fn + "_ligand.pdb"
    ### check ligand input file ###
    inlig_rdkit = None
    if inlig1 in os.listdir("."):
        inlig = inlig1
        try:
            mol = Chem.MolFromMol2File(inlig, removeHs=False)
            if mol == None:
                if inlig2 in os.listdir("."):
                    inlig = inlig2
                    mol = Chem.SDMolSupplier(inlig, removeHs=False)[0]
                    if mol != None:
                        inlig_rdkit = inlig2
            else:
                inlig_rdkit = inlig1
        except:
            pass
    else:
        inlig = inlig2
        try:
            mol = Chem.SDMolSupplier(inlig, removeHs=False)[0]
            if mol != None:
                inlig_rdkit = inlig2
        except:
            pass
    ### correcting atom name for sasa calculation ###
    ### if inlig sdf or mol2 file might have problems, you should also provide a pdb file that can be used to conduct other calculation ###
    if inlig3 not in os.listdir(".") and inlig_rdkit == None:
        ### if sdf and mol2 files can't be processed by RDKit, we need to generate pdb file and continue other calculations ###
        ### this is not good, since if that molecule has large problem, the conversion process might be wrong; but this can work when the problem is caused by RDKit ###
        infmt = inlig.split(".")[-1]
        outlig = inlig3
        cmd = "obabel -i" + infmt + " " + outlig + " -opdb -O " + outlig
        os.system(cmd)

    if inlig3 not in os.listdir("."):
        inlig = inlig_rdkit
        infmt = inlig.split(".")[-1]
        if infmt == "mol2":
            outlig_num = inlig.split(".")[0] + "_rename.mol2"
            renumber(infmt, inlig, outlig_num)
            outlig = inlig3.split(".")[0] + "_rename.pdb"
            cmd = "obabel -i" + infmt + " " + outlig_num + " -opdb -O " + outlig
            os.system(cmd)
        elif infmt == "sdf":
            outlig = inlig3
            cmd = "obabel -i" + infmt + " " + inlig + " -opdb -O " + outlig
            os.system(cmd)
            outlig_num = outlig.split(".")[0] + "_rename.pdb"
            renumber('pdb', outlig, outlig_num)
    else:
        infmt = inlig3.split(".")[-1]
        outlig_num = inlig.split(".")[0] + "_rename.pdb"
        renumber(infmt, inlig3, outlig_num)

    inlig3 = fn + "_ligand_rename.pdb"

    ### get protein ###
    ### at least one protein structure should be provided with all waters ###
    inpro1 = fn + "_protein.pdb"
    inpro2 = fn + "_protein_all.pdb"
    if inpro1 not in os.listdir("."):
        inpro = inpro2
        outpro = open(inpro1, "w")
        protein_lines = get_pdbinfo.pdbinfo(fn,
                                            file=inpro).getProteinWaters()[0]
        outpro.write("".join(protein_lines))
        outpro.close()
    ### check input structures ###
    if inlig_rdkit != None and os.path.isfile(
            inlig_rdkit) and os.stat(inlig_rdkit).st_size != 0:
        print("Ligand for conformation stability:" + inlig_rdkit)
    else:
        print(
            "Warning:input ligand should be checked, skip ligand stability calculation, use default(dE:-300, RMSD:300)"
        )

    if os.path.isfile(inlig3) and os.stat(inlig3).st_size != 0:
        print("Ligand for Vina, SASA, BA, ION:" + inlig3)
    else:
        sys.exit("Error: ligand input (pdb)")
    if os.path.isfile(inpro1) and os.stat(inpro1).st_size != 0:
        print("Protein without water molecules:" + inpro1)
    else:
        sys.exit("Error: protein input without water")
    if os.path.isfile(inpro2) and os.stat(inpro2).st_size != 0:
        print("Protein with water molecules:" + inpro2)
    else:
        sys.exit("Error: protein input with water")
    os.chdir(olddir)

    return inlig_rdkit, inlig3, inpro1, inpro2