示例#1
0
    def pqr2pdb(line, counter):
        counter += 1
        (aname, anumb_old, resname, chain, resnumb, x, y,
         z) = read_pdb_line(line)

        # if chain == "_":
        #    chain = " "

        if resname in RNA_RESIDUES:
            resname = RNA_RESIDUES[resname]

            incorrect_rna_ops = {"OP1": "O1P", "OP2": "O2P"}

            if aname in incorrect_rna_ops:
                aname = incorrect_rna_ops[aname]

        return (
            new_pdb_line(counter,
                         aname,
                         resname,
                         resnumb,
                         x,
                         y,
                         z,
                         chain=chain),
            counter,
        )
示例#2
0
def storeResidues(filename):
    residues = {}
    with open(filename) as f_original:
        for line in f_original:
            if line.startswith("ATOM "):
                (aname, anumb, resname, chain, resnumb, x, y, z) = read_pdb_line(line)
                if resnumb not in residues:
                    residues[resnumb] = {}
                residues[resnumb][aname] = (resname, x, y, z)
    return residues
示例#3
0
def remove_membrane_n_rna(pdbfile, outfile):
    protein_lines = ""
    to_remove = LIPID_RESIDUES + list(Config.pypka_params.LIPIDS.values())

    with open(pdbfile) as f:
        for line in f:
            if line.startswith("ATOM"):
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                insertion_code = line[26].strip()
                if insertion_code:
                    continue
                if aname[0] == "H" and Config.pypka_params["remove_hs"]:
                    continue

                if Config.pypka_params["ffinput"] == "CHARMM":
                    if resname in ("HSD", "HSE"):
                        resname = "HSP"

                if resname not in to_remove:
                    if resname in PDB_RNA_RESIDUES:
                        resname = PDB_RNA_RESIDUES[resname]
                    protein_lines += new_pdb_line(anumb,
                                                  aname,
                                                  resname,
                                                  resnumb,
                                                  x,
                                                  y,
                                                  z,
                                                  chain=chain)

            elif line.startswith("ENDMDL"):
                break

    with open(outfile, "w") as f_new:
        f_new.write(protein_lines)

    # rna_fname = None
    # if rna_lines:
    #    rna_fname = "tmp_rna.pdb"
    #    with open(rna_fname, "w") as f_new:
    #        f_new.write(rna_lines)

    return  # rna_fname
示例#4
0
def fix_fixed_sites(molecules, fixed_sites, fname):
    for chain in fixed_sites:
        for site, state in list(fixed_sites[chain].items()):
            if isinstance(site, str) and site[-1] in "NC":
                del fixed_sites[chain][site]
                site = int(site[:-1]) + TERMINAL_OFFSET
                fixed_sites[chain][site] = state

    for molecule in molecules.values():
        chain = molecule.chain
        for sitenumb, site in list(molecule.sites.items()):
            if sitenumb in fixed_sites[chain]:
                del molecule.sites[sitenumb]
                site_i = molecule.sites_order.index(site)
                del molecule.sites_order[site_i]

    new_pdb_content = ""
    with open(fname) as f:
        for line in f:
            if line.startswith("ATOM "):
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                if resnumb in fixed_sites[chain]:
                    resname = "{}{}".format(resname[:-1],
                                            str(fixed_sites[chain][resnumb]))
                new_pdb_content += new_pdb_line(anumb,
                                                aname,
                                                resname,
                                                resnumb,
                                                x,
                                                y,
                                                z,
                                                chain=chain)
            else:
                new_pdb_content += line
    with open(fname, "w") as f:
        f.write(new_pdb_content)
示例#5
0
def make_delphi_inputfile(f_in, f_out, molecules):
    def getMaxCoords(coords, max_coords):
        x, y, z = coords
        max_x, max_y, max_z = max_coords
        if max_x < x:
            max_x = x
        if max_y < y:
            max_y = y
        if max_z < z:
            max_z = z
        return max_x, max_y, max_z

    def correct_termini(resnumb, resname, aname, ntr_res, ctr_res):
        if resnumb == ntr_res and aname in Config.pypka_params["NTR_atoms"]:
            resname = "NTR"
            resnumb += TERMINAL_OFFSET
        elif resnumb == ctr_res and aname in Config.pypka_params["CTR_atoms"]:
            resname = "CTR"
            resnumb += TERMINAL_OFFSET
            # if aname == "C":
            #    aname = "CT"
        return resnumb, resname, aname

    def correct_res_names(molecule, resnumb, resname, aname):
        if resnumb in list(molecule.correct_names.keys()):
            resname = molecule.correct_names[resnumb]
        if (resnumb in list(molecule.correct_atoms.keys())
                and aname in molecule.correct_atoms[resnumb]):
            aname = molecule.correct_atoms[resnumb][aname]

        return resnumb, resname, aname

    def assign_atoms(sites, resnumb, aname, site_Hs, site_positions):
        ref_tau_name = resname
        if resnumb in list(sites.keys()) and aname in list(
                sites[resnumb].getRefTautomer().charge_set.keys()):
            # ( aname not in ('N', 'H', 'C', 'O', 'CA') or
            # (aname in ('N', 'H', 'C', 'O', 'CA') and resname == 'NTR')):
            # change res name to reference tautomer
            ref_tau_name = sites[resnumb].getRefTautomerName()

            # add atom to corresponding site
            sites[resnumb].addAtom(aname, anumb)

            if chain not in site_positions:
                site_positions[chain] = {}
                site_Hs[chain] = {}
            if resnumb not in site_positions[chain]:
                site_positions[chain][resnumb] = []
                site_Hs[chain][resnumb] = []

            if resnumb in site_positions[chain]:
                site_positions[chain][resnumb].append((x, y, z))
                if aname[0] == "H":
                    site_Hs[chain][resnumb].append((x, y, z))

        return site_Hs, ref_tau_name, site_positions

    new_pdb_content = ""
    site_positions = {}
    site_Hs = {}
    max_box = [0.0, 0.0, 0.0]
    aposition = -1
    sequence = {}
    with open(f_in) as f:
        for line in f:
            if line.startswith("ATOM"):
                aposition += 1
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                max_box = getMaxCoords([x, y, z], max_box)

                if chain not in sequence:
                    sequence[chain] = {}
                if resnumb not in sequence[chain]:
                    sequence[chain][resnumb] = resname

                if chain in molecules:
                    molecule = molecules[chain]
                    ntr_res = molecule.NTR
                    ctr_res = molecule.CTR
                    sites = molecule.sites

                    if (resname == "HIS" and aname == "HD1"
                            and resnumb not in sites.keys()):
                        aposition -= 1
                        continue

                    resnumb, resname, aname = correct_termini(
                        resnumb, resname, aname, ntr_res, ctr_res)

                    resnumb, resname, aname = correct_res_names(
                        molecule, resnumb, resname, aname)

                    titrable_res = False
                    if resnumb in sites.keys():
                        titrable_res = True

                    molecule.addAtom(aname, anumb, aposition, titrable_res)

                    (site_Hs, resname,
                     site_positions) = assign_atoms(sites, resnumb, aname,
                                                    site_Hs, site_positions)

                else:
                    if resname == "HIS" and aname == "HD1":
                        aposition -= 1
                        continue
                    resnumb, resname, aname = correct_res_names(
                        molecule, resnumb, resname, aname)

                new_pdb_content += new_pdb_line(aposition,
                                                aname,
                                                resname,
                                                resnumb,
                                                x,
                                                y,
                                                z,
                                                chain=chain)

            elif line.startswith("CRYST1"):
                parts = line.split()
                box = [float(i) for i in parts[1:4]]

    if box == [1.0, 1.0, 1.0]:
        box = max_box

    if Config.pypka_params["box"]:
        box = Config.pypka_params["box"]
    else:
        Config.pypka_params.setBox(box)

    if Config.delphi_params["pbc_dim"] == 2:
        Config.delphi_params.redefineScale()

    new_pdb_content += "TER\nENDMDL\n"
    with open(f_out, "w") as f_new:
        f_new.write(new_pdb_content)

    # TODO: check terminal_offset has to be bigger than the total number of residues
    # TODO: delete terminal_offset and use another approach to distinguish between N- and C-ter
    # TODO: check size xy > config.cutoff * 2
    # if so, raise Exception, and ask to change cutoff value

    # TODO: check if pbc_dim -> set gsizes from pdb size xy and ignore perfil

    for chain in site_positions.keys():
        molecule = molecules[chain]
        for site in site_positions[chain]:
            if site in list(molecule.sites.keys()):
                pos_max = [-9999990, -999999, -999999]
                pos_min = [999999, 999999, 999999]
                focus_center = [0, 0, 0]
                for atom in site_positions[chain][site]:
                    for i in range(3):
                        if pos_max[i] < atom[i]:
                            pos_max[i] = atom[i]
                        if pos_min[i] > atom[i]:
                            pos_min[i] = atom[i]
                focus_center[0] = (pos_max[0] + pos_min[0]) / 2
                focus_center[1] = (pos_max[1] + pos_min[1]) / 2
                focus_center[2] = (pos_max[2] + pos_min[2]) / 2

                if Config.delphi_params["pbc_dim"] == 2:
                    molecule.sites[site].addCenter(focus_center,
                                                   boxsize=box[0],
                                                   box_z=box[2])
                else:
                    molecule.sites[site].addCenter(focus_center)
                hx, hy, hz = 0, 0, 0
                nHs = len(site_Hs[chain][site])
                if nHs == 0:
                    sitename = molecule.sites[site].getName()
                    raise Exception("Site {1}{0} appears "
                                    "to have no Hydrogen atoms".format(
                                        site, sitename))
                for h in site_Hs[chain][site]:
                    hx += h[0]
                    hy += h[1]
                    hz += h[2]
                hx /= nHs
                hy /= nHs
                hz /= nHs
                Hcenter = (hx, hy, hz)
                molecule.sites[site].addCenterH(Hcenter)

    return sequence
示例#6
0
def check_sites_integrity(filename, molecules, chains_res):
    """Identifies titrable residues and checks integrity of the residue blocks
    (excluding Hydrogens)
    """
    def check_site(prev_resname, cur_atoms, ter=None):
        def correctResName(resname):
            if resname in PROTEIN_RESIDUES:
                return resname
            for res in REGULARTITRATINGRES:
                if res[0:2] == resname[0:2]:
                    return res
            return resname

        def makeSite(molecule, resnumb, resname, termini_resname=None):
            if resname in TITRABLETAUTOMERS:
                ntautomers = TITRABLETAUTOMERS[resname]
            else:
                for res in REGULARTITRATINGRES:
                    if res[0:2] == resname[0:2]:
                        ntautomers = TITRABLETAUTOMERS[res]
            sID = molecule.addSite(resnumb)
            molecule.addTautomers(sID,
                                  ntautomers,
                                  resname,
                                  termini_resname=termini_resname)
            # print('added', molecule.chain, resnumb, resname)

        prev_resname = correctResName(prev_resname)

        res_tits = True
        if ter:
            if not Config.pypka_params[
                    "ser_thr_titration"] and prev_resname in (
                        "SER",
                        "THR",
                    ):
                res_tits = False
            else:
                res_tits = bool(prev_resname in TITRABLERESIDUES)

        res_atoms = copy(cur_atoms)
        (integrity_terminal, integrity_site) = check_integrity(prev_resname,
                                                               res_atoms,
                                                               ter=ter,
                                                               site=res_tits)

        if integrity_terminal:
            ter_resnumb = prev_resnumb + TERMINAL_OFFSET
            makeSite(molecule, ter_resnumb, ter, termini_resname=prev_resname)
            if ter == "NTR":
                molecule.NTR = prev_resnumb
            elif ter == "CTR":
                molecule.CTR = prev_resnumb
        else:
            warning(molecule, prev_resnumb, ter, "")

        if prev_resnumb in sites:
            if integrity_site:
                makeSite(molecule, prev_resnumb, prev_resname)
            else:
                warning(molecule, prev_resnumb, prev_resname, cur_atoms)
        elif prev_resname == "CYS":  # dealing with a CYS that is not in sites
            if not integrity_site:
                if ter == "NTR":
                    cur_atoms = set(cur_atoms) - set(["H1", "H2", "H3"])
                    cur_atoms.add("H")
                elif ter == "CTR":
                    cur_atoms = set(cur_atoms) - set(
                        ["HO11", "HO12", "HO21", "HO22", "O1", "O2"])
                    cur_atoms.update(("H", "O"))
                warning(molecule,
                        prev_resnumb,
                        prev_resname,
                        cur_atoms,
                        mode="CYS")

    def warning(molecule, resnumb, resname, res_atoms, mode=None):
        if mode == "CYS" or resname == "CYS":
            CYS_atoms = ["N", "CA", "CB", "SG", "C", "O", "H"]
            if set(res_atoms).issubset(CYS_atoms) and set(CYS_atoms).issubset(
                    res_atoms):
                # no need to correct residue name
                warn = "{0} {1} is assumed to be participating " "in a SS-bond".format(
                    resnumb, resname)
                logger.warning(warn)
                return
            CY0_atoms = ["N", "CA", "CB", "SG", "C", "O", "H", "HG1"]
            if set(res_atoms).issubset(CY0_atoms) and set(CY0_atoms).issubset(
                    res_atoms):
                molecule.correct_names[resnumb] = "CY0"
                return
            CY0_atoms = ["N", "CA", "CB", "SG", "C", "O", "H", "HG"]
            if set(res_atoms).issubset(CY0_atoms) and set(CY0_atoms).issubset(
                    res_atoms):
                molecule.correct_names[resnumb] = "CY0"
                molecule.correct_atoms[resnumb] = {"HG": "HG1"}
                return
            else:
                warn = "{0} {1} failed integrity check".format(
                    resnumb, resname)
                logger.warning(warn)
        elif resname not in TITRABLERESIDUES:
            return
        else:
            warn = "{0} {1} failed integrity check".format(resnumb, resname)
            logger.warning(warn)

    resnumb = None
    cur_atoms = []
    prev_resnumb = None
    prev_resname = None
    prev_chain = None
    last_chain = None
    chain = None
    with open(filename) as f:
        nline = 0
        f_lines = f.readlines()
        maxnlines = len(f_lines)
        for line in f_lines:
            resname = None
            nline += 1

            if "ATOM " == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                if chain in molecules:
                    if not last_chain:
                        last_chain = chain
                    molecule = molecules[last_chain]
                    sites = chains_res[last_chain]
                    last_molecule = molecule

                if nline == maxnlines:
                    cur_atoms.append(aname)

            if line == "TER\n":
                resnumb += 1

            if (prev_resnumb != resnumb or nline == maxnlines
                    or chain != last_chain) and prev_resnumb is not None:

                if nline == maxnlines:
                    prev_resnumb = copy(resnumb)
                    resnumb = "None"

                if last_chain in molecules:
                    if (prev_resname in TITRABLERESIDUES
                            or (prev_resnumb == molecule.NTR
                                or resnumb == molecule.NTR)
                            or (prev_resnumb == molecule.CTR
                                or resnumb == molecule.CTR)):
                        if prev_resnumb == molecule.NTR and resnumb != molecule.NTR:
                            check_site(prev_resname, cur_atoms, ter="NTR")
                            prev_resnumb = None
                        # Dealing with the last residue and CTR
                        elif (prev_resnumb == molecule.CTR and resnumb !=
                              molecule.CTR) or (prev_resnumb == molecule.CTR
                                                and chain != last_chain):
                            check_site(prev_resname, cur_atoms, ter="CTR")
                            prev_resnumb = None
                            last_chain = None
                        # Dealing with the previous residue
                        elif (prev_resnumb is not None
                              and prev_resname in TITRABLERESIDUES):
                            if not (not Config.pypka_params["ser_thr_titration"]
                                    and prev_resname in ("SER", "THR")):
                                check_site(prev_resname, cur_atoms)

                    elif prev_resname == "ALA":
                        # TODO: check residue block integrity for other non titrating residues
                        pass
                elif (last_molecule and prev_resnumb == last_molecule.CTR
                      and resnumb != last_molecule.CTR):
                    check_site(prev_resname, cur_atoms, ter="CTR")
                elif prev_resname in TITRABLERESIDUES and prev_resnumb is not None:
                    check_site(prev_resname, cur_atoms)

            # Dealing with the new residue
            if prev_resnumb != resnumb:
                cur_atoms = [aname]
                prev_resnumb = resnumb
                prev_resname = resname
                last_chain = chain
            elif resnumb is not None:
                cur_atoms.append(aname)
                if prev_resname in ("NTR", "CTR") and prev_resname != resname:
                    prev_resname = resname

    for molecule in molecules.values():
        # Adding the reference tautomer to each site
        molecule.addReferenceTautomers()
        # Assigning a charge set to each tautomer
        molecule.addTautomersChargeSets()

    # TODO: report blocks that failed the check (in .log file with
    # numbering reference to stepwise scheme)
    # TODO: add lipid residues
    if Config.debug:
        print("exiting check_sites_integrity")
示例#7
0
文件: pdb_out.py 项目: mms-fcul/PypKa
def write_output_structure(sites, molecules, delphi_input_content):
    def getProtomerResname(pdb_content, site, pH, ff_protomers):
        resnumb = site.getResNumber()
        resname = site.getName()
        new_state, (state_prob, taut_prob) = site.getMostProbTaut(pH)
        new_state_i = new_state - 1
        for ff_resname, protomers in ff_protomers[resname].items():
            if new_state_i in protomers.keys():
                new_resname = ff_resname
                remove_hs = protomers[new_state_i]
                average_prot = site.getTitrationCurve()[pH]
                if state_prob < 0.75:
                    warn = ("{0}{1} "
                            "protonation state probability: {2}, "
                            "tautomer probability: {3}".format(
                                resname, resnumb, state_prob, taut_prob))
                    logger.warning(warn)

                rounded_sprob = round(state_prob, 2)
                rounded_tprob = round(taut_prob, 2)
                rounded_avgprot = round(average_prot, 2)
                remark_line = ("{0: <5}{1: <6}    {2: >1.2f}         "
                               "{3: >1.2f}         {4: >1.2f}".format(
                                   resname,
                                   resnumb,
                                   rounded_avgprot,
                                   rounded_sprob,
                                   rounded_tprob,
                               ))
                pdb_content += "REMARK     {text}\n".format(text=remark_line)
        # print(resnumb, new_state, new_resname, remove_hs, state_prob, taut_prob)
        return pdb_content, new_state_i, new_resname, remove_hs

    outputname = Config.pypka_params["f_structure_out"]
    pH = float(Config.pypka_params["f_structure_out_pH"])
    ff_out = Config.pypka_params["ff_structure_out"]
    ff_protomer = {
        "amber": AMBER_protomers,
        "gromos_cph": GROMOS_protomers
    }[ff_out]
    pdb_content = (
        f"REMARK     PypKa assigned protonation states @ pH {pH}\n"
        "REMARK     Residue    Avg Prot   State Prob    Taut Prob\n")
    new_states = {}
    for site in sites:
        resname = site.getName()
        resnumb = site.res_number
        molecule = site.molecule
        chain = molecule.chain
        (pdb_content, new_state, new_resname,
         remove_hs) = getProtomerResname(pdb_content, site, pH, ff_protomer)
        if resname in ("NTR", "CTR"):
            new_resname = site.termini_resname
        if chain not in new_states:
            new_states[chain] = {}
        new_states[chain][resnumb] = (resname, new_state, new_resname,
                                      remove_hs)
    new_pdb = pdb_content
    counter = 0
    tit_atoms = {}
    other_atoms = {}
    for molecule in molecules.values():
        for atom_numb in molecule.atoms_tit_res:
            if molecule.atoms_tit_res[atom_numb]:
                tit_atoms[atom_numb] = molecule
            else:
                other_atoms[atom_numb] = molecule

    in_delphi_pdb = {}
    for line in delphi_input_content:
        if line.startswith("ATOM "):
            (aname, anumb, resname, chain, resnumb, x, y,
             z) = read_pdb_line(line)
            if chain not in in_delphi_pdb:
                in_delphi_pdb[chain] = {}
            if resnumb not in in_delphi_pdb[chain]:
                in_delphi_pdb[chain][resnumb] = []
            in_delphi_pdb[chain][resnumb].append(aname)

    for line in delphi_input_content:
        if line.startswith("ATOM "):
            (aname, anumb, resname, chain, resnumb, x, y,
             z) = read_pdb_line(line)

            if anumb in tit_atoms.keys():
                molecule = tit_atoms[anumb]
                (oldresname, new_state, resname,
                 removeHs) = new_states[chain][resnumb]
                if aname in removeHs:
                    continue
                if (ff_out == "amber" and oldresname in gromos2amber
                        and new_state in gromos2amber[oldresname]
                        and aname in gromos2amber[oldresname][new_state]):
                    aname = gromos2amber[oldresname][new_state][aname]
            elif anumb in other_atoms:
                molecule = other_atoms[anumb]
            else:
                continue
            if resnumb > TERMINAL_OFFSET:
                termini_site = molecule.sites[resnumb]
                resnumb -= TERMINAL_OFFSET
                if resnumb in molecule.sites.keys():
                    _, ter_new_state, resname, ter_removeHs = new_states[
                        chain][resnumb]
                else:
                    resname = termini_site.termini_resname
                # print(new_pdb_line(anumb, aname, resname, resnumb, x, y, z).strip())
            if resnumb in molecule.getCYS_bridges():
                resname = "CYX"
            counter += 1
            new_pdb += new_pdb_line(counter,
                                    aname,
                                    resname,
                                    resnumb,
                                    x,
                                    y,
                                    z,
                                    chain=chain)
            if chain in mainchain_Hs and resnumb in mainchain_Hs[chain]:
                while len(mainchain_Hs[chain][resnumb]) > 0:
                    counter += 1
                    (aname, anumb, oldresname, chain, x, y,
                     z) = mainchain_Hs[chain][resnumb].pop()
                    if (resnumb not in in_delphi_pdb[chain]
                            or aname not in in_delphi_pdb[chain][resnumb]):
                        new_pdb += new_pdb_line(counter,
                                                aname,
                                                resname,
                                                resnumb,
                                                x,
                                                y,
                                                z,
                                                chain=chain)
                del mainchain_Hs[chain][resnumb]
        elif not line.startswith("ENDMDL"):
            new_pdb += line

    outputpqr = "leftovers.pqr"
    logfile = "LOG_pdb2pqr_nontitrating"
    if ff_out == "gromos_cph":
        ff_out = "GROMOS"
    mend_pdb(
        Config.pypka_params["pdb2pqr_inputfile"],
        outputpqr,
        ff_out,
        ff_out,
        logfile=logfile,
    )
    os.system("rm -f input_clean_fixed.pdb")

    with open(outputpqr) as f:
        for line in f:
            if line.startswith("ATOM "):
                (
                    aname,
                    anumb,
                    resname,
                    chain,
                    resnumb,
                    x,
                    y,
                    z,
                    charge,
                    radius,
                ) = read_pqr_line(line)
                if chain not in mainchain_Hs:
                    counter += 1
                    new_pdb += new_pdb_line(counter,
                                            aname,
                                            resname,
                                            resnumb,
                                            x,
                                            y,
                                            z,
                                            chain=chain)
    to_remove = (logfile, outputpqr, Config.pypka_params["pdb2pqr_inputfile"])
    for f in to_remove:
        os.remove(f)
    with open(outputname, "w") as f_new:
        f_new.write(new_pdb)
示例#8
0
def add_non_protein(pdbfile_origin,
                    add_to_pdb,
                    keep_membrane=False,
                    keep_ions=False):
    new_file_body = ""

    with open(add_to_pdb) as f:
        for line in f:
            if line.startswith("ATOM "):
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                last_anumb = anumb
                last_resnumb = resnumb

    # Read the original pdb with the membrane
    with open(pdbfile_origin) as f:
        for line in f:
            if "ATOM " == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                if keep_membrane:
                    if resname in LIPID_RESIDUES:
                        last_anumb += 1
                        new_file_body += new_pdb_line(last_anumb,
                                                      aname,
                                                      resname,
                                                      resnumb,
                                                      x,
                                                      y,
                                                      z,
                                                      chain=" ")

                    if resname in list(Config.pypka_params.LIPIDS.values()):
                        aname, resname, to_include = convert_FF_atomnames(
                            aname, resname)
                        if to_include:
                            last_anumb += 1
                            resnumb += last_resnumb
                            new_file_body += new_pdb_line(
                                last_anumb,
                                aname,
                                resname,
                                resnumb,
                                x,
                                y,
                                z,
                                chain=" ",
                            )
                if keep_ions and aname in IONS and resname == aname:
                    last_anumb += 1
                    resnumb += last_resnumb
                    new_file_body += new_pdb_line(last_anumb,
                                                  aname,
                                                  resname,
                                                  resnumb,
                                                  x,
                                                  y,
                                                  z,
                                                  chain=chain)

    with open(add_to_pdb, "a") as f_new:
        f_new.write(new_file_body)
示例#9
0
def inputPDBCheck(filename, sites, clean_pdb):
    """
    Returns: chains_length, chains_res
    """
    if filename[-3:] in ("pdb", "pqr"):
        filetype = "pdb"
    elif filename[-3:] == "gro":
        filetype = "gro"
    else:
        raise Exception("Input file must be either a pdb or a gro.")

    chains_length = {}
    chains_res = {chain: {} for chain in sites.keys()}
    chains = []
    if filetype == "pdb" and not clean_pdb:
        new_gro_header = "CREATED within PyPka\n"
        new_gro_body = ""
    with open(filename) as f:
        last_chain = ""
        chain_length = 0

        nline = 0
        maxnlines = 0
        atom_number = 0
        for line in f:
            nline += 1
            atom_line = False
            if filetype == "pdb":
                if line.startswith("ATOM "):
                    atom_line = True
                    chain_length += 1
                    (aname, anumb, resname, chain, resnumb, x, y,
                     z) = read_pdb_line(line)
                    if chain not in chains:
                        chains.append(chain)
                    atom_number += 1
                    if not clean_pdb:
                        if (len(aname) > 2 and aname[1] == "H"
                                and aname[0] in ("1", "2")):
                            aname = aname[1:] + aname[0]
                        new_gro_body += new_gro_line(anumb, aname, resname,
                                                     resnumb, x / 10.0, y / 10,
                                                     z / 10)
                elif line.startswith("CRYST1"):
                    tmp = line.split()[1:4]
                    box = (float(tmp[0]), float(tmp[1]), float(tmp[2]))
                    new_gro_footer = "{0:10.5f}{1:10.5f}{2:10.5f}\n".format(
                        box[0] / 10.0, box[1] / 10.0, box[2] / 10.0)

            elif filetype == "gro":
                if nline > 2 and nline < maxnlines:
                    (aname, anumb, resname, resnumb, x, y,
                     z) = read_gro_line(line)
                    chain = "A"
                    atom_line = True
                elif nline == 2:
                    natoms = int(line.strip())
                    maxnlines = natoms + 3

            if atom_line:
                if chain_length == 1:
                    last_chain = chain

                if chain != last_chain and chain_length != 1:
                    chains_length[last_chain] = chain_length
                    # chains_res[chain] = done[chain]
                    chain_length = 0
                    last_chain = chain

                if (chain in sites and resnumb not in chains_res[chain]
                        and str(resnumb) in sites[chain]):
                    if Config.pypka_params["ffinput"] == "CHARMM":
                        if resname in ("HSD", "HSE", "HSP"):
                            resname = "HIS"
                    chains_res[chain][resnumb] = resname

    # if filetype == 'pdb' and not clean_pdb:
    #    new_gro_header += '{0}\n'.format(atom_number)
    #    with open('TMP.gro', 'w') as f:
    #        f.write(new_gro_header + new_gro_body + new_gro_footer)

    chains_length[last_chain] = chain_length
    # chains_res[chain] = done[chain]

    # tmp_chains_res is an ugly hack so that test cases hold
    # TODO: remove tmp_chains_res and update tests
    tmp_chains_res = {}
    for chain in sites.keys():
        if chain not in chains:
            continue

        tmp_chains_res[chain] = {}
        for site in sites[chain]:
            if site[-1] == "C":
                resnumb = site[:-1]
                tmp_chains_res[chain][resnumb] = "CTR"
            elif site[-1] == "N":
                resnumb = site[:-1]
                tmp_chains_res[chain][resnumb] = "NTR"
        chains_res[chain] = {**tmp_chains_res[chain], **chains_res[chain]}

    skipped_sites = {}
    for chain, resnumbs in sites.items():
        if chain not in chains_res:
            skipped_sites[chain] = resnumbs
            continue
        for resnumb in resnumbs:
            skipped = False
            if resnumb[-1] in "NC":
                termini = "NTR" if resnumb[-1] == "N" else "CTR"
                resnumb = resnumb[:-1]
                if (resnumb not in chains_res[chain]
                        or chains_res[chain][resnumb] != termini):
                    print("{1} in chain '{0}' not found or not titratable.".
                          format(chain, termini))
            else:
                resnumb = int(resnumb)
                if resnumb not in chains_res[chain].keys():
                    print(
                        "Residue #{1} in chain '{0}' not found or not titratable."
                        .format(chain, resnumb))

    nsites = sum([len(res) for res in chains_res.values()])
    if not nsites:
        raise Exception(
            "No titrable residues found. Please check the residue number and chain."
        )

    return chains_length, chains_res