Пример #1
0
def add_non_protein(pdbfile_origin,
                    add_to_pdb,
                    keep_membrane=False,
                    keep_ions=False):
    new_file_body = ''

    with open(add_to_pdb) as f:
        for line in f:
            if line.startswith('ATOM '):
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                last_anumb = anumb
                last_resnumb = resnumb

    # Read the original pdb with the membrane
    with open(pdbfile_origin) as f:
        for line in f:
            if 'ATOM ' == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                if keep_membrane:
                    if resname in LIPID_RESIDUES:
                        last_anumb += 1
                        new_file_body += new_pdb_line(last_anumb,
                                                      aname,
                                                      resname,
                                                      resnumb,
                                                      x,
                                                      y,
                                                      z,
                                                      chain=chain)

                    if resname in list(Config.pypka_params.LIPIDS.values()):
                        aname, resname, to_include = convert_FF_atomnames(
                            aname, resname)
                        if to_include:
                            last_anumb += 1
                            resnumb += last_resnumb
                            new_file_body += new_pdb_line(last_anumb,
                                                          aname,
                                                          resname,
                                                          resnumb,
                                                          x,
                                                          y,
                                                          z,
                                                          chain=chain)
                if keep_ions and aname in IONS and resname == aname:
                    last_anumb += 1
                    resnumb += last_resnumb
                    new_file_body += new_pdb_line(last_anumb,
                                                  aname,
                                                  resname,
                                                  resnumb,
                                                  x,
                                                  y,
                                                  z,
                                                  chain=chain)

    with open(add_to_pdb, 'a') as f_new:
        f_new.write(new_file_body)
Пример #2
0
def removeMembrane(pdbfile):
    nomembrane_text = ''
    with open(pdbfile) as f:
        for line in f:
            if 'ATOM ' == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                if chain == ' ':
                    chain = '_'  # workaround to deal with pdb2pqr

                if resname not in LIPID_RESIDUES:
                    nomembrane_text += new_pdb_line(anumb,
                                                    aname,
                                                    resname,
                                                    resnumb,
                                                    x,
                                                    y,
                                                    z,
                                                    chain=chain)

            else:
                nomembrane_text += line
    with open('tmp.tmp', 'w') as f_new:
        f_new.write(nomembrane_text)
    os.rename('tmp.tmp', 'input_clean.pdb')
    return 'input_clean.pdb'
Пример #3
0
def get_chains_from_file(f_in):
    chain_list = []
    with open(f_in) as f:
        for line in f:
            if 'ATOM ' == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                if chain not in chain_list:
                    chain_list.append(chain)

    return chain_list
Пример #4
0
def storeResidues(filename):
    residues = {}
    with open(filename) as f_original:
        for line in f_original:
            if line.startswith('ATOM '):
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)
                if resnumb not in residues:
                    residues[resnumb] = {}
                residues[resnumb][aname] = (resname, x, y, z)
    return residues
Пример #5
0
def removeMembrane(pdbfile):
    nomembrane_text = ""
    with open(pdbfile) as f:
        for line in f:
            if "ATOM " == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y, z) = read_pdb_line(line)

                if chain == " ":
                    chain = "_"  # workaround to deal with pdb2pqr

                if resname not in LIPID_RESIDUES:
                    nomembrane_text += new_pdb_line(
                        anumb, aname, resname, resnumb, x, y, z, chain=chain
                    )

            else:
                nomembrane_text += line
    with open("tmp.tmp", "w") as f_new:
        f_new.write(nomembrane_text)
    os.rename("tmp.tmp", "input_clean.pdb")
    return "input_clean.pdb"
Пример #6
0
    def writeOutputStructure(self):
        def getProtomerResname(pdb_content, site, pH, ff_protomers):
            resnumb = site.getResNumber()
            resname = site.getName()
            new_state, new_state_prob = site.getMostProbTaut(pH)
            new_state_i = new_state - 1
            for ff_resname, protomers in ff_protomers[resname].items():
                if new_state_i in protomers.keys():
                    new_resname = ff_resname
                    remove_hs = protomers[new_state_i]

                    state_prob, taut_prob = site.getTautProb(new_state, pH)

                    if state_prob < 0.75:
                        warn = ("{0}{1} "
                                "protonation state probability: {2}, "
                                "tautomer probability: {3}".format(
                                    resname, resnumb, state_prob, taut_prob))
                        Config.log.report_warning(warn)

                        print(warn)
                    rounded_sprob = round(state_prob, 2)
                    rounded_tprob = round(taut_prob, 2)
                    remark_line = ("{0: <5}{1: <10}{2: ^7}"
                                   "{3: >1.2f}{4: ^13}{5: >1.2f}".format(
                                       resname, resnumb, "", rounded_sprob, "",
                                       rounded_tprob))

                    pdb_content += "REMARK     {text}\n".format(
                        text=remark_line)

            # print(resnumb, new_state, new_resname, remove_hs, state_prob, taut_prob)
            return pdb_content, new_state_i, new_resname, remove_hs

        outputname = Config.pypka_params["f_structure_out"]
        pH = float(Config.pypka_params["f_structure_out_pH"])
        ff_out = Config.pypka_params["ff_structure_out"]

        ff_protomer = {
            "amber": AMBER_protomers,
            "gromos_cph": GROMOS_protomers
        }[ff_out]

        pdb_content = (
            "REMARK     Protonation states assigned according to PypKa\n"
            "REMARK     Residue    Prot State Prob    Tautomer Prob\n")

        sites = self.get_all_sites(get_list=True)
        new_states = {}
        for site in sites:
            resname = site.getName()
            resnumb = site.res_number
            molecule = site.molecule
            chain = molecule.chain

            (pdb_content, new_state, new_resname,
             remove_hs) = getProtomerResname(pdb_content, site, pH,
                                             ff_protomer)

            if resname in ("NTR", "CTR"):
                new_resname = site.termini_resname

            if chain not in new_states:
                new_states[chain] = {}

            new_states[resnumb] = (resname, new_state, new_resname, remove_hs)

        new_pdb = pdb_content
        counter = 0

        tit_atoms = {}
        other_atoms = {}
        for molecule in self.molecules.values():
            for atom_numb in molecule.atoms_tit_res:
                if molecule.atoms_tit_res[atom_numb]:
                    tit_atoms[atom_numb] = molecule
                else:
                    other_atoms[atom_numb] = molecule

        for line in self.delphi_input_content:
            if line.startswith("ATOM "):
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                if anumb in tit_atoms.keys():
                    molecule = tit_atoms[anumb]

                    (oldresname, new_state, resname,
                     removeHs) = new_states[resnumb]

                    if aname in removeHs:
                        continue

                    if (ff_out == "amber" and oldresname in gromos2amber
                            and new_state in gromos2amber[oldresname]
                            and aname in gromos2amber[oldresname][new_state]):
                        aname = gromos2amber[oldresname][new_state][aname]
                else:
                    molecule = other_atoms[anumb]

                if resnumb > TERMINAL_OFFSET:
                    termini_site = molecule.sites[resnumb]
                    resnumb -= TERMINAL_OFFSET
                    if resnumb in molecule.sites.keys():
                        ter_resname, ter_new_state, resname, ter_removeHs = new_states[
                            resnumb]
                    else:
                        resname = termini_site.termini_resname

                    # print(new_pdb_line(anumb, aname, resname, resnumb, x, y, z).strip())
                if resnumb in molecule.getCYS_bridges():
                    resname = "CYX"

                counter += 1
                new_pdb += new_pdb_line(counter, aname, resname, resnumb, x, y,
                                        z)
                if resnumb in mainchain_Hs:
                    while len(mainchain_Hs[resnumb]) > 0:
                        counter += 1
                        (aname, anumb, oldresname, chain, x, y,
                         z) = mainchain_Hs[resnumb].pop()
                        new_pdb += new_pdb_line(counter, aname, resname,
                                                resnumb, x, y, z)
                    del mainchain_Hs[resnumb]
            else:
                new_pdb += line

        with open(outputname, "w") as f_new:
            f_new.write(new_pdb)
Пример #7
0
def inputPDBCheck(filename, sites, clean_pdb):
    """
    Returns: chains_length, chains_res
    """
    if filename[-3:] in ('pdb', 'pqr'):
        filetype = 'pdb'
    elif filename[-3:] == 'gro':
        filetype = 'gro'
    else:
        raise Exception('Input file must be either a pdb or a gro.')

    chains_length = {}
    chains_res = {}

    for chain in sites.keys():
        chains_res[chain] = {}
        for site in sites[chain]:
            if site[-1] == 'C':
                resnumb = site[:-1]
                chains_res[chain][resnumb] = 'CTR'
            elif site[-1] == 'N':
                resnumb = site[:-1]
                chains_res[chain][resnumb] = 'NTR'

    if filetype == 'pdb' and not clean_pdb:
        new_gro_header = 'CREATED within PyPka\n'
        new_gro_body = ''
    with open(filename) as f:
        last_chain = ''
        chain_length = 0

        nline = 0
        maxnlines = 0
        atom_number = 0
        for line in f:
            nline += 1
            atom_line = False
            if filetype == 'pdb':
                if 'ATOM ' == line[0:5]:
                    atom_line = True
                    chain_length += 1
                    (aname, anumb, resname, chain, resnumb, x, y,
                     z) = read_pdb_line(line)
                    atom_number += 1
                    if not clean_pdb:
                        if len(aname) > 2 and \
                           aname[1] == 'H' and \
                           aname[0] in ('1', '2'):
                            aname = aname[1:] + aname[0]
                        new_gro_body += new_gro_line(anumb, aname, resname,
                                                     resnumb, x / 10.0, y / 10,
                                                     z / 10)
                elif 'CRYST1' in line:
                    tmp = line.split()[1:4]
                    box = (float(tmp[0]), float(tmp[1]), float(tmp[2]))
                    new_gro_footer = '{0:10.5f}{1:10.5f}{2:10.5f}\n'.format(
                        box[0] / 10.0, box[1] / 10.0, box[2] / 10.0)

            elif filetype == 'gro':
                if nline > 2 and nline < maxnlines:
                    (aname, anumb, resname, resnumb, x, y,
                     z) = read_gro_line(line)
                    chain = 'A'
                    atom_line = True
                elif nline == 2:
                    natoms = int(line.strip())
                    maxnlines = natoms + 3

            if atom_line:
                if chain_length == 1:
                    last_chain = chain

                if chain != last_chain and chain_length != 1:
                    chains_length[last_chain] = chain_length
                    #chains_res[chain] = done[chain]
                    chain_length = 0
                    last_chain = chain

                if chain in sites and \
                   resnumb not in chains_res[chain] and \
                   str(resnumb) in sites[chain]:
                    chains_res[chain][resnumb] = resname

    #if filetype == 'pdb' and not clean_pdb:
    #    new_gro_header += '{0}\n'.format(atom_number)
    #    with open('TMP.gro', 'w') as f:
    #        f.write(new_gro_header + new_gro_body + new_gro_footer)

    chains_length[last_chain] = chain_length
    #chains_res[chain] = done[chain]

    return chains_length, chains_res
Пример #8
0
def inputPDBCheck(filename, sites, clean_pdb):
    """
    Returns: chains_length, chains_res
    """
    if filename[-3:] in ("pdb", "pqr"):
        filetype = "pdb"
    elif filename[-3:] == "gro":
        filetype = "gro"
    else:
        raise Exception("Input file must be either a pdb or a gro.")

    chains_length = {}
    chains_res = {}

    for chain in sites.keys():
        chains_res[chain] = {}
        for site in sites[chain]:
            if site[-1] == "C":
                resnumb = site[:-1]
                chains_res[chain][resnumb] = "CTR"
            elif site[-1] == "N":
                resnumb = site[:-1]
                chains_res[chain][resnumb] = "NTR"

    if filetype == "pdb" and not clean_pdb:
        new_gro_header = "CREATED within PyPka\n"
        new_gro_body = ""
    with open(filename) as f:
        last_chain = ""
        chain_length = 0

        nline = 0
        maxnlines = 0
        atom_number = 0
        for line in f:
            nline += 1
            atom_line = False
            if filetype == "pdb":
                if "ATOM " == line[0:5]:
                    atom_line = True
                    chain_length += 1
                    (aname, anumb, resname, chain, resnumb, x, y, z) = read_pdb_line(
                        line
                    )
                    atom_number += 1
                    if not clean_pdb:
                        if (
                            len(aname) > 2
                            and aname[1] == "H"
                            and aname[0] in ("1", "2")
                        ):
                            aname = aname[1:] + aname[0]
                        new_gro_body += new_gro_line(
                            anumb, aname, resname, resnumb, x / 10.0, y / 10, z / 10
                        )
                elif "CRYST1" in line:
                    tmp = line.split()[1:4]
                    box = (float(tmp[0]), float(tmp[1]), float(tmp[2]))
                    new_gro_footer = "{0:10.5f}{1:10.5f}{2:10.5f}\n".format(
                        box[0] / 10.0, box[1] / 10.0, box[2] / 10.0
                    )

            elif filetype == "gro":
                if nline > 2 and nline < maxnlines:
                    (aname, anumb, resname, resnumb, x, y, z) = read_gro_line(line)
                    chain = "A"
                    atom_line = True
                elif nline == 2:
                    natoms = int(line.strip())
                    maxnlines = natoms + 3

            if atom_line:
                if chain_length == 1:
                    last_chain = chain

                if chain != last_chain and chain_length != 1:
                    chains_length[last_chain] = chain_length
                    # chains_res[chain] = done[chain]
                    chain_length = 0
                    last_chain = chain

                if (
                    chain in sites
                    and resnumb not in chains_res[chain]
                    and str(resnumb) in sites[chain]
                ):
                    chains_res[chain][resnumb] = resname

    # if filetype == 'pdb' and not clean_pdb:
    #    new_gro_header += '{0}\n'.format(atom_number)
    #    with open('TMP.gro', 'w') as f:
    #        f.write(new_gro_header + new_gro_body + new_gro_footer)

    chains_length[last_chain] = chain_length
    # chains_res[chain] = done[chain]

    return chains_length, chains_res
Пример #9
0
def identify_tit_sites(molecules, instanciate_sites=True):
    def SitesFileLine(resnumb, resname):
        for res in REGULARTITRATINGRES:
            if res[0:2] == resname[0:2]:
                resname = res
        if resname in TITRABLETAUTOMERS:
            ntautomers = TITRABLETAUTOMERS[resname]
        else:
            for res in REGULARTITRATINGRES:
                if res[0:2] == resname[0:2]:
                    ntautomers = TITRABLETAUTOMERS[res]
        # In debug mode, a .sites file is created
        text = '{0} '.format(resnumb)
        for i in range(ntautomers):
            text += '{0}tau{1} '.format(resname, i + 1)

        if res in ('NTR', 'CTR'):
            resnumb += TERMINAL_OFFSET

        if instanciate_sites:
            instanciate_site(resnumb, resname, ntautomers)
        return text + '\n'

    def instanciate_site(resnumb, resname, ntautomers):
        sID = molecule.addSite(resnumb)
        molecule.addTautomers(sID, ntautomers, resname)

    def add2chain(chain, chain_res, resnumb, resname):
        chain_res[chain][resnumb] = resname

    sites_file = ''
    sites = {chain: [] for chain in molecules.keys()}
    chain_res = {chain: {} for chain in molecules.keys()}
    last_res = None
    with open(Config.pypka_params['f_in']) as f:
        nline = 0
        resnumb = None
        resname = None
        for line in f:
            nline += 1
            if 'ATOM ' == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                if line[26] != ' ':
                    continue
                #if chain == ' ':
                #    chain = 'A'
                last_res = resnumb
                last_chain = chain
                chain_sites = []

                if chain in molecules.keys():
                    molecule = molecules[chain]
                    chain_sites = chain_res[chain]

                    if resname in PROTEIN_RESIDUES or \
                       resname in TITRABLERESIDUES:
                        if resnumb not in chain_sites:
                            if not chain_res[chain]:
                                sites_file += SitesFileLine(resnumb, 'NTR')
                                add2chain(chain, chain_res, str(resnumb),
                                          'NTR')
                                if instanciate_sites:
                                    molecule.NTR = resnumb

                            if resname in TITRABLERESIDUES and \
                               resname != 'NTR' and resname != 'CTR':
                                if Config.pypka_params['ser_thr_titration'] == False and \
                                   resname in ('SER', 'THR'):
                                    continue
                                sites_file += SitesFileLine(resnumb, resname)
                                add2chain(chain, chain_res, resnumb, resname)

                        if 'CTR' not in sites and \
                           aname in ('CT', 'OT', 'OT1', 'OT2', 'O1', 'O2', 'OXT'):
                            sites_file += SitesFileLine(resnumb, 'CTR')
                            add2chain(chain, chain_res, str(resnumb), 'CTR')
                            if instanciate_sites:
                                molecule.CTR = resnumb


    if 'CTR' not in sites and \
       aname in ('CT', 'OT', 'OT1', 'OT2', 'O1', 'O2', 'OXT') and molecule.sites == 'all':
        sites_file += SitesFileLine(last_res, 'CTR')
        add2chain(last_chain, chain_res, str(last_res), 'CTR')
        if instanciate_sites:
            molecule = molecules[chain]
            molecule.CTR = resnumb

    if not chain_res and instanciate_sites:
        f_in = Config.pypka_params['f_in']
        raise Exception(
            'Not one titrable residue was found in {}'.format(f_in))

    if instanciate_sites:
        for molecule in molecules.values():
            # Adding the reference tautomer to each site
            molecule.addReferenceTautomers()
            # Assigning a charge set to each tautomer
            molecule.addTautomersChargeSets()

    if Config.debug:
        with open('tmp.sites', 'w') as f_new:
            f_new.write(sites_file)

    return chain_res
Пример #10
0
def make_delphi_inputfile(f_in, f_out, molecules):
    def getMaxCoords(coords, max_coords):
        x, y, z = coords
        max_x, max_y, max_z = max_coords
        if max_x < x:
            max_x = x
        if max_y < y:
            max_y = y
        if max_z < z:
            max_z = z
        return max_x, max_y, max_z

    def correct_termini(resnumb, resname, aname, ntr_res, ctr_res):
        if resnumb == ntr_res and \
           aname in Config.pypka_params['NTR_atoms']:
            resname = 'NTR'
            resnumb += TERMINAL_OFFSET
        elif resnumb == ctr_res and \
             aname in Config.pypka_params['CTR_atoms']:
            resname = 'CTR'
            resnumb += TERMINAL_OFFSET
            if aname == 'C':
                aname = 'CT'
        return resnumb, resname, aname

    def correct_res_names(molecule, resnumb, resname, aname):
        if resnumb in list(molecule.correct_names.keys()):
            resname = molecule.correct_names[resnumb]
        if resnumb in list(molecule.correct_atoms.keys()) and \
           aname in molecule.correct_atoms[resnumb]:
            aname = molecule.correct_atoms[resnumb][aname]

        return resnumb, resname, aname

    def assign_atoms(sites, resnumb, aname, site_Hs, site_positions):
        ref_tau_name = resname
        if resnumb in list(sites.keys()) and \
           aname in list(sites[resnumb].getRefTautomer().charge_set.keys()):
            #( aname not in ('N', 'H', 'C', 'O', 'CA') or
            #(aname in ('N', 'H', 'C', 'O', 'CA') and resname == 'NTR')):
            # change res name to reference tautomer
            ref_tau_name = sites[resnumb].getRefTautomerName()

            # add atom to corresponding site
            sites[resnumb].addAtom(aname, anumb)

            if chain not in site_positions:
                site_positions[chain] = {}
                site_Hs[chain] = {}
            if resnumb not in site_positions[chain]:
                site_positions[chain][resnumb] = []
                site_Hs[chain][resnumb] = []

            if resnumb in site_positions[chain]:
                site_positions[chain][resnumb].append((x, y, z))
                if aname[0] == 'H':
                    site_Hs[chain][resnumb].append((x, y, z))

        return site_Hs, ref_tau_name, site_positions

    new_pdb_content = ""
    site_positions = {}
    site_Hs = {}
    max_box = [0.0, 0.0, 0.0]
    aposition = -1
    with open(f_in) as f:
        for line in f:
            if line.startswith('ATOM'):
                aposition += 1
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                max_box = getMaxCoords([x, y, z], max_box)

                if chain in molecules:
                    molecule = molecules[chain]
                    ntr_res = molecule.NTR
                    ctr_res = molecule.CTR
                    sites = molecule.sites

                    if (resname == 'HIS' and aname == 'HD1'
                            and resnumb not in sites.keys()):
                        aposition -= 1
                        continue

                    resnumb, resname, aname = correct_termini(
                        resnumb, resname, aname, ntr_res, ctr_res)

                    resnumb, resname, aname = correct_res_names(
                        molecule, resnumb, resname, aname)

                    titrable_res = False
                    if resnumb in sites.keys():
                        titrable_res = True

                    molecule.addAtom(aname, anumb, aposition, titrable_res)

                    (site_Hs, resname,
                     site_positions) = assign_atoms(sites, resnumb, aname,
                                                    site_Hs, site_positions)

                else:
                    if (resname == 'HIS' and aname == 'HD1'):
                        aposition -= 1
                        continue
                    resnumb, resname, aname = correct_res_names(
                        molecule, resnumb, resname, aname)

                new_pdb_content += new_pdb_line(aposition, aname, resname,
                                                resnumb, x, y, z)

            elif line.startswith('CRYST1'):
                parts = line.split()
                box = [float(i) for i in parts[1:4]]

    if box == [0.1, 0.1, 0.1]:
        box = max_box

    if Config.pypka_params['box']:
        box = Config.pypka_params['box']
    else:
        Config.pypka_params.setBox(box)

    if Config.delphi_params['pbc_dim'] == 2:
        Config.delphi_params.redefineScale()

    new_pdb_content += 'TER\nENDMDL\n'
    with open(f_out, 'w') as f_new:
        f_new.write(new_pdb_content)

    # TODO: check Terminal_offset has to be bigger than the total number of residues
    # TODO: delete terminal_offset and use another approach to distinguish between N- and C-ter
    # TODO: check size xy > config.cutoff * 2
    # if so, raise Exception, and ask to change cutoff value

    # TODO: check if pbc_dim -> set gsizes from pdb size xy and ignore perfil

    for chain in site_positions.keys():
        molecule = molecules[chain]
        for site in site_positions[chain]:
            if site in list(molecule.sites.keys()):
                pos_max = [-9999990, -999999, -999999]
                pos_min = [999999, 999999, 999999]
                focus_center = [0, 0, 0]
                for atom in site_positions[chain][site]:
                    for i in range(3):
                        if pos_max[i] < atom[i]:
                            pos_max[i] = atom[i]
                        if pos_min[i] > atom[i]:
                            pos_min[i] = atom[i]
                focus_center[0] = (pos_max[0] + pos_min[0]) / 2
                focus_center[1] = (pos_max[1] + pos_min[1]) / 2
                focus_center[2] = (pos_max[2] + pos_min[2]) / 2

                if Config.delphi_params['pbc_dim'] == 2:
                    molecule.sites[site].addCenter(focus_center,
                                                   boxsize=box[0],
                                                   box_z=box[2])
                else:
                    molecule.sites[site].addCenter(focus_center)
                hx, hy, hz = 0, 0, 0
                nHs = len(site_Hs[chain][site])
                if nHs == 0:
                    sitename = molecule.sites[site].getName()
                    raise Exception('Site {1}{0} appears '
                                    'to have no Hydrogen atoms'.format(
                                        site, sitename))
                for h in site_Hs[chain][site]:
                    hx += h[0]
                    hy += h[1]
                    hz += h[2]
                hx /= nHs
                hy /= nHs
                hz /= nHs
                Hcenter = (hx, hy, hz)
                molecule.sites[site].addCenterH(Hcenter)
Пример #11
0
def check_sites_integrity(molecules, chains_res, useTMPpdb=False):
    """Identifies titrable residues and checks integrity of the residue blocks
    (excluding Hydrogens)
    """
    def check_site(prev_resname, cur_atoms, ter=None):
        def correctResName(resname):
            for res in REGULARTITRATINGRES:
                if res[0:2] == resname[0:2]:
                    return res
            return resname

        def makeSite(molecule, resnumb, resname, termini_resname=None):
            if resname in TITRABLETAUTOMERS:
                ntautomers = TITRABLETAUTOMERS[resname]
            else:
                for res in REGULARTITRATINGRES:
                    if res[0:2] == resname[0:2]:
                        ntautomers = TITRABLETAUTOMERS[res]
            sID = molecule.addSite(resnumb)
            molecule.addTautomers(sID,
                                  ntautomers,
                                  resname,
                                  termini_resname=termini_resname)
            #print('added', molecule.chain, resnumb, resname)

        prev_resname = correctResName(prev_resname)

        res_tits = True
        if ter:
            if (not Config.pypka_params['ser_thr_titration']
                    and prev_resname in ('SER', 'THR')):
                res_tits = False
            else:
                res_tits = bool(prev_resname in TITRABLERESIDUES)

        res_atoms = copy(cur_atoms)
        (integrity_terminal, integrity_site) = check_integrity(prev_resname,
                                                               res_atoms,
                                                               ter=ter,
                                                               site=res_tits)

        if integrity_terminal:
            ter_resnumb = prev_resnumb + TERMINAL_OFFSET
            makeSite(molecule, ter_resnumb, ter, termini_resname=prev_resname)
            if ter == 'NTR':
                molecule.NTR = prev_resnumb
            elif ter == 'CTR':
                molecule.CTR = prev_resnumb
        else:
            warning(molecule, prev_resnumb, ter, '')

        if prev_resnumb in sites:
            if integrity_site:
                makeSite(molecule, prev_resnumb, prev_resname)
            else:
                warning(molecule, prev_resnumb, prev_resname, cur_atoms)
        elif prev_resname == 'CYS':  # dealing with a CYS that is not in sites
            if not integrity_site:
                warning(molecule,
                        prev_resnumb,
                        prev_resname,
                        cur_atoms,
                        mode='CYS')

    def warning(molecule, resnumb, resname, res_atoms, mode=None):
        if mode == 'CYS' or resname == 'CYS':
            CYS_atoms = ['N', 'CA', 'CB', 'SG', 'C', 'O', 'H']
            if set(res_atoms).issubset(CYS_atoms) and \
               set(CYS_atoms).issubset(res_atoms):
                # no need to correct residue name
                warn = '{0} {1} is assumed to be participating '\
                       'in a SS-bond'.format(resnumb, resname)
                Config.log.report2log(warn)
                return
            CY0_atoms = ['N', 'CA', 'CB', 'SG', 'C', 'O', 'H', 'HG1']
            if set(res_atoms).issubset(CY0_atoms) and \
               set(CY0_atoms).issubset(res_atoms):
                molecule.correct_names[resnumb] = 'CY0'
                return
            CY0_atoms = ['N', 'CA', 'CB', 'SG', 'C', 'O', 'H', 'HG']
            if set(res_atoms).issubset(CY0_atoms) and \
               set(CY0_atoms).issubset(res_atoms):
                molecule.correct_names[resnumb] = 'CY0'
                molecule.correct_atoms[resnumb] = {'HG': 'HG1'}
                return
            else:
                warn = '{0} {1} failed integrity check'.format(
                    resnumb, resname)
                Config.log.report2log(warn)
        elif resname not in TITRABLERESIDUES:
            return
        else:
            warn = '{0} {1} failed integrity check'.format(resnumb, resname)
            Config.log.report2log(warn)

    if Config.pypka_params['f_in'] and not useTMPpdb:
        filename = Config.pypka_params['f_in']
        filetype = 'pdb'
    else:
        filename = "TMP.pdb"
        filetype = 'pdb'

    resnumb = None
    cur_atoms = []
    prev_resnumb = None
    prev_resname = None
    last_chain = None
    with open(filename) as f:
        nline = 0
        f_lines = f.readlines()
        maxnlines = len(f_lines)
        for line in f_lines:
            resname = None
            nline += 1

            chain = None

            if 'ATOM ' == line[0:5]:
                (aname, anumb, resname, chain, resnumb, x, y,
                 z) = read_pdb_line(line)

                if chain in molecules:
                    if not last_chain:
                        last_chain = chain
                    molecule = molecules[last_chain]
                    sites = chains_res[last_chain]
                    last_molecule = molecule
                    last_chain = chain

                if nline == maxnlines:
                    cur_atoms.append(aname)

            if line == 'TER\n':
                resnumb += 1

            if (prev_resnumb != resnumb or nline == maxnlines) and \
               prev_resnumb is not None:
                if nline == maxnlines:
                    prev_resnumb = copy(resnumb)
                    resnumb = 'None'

                if chain in molecules:
                    if prev_resname in TITRABLERESIDUES or \
                       (prev_resnumb == molecule.NTR or resnumb == molecule.NTR) or \
                       (prev_resnumb == molecule.CTR or resnumb == molecule.CTR):
                        if prev_resnumb == molecule.NTR and resnumb != molecule.NTR:
                            check_site(prev_resname, cur_atoms, ter='NTR')
                            prev_resnumb = None
                        # Dealing with the last residue and CTR
                        elif prev_resnumb == molecule.CTR and resnumb != molecule.CTR:
                            check_site(prev_resname, cur_atoms, ter='CTR')
                        # Dealing with the previous residue
                        elif prev_resnumb is not None and \
                             prev_resname in TITRABLERESIDUES:
                            if not (not Config.pypka_params['ser_thr_titration'] and \
                                prev_resname in ('SER', 'THR')):
                                check_site(prev_resname, cur_atoms)

                    elif prev_resname == 'ALA':
                        # TODO: check residue block integrity for other non titrating residues
                        pass
                elif last_molecule and prev_resnumb == last_molecule.CTR and resnumb != last_molecule.CTR:
                    check_site(prev_resname, cur_atoms, ter='CTR')
                elif prev_resname in TITRABLERESIDUES and prev_resnumb is not None:
                    check_site(prev_resname, cur_atoms)

            # Dealing with the new residue
            if prev_resnumb != resnumb:
                cur_atoms = [aname]
                prev_resnumb = resnumb
                prev_resname = resname
            elif resnumb is not None:
                cur_atoms.append(aname)
                if prev_resname in ('NTR', 'CTR') and \
                   prev_resname != resname:
                    prev_resname = resname

    for molecule in molecules.values():
        # Adding the reference tautomer to each site
        molecule.addReferenceTautomers()
        # Assigning a charge set to each tautomer
        molecule.addTautomersChargeSets()

    # TODO: report blocks that failed the check (in .log file with
    # numbering reference to stepwise scheme)
    # TODO: add lipid residues
    if Config.debug:
        print('exiting check_sites_integrity')