Exemplo n.º 1
0
def align_structs(id1, chain1, id2, chain2):
    """
    the main function. gets the ids and the chain's names and finds the alignment with the best RMSD.
    prints the best RMSD, and saving the alignments file in cif format
    :param id1: the first file id
    :param chain1: the first protein's chain
    :param id2: the second file id
    :param chain2: the second protein's chain
    """
    # generating the relevant data
    lst = pdb.PDBList()
    protein1 = lst.retrieve_pdb_file(id1)
    protein2 = lst.retrieve_pdb_file(id2)
    parser = pdb.MMCIFParser()
    struct1 = parser.get_structure("p1", protein1)
    struct2 = parser.get_structure("p2", protein2)

    # creating a lists of CA atoms to align
    atoms1 = create_atoms_list(struct1, chain1)
    atoms2 = create_atoms_list(struct2, chain2)
    if len(atoms1) != len(atoms2):
        atoms1, atoms2 = bonus_9_2(chain1, chain2, struct1, struct2)

    # making the align
    super_imposer = pdb.Superimposer()
    super_imposer.set_atoms(atoms1, atoms2)
    super_imposer.apply(struct2[0].get_atoms())
    print(super_imposer.rms)

    # saving the aligned structure to files
    saving_file(id1, struct1)
    saving_file(id2, struct2)
Exemplo n.º 2
0
class Protein:
    """Instance is one protein"""

    # Initialize modules length, read into memory.
    modules_path = './data/modules-length'
    with open(modules_path, 'r') as f:
        modules_length = dict()
        for line in f:
            words = line.split()
            modules_length[words[0]] = int(words[1])

    # Set up biopyhton PDB parser
    parser = PDB.PDBParser()
    # Set up biopython Superimposer
    sup = PDB.Superimposer()

    @staticmethod
    def kabsch(q, p, modules_range=None):
        """Calculate RMSD between parts of two proteins using the Kabsch method.

        :param q: First instance of Protein.
        :param p: Second instance of Protein.
        :param modules_range: Range of the modules that should be considered.
        If not given take the whole proteins.
        :return: RMSD between the proteins for the given range.
        """
        assert len(q.residue_chain) == len(p.residue_chain)

        if modules_range is None:
            Protein.sup.set_atoms(q.residue_chain, p.residue_chain)
            return Protein.sup.rms
        else:
            modules_rmsd = []
            for i in modules_range:
                start = sum(q.modules_sections[:i])
                end = start + q.modules_sections[i]
                Protein.sup.set_atoms(q.residue_chain[start:end],
                                      p.residue_chain[start:end])
                modules_rmsd.append(Protein.sup.rms)
            return modules_rmsd

    def __init__(self, structure_name, pdb_path, json_path, strict=True):
        self.name = structure_name
        self.path = pdb_path
        self.structure = Protein.parser.get_structure(self.name, self.path)

        with open(json_path, 'r') as f:
            self.modules_chain = json.load(f)['nodes']

        self.modules_sections = [
            Protein.modules_length[x + '.pdb'] for x in self.modules_chain
        ]

        self.residue_chain = list()
        for residue in self.structure.get_residues():
            self.residue_chain.append(residue['CA'])

        if strict:
            assert sum(self.modules_sections) == len(self.residue_chain)
    def __call__(self):
        super_imposer = PDB.Superimposer()
        super_imposer.set_atoms(
            self.ref_atoms,
            self.alt_atoms,
        )

        return super_imposer
Exemplo n.º 4
0
def superimpose_and_rotate(eq_chain1, eq_chain2, moving_chain, curr_struct,
                           rec_level_complex):
    """Superimpose 2 chains and add another with the rotation parameters obtained. Return structure object with added chain, information about clashes and a flag for having added something.

    Keyword arguments:
    eq_chain1 -- common chain in the current structure (curr_struct)
    eq_chain2 -- common chain in the structure from which a chain wants to be added (moving_chain)
    moving_chain -- chain that may be added to the current complex
    rec_level_complex -- recursion level of building the complex
    filename2 -- name of the file that contains the moving_chain """

    # all residues from same chain (common chain) are retrieved from the 2 structures. Example: chain A

    res_chain1 = list(eq_chain1.get_residues())
    res_chain2 = list(eq_chain2.get_residues())

    # get the atoms of the previous list, ONLY belonging to common RESIDUES! to be then able to superimpose
    # so first we obtain a list of the common residues
    common_res_s1 = get_list_of_common_res(res_chain1, res_chain2)
    common_res_s2 = get_list_of_common_res(res_chain2, res_chain1)

    # then we obtain a list of atom objects to use it later
    common_atoms_s1 = get_atom_list_from_res_list(common_res_s1)
    common_atoms_s2 = get_atom_list_from_res_list(common_res_s2)

    # debug
    if len(common_atoms_s1) != len(common_atoms_s2):
        return curr_struct, 0, False, set(), moving_chain

    # use the Superimposer
    sup = pdb.Superimposer()

    # first argument is fixed, second is moving. both are lists of Atom objects
    sup.set_atoms(common_atoms_s1, common_atoms_s2)
    rms = sup.rms

    # rotate moving atoms
    sup.apply(list(moving_chain.get_atoms()))

    # add to the fixed structure, the moved chain
    added = 0
    clash, clashing_chains = is_steric_clash(curr_struct, moving_chain)

    # something is added if there's no clashes and the RMSD is very low, indicating that the two chains are actually the same
    if not clash and rms <= 3.0:
        my_id = moving_chain.id
        chain_names = [x.id for x in curr_struct[0].get_chains()]
        while added == 0:
            rand = (
                create_random_chars_id(6), str(rec_level_complex)
            )  # random ID + a number that indicates the recursion level at which this chain has been added
            if my_id + rand not in chain_names:
                moving_chain.id = tuple(list(my_id) + list(rand))
                curr_struct[0].add(moving_chain)
                added = 1

    return curr_struct, added, clash, clashing_chains, moving_chain
Exemplo n.º 5
0
def Superimpose_Chain(reference, interaction, target, pairs,
                      collisions_accepted, radius, percent):
    """Superimpose two chains with the same length and returns the related chain object moved accordingly
    Input:
    -reference = chain object used as reference
    -interaction = String with two chain names related between them
    -target = string chain name of the common chain
    -pairs = Pairs dictionary:
        -Keys = interaction pair
        -Values = dictionary:
            -Keys = Chain name
            -Values = Chain object
    -radius = integrer required for become the empty radius (measure in Amstrongs) around each atom, user input
    -collisions_accepted = number of atom collisions allowed in each join, user input
    -percent = similarity percentage accepted, user input
    Output:
    -mobile_chain = list of atoms from the mobile chain that have had their coordinates moved for superimposing return None if the chain can't be added to the actual model
    """

    fixed_atoms, mobile_atoms = Check_Similarity(reference,
                                                 pairs[interaction][target],
                                                 percent)

    sup = pdb.Superimposer()  #apply superimposer tool
    sup.set_atoms(fixed_atoms,
                  mobile_atoms)  #set both lists of atoms here to get rotation
    rotran = sup.rotran

    mobile_chain_name = interaction.replace(
        target, "", 1)  # Obtain the chain name of the mobile one

    mobile_chain = cp.deepcopy(pairs[interaction][mobile_chain_name])

    mobile_chain.transform(rotran[0], rotran[1])

    model_atom_list = []
    for chain in reference.get_parent():
        model_atom_list.extend(chain.get_atoms())

    addition_atom_list = list(mobile_chain.get_atoms())

    collisions = Collision_Check(model_atom_list, addition_atom_list, radius)

    if len(collisions) > 0:
        s.argprint("Number of collisions:", s.options.verbose, s.options.quiet,
                   2)
        s.argprint(len(collisions), s.options.verbose, s.options.quiet, 2)

    if len(collisions) > collisions_accepted:
        e = s.CollisionAppears(target, mobile_chain_name, collisions)
        s.argprint(e, s.options.verbose, s.options.quiet, 2)
        s.argprint(e.Get_Collisions(), s.options.verbose, s.options.quiet, 3)
        return None
    else:
        return mobile_chain
Exemplo n.º 6
0
    def superimpose(self, hetid=None, within=8.0):
        """

        The superimposition method. If optional `hetid` is supplied, only binding site
        residues will used for the superimposition.

        :param hetid: Ligand identifier
        :type hetid: str
        :param binding_site: flag
        :type binding_site: bool
        :param within: binding site cutoff distance
        :type within: float

        .. code-block:: python

            # Example useage
            from pdb_superimposer import ChainSuperimposer, Helper

            ref = Helper.protein_from_file("2VTA", "2VTA.pdb")
            ref_chain = [c for c in ref[0]][0]

            other = Helper.protein_from_file("6YLK", "6YLK.pdb")
            ref_chain = [c for c in other[0]][0]

            cs = ChainSuperimposer(reference=ref_chain, other=other_chain, other_struc=other)
            cs.superimpose()


        """
        if hetid:
            # detect binding site
            bs = self.binding_site(chain=self.other,
                                   hetid=hetid,
                                   within=within)
            self.selected_index = self.selected_index.intersection(bs)

        super_imposer = PDB.Superimposer()

        # set the active atoms
        reference_atms = [
            atm for resi in self.reference_seq.values() for atm in resi
            if resi.get_id()[1] in self.selected_index and atm.element != "H"
        ]
        other_atms = [
            atm for resi in self.other_seq.values() for atm in resi
            if resi.get_id()[1] in self.selected_index and atm.element != "H"
        ]
        super_imposer.set_atoms(reference_atms, other_atms)

        # apply the transformation matrix to the whole chain
        super_imposer.apply(self.other_struc.get_atoms())
        self.rms = super_imposer.rms
Exemplo n.º 7
0
def superimpose(first, second):
    superimpose = PDB.Superimposer()
    superimpose.set_atoms(first, second)
    model = second_model
    superimpose.apply(model.get_atoms())
    print("The RMSD is: ")
    print(superimpose.rms)
    structure = second_structure
    io = PDB.PDBIO()
    io.set_structure(structure)
    saved_file = input("Name of output file (.pdb)?")
    io.save(saved_file)
    return saved_file 
Exemplo n.º 8
0
def superimpose(fixed_vector, moving_vector, moving_atom_list):
    """
    Rotates and translates a list of moving atoms from a moving vector to a fixed vector.
    :param fixed_vector: vector used as reference.
    :param moving_vector: vector that will rotate and translate.
    :param moving_atom_list: list of atoms that we want to do the rotation and translation of the moving vector.
    :return: the input list of atoms is rotated an translated.
    """
    # Do the superimposition with BioPython
    sup = bio.Superimposer()
    # Set the vectors: first element is the fix vector (bond of the core) and second is the moving (bond of the fragment)
    sup.set_atoms(fixed_vector, moving_vector)
    # Apply the transformation to the atoms of the fragment (translate and rotate)
    return sup.apply(moving_atom_list)
Exemplo n.º 9
0
def align(ref_structure, mobile_structure):

    # translation_mobile_to_ref = ref_structure.atoms.center_of_mass() - mobile_structure.atoms.center_of_mass()
    #
    #
    # mobile0 = mobile_structure.select_atoms('name CA').positions - mobile_structure.atoms.center_of_mass()
    # ref0 = ref_structure.select_atoms('name CA').positions - ref_structure.atoms.center_of_mass()
    # rotation_mobile_to_ref, rmsd = align.rotation_matrix(mobile0, ref0)

    # ref_atoms = []
    # alt_atoms = []

    # for atom in ref_structure.get_atoms():
    #     if atom.name == "CA":
    #         ref_atoms.append(atom)
    #
    # for atom in mobile_structure.get_atoms():
    #     if atom.name == "CA":
    #         alt_atoms.append(atom)

    # for (ref_model, alt_model) in zip(ref_structure, mobile_structure):
    #     for (ref_chain, alt_chain) in zip(ref_model, alt_model):
    #         for ref_res, alt_res in zip(ref_chain, alt_chain):
    #
    #             # CA = alpha carbon
    #             # print("\tModel: {}".format(ref_model))
    #             # print("\tChain: {}".format(ref_chain))
    #             # print("\tResidue: {}".format(ref_res))
    #
    #             print(ref_res)
    #             print(alt_res)
    #             # print(dir(ref_res))
    #             # print([x for x in ref_res.get_atoms()])
    #             ref_atoms.append(ref_res['CA'])
    #             alt_atoms.append(alt_res['CA'])

    ref_atoms, alt_atoms = common_set(ref_structure, mobile_structure)

    # print("\tAligning {} atoms agianst {} atoms".format(len(ref_atoms),
    #                                                     len(alt_atoms),
    #                                                     )
    #       )

    super_imposer = PDB.Superimposer()
    super_imposer.set_atoms(
        ref_atoms,
        alt_atoms,
    )

    return super_imposer
Exemplo n.º 10
0
def residuelist_rmsd(c1_list, c2_list, sidechains=False, superimpose=True):
    import forgi.threedee.model.similarity as ftms

    if len(c1_list) != len(c2_list):
        raise Exception(
            "Chains of different length. (Maybe an RNA-DNA hybrid?)")

    #c1_list.sort(key=lambda x: x.id[1])
    #c2_list.sort(key=lambda x: x.id[1])
    to_residues = []
    crds1 = []
    crds2 = []
    all_atoms1 = []
    all_atoms2 = []
    for r1, r2 in zip(c1_list, c2_list):
        if sidechains:
            anames = nonsidechain_atoms + \
                side_chain_atoms[r1.resname.strip()]
        else:
            anames = nonsidechain_atoms
        #anames = a_5_names + a_3_names

        for a in anames:
            try:
                at1 = r1[a]
                at2 = r2[a]
            except:
                continue
            else:
                all_atoms1.append(at1)
                all_atoms2.append(at2)
                crds1.append(at1.coord)
                crds2.append(at2.coord)
                to_residues.append(r1)

    diff_vecs = ftms._pointwise_deviation(crds1, crds2)
    dev_per_res = defaultdict(list)
    for i, res in enumerate(to_residues):
        dev_per_res[res].append(diff_vecs[i])

    if superimpose:
        sup = bpdb.Superimposer()
        sup.set_atoms(all_atoms1, all_atoms2)

        return (len(all_atoms1), sup.rms, sup.rotran, dev_per_res)
    else:
        return (len(all_atoms1), ftuv._vector_set_rmsd(crds1, crds2), None,
                dev_per_res)
Exemplo n.º 11
0
def superimpose_chains_test(chain_one_real, chain_two_real):
    """
    Superimposes two structures and returns the superimposed structure and the
    RMSD of the superimposition.
    """
    chain_one = copy.deepcopy(chain_one_real)
    chain_two = copy.deepcopy(chain_two_real)
    super_imposer = pdb.Superimposer()
    atoms_one = sorted(list(chain_one.get_atoms()))
    atoms_two = sorted(list(chain_two.get_atoms()))
    # Fix lengths so that they are the same
    min_len = min(len(atoms_one), len(atoms_two))
    atoms_one = atoms_one[:min_len]
    atoms_two = atoms_two[:min_len]
    super_imposer.set_atoms(atoms_one, atoms_two)
    return super_imposer
Exemplo n.º 12
0
def superimpose_chains(chain_structure_one, chain_structure_two):
    """
    Superimpose two chains or structures returning a superimposer object.
    """
    chain_one = copy.deepcopy(chain_structure_one)
    chain_two = copy.deepcopy(chain_structure_two)
    super_imposer = pdb.Superimposer()
    atoms_one = sorted(list(chain_one.get_atoms()))
    atoms_two = sorted(list(chain_two.get_atoms()))

    min_len = min(len(atoms_one), len(atoms_two))
    atoms_one = atoms_one[:min_len]
    atoms_two = atoms_two[:min_len]
    super_imposer.set_atoms(atoms_one, atoms_two)

    return super_imposer
Exemplo n.º 13
0
def superimpose(structure_one_real, structure_two_real):
    """
    Superimposes two structures and returns the superimposed structure and the
    rmsd of the superimposition.
    """
    structure_one = copy.deepcopy(structure_one_real)
    structure_two = copy.deepcopy(structure_two_real)
    super_imposer = pdb.Superimposer()
    atoms_one = list(structure_one.get_atoms())
    atoms_two = list(structure_two.get_atoms())
    # Fix lengths so that they are the same
    min_len = min(len(atoms_one), len(atoms_two))
    atoms_one = atoms_one[:min_len]
    atoms_two = atoms_two[:min_len]
    super_imposer.set_atoms(atoms_one, atoms_two)
    super_imposer.apply(list(structure_two[0].get_atoms()))
    return (structure_two, super_imposer.rms)
Exemplo n.º 14
0
def align_structures(residues, structures, bead_name=atom_name, quiet=False):
    """Superimpose all models in structures by the desired beads in the
    nominated residues and return the target

    The target is simply the first structure in the list"""

    # Take the first model in the first structure in structures, and
    # pick out the BB beads in the appropriate residues so we have a
    # target for superposition
    target_model = structures[0][0]

    target_beads_dict = beads_from_model(residues, target_model, bead_name)
    target_beads = list(target_beads_dict.values())

    longest_line_len = 0
    n = 0
    conf = "open"
    for structure in structures:
        if not quiet:
            print_line = "Aligning " + str(structure) + "..."
            print(print_line, end="\r")
            longest_line_len = max(longest_line_len, len(print_line))
        for model in structure:
            # Get list of beads to align by
            mobile_beads_dict = beads_from_model(residues, model, bead_name)
            mobile_beads = list(mobile_beads_dict.values())
            # Superimpose the model on the target
            sup = PDB.Superimposer()
            sup.set_atoms(target_beads, mobile_beads)
            sup.apply(model.get_atoms())
        n += 1
        if n == (len(structures) / 2.0) + 1:
            conf = "closed"
            n = 1
        structure_filename = conf + str(n) + ".pdb"
        # if not os.path.isfile(structure_filename):
        #     save_structure(structure, structure_filename)

    if not quiet:
        final_print_str = ("Performed " + str(len(structures)) +
                           " structure alignments.")
        num_spaces = max(0, longest_line_len - len(final_print_str) + 12)
        print(final_print_str + " " * num_spaces)

    return target_model
Exemplo n.º 15
0
    def generateResults(self):
        self._calculateCommonAtoms()
        resultSet = ResultSet(self.problemId, self.currentMainSolution)

        #prepare results
        for solution in self.solutions:
            result = Result(resultSet, solution)
            resultSet.results.append(result)

        #rmsd
        for i in resultSet.results:
            solution = i.solution
            try:
                sup = PDB.Superimposer()
                sup.set_atoms(solution.commonAtomRealSolution,
                              solution.commonAtomThisSolution)
                sup.apply(solution.commonAtomThisSolution)
                i.rmsd = sup.rms
            except ZeroDivisionError:
                pass

        #INF
        for i in resultSet.results:
            solution = i.solution

            solutionCount = len(list(solution.structure.get_atoms()))
            realSolutionCount = len(
                list(self.currentMainSolution.structure.get_atoms()))
            commonCount = len(solution.commonAtomRealSolution)

            i.inf = (commonCount) / (solutionCount + realSolutionCount -
                                     commonCount)

        #DI
        for i in resultSet.results:
            solution = i.solution

            try:
                i.di = i.rmsd / i.inf
            except (ZeroDivisionError, TypeError) as e:
                pass

        return resultSet
Exemplo n.º 16
0
    def superimpose_models(self, reference_model=0):
        """Superimpose two or more structures.

        Superimpose two or more structures by using the Bio.PDB.Superimposer
        class.

        Args:

            reference_model is an int with the reference model (default=0)
        """

        ref_model = self.structure[reference_model]
        if self.reference is not None:
            ref_model = self.reference[0]

        for alt_model in self.structure:
            ref_atoms = []
            alt_atoms = []
            # Iterate over the structure method to obtain all atoms of interest
            # for the analysis and superimposes the structures using them
            for (ref_chain, alt_chain) in zip(ref_model, alt_model):
                for ref_res, alt_res in \
                        zip(ref_chain, alt_chain):
                    # assert ref_res.resname == alt_res.resname, \
                        # "{:s} is not equal to {:s}".format(ref_res.resname,
                                                            # alt_res.resname)
                    # assert ref_res.id == alt_res.id, \
                        # "{:s} is not equal to {:s}".format(ref_res.id,
                                                            # alt_res.id)
                    # CA = alpha carbon
                    if ref_res.has_id('CA'):
                        if self.__atom == []:
                            ref_atoms.extend(list(ref_res.get_atom()))
                            alt_atoms.extend(list(alt_res.get_atom()))
                        else:
                            for atoms in self.__atom:
                                try:
                                    ref_atoms.append(ref_res[atoms])
                                    alt_atoms.append(alt_res[atoms])
                                except KeyError:
                                    raise KeyError(('Your input data is '
                                                    'misssing information for '
                                                    '{:s} atoms. Input more '
                                                    'complete data or select a'
                                                    ' smaller set of '
                                                    'atoms').format(atoms))

            # Align these paired atom lists:
            super_imposer = pdb.Superimposer()
            super_imposer.set_atoms(ref_atoms, alt_atoms)

            if ref_model.get_full_id() == alt_model.get_full_id():
                # Check for self/self get zero RMS, zero translation
                # and identity matrix for the rotation.
                assert np.abs(super_imposer.rms) < 0.0000001
                assert np.max(np.abs(super_imposer.rotran[1])) < 0.000001
                assert np.max(np.abs(super_imposer.rotran[0]) -
                              np.identity(3)) < 0.000001
            else:
                # Update the structure by moving all the atoms in
                # this model (not just the ones used for the alignment)
                super_imposer.apply(alt_model.get_atoms())
Exemplo n.º 17
0
def align_pdbs(referencePath,
               fitPath,
               optionsrefatomsA,
               optionsfitatomsA,
               optionsoutA,
               optionsaddatomsA=""):
    if not os.path.exists(referencePath):
        print "Error: File path for reference PDB or CIF file does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    if not os.path.exists(fitPath):
        print "Error: File path for PDB or CIF file to fit to reference does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    ref_al = []
    for i in optionsrefatomsA.split(':'):
        ref_al.append(tuple(i.split(',')))
    fit_al = []
    for i in optionsfitatomsA.split(':'):
        fit_al.append(tuple(i.split(',')))

    ref_structure, dir_path, _ = _read_structure(referencePath, 'reference',
                                                 'reference')
    fit_structure, dir_path, _ = _read_structure(fitPath, 'fit', 'fit')

    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model = ref_structure[0]
    fit_model = fit_structure[0]
    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    ref_atoms = []
    fit_atoms = []
    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        for r_a in ref_al:
            if ref_chain.get_id() == r_a[1]:
                for ref_res in ref_chain:
                    # Check if residue number ( .get_id() ) is in the list
                    if ref_res.get_id()[1] in range(int(r_a[2]),
                                                    int(r_a[3]) + 1):
                        # Append CA atom to list
                        ref_atoms.append(ref_res[r_a[0]])
    # Do the same for the sample structure
    for fit_chain in fit_model:
        for r_a in fit_al:
            if fit_chain.get_id() == r_a[1]:
                for fit_res in fit_chain:
                    if fit_res.get_id()[1] in range(int(r_a[2]),
                                                    int(r_a[3]) + 1):
                        fit_atoms.append(fit_res[r_a[0]])
    # Now we initiate the superimposer:
    super_imposer = struct.Superimposer()
    super_imposer.set_atoms(ref_atoms, fit_atoms)
    super_imposer.apply(fit_model.get_atoms())
    if optionsaddatomsA != "":
        # A region is defined by a chain, an initial residue number, a followup residue number
        # to start of region, and another residue number fotr the end of the region B,3,10
        # both begining and end are included.
        add_region = []
        for i in optionsaddatomsA.split(':'):
            add_region.append(tuple(i.split(',')))
        if len(add_region) != 2:
            print(
                "ERROR: Only two entries in the addatom option. One for reference, and one for fit."
            )
            sys.exit(1)
        print("Adding residues from (" + referencePath + "," +
              add_region[0][0] + "," + add_region[0][1] + "," +
              add_region[0][2] + ") ")
        print(" to (" + fitPath + "," + add_region[1][0] + "," +
              add_region[1][1] + "," + add_region[1][2] + ")")
        add_res = []
        # Add residues before missing segment from incomplete chain (fit)
        for i in fit_model:
            if i.get_id() == add_region[1][0]:
                for j in i.get_residues():
                    if j.get_id()[1] < int(add_region[0][1]):
                        add_res.append(j)
        # add residues in missing segment from complete chain (reference)
        for i in ref_model:
            if i.get_id() == add_region[0][0]:
                for j in i.get_residues():
                    if j.get_id()[1] in range(int(add_region[0][1]),
                                              int(add_region[0][2]) + 1):
                        add_res.append(j)
        # add residues after missing segment.from incomplete (fit)
        for i in fit_model:
            if i.get_id() == add_region[1][0]:
                for j in i.get_residues():
                    if j.get_id()[1] > int(add_region[0][2]):
                        add_res.append(j)
        newChain = struct.Chain.Chain(add_region[1][0])
        for i in add_res:
            newChain.add(i)
        # put chains in a list
        chain_order = []
        for i in fit_model:
            chain_order.append(i)
        # delete all chains from the fit model
        for i in chain_order:
            fit_model.detach_child(i.get_id())
        # Add chains back in fit_model making sure that newChain replaces the incoplete chain
        for i in chain_order:
            if i.get_id() == add_region[1][0]:
                fit_model.add(newChain)
            else:
                fit_model.add(i)
                ###########   TEST  ###############
                # for i in fit_model:
                #    print(i,i.get_id())
                #    for j in i.get_residues():
                #        print(j.get_full_id(),j.get_resname())
                # sys.exit(1)
                # TODO it is possible that a section with different resids is added to a fitted section. In this case residd
                #      fit the chains that were just added need to be renumbered approprietly.
                # Last check to make sure residue numbers in completed chain are monotonically increased.
                # if add_res_range_from_to[0][1:2] == add_res_range_from_to[1][1:2]:
                #    pass
                # else:
                #    get_fit_first_res_id = True
                #    fit_first_res_id = -1
                #    for i in fit_model:
                #        if i.get_id() == add_res_range_from_to[1][0]:
                #            for j in i.get_residues():
                #                if get_fit_first_res_id:
                #                    get_fit_first_res_id = False
                #                    fit_first_res_id = j.get_id()[1]
                #    get_ref_first_res_id = True
                #    ref_first_res_id = -1
                #    for i in ref_model:
                #        if i.get_id() == add_res_range_from_to[0][0]:
                #            for j in i.get_residues():
                #                if get_ref_first_res_id:
                #                    get_ref_first_res_id = False
                #                    ref_first_res_id = j.get_id()[1]
    # Print RMSD:
    print("Fiting " + fitPath + " by " + optionsrefatomsA + " to " +
          referencePath + " by " + optionsfitatomsA + "\
 RMSD=" + str(super_imposer.rms))
    # Save the aligned version
    _save_structure(fit_model, dir_path + optionsoutA)
Exemplo n.º 18
0
    def get_simrna_ready(self, renumber_residues=True):
        """Get simrna_ready .. 

        - take only first model,
        - renumber residues if renumber_residues=True

        .. warning:: requires: Biopython"""
        try:
            from Bio import PDB
            from Bio.PDB import PDBIO
        except:
            sys.exit(
                'Error: Install biopython to use this function (pip biopython)'
            )

        import warnings

        warnings.filterwarnings(
            'ignore',
            '.*Invalid or missing.*',
        )
        warnings.filterwarnings(
            'ignore',
            '.*with given element *',
        )

        import copy

        G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split(
        )
        A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split(
        )
        U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split(
        )
        C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split(
        )

        ftmp = '/tmp/out.pdb'
        self.write(ftmp, v=False)

        parser = PDB.PDBParser()
        struct = parser.get_structure('', ftmp)
        model = struct[0]

        s2 = PDB.Structure.Structure(struct.id)
        m2 = PDB.Model.Model(model.id)

        chains2 = []

        missing = []

        for chain in model.get_list():
            res = []
            for r in chain:
                res.append(r)

            res = copy.copy(res)

            c2 = PDB.Chain.Chain(chain.id)

            c = 1  # new chain, goes from 1 if renumber True
            for r in res:
                # hack for amber/qrna
                r.resname = r.resname.strip()
                if r.resname == 'RC3': r.resname = 'C'
                if r.resname == 'RU3': r.resname = 'U'
                if r.resname == 'RG3': r.resname = 'G'
                if r.resname == 'RA3': r.resname = 'A'

                if r.resname == 'C3': r.resname = 'C'
                if r.resname == 'U3': r.resname = 'U'
                if r.resname == 'G3': r.resname = 'G'
                if r.resname == 'A3': r.resname = 'A'

                if r.resname == 'RC5': r.resname = 'C'
                if r.resname == 'RU5': r.resname = 'U'
                if r.resname == 'RG5': r.resname = 'G'
                if r.resname == 'RA5': r.resname = 'A'

                if r.resname == 'C5': r.resname = 'C'
                if r.resname == 'U5': r.resname = 'U'
                if r.resname == 'G5': r.resname = 'G'
                if r.resname == 'A5': r.resname = 'A'

                if r.resname.strip() == 'RC': r.resname = 'C'
                if r.resname.strip() == 'RU': r.resname = 'U'
                if r.resname.strip() == 'RG': r.resname = 'G'
                if r.resname.strip() == 'RA': r.resname = 'A'

                r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid)
                if renumber_residues:
                    r2.id = (r2.id[0], c, r2.id[2])  ## renumber residues
                if c == 1:
                    p_missing = True
                    #if p_missing:
                    #    try:
                    #        x = r["O5'"]
                    #        x.id =       ' P'
                    #        x.name =     ' P'
                    #        x.fullname = ' P'
                    #        print "REMARK 000 FIX O5' -> P fix in chain ", chain.id
                    #    except:
                    #        pass
                    for a in r:
                        if a.id == 'P':
                            p_missing = False

                    if p_missing:
                        currfn = __file__
                        if currfn == '':
                            path = '.'
                        else:
                            path = os.path.dirname(currfn)
                        if os.path.islink(
                                currfn
                        ):  #path + os.sep + os.path.basename(__file__)):
                            path = os.path.dirname(
                                os.readlink(path + os.sep +
                                            os.path.basename(currfn)))

                        po3_struc = PDB.PDBParser().get_structure(
                            '', path + '/data/PO3_inner.pdb')
                        po3 = [
                            po3_atom
                            for po3_atom in po3_struc[0].get_residues()
                        ][0]

                        r_atoms = [r["O4'"], r["C4'"], r["C3'"]]
                        po3_atoms = [po3["O4'"], po3["C4'"], po3["C3'"]]

                        sup = PDB.Superimposer()
                        sup.set_atoms(r_atoms, po3_atoms)
                        rms = round(sup.rms, 3)

                        sup.apply(po3_struc.get_atoms())  # to all atoms of po3

                        r.add(po3['P'])
                        r.add(po3['OP1'])
                        r.add(po3['OP2'])
                        try:
                            r.add(po3["O5'"])
                        except:
                            del r["O5'"]
                            r.add(po3["O5'"])

                    p_missing = False  # off this function

                    # save it
                    #io = PDB.PDBIO()
                    #io.set_structure( po3_struc )
                    #io.save("po3.pdb")

                if str(r.get_resname()).strip() == "G":
                    for an in G_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "A":
                    for an in A_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "C":
                    for an in C_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "U":
                    for an in U_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r,' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                c += 1
            chains2.append(c2)

        io = PDBIO()
        s2.add(m2)
        for chain2 in chains2:
            m2.add(chain2)
        #print c2
        #print m2
        io.set_structure(s2)
        #fout = fn.replace('.pdb', '_fx.pdb')
        fout = '/tmp/outout.pdb'  # hack
        io.save(fout)

        if missing:
            print 'REMARK 000 Missing atoms:'
            for i in missing:
                print 'REMARK 000  +', i[0], i[1], i[2], 'residue #', i[3]
            #raise Exception('Missing atoms in %s' % self.fn)
        s = StrucFile(fout)
        self.lines = s.lines
Exemplo n.º 19
0
def mp_superpose_opm(reference_chain,
                     target,
                     filename,
                     target_chain='A',
                     ref_model_id=0,
                     target_model_id=0,
                     ref_align_atoms=[],
                     target_align_atoms=[],
                     write_opm=False):
    # Adapted from https://gist.github.com/andersx/6354971
    # Copyright (c) 2010-2016 Anders S. Christensen

    # Get reference structure e.g. from OPM
    def get_ref_struc(keyword):
        try:
            ref_struc = extract_from_opm(keyword)
        except:
            logger.error("no OPM - id found - add a workaround, e.g. PDBTM")
            return
        else:
            logger.info("Obtain structure from OPM: successful")
        return ref_struc

    reference = reference_chain.split('_')[0]
    ref_chain = reference_chain.split('_')[1]
    ref_struc = get_ref_struc(reference)
    # Parse reference (from string) and target structure (from file)
    parser = PDB.PDBParser(QUIET=True)
    bio_ref_struc_raw = parser.get_structure("reference",
                                             io.StringIO(ref_struc))
    bio_target_struc_raw = parser.get_structure("target", target)
    # Select the model number - normally always the first
    bio_ref_struc = bio_ref_struc_raw[ref_model_id]
    bio_target_struc = bio_target_struc_raw[target_model_id]

    # List of residues to align
    align_ref_atoms = []
    for ind, chain in enumerate(bio_ref_struc_raw.get_chains()):
        if chain.id == ref_chain:
            for res in chain.get_residues():  # bio_ref_struc.get_residues():
                if ref_align_atoms == [] or res.get_id()[1] in ref_align_atoms:
                    for atom in res:
                        if atom.get_name() == 'CA':
                            align_ref_atoms.append(atom)
    align_target_atoms = []
    for ind, chain in enumerate(bio_target_struc.get_chains()):
        if chain.id == target_chain:
            for res in chain.get_residues(
            ):  # bio_target_struc.get_residues():
                if target_align_atoms == [] or res.get_id(
                )[1] in target_align_atoms:
                    for atom in res:
                        if atom.get_name() == 'CA':
                            align_target_atoms.append(atom)
    # Superposer
    super_imposer = PDB.Superimposer()
    super_imposer.set_atoms(align_ref_atoms, align_target_atoms)
    super_imposer.apply(bio_target_struc)

    logger.info(f"RMSD of superimposed structures: {super_imposer.rms}")

    bioio = PDB.PDBIO()
    bioio.set_structure(bio_target_struc)
    bioio.save(filename)
    logger.info(f"Aligned structure saved")

    if write_opm:
        with open(os.path.join(os.path.dirname(filename), 'ref_opm.pdb'),
                  'w') as fp:
            fp.write(ref_struc)
        logger.info(f"write_opm set to true - OPM structure saved")
Exemplo n.º 20
0
def align_structures(ref_structure, mobile_structure, mobile_xmap):

    # translation_mobile_to_ref = ref_structure.atoms.center_of_mass() - mobile_structure.atoms.center_of_mass()
    #
    #
    # mobile0 = mobile_structure.select_atoms('name CA').positions - mobile_structure.atoms.center_of_mass()
    # ref0 = ref_structure.select_atoms('name CA').positions - ref_structure.atoms.center_of_mass()
    # rotation_mobile_to_ref, rmsd = align.rotation_matrix(mobile0, ref0)

    ref_atoms = []
    alt_atoms = []
    for (ref_model, alt_model) in zip(ref_structure, mobile_structure):
        for (ref_chain, alt_chain) in zip(ref_model, alt_model):
            for ref_res, alt_res in zip(ref_chain, alt_chain):

                # CA = alpha carbon
                try:
                    # print("\tModel: {}".format(ref_model))
                    # print("\tChain: {}".format(ref_chain))
                    # print("\tResidue: {}".format(ref_res))

                    # print(ref_res)
                    # print(dir(ref_res))
                    # print([x for x in ref_res.get_atoms()])
                    ref_atoms.append(ref_res['CA'])
                    alt_atoms.append(alt_res['CA'])
                except:
                    pass

    super_imposer = PDB.Superimposer()
    super_imposer.set_atoms(
        ref_atoms,
        alt_atoms,
    )

    translation_mobile_to_ref = super_imposer.rotran[1]
    rotation_mobile_to_ref = super_imposer.rotran[0]

    # print("\tTranslation is: {}".format(translation_mobile_to_ref))
    # print("\tRotation is: {}".format(rotation_mobile_to_ref))
    # xmap_np = interpolate_uniform_grid(mobile_xmap,
    #                                    translation_mobile_to_ref,
    #                                    np.transpose(rotation_mobile_to_ref),
    #                                    )

    rtop = clipper_python.RTop_orth(
        clipper_python.Mat33_double(np.transpose(rotation_mobile_to_ref)),
        clipper_python.Vec3_double(translation_mobile_to_ref),
    )

    xmap_new = clipper_python.Xmap_float(
        mobile_xmap.xmap.spacegroup,
        mobile_xmap.xmap.cell,
        mobile_xmap.xmap.grid_sampling,
    )

    clipper_python.rotate_translate(
        mobile_xmap.xmap,
        xmap_new,
        rtop,
    )

    return xmap_new.export_numpy()
Exemplo n.º 21
0
def pdb_rmsd(c1, c2, sidechains=False, superimpose=True, apply_sup=False):
    '''
    Calculate the all-atom rmsd between two RNA chains.

    :param c1: A Bio.PDB.Chain
    :param c2: Another Bio.PDB.Chain
    :return: The rmsd between the locations of all the atoms in the chains.
    '''

    a_5_names = ['P', 'O5*', 'C5*', 'C4*', 'O4*', 'O2*']
    a_5_names += ['P', "O5'", "C5'", "C4'", "O4'", "O2'"]
    a_3_names = ["C1*", "C2*", "C3*", "O3*"]
    a_3_names += ["C1'", "C2'", "C3'", "O3'"]

    a_names = dict()
    a_names['U'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6'
    ] + a_3_names
    a_names['C'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6'
    ] + a_3_names

    a_names['A'] = a_5_names + [
        'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9'
    ] + a_3_names
    a_names['G'] = a_5_names + [
        'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9'
    ] + a_3_names

    a_names['U'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6'
    ] + a_3_names
    a_names['C'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6'
    ] + a_3_names

    a_names['A'] = a_5_names + [
        'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9'
    ] + a_3_names
    a_names['G'] = a_5_names + [
        'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9'
    ] + a_3_names

    all_atoms1 = []
    all_atoms2 = []

    acceptable_residues = ['A', 'C', 'G', 'U', 'rA', 'rC', 'rG', 'rU', 'DG']
    c1_list = [
        cr for cr in c1.get_list() if cr.resname.strip() in acceptable_residues
    ]
    c2_list = [
        cr for cr in c2.get_list() if cr.resname.strip() in acceptable_residues
    ]

    if len(c1_list) != len(c2_list):
        #print >>sys.stderr, "Chains of different length", len(c1.get_list()), len(c2.get_list())
        raise Exception("Chains of different length.")

    #c1_list.sort(key=lambda x: x.id[1])
    #c2_list.sort(key=lambda x: x.id[1])

    for r1, r2 in zip(c1_list, c2_list):
        if sidechains:
            anames = backbone_atoms + a_names[c1[i].resname.strip()]
        else:
            anames = backbone_atoms
        #anames = a_5_names + a_3_names

        for a in anames:
            try:
                at1 = r1[a]
                at2 = r2[a]

                all_atoms1 += [at1]
                all_atoms2 += [at2]
            except:
                continue

    #print "rmsd len:", len(all_atoms1), len(all_atoms2)
    if superimpose:
        sup = bpdb.Superimposer()
        sup.set_atoms(all_atoms1, all_atoms2)

        if apply_sup:
            sup.apply(c2.get_atoms())

        return (len(all_atoms1), sup.rms, sup.rotran)
    else:
        crvs1 = np.array([a.get_vector().get_array() for a in all_atoms1])
        crvs2 = np.array([a.get_vector().get_array() for a in all_atoms2])

        return (len(all_atoms1), ftuv.vector_set_rmsd(crvs1, crvs2), None)
Exemplo n.º 22
0
def pdb_rmsd(c1, c2, sidechains=False, superimpose=True, apply_sup=False):
    '''
    Calculate the all-atom rmsd between two RNA chains.

    :param c1: A Bio.PDB.Chain
    :param c2: Another Bio.PDB.Chain
    :return: The rmsd between the locations of all the atoms in the chains.
    '''
    import forgi.threedee.model.similarity as ftms
    a_5_names = ['P', 'O5*', 'C5*', 'C4*', 'O4*', 'O2*']
    a_5_names += ['P', "O5'", "C5'", "C4'", "O4'", "O2'"]
    a_3_names = ["C1*", "C2*", "C3*", "O3*"]
    a_3_names += ["C1'", "C2'", "C3'", "O3'"]

    a_names = dict()
    a_names['U'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6'
    ] + a_3_names
    a_names['C'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6'
    ] + a_3_names

    a_names['A'] = a_5_names + [
        'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9'
    ] + a_3_names
    a_names['G'] = a_5_names + [
        'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9'
    ] + a_3_names

    a_names['U'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'O4', 'C5', 'C6'
    ] + a_3_names
    a_names['C'] = a_5_names + [
        'N1', 'C2', 'O2', 'N3', 'C4', 'N4', 'C5', 'C6'
    ] + a_3_names

    a_names['A'] = a_5_names + [
        'N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N6', 'N7', 'C8', 'N9'
    ] + a_3_names
    a_names['G'] = a_5_names + [
        'N1', 'C2', 'N2', 'N3', 'C4', 'C5', 'C6', 'O6', 'N7', 'C8', 'N9'
    ] + a_3_names

    all_atoms1 = []
    all_atoms2 = []

    acceptable_residues = ['A', 'C', 'G', 'U', 'rA', 'rC', 'rG', 'rU', 'DG']
    c1_list = [
        cr for cr in c1.get_list() if cr.resname.strip() in acceptable_residues
    ]
    c2_list = [
        cr for cr in c2.get_list() if cr.resname.strip() in acceptable_residues
    ]

    if len(c1_list) != len(c2_list):
        #print >>sys.stderr, "Chains of different length", len(c1.get_list()), len(c2.get_list())
        raise Exception(
            "Chains of different length. (Maybe an RNA-DNA hybrid?)")

    #c1_list.sort(key=lambda x: x.id[1])
    #c2_list.sort(key=lambda x: x.id[1])
    to_residues = []
    crds1 = []
    crds2 = []
    for r1, r2 in zip(c1_list, c2_list):
        if sidechains:
            anames = backbone_atoms + a_names[c1[i].resname.strip()]
        else:
            anames = backbone_atoms
        #anames = a_5_names + a_3_names

        for a in anames:
            try:
                at1 = r1[a]
                at2 = r2[a]
            except:
                continue
            else:
                all_atoms1.append(at1)
                all_atoms2.append(at2)
                crds1.append(at1.coord)
                crds2.append(at2.coord)
                to_residues.append(r1)

    diff_vecs = ftms._pointwise_deviation(crds1, crds2)
    dev_per_res = defaultdict(list)
    for i, res in enumerate(to_residues):
        dev_per_res[res].append(diff_vecs[i])

    #print "rmsd len:", len(all_atoms1), len(all_atoms2)
    if superimpose:
        sup = bpdb.Superimposer()
        sup.set_atoms(all_atoms1, all_atoms2)

        if apply_sup:
            sup.apply(c2.get_atoms())

        return (len(all_atoms1), sup.rms, sup.rotran, dev_per_res)
    else:
        crvs1 = np.array([a.get_vector().get_array() for a in all_atoms1])
        crvs2 = np.array([a.get_vector().get_array() for a in all_atoms2])

        return (len(all_atoms1), ftuv.vector_set_rmsd(crvs1, crvs2), None,
                dev_per_res)
Exemplo n.º 23
0
def structure_in_created_structures(structure, created_structures):
    """Ask if structure is already in created_structures (a list of structures). Returns a boolean.

    Considerations:
    Return True if all of the chains in structure are in one of the structures in created_structures and RMSD <= 3.0, meaning they are the same structure """

    # make a deepcopy of these objects
    structure = structure.copy()
    created_structures = created_structures.copy()

    # Get the ids of the chains in structure
    chain_ids_structure = tuple(
        sorted([x.id[1] for x in structure.get_chains()]))

    # loop through each of the contents of created_structures:
    for created_structure in created_structures:

        # get the chains of created_structure
        chain_ids_created_structure = tuple(
            sorted([x.id[1] for x in created_structure.get_chains()]))

        # ask if the number of each and ids of the chains are the same:
        if chain_ids_structure == chain_ids_created_structure:

            # pick one chain in structure to compare with the chains in created
            chain_str = list(structure.get_chains())[0]
            id_str = chain_str.id[1]

            # try to find a partner in created_structure:
            for chain_created_str in created_structure.get_chains():

                id_created_str = chain_created_str.id[1]

                # if they have the same id they are potential partners. Superimpose these next. The id_created_str has also to be avaliable in possible_partners
                if id_str == id_created_str:

                    # get list of residues:
                    res_chain1 = list(chain_str.get_residues())
                    res_chain2 = list(chain_created_str.get_residues())

                    # get the atoms of the previous list, ONLY belonging to common RESIDUES! to be then able to superimpose
                    # so first we obtain a list of the common residues
                    common_res_s1 = get_list_of_common_res(
                        res_chain1, res_chain2)
                    common_res_s2 = get_list_of_common_res(
                        res_chain2, res_chain1)

                    # then we obtain a list of atom objects to use it later
                    common_atoms_s1 = get_atom_list_from_res_list(
                        common_res_s1)
                    common_atoms_s2 = get_atom_list_from_res_list(
                        common_res_s2)

                    # continue if the common atoms is full
                    if len(common_atoms_s1) > 0:

                        # use the Superimposer
                        sup = pdb.Superimposer()

                        # first argument is fixed, second is moving. both are lists of Atom objects
                        sup.set_atoms(common_atoms_s2, common_atoms_s1)

                        # if I have superimposed same ID but different structure, try another chain
                        if sup.rms > 3.0:
                            continue

                        # apply rotation to whole common structure
                        sup.apply(list(structure.get_atoms()))

                        # if the previous chain_str and chain_created_str are real partners they should also result in haveing all they cross-superimposed chains with partners
                        partners = set()

                        for searching_partner in created_structure.get_chains(
                        ):
                            partner_found = False

                            for possible_partner in structure.get_chains():

                                if partner_found is True:
                                    break

                                if possible_partner.id[1] == searching_partner.id[
                                        1] and possible_partner not in partners:
                                    # get list of residues:
                                    res_partner1 = list(
                                        searching_partner.get_residues())
                                    res_partner2 = list(
                                        possible_partner.get_residues())

                                    # get the atoms of the previous list, ONLY belonging to common RESIDUES! to be then able to superimpose
                                    # so first we obtain a list of the common residues
                                    common_res_p1 = get_list_of_common_res(
                                        res_partner1, res_partner2)
                                    common_res_p2 = get_list_of_common_res(
                                        res_partner2, res_partner1)

                                    # then we obtain a list of coordinates
                                    common_coords_p1 = np.array([
                                        list(x.get_coord())
                                        for x in get_atom_list_from_res_list(
                                            common_res_p1)
                                    ])
                                    common_coords_p2 = np.array([
                                        list(x.get_coord())
                                        for x in get_atom_list_from_res_list(
                                            common_res_p2)
                                    ])

                                    rms = rmsd.kabsch_rmsd(
                                        common_coords_p2, common_coords_p1)
                                    if rms <= 3.0:
                                        partners.add(possible_partner)
                                        partner_found = True

                        if len(partners) == len(
                                list(created_structure.get_chains())):
                            return True  # all chains have a partner, which means that the structure is in the created_structures

    # if you didn't find any match return false:
    return False
Exemplo n.º 24
0
def complex_builder(interaction_dict, pdb_models, num_models, max_chains,
                    verbose):
    output_objects = []
    for i in range(1, num_models + 1):
        if verbose:
            sys.stderr.write("Building Macrocomplex " + str(i) + " ...\n")
        macrocomplex = starting_model(pdb_models, verbose).copy()
        if verbose:
            sys.stderr.write(
                "Building it from model {}, which contains chains {} and {} \n\n"
                .format(macrocomplex.id,
                        [chain.id for chain in macrocomplex.get_chains()][0],
                        [chain.id for chain in macrocomplex.get_chains()][1]))
        for key, values in interaction_dict.items():
            print("{}:{} tuples".format(key, len(values)))
            for tuple in values:
                print("{}:{}".format(key, [element.id for element in tuple]))
            print("\n")
        model_stech = generate_model_profile(macrocomplex)
        macrocomplex.id = "Model_" + str(i)
        run = True  # While this variable is true, the program will keep trying to add chains to the macrocomplex
        num_of_chains = 2  # The model starts with 2 chains already
        num_empty_chains = 0  # NUmber of chains that have all their interactions depleted
        while run:
            for chain in macrocomplex:  # Iterates the macrocomplex chains
                if num_of_chains < max_chains:  # If the number of chains still hasn't reached the maximum allowed
                    if len(interaction_dict[chain.id]) != 0:
                        if verbose:
                            sys.stderr.write(
                                "*** Adding interactions from chain {} ***\n\n"
                                .format(chain.id))
                        # If this chain still has pending interactions. Chain id is the key of the dictionary
                        random.shuffle(
                            interaction_dict[chain.id]
                        )  # Shuffle the interactions list (to avoid repetitive behaviour)
                        for tuple in interaction_dict[chain.id]:
                            fix = tuple[
                                1]  # Get the chain instance that corresponds to the same chain in macrocomplex
                            to_move = tuple[
                                2]  # Get the chain instance that interacts with the other
                            sup = PDB.Superimposer(
                            )  # Generates a superimposer instance
                            chain_atoms, fix_atoms = chain.get_common_atoms(
                                fix)  # Get common atoms between the
                            # macrocomplex chain and the one in the interaction dictionary
                            sup.set_atoms(
                                chain_atoms,
                                fix_atoms)  # Generate the superposition
                            move = to_move.copy(
                            )  # Make a copy of the chain to move
                            sup.apply(move)  # Apply superposition matrix
                            move_atoms = sorted(move.get_atoms())
                            if not has_clashes(move_atoms, macrocomplex):
                                if verbose:
                                    sys.stderr.write(
                                        "-> Succesful superposition between " +
                                        str(chain.id) +
                                        " from macrocomplex and chain " +
                                        fix.id + " from model " + tuple[0].id +
                                        ".")
                                    sys.stderr.write("  Chain " +
                                                     str(num_of_chains) +
                                                     " added: Chain " +
                                                     move.id + " from model " +
                                                     tuple[0].id + ".\n\n")
                                move.parent = None  # Sets the parent to none to evade biopython's strict id policy
                                macrocomplex.add(
                                    move)  # Adds the target chain to the model
                                model_stech.setdefault(move.id, 0)
                                model_stech[move.id] += 1
                                num_of_chains += 1
                                index = interaction_dict[chain.id].index(tuple)
                                del interaction_dict[chain.id][
                                    index]  # elimino la tupla con el modelo que acabamos de añadir. faltaria eliinar la tupla con el mismo objeto.
                                for redundant_tuple in interaction_dict[
                                        move.id]:
                                    if redundant_tuple[0].id == tuple[0].id:
                                        index = interaction_dict[
                                            move.id].index(redundant_tuple)
                                        del interaction_dict[move.id][index]
                            else:
                                if verbose:
                                    sys.stderr.write(
                                        "->  Unsuccesful superposition between "
                                        + str(chain.id) +
                                        " from macrocomplex and chain " +
                                        fix.id + " from model " + tuple[0].id +
                                        ".")
                                    sys.stderr.write("  Chain " + move.id +
                                                     " from model " +
                                                     tuple[0].id +
                                                     " NOT ADDED.\n\n")
                            #para eliminar la otra tupla quizá hay que identificarla por la id del modelo y luego mirar como eliminarla
                            for key, values in interaction_dict.items():
                                print("{}:{} tuples".format(key, len(values)))
                                print("----------")
                                for tuple in values:
                                    print("{}:{}".format(
                                        key,
                                        [element.id for element in tuple]))
                                print("\n")

                    else:
                        if verbose:
                            print("Chain " + chain.id + " empty")
                        num_empty_chains += 1
                else:
                    run = False  # When the maximum chain treshold is reached stop running
                    break
            if num_empty_chains >= len(
                    macrocomplex
            ):  # If all chains are empty of interactions stop running
                run = False
            if verbose:
                stechometry_string = ""  # Print the model's stechometry
                for key in sorted(model_stech.keys()):
                    stechometry_string += key + ":" + str(
                        model_stech[key]) + ","
                stechometry_string = stechometry_string[:-1]
                print("Macrocomplex's" + str(i) + " Stoichiometry is: " +
                      stechometry_string)
            print("Macrocomplex " + str(i) + " finished")
            output_objects.append(macrocomplex)  # Add model to the models list
        return output_objects
Exemplo n.º 25
0
def complex_builder(interaction_dict, pdb_models, num_models, max_chains,
                    stoich_dict, clash_dist, verbose):
    """Function that iteratively builds a macrocomplex model from the pairs of interactions.
    First, it selects the starting pair of interacting chains of the macrocomplex. From that, the chains of the macrocomplex are being iterated looking for possible
    interactions according to the interaction dictionary. If it does not encounter clashes, the two chains that are equivalent superimpose, resulting in the addition of the
    other interacting chain to the complex. That specific model of interacting chains is removed from a list of pending interactions, since it is already added. If clashes are found,
    the model is not added, waiting. Returns a list of resulting macrocomplex models.

    Keyword arguments:
    interaction_dict -- dictionary of tuples with the possible interactions a given chain can perform.
    pdb_models -- PDB models containing chains with unified IDs according to sequence similarity
    num_models -- maximum number of models the function is going to produce
    max_chains -- maximum number of chains allowed in the models
    stoich_dict -- dictionary with the stoichiometry specified by the user
    clash_dist -- minimum clash distance between 2 atoms. The default minimum is 2 A
    verbose -- boolean, prints to stderr the progress of the program

    Considerations:
    Through all the process, the current stoichiometry of the building complex is analyzed. If a specific stoichiometry is provided, that is compared with the one of the macrocomplex.
    If the stoichiometry is fulfilled, the process stops.
    If there are no possible interactions left, the process stops.
    If the maximum number of chains is overpassed, the process stops.
    If no chains are added to complex during three iterations through the chains, the process stops"""
    output_objects = []
    for i in range(1, int(num_models) + 1):
        if verbose:
            sys.stderr.write("Building Macrocomplex " + str(i) + " ...\n")
        macrocomplex = starting_model(pdb_models, verbose)
        for key, tuple in interaction_dict.items():
            for tuple in interaction_dict[key]:
                if tuple[0].id == str(macrocomplex.id):
                    index = interaction_dict[key].index(tuple)
                    del interaction_dict[key][index]
        if verbose:
            sys.stderr.write(
                "Building it from model {}, which contains chains {} and {}.\n\n"
                .format(macrocomplex.id,
                        [chain.id for chain in macrocomplex.get_chains()][0],
                        [chain.id for chain in macrocomplex.get_chains()][1]))
        model_stoich = get_model_stoichiometry(macrocomplex)
        macrocomplex.id = "Model_" + str(i)
        run = True  # While this variable is true, the program will keep trying to add chains to the macrocomplex
        u = 1
        num_of_chains = 2
        num_empty_chains = 0
        while run:
            for chain in macrocomplex:  # Iterating through each chain of the macrocomplex
                if num_of_chains < int(
                        max_chains
                ):  # If the number of chains still hasn't reached the maximum allowed
                    interaction_copy = interaction_dict.copy()
                    if len(
                            interaction_dict[chain.id]
                    ) != 0:  # Check if the chain has possible interactions left to make
                        random.shuffle(
                            interaction_dict[chain.id]
                        )  # Shuffle the interactions list (avoiding repetitive behaviour)
                        for tuple in interaction_dict[chain.id]:
                            fix = tuple[
                                1]  # Chain instance that corresponds to the same chain in macrocomplex
                            move = tuple[
                                2]  # Chain instance that interacts with the other
                            if stoich_dict:
                                move_chain_id = move.id
                                model_stoich.setdefault(move_chain_id, 0)
                                model_number_chain = model_stoich[
                                    move_chain_id]
                                stoich_dict.setdefault(move_chain_id, 0)
                                if stoich_dict[
                                        move_chain_id] <= model_number_chain:  # Don't add the chain if surpasses the stoichiometry given
                                    if verbose:
                                        sys.stderr.write(
                                            "  The current chain already fulfills the stoichiometry of the complex.\n"
                                        )
                                        sys.stderr.write("  Chain " + move.id +
                                                         " from model " +
                                                         tuple[0].id +
                                                         " NOT ADDED.\n\n")
                                    continue  #   go to the next interaction tuple
                            sup = PDB.Superimposer()
                            chain_atoms, fix_atoms = chain.get_common_atoms(
                                fix)
                            sup.set_atoms(
                                chain_atoms,
                                fix_atoms)  # Generate the superposition
                            sup.apply(move)
                            move_atoms = sorted(move.get_atoms())
                            if not has_clashes(move_atoms, macrocomplex,
                                               clash_dist):
                                if verbose:
                                    sys.stderr.write(
                                        "  Succesful superposition between " +
                                        str(chain.id) +
                                        " from macrocomplex and chain " +
                                        fix.id + " from model " + tuple[0].id +
                                        ".\n")
                                    sys.stderr.write("  Chain " +
                                                     str(num_of_chains) +
                                                     " added: Chain " +
                                                     move.id + " from model " +
                                                     tuple[0].id + ".\n\n")
                                move.parent = None
                                macrocomplex.add(move)
                                model_stoich.setdefault(move.id, 0)
                                model_stoich[move.id] += 1
                                num_of_chains += 1
                                index = interaction_dict[chain.id].index(tuple)
                                del interaction_dict[chain.id][
                                    index]  # Deleting the model that has just been added from the interaction_dict
                                for redundant_tuple in interaction_dict[
                                        move.id]:
                                    if redundant_tuple[0].id == tuple[0].id:
                                        index = interaction_dict[
                                            move.id].index(redundant_tuple)
                                        del interaction_dict[move.id][index]
                            else:
                                if verbose:
                                    sys.stderr.write(
                                        "  Unsuccesful superposition between "
                                        + str(chain.id) +
                                        " from macrocomplex and chain " +
                                        fix.id + " from model " + tuple[0].id +
                                        ".\n")
                                    sys.stderr.write("  Chain " + move.id +
                                                     " from model " +
                                                     tuple[0].id +
                                                     " NOT ADDED.\n\n")
                    else:
                        if verbose:
                            sys.stderr.write(
                                "No possible interactions with chain " +
                                chain.id + " left.\n")
                        num_empty_chains += 1
                    if stoich_dict:
                        if stoich_dict == model_stoich:
                            run = False
                            break
                else:
                    run = False  # When the maximum chain treshold is reached, stop running
                    break
            if num_empty_chains >= len(
                    macrocomplex
            ):  # If all chains of the macrocomplex have no possible interactions to make, stop running
                run = False
            if interaction_dict == interaction_copy:  # After 3 iterations without being able to add any chain to the macrocomplex, stop running
                u += 1
                if u == 3:
                    run = False
                    break

        stoichiometry_string = ""
        for key in sorted(model_stoich.keys()):
            stoichiometry_string += key + ":" + str(model_stoich[key]) + ","
        stoichiometry_string = stoichiometry_string[:-1]
        sys.stderr.write("\nStoichiometry of macrocomplex " + str(i) +
                         " is: " + stoichiometry_string + ".\n")
        sys.stderr.write("Macrocomplex " + str(i) + " finished.\n")
        output_objects.append(macrocomplex)
    return output_objects
Exemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(description='Compares a list of pdb \
                                     files to a reference pdb structure')
    parser.add_argument('-p',
                        '--pdb_file_list',
                        action='store',
                        nargs=1,
                        dest='pdb',
                        help='List of pdb files (.txt) - provide \
                        full pathname if not in current directory')
    parser.add_argument('-r',
                        '--reference_pdb',
                        action='store',
                        nargs=1,
                        dest='ref',
                        help='Full path to reference .pdb file')
    parser.add_argument('-n',
                        '--name',
                        action='store',
                        nargs=1,
                        dest='name',
                        default=['./'],
                        help='Output file name \
                        suffix to be used')
    parser.add_argument('-i',
                        '--input_directory',
                        action='store',
                        nargs=1,
                        dest='input',
                        default=['./'],
                        help='Directory where \
                        input pdb files are stored')
    parser.add_argument('-o',
                        '--output_directory',
                        action='store',
                        nargs=1,
                        dest='output',
                        default=['./'],
                        help='Directory where \
                        output log should be written')
    args = vars(parser.parse_args())

    #Get list of all pdb files after filtering ZDOCK output
    with open(args['pdb'][0], 'r') as pdb_file_list:
        pdb_files = pdb_file_list.read().splitlines()

    #Get a list of the atom objects from each structure
    parser = PDB.PDBParser()
    structure_atoms = []
    for pdb_file in pdb_files:
        structure = parser.get_structure(pdb_file, args['input'][0] + pdb_file)
        ca_atoms = [
            atom for atom in structure.get_atoms() if atom.get_id() == 'CA'
        ]
        structure_atoms.append(ca_atoms)

    ref_struct = parser.get_structure('ref', args['ref'][0])
    ref_atoms = [
        atom for atom in ref_struct.get_atoms() if atom.get_id() == 'CA'
    ]

    good_pdb = []
    good_rms = []
    all_pdb = []
    all_rms = []
    sup = PDB.Superimposer()
    for idx, struct in enumerate(structure_atoms):
        sup.set_atoms(ref_atoms, struct)
        all_pdb.append(pdb_files[idx])
        all_rms.append(sup.rms)
        if sup.rms <= 10:
            good_pdb.append(pdb_files[idx])
            good_rms.append(sup.rms)

    good_struct = pd.DataFrame()
    good_struct['pdb'] = good_pdb
    good_struct['rms'] = good_rms
    good_struct.to_csv(args['output'][0] + '/' + 'good_' + args['name'][0] +
                       '.csv',
                       index=False)

    all_struct = pd.DataFrame()
    all_struct['pdb'] = all_pdb
    all_struct['rms'] = all_rms
    all_struct.to_csv(args['output'][0] + '/' + 'all_' + args['name'][0] +
                      '.csv',
                      index=False)
Exemplo n.º 27
0
from Bio import PDB
from Bio.PDB import PDBIO

parser = PDB.MMCIFParser()
structure = parser.get_structure("2DN1", "dn\\2dn1.cif")

atom1 = structure[0]["A"][10]["CA"]
atom2 = structure[0]["A"][20]["CA"]
atom3 = structure[0]["A"][30]["CA"]
atom4 = structure[0]["B"][10]["CA"]
atom5 = structure[0]["B"][20]["CA"]
atom6 = structure[0]["B"][30]["CA"]

moving = [atom1, atom2, atom3]
fixed = [atom4, atom5, atom6]

sup = PDB.Superimposer()
sup.set_atoms(fixed, moving)
print(sup.rotran)
print('RMS:', sup.rms)

chainA = structure[0]['A']
chainA.transform(sup.rotran[0], sup.rotran[1])

io = PDBIO()
io.set_structure(structure)
io.save('my_structure_temp.pdb')