示例#1
0
def get_chain(structure: Structure, return_a_chain: bool = False) -> ChainDesc:
    if return_a_chain:
        for chain in structure.get_chains():
            if chain.get_id() == 'A':
                return chain
    chains = [ChainDesc(chain=c) for c in structure.get_chains()]

    # fast return if structure contains a single chain
    if len(chains) == 1:
        print('structure contains a single chain')
        return chains[0]
    print(f'structure contains {len(chains)} chains')

    chains_with_ligands = [c for c in chains if c.if_has_ligands()]
    print(f'chains with ligands: {len(chains_with_ligands)}')

    shortened_chain_sequences = [c.get_shortened_seq() for c in chains_with_ligands]

    # - check all pair alignments
    # - if at least one pair has equality score less than threshold,
    #   ask user for which chain to choose (by its letter)
    # - otherwise (that means that all chains are similar) choose longest one

    equality_threshold = 0.95

    for first_index in range(len(shortened_chain_sequences)):
        for second_index in range(first_index+1, len(shortened_chain_sequences)):
            first_sequence = shortened_chain_sequences[first_index]
            second_sequence = shortened_chain_sequences[second_index]
            alignments = pairwise2.align.globalds(first_sequence, second_sequence, blosum62, -10, -0.5)
            first_aligned, second_aligned, score, begin, end = alignments[0]
            if score < equality_threshold:
                print(f'two different sequences found (score={score})')
                print('please enter a letter of chain to work with: ', end='')
                chain_letter = input()[0]
                chain_index = int(chain_letter) - int('A')
                return chains[chain_index]

    # as we are here, then no different chains were found -
    # so choose longest one
    def get_chain_length(chain: ChainDesc) -> int:
        sequence = chain.get_shortened_seq()
        return len(sequence)

    sorted_chains = sorted(chains_with_ligands, key=get_chain_length, reverse=True)

    longest_chain = sorted_chains[0]
    print(f'chain selected: {longest_chain.chain.get_id()}')

    return longest_chain
示例#2
0
class PyRyStructure(object):
    """
        class represents structure as entity (very wide definition)
        used for storing information about structures,
        creating BIO.pdb structures,
        saving structure files etc.
    """
    def __init__(self, structure=None):
        if structure: self.struct = structure
        else: self.struct = None
        self.sequence = ''  # sequence taken from structure
        #----------------will decide on one of these 3 ----------------------------
        self.center_of_mass = []  # [x,y,z] coords of center of mass
        self.geometric_center = []  # geometric centre
        self.center = None  # actual center of given complex component
        #--------------------------------------------------------------------------
        self.chain = ''  # chain name from structure file
        self.moltype = ''  # protein, DNA, RNA

    def __str__(self):
        return "%s %s %s %s %s"%(self.struct, self.chain, self.center_of_mass,\
                                                self.moltype, self.sequence)

    def add_chain_to_struct(self, chain_id):
        """
            adds another model to BIO.pdb structure object
        Parameters:
        -----------
            chain_id    :   chain name
        Returns:
        ---------
            self.struct :   Bio.PDB structure with new chain
        """
        chain = Chain(chain_id)
        self.struct[0].add(chain)

    def add_residues_to_structure(self, struct, chain_id, chain2_id):
        """
            adds residues from struct to a given structure (self.structure)
        Parameters:
        -----------
            struct      :   template structure object with residues which will
                            be added to self.structure object
            chain_id    :   name of template chain 
            chain2_id   :   name of new chain in self.struct
        Returns:
        ---------
            self.stuct  :   with extra residues
        """
        residues = struct[0][chain_id].child_list
        [self.struct[0][chain2_id].add(res) for res in residues]

    def calculate_atom_atom_distance(self, atom1, atom2):
        """
            calculates distance between two atoms
        Parameters:
        -----------
            atom1, atom2    :   Bio.PDB.Atom entities
        Returns:
        ---------
            distance from atom1 to atom2 in 3D space
        Raises:
        -------
            PyRyStructureError if parameters are not Bio.PDB.Atom entities
        """
        if is_structure(): return atom1 - atom2

    def calculate_centre_of_mass(self, entity=None, geometric=False):
        """
           calculates centre of mass for given structure
        Returns gravitic or geometric center of mass of an Entity.
        Geometric assumes all masses are equal (geometric=True)
        Defaults to Gravitic.
Parameters:
-----------
    geometric   : optional   
Returns:
---------
    centre of mass coordinates as [x,y,z] list   
Raises:
-------
    ValueError  :   if wrong object is given as a target
    PyRyStructureError  : no PyRyStructure object
        """
        #if self.struct == None: raise PyRyStructureError("You haven't provided \
        #                                any structure to PyRyStructure class")

        if isinstance(self.struct,
                      Entity.Entity):  # Structure, Model, Chain, Residue
            atom_list = self.struct.get_atoms()
        elif hasattr(entity, '__iter__') and filter(lambda x: x.level ==\
                                            'A', entity): # List of Atoms
            atom_list = entity
        else:  # Some other weirdo object
            raise ValueError('Center of Mass can only be calculated from \n\
        the following objects:Structure, Model, Chain, Residue, list of Atoms.'
                             )

        new_centre = [0., 0., 0.]
        whole_mass = 0

        for atom in atom_list:
            atom_centre = array([
                float(atom.coord[0]),
                float(atom.coord[1]),
                float(atom.coord[2])
            ])
            whole_mass += atom.molweight
            new_centre += atom_centre * atom.molweight

        new_centre /= whole_mass

        self.center_of_mass = new_centre
        return self.center_of_mass

    def create_PDB_obj(self, id, filename):
        """
            creates Bio.PDB object from pdb file
        Parameters:
        -----------
            id          : name of structure
            filename    : file name
        """
        parser = PDBParser()
        self.struct = parser.get_structure(str(id), filename)

    def create_new_structure(self, name, chain_id):
        """
            creates new Bio.PDB structure object
        Parameters:
        -----------
            name        :   structure name
            chain_id    :   chain name (e.g. A, B, C) 
        Returns:
        ---------
            self.struct :   Bio.PDB object with model and chain inside
        """
        self.struct = Structure(name)
        my_model = Model(0)
        my_chain = Chain(chain_id)
        self.struct.add(my_model)
        self.struct[0].add(my_chain)

    def get_chainname(self):
        """
            returns name of given structures chain
        """
        self.chain = list(self.struct.get_chains())[0].id

    def get_mol_sequence(self):
        """
            retrieves struct sequence as one letter code
        Parameters:
        -----------
            self.struct : structure object
        Returns:
        ---------
            self.sequence : sequence of given structure in one letter code
        """
        ##----must be included in tests!!!--------------------

        for resi in self.struct.get_residues():
            resi_name = resi.resname.strip().upper()

            #add one letter nucleotide names
            if len(resi_name) == 1 and resi_name in RESNAMES.values():
                self.sequence += resi_name
                #add hetatms with modifications
            elif resi_name in to_one_letter_code:
                self.sequence += to_one_letter_code[resi_name]
                #do not add ions and ligands into sequence
            elif resi_name in LIGANDS:
                pass
                #if antyhing else appeared include as X
            else:
                self.sequence += "X"
        return self.sequence

    def get_moltype(self):
        """
            based on component's sequence determines if a certain
            component is DNA, RNA or protein
        Raises:
        -------
            PyRyStructureError if resnames are incorrect
        """

        res = list(self.struct.get_residues())[0]
        if len(res.resname.strip()) == 3:
            if res.resname.strip() in AMINOACIDS.values():
                self.moltype = 'protein'
            else:
                if res.resname.strip() in RESNAMES.keys(): pass
                else:
                    raise PyRyStructureError("Wrong 3letter name",
                                             res.resname.strip())
        else:
            for at in res:
                if at.fullname.strip() == "CA":
                    self.moltype = 'protein'
                    break
                elif at.fullname.strip() == "C4'" or at.fullname.strip(
                ) == "C4*":
                    for atom in res.child_list:
                        if atom.fullname.strip() == "O2'":
                            self.moltype = "RNA"
                            break
                    if self.moltype == "":
                        self.moltype = "DNA"
        return self.moltype

    def is_structure(self):
        """
            checks if a given structure is Bio.PDB structure object
        Raises:
        ------
            PyRyStructureError  : if self.struct is not Bio.PDB object 
        """
        if isinstance(self.struct,
                      Entity.Entity):  # Structure, Model, Chain, Residue
            return True
        else:
            raise PyRyStructureError('%s should be one of\n\
                     the following objects:Structure, Model, Chain, Residue, \n\
                                                  list of Atoms.' %
                                     (self.struct))

    def set_chain_name(self, chain):
        self.chain = chain

    def set_moltype(self, moltype):
        """
        """
        self.moltype = moltype

    def set_structure(self, struct):
        self.struct = struct

    def set_pyrystructure(self, structure=None):
        """
        sets structure as PyRyStructure atrribute
        
        Parameters:
        -----------
            structure   :   Bio.PDB structure object
        """
        if self.struct == None: self.struct = structure
        if self.sequence == '': self.get_mol_sequence()
        self.get_chainname()
        self.get_moltype()

    def set_sequence(self, seq):
        """
        """
        self.sequence = seq

    def write_structure(self, filename):
        """
            Writting structure to the pdb_file, saving changed coordinated
        Parameters:
        -----------
            filename    :   final name of structure file        
        """
        out = PDBIO()
        out.set_structure(self.struct)
        out.save(filename)