Пример #1
0
def getLigandResIds(ligchemid: str, struct: Structure) -> List[Residue]:
    """Returns a list of dictionaries specifying each _ligand_ of type @ligchemid as a biopython-residue inside a given @struct."""
    """*ligchemids are of type https://www.rcsb.org/ligand/IDS"""
    ligandResidues: List[Residue] = list(
        filter(lambda x: x.get_resname() == ligchemid,
               list(struct.get_residues())))
    return ligandResidues
Пример #2
0
def featurize(structure: Structure) -> list[Any]:
    """
    Calculates 3D ML features from the `structure`.
    """
    structure1 = freesasa.Structure(pdbpath)
    result = freesasa.calc(structure1)
    area_classes = freesasa.classifyResults(result, structure1)

    Total_area = []
    Total_area.append(result.totalArea())

    Polar_Apolar = []

    for key in area_classes:
        # print( key, ": %.2f A2" % area_classes[key])
        Polar_Apolar.append(area_classes[key])
    # get all the residues
    residues = [res for res in structure.get_residues()]
    seq_length = []
    seq_length.append(len(residues))
    # calculate some random 3D features (you should be smarter here!)
    protein_length = residues[1]["CA"] - residues[-2]["CA"]
    angle = calc_dihedral(
        residues[1]["CA"].get_vector(),
        residues[2]["CA"].get_vector(),
        residues[-3]["CA"].get_vector(),
        residues[-2]["CA"].get_vector(),
    )
    # create the feature vector
    features = [Total_area, Polar_Apolar, protein_length, seq_length, angle]

    return features
Пример #3
0
def get_ptc_residues(struct: Structure, pdbid: str,
                     conserved_nucleotides: List[int]) -> List[Residue]:

    ECOLI_PTC_CONSERVED_NUCLEOTIDES = [
        '2055', '2451', '2452', '2504', '2505', '2506', '2507'
    ]

    def belongs_to_ptc(x: Residue):
        return int(x.get_id()[1]) in conserved_nucleotides

    PTC_residues = filter(belongs_to_ptc, [*struct.get_residues()])
    return [*PTC_residues]
Пример #4
0
def featurize(structure: Structure) -> list[Any]:
    """
    Calculates 3D ML features from the `structure`.
    """

    # get all the residues
    residues = [res for res in structure.get_residues()]

    # calculate some random 3D features (you should be smarter here!)
    protein_length = residues[1]["CA"] - residues[-2]["CA"]
    angle = calc_dihedral(
        residues[1]["CA"].get_vector(),
        residues[2]["CA"].get_vector(),
        residues[-3]["CA"].get_vector(),
        residues[-2]["CA"].get_vector(),
    )
    # create the feature vector
    features = [protein_length, angle]

    return features
Пример #5
0
def getLigandResIds(ligchemid:str, struct: Structure, dict_or_res:bool='dict')->List[ResidueFullIdDict]:
    """Returns a list of dictionaries specifying each ligand as a residue inside a given struct."""
    ligandResidues: List[Residue] = list(filter(lambda x: x.get_resname() == ligchemid, list( struct.get_residues() )))
    if dict_or_res == 'dict':
        return [ ResidueFullIdDict(res) for res in ligandResidues ]
    else:
        return ligandResidues
Пример #6
0
def getLigandsResIds(ligChemIds:List[str], struct: Structure)->List[ResidueFullIdDict]:
    """Returns a list of dictionaries specifying each ligand as a residue inside a given struct."""
    ligandResidues: List[Residue] = list(filter(lambda x: x.get_resname() in ligChemIds, list( struct.get_residues() )))
    return [ ResidueFullIdDict(res) for res in ligandResidues ]
Пример #7
0
class PyRyStructure(object):
    """
        class represents structure as entity (very wide definition)
        used for storing information about structures,
        creating BIO.pdb structures,
        saving structure files etc.
    """
    def __init__(self, structure=None):
        if structure: self.struct = structure
        else: self.struct = None
        self.sequence = ''  # sequence taken from structure
        #----------------will decide on one of these 3 ----------------------------
        self.center_of_mass = []  # [x,y,z] coords of center of mass
        self.geometric_center = []  # geometric centre
        self.center = None  # actual center of given complex component
        #--------------------------------------------------------------------------
        self.chain = ''  # chain name from structure file
        self.moltype = ''  # protein, DNA, RNA

    def __str__(self):
        return "%s %s %s %s %s"%(self.struct, self.chain, self.center_of_mass,\
                                                self.moltype, self.sequence)

    def add_chain_to_struct(self, chain_id):
        """
            adds another model to BIO.pdb structure object
        Parameters:
        -----------
            chain_id    :   chain name
        Returns:
        ---------
            self.struct :   Bio.PDB structure with new chain
        """
        chain = Chain(chain_id)
        self.struct[0].add(chain)

    def add_residues_to_structure(self, struct, chain_id, chain2_id):
        """
            adds residues from struct to a given structure (self.structure)
        Parameters:
        -----------
            struct      :   template structure object with residues which will
                            be added to self.structure object
            chain_id    :   name of template chain 
            chain2_id   :   name of new chain in self.struct
        Returns:
        ---------
            self.stuct  :   with extra residues
        """
        residues = struct[0][chain_id].child_list
        [self.struct[0][chain2_id].add(res) for res in residues]

    def calculate_atom_atom_distance(self, atom1, atom2):
        """
            calculates distance between two atoms
        Parameters:
        -----------
            atom1, atom2    :   Bio.PDB.Atom entities
        Returns:
        ---------
            distance from atom1 to atom2 in 3D space
        Raises:
        -------
            PyRyStructureError if parameters are not Bio.PDB.Atom entities
        """
        if is_structure(): return atom1 - atom2

    def calculate_centre_of_mass(self, entity=None, geometric=False):
        """
           calculates centre of mass for given structure
        Returns gravitic or geometric center of mass of an Entity.
        Geometric assumes all masses are equal (geometric=True)
        Defaults to Gravitic.
Parameters:
-----------
    geometric   : optional   
Returns:
---------
    centre of mass coordinates as [x,y,z] list   
Raises:
-------
    ValueError  :   if wrong object is given as a target
    PyRyStructureError  : no PyRyStructure object
        """
        #if self.struct == None: raise PyRyStructureError("You haven't provided \
        #                                any structure to PyRyStructure class")

        if isinstance(self.struct,
                      Entity.Entity):  # Structure, Model, Chain, Residue
            atom_list = self.struct.get_atoms()
        elif hasattr(entity, '__iter__') and filter(lambda x: x.level ==\
                                            'A', entity): # List of Atoms
            atom_list = entity
        else:  # Some other weirdo object
            raise ValueError('Center of Mass can only be calculated from \n\
        the following objects:Structure, Model, Chain, Residue, list of Atoms.'
                             )

        new_centre = [0., 0., 0.]
        whole_mass = 0

        for atom in atom_list:
            atom_centre = array([
                float(atom.coord[0]),
                float(atom.coord[1]),
                float(atom.coord[2])
            ])
            whole_mass += atom.molweight
            new_centre += atom_centre * atom.molweight

        new_centre /= whole_mass

        self.center_of_mass = new_centre
        return self.center_of_mass

    def create_PDB_obj(self, id, filename):
        """
            creates Bio.PDB object from pdb file
        Parameters:
        -----------
            id          : name of structure
            filename    : file name
        """
        parser = PDBParser()
        self.struct = parser.get_structure(str(id), filename)

    def create_new_structure(self, name, chain_id):
        """
            creates new Bio.PDB structure object
        Parameters:
        -----------
            name        :   structure name
            chain_id    :   chain name (e.g. A, B, C) 
        Returns:
        ---------
            self.struct :   Bio.PDB object with model and chain inside
        """
        self.struct = Structure(name)
        my_model = Model(0)
        my_chain = Chain(chain_id)
        self.struct.add(my_model)
        self.struct[0].add(my_chain)

    def get_chainname(self):
        """
            returns name of given structures chain
        """
        self.chain = list(self.struct.get_chains())[0].id

    def get_mol_sequence(self):
        """
            retrieves struct sequence as one letter code
        Parameters:
        -----------
            self.struct : structure object
        Returns:
        ---------
            self.sequence : sequence of given structure in one letter code
        """
        ##----must be included in tests!!!--------------------

        for resi in self.struct.get_residues():
            resi_name = resi.resname.strip().upper()

            #add one letter nucleotide names
            if len(resi_name) == 1 and resi_name in RESNAMES.values():
                self.sequence += resi_name
                #add hetatms with modifications
            elif resi_name in to_one_letter_code:
                self.sequence += to_one_letter_code[resi_name]
                #do not add ions and ligands into sequence
            elif resi_name in LIGANDS:
                pass
                #if antyhing else appeared include as X
            else:
                self.sequence += "X"
        return self.sequence

    def get_moltype(self):
        """
            based on component's sequence determines if a certain
            component is DNA, RNA or protein
        Raises:
        -------
            PyRyStructureError if resnames are incorrect
        """

        res = list(self.struct.get_residues())[0]
        if len(res.resname.strip()) == 3:
            if res.resname.strip() in AMINOACIDS.values():
                self.moltype = 'protein'
            else:
                if res.resname.strip() in RESNAMES.keys(): pass
                else:
                    raise PyRyStructureError("Wrong 3letter name",
                                             res.resname.strip())
        else:
            for at in res:
                if at.fullname.strip() == "CA":
                    self.moltype = 'protein'
                    break
                elif at.fullname.strip() == "C4'" or at.fullname.strip(
                ) == "C4*":
                    for atom in res.child_list:
                        if atom.fullname.strip() == "O2'":
                            self.moltype = "RNA"
                            break
                    if self.moltype == "":
                        self.moltype = "DNA"
        return self.moltype

    def is_structure(self):
        """
            checks if a given structure is Bio.PDB structure object
        Raises:
        ------
            PyRyStructureError  : if self.struct is not Bio.PDB object 
        """
        if isinstance(self.struct,
                      Entity.Entity):  # Structure, Model, Chain, Residue
            return True
        else:
            raise PyRyStructureError('%s should be one of\n\
                     the following objects:Structure, Model, Chain, Residue, \n\
                                                  list of Atoms.' %
                                     (self.struct))

    def set_chain_name(self, chain):
        self.chain = chain

    def set_moltype(self, moltype):
        """
        """
        self.moltype = moltype

    def set_structure(self, struct):
        self.struct = struct

    def set_pyrystructure(self, structure=None):
        """
        sets structure as PyRyStructure atrribute
        
        Parameters:
        -----------
            structure   :   Bio.PDB structure object
        """
        if self.struct == None: self.struct = structure
        if self.sequence == '': self.get_mol_sequence()
        self.get_chainname()
        self.get_moltype()

    def set_sequence(self, seq):
        """
        """
        self.sequence = seq

    def write_structure(self, filename):
        """
            Writting structure to the pdb_file, saving changed coordinated
        Parameters:
        -----------
            filename    :   final name of structure file        
        """
        out = PDBIO()
        out.set_structure(self.struct)
        out.save(filename)