示例#1
0
def parse(structure, aa):
    # Solo consideramos el 1er modelo
    model = structure[0]
    num_chains = len(structure[0])
    print('Number of chains ' + str(num_chains))

    df = DataFrame([])
    res = 0

    for chain in model:
        for residue in chain:
            if residue.get_id()[0] == ' ': #ignore all hetero atoms
                name = residue.get_resname()
                if not isin(name, aa):
                    print('Non recognized residue ' + name)
                    return DataFrame([])
                N_xyz = residue['N'].get_vector()
                df = df.append({'chain':chain.id, 'aa': name, 'atom': 'N', 'res': res, 'coord': N_xyz}, ignore_index=True)
                CA_xyz = residue['CA'].get_vector()
                df = df.append({'chain':chain.id, 'aa': name, 'atom': 'CA', 'res': res, 'coord': CA_xyz}, ignore_index=True)
                C_xyz = residue['C'].get_vector()
                df = df.append({'chain':chain.id, 'aa': name, 'atom': 'C', 'res': res, 'coord': C_xyz}, ignore_index=True)
                res = res + 1

    bond_length = [(df.iloc[1].coord - df.iloc[0].coord).norm(), (df.iloc[2].coord - df.iloc[1].coord).norm()]
    bond_angle = [0, calc_angle(df.iloc[0].coord, df.iloc[1].coord, df.iloc[2].coord)*180/pi]
    torsion_angle = [0, 0]
    coord = [df.iloc[0].coord.get_array(), df.iloc[1].coord.get_array()]
    for ij in range(2, len(df)-1):
        bond_length.append((df.iloc[ij+1].coord - df.iloc[ij].coord).norm())
        bond_angle.append(calc_angle(df.iloc[ij-1].coord, df.iloc[ij].coord, df.iloc[ij+1].coord)*180/pi)
        torsion_angle.append(calc_dihedral(df.iloc[ij-2].coord, df.iloc[ij-1].coord, df.iloc[ij].coord, df.iloc[ij+1].coord)*180/pi)
        coord.append(df.iloc[ij].coord.get_array())

    bond_length.append(0)
    bond_angle.append(0)
    torsion_angle.append(0)
    coord.append(df.iloc[len(df)-1].coord.get_array())
    coord = array(coord)
    df_new = df.drop('coord', axis=1)
    df_new['x'] = coord[:, 0]
    df_new['y'] = coord[:, 1]
    df_new['z'] = coord[:, 2]
    df_new['bond_length'] = bond_length
    df_new['bond_angle'] = bond_angle
    df_new['torsion_angle'] = torsion_angle
    return df_new
示例#2
0
    def test_Vector(self):
        """Test Vector object."""
        v1 = Vector(0, 0, 1)
        v2 = Vector(0, 0, 0)
        v3 = Vector(0, 1, 0)
        v4 = Vector(1, 1, 0)

        self.assertEqual(calc_angle(v1, v2, v3), 1.5707963267948966)
        self.assertEqual(calc_dihedral(v1, v2, v3, v4), 1.5707963267948966)
        self.assertTrue(
            numpy.array_equal((v1 - v2).get_array(), numpy.array([0.0, 0.0, 1.0]))
        )
        self.assertTrue(
            numpy.array_equal((v1 - 1).get_array(), numpy.array([-1.0, -1.0, 0.0]))
        )
        self.assertTrue(
            numpy.array_equal(
                (v1 - (1, 2, 3)).get_array(), numpy.array([-1.0, -2.0, -2.0])
            )
        )
        self.assertTrue(
            numpy.array_equal((v1 + v2).get_array(), numpy.array([0.0, 0.0, 1.0]))
        )
        self.assertTrue(
            numpy.array_equal((v1 + 3).get_array(), numpy.array([3.0, 3.0, 4.0]))
        )
        self.assertTrue(
            numpy.array_equal(
                (v1 + (1, 2, 3)).get_array(), numpy.array([1.0, 2.0, 4.0])
            )
        )
        self.assertTrue(numpy.array_equal(v1.get_array() / 2, numpy.array([0, 0, 0.5])))
        self.assertTrue(numpy.array_equal(v1.get_array() / 2, numpy.array([0, 0, 0.5])))
        self.assertEqual(v1 * v2, 0.0)
        self.assertTrue(
            numpy.array_equal((v1 ** v2).get_array(), numpy.array([0.0, -0.0, 0.0]))
        )
        self.assertTrue(
            numpy.array_equal((v1 ** 2).get_array(), numpy.array([0.0, 0.0, 2.0]))
        )
        self.assertTrue(
            numpy.array_equal(
                (v1 ** (1, 2, 3)).get_array(), numpy.array([0.0, 0.0, 3.0])
            )
        )
        self.assertEqual(v1.norm(), 1.0)
        self.assertEqual(v1.normsq(), 1.0)
        v1[2] = 10
        self.assertEqual(v1.__getitem__(2), 10)
示例#3
0
文件: sco.py 项目: volkamerlab/kissim
    def _calculate_vertex_angle(self, vector1, vector2, vector3):
        """
        Calculate a vertex angle between three vectors (vertex = second vector).

        Parameters
        ----------
        vector1 : Bio.PDB.Vector.Vector or None
            Coordinates.
        vector2 : Bio.PDB.Vector.Vector or None
            Coordinates (defined as vertex of angle).
        vector2 : Bio.PDB.Vector.Vector or None
            Coordinates.

        Returns
        -------
        float or np.nan
            Vertex angle between the three points. None if any of the input vectors are None.
        """
        if all([vector1, vector2, vector2]):
            vertex_angle = np.degrees(calc_angle(vector1, vector2, vector3))
            vertex_angles = vertex_angle.round(2)
            return vertex_angle
        else:
            return np.nan
def generate_node_features(protein_chains,
                           surface,
                           ns: NeighborSearch,
                           only_ca=Constants.GET_ONLY_CA_ATOMS):
    pdb_id = protein_chains[0].get_parent().full_id[0]
    pdb_id = pdb_id[-4:]
    dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp'))
    get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0

    for chain in protein_chains:
        start = time.time()
        residue_generator = chain.get_residues()
        get_residues_t += time.time() - start

        last_n_residues = deque(
            [None,
             next(residue_generator),
             next(residue_generator, None)])
        while last_n_residues[1] is not None:
            prev_res = last_n_residues.popleft()
            prev_res_name = Constants.EMPTY_STR_FEATURE
            if prev_res is not None:
                prev_res_name = prev_res.resname
            res = last_n_residues[0]

            next_res = last_n_residues[1]
            next_res_name = Constants.EMPTY_STR_FEATURE
            if next_res is not None:
                next_res_name = next_res.resname

            start = time.time()
            is_key = True
            key = res.full_id[2:]
            if key not in dssp[0]:
                key = (key[0], (' ', key[1][1], ' '))
                if key not in dssp[0]:
                    for dssp_key in dssp[0]:
                        if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][
                                1]:
                            key = dssp_key
                            break

                    if key not in dssp[0]:
                        is_key = False
                        # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}')
            if is_key:
                dssp_features = dssp[0][key]
            else:
                dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                 0.0, 0.0, 0.0, 0.0, 0.0)
            dssp_key_t += time.time() - start

            start = time.time()
            is_cb = 'CB' in res
            cb_ca_surf_angle = 0
            ca_cb_surf_angle = 0

            ca_atom = res['CA']
            ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface)
            ca_vec = ca_atom.get_vector()
            if not is_cb:
                # print('there is no CB ..... :(((((((')
                pass
            else:
                cb_vec = res['CB'].get_vector()
                cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface)
                cb_ca_surf_angle = calc_angle(cb_vec, ca_vec,
                                              Vector(surface[ca_surf_idx]))
                ca_cb_surf_angle = calc_angle(ca_vec, cb_vec,
                                              Vector(surface[cb_surf_idx]))
            min_dist_t += time.time() - start

            start = time.time()
            res_d, dist_list = residue_depth(res, surface)
            if res_d is None:
                res_d = 5.0
                print("Nan values!!!")

            if ca_d is None:
                ca_d = 5.0
                print("Nan values!!!")
            residue_depth_t += time.time() - start

            for idx, atom in enumerate(res.get_atoms()):
                if only_ca:
                    atom = ca_atom

                start = time.time()
                atom_d, s_idx = dist_list[idx]
                atom_coord = atom.get_coord()
                ca_atom_coord = ca_atom.get_coord()

                d = atom_coord - ca_atom_coord
                ca_atom_dist = np.sqrt(np.sum(d * d))
                atom_ca_surf_angle = 0
                ca_atom_surf_angle = 0
                if not np.array_equal(atom_coord, ca_atom_coord):
                    atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec,
                                                    Vector(surface[s_idx]))
                    ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(),
                                                    Vector(surface[s_idx]))

                if atom_d is None:
                    atom_d = 5.0
                    print(f"Nan valuess!! {atom_d}, {atom}")
                atom_d_t += time.time() - start

                start = time.time()
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['prev_res_name'],
                        prev_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['next_res_name'],
                        next_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['residue_depth'],
                        res_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'],
                        atom_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'],
                        ca_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'],
                        ca_atom_dist)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'],
                        cb_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'],
                        ca_cb_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'],
                        atom_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'],
                        ca_atom_surf_angle)
                setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features)
                settattr_t += time.time() - start

                cumsum_main = 0
                cumsum_plane = 0

                cumsum_atom_main = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                cumsum_atom_plane = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS):
                    atoms = ns.search(atom_coord, radius)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_radius_name(num)],
                        len(atoms) - cumsum_main)

                    num_above_plane = num_of_atoms_above_plane(
                        surface[s_idx] - atom_coord, atom_coord, atoms)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_above_plane_radius_name(
                                num)], num_above_plane - cumsum_plane)
                    cumsum_main += len(atoms)
                    cumsum_plane += num_above_plane

                    for i, atom_element in enumerate(
                            Constants.NEIGHBOUR_SUM_RADIUS_ATOMS):
                        atoms_one_element = list(
                            filter(
                                lambda a: a.element.upper() == atom_element.
                                upper(), atoms))
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.neighbour_sum_radius_name(
                                    num, atom_element)],
                            len(atoms_one_element) - cumsum_atom_main[i])

                        num_above_plane = num_of_atoms_above_plane(
                            surface[s_idx] - atom_coord, atom_coord,
                            atoms_one_element)
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.
                                neighbour_sum_above_plane_radius_name(
                                    num, atom_element)],
                            num_above_plane - cumsum_atom_plane[i])
                        cumsum_atom_main[i] += len(atoms_one_element)
                        cumsum_atom_plane[i] += num_above_plane
                if only_ca:
                    break
            last_n_residues.append(next(residue_generator, None))