def parse(structure, aa): # Solo consideramos el 1er modelo model = structure[0] num_chains = len(structure[0]) print('Number of chains ' + str(num_chains)) df = DataFrame([]) res = 0 for chain in model: for residue in chain: if residue.get_id()[0] == ' ': #ignore all hetero atoms name = residue.get_resname() if not isin(name, aa): print('Non recognized residue ' + name) return DataFrame([]) N_xyz = residue['N'].get_vector() df = df.append({'chain':chain.id, 'aa': name, 'atom': 'N', 'res': res, 'coord': N_xyz}, ignore_index=True) CA_xyz = residue['CA'].get_vector() df = df.append({'chain':chain.id, 'aa': name, 'atom': 'CA', 'res': res, 'coord': CA_xyz}, ignore_index=True) C_xyz = residue['C'].get_vector() df = df.append({'chain':chain.id, 'aa': name, 'atom': 'C', 'res': res, 'coord': C_xyz}, ignore_index=True) res = res + 1 bond_length = [(df.iloc[1].coord - df.iloc[0].coord).norm(), (df.iloc[2].coord - df.iloc[1].coord).norm()] bond_angle = [0, calc_angle(df.iloc[0].coord, df.iloc[1].coord, df.iloc[2].coord)*180/pi] torsion_angle = [0, 0] coord = [df.iloc[0].coord.get_array(), df.iloc[1].coord.get_array()] for ij in range(2, len(df)-1): bond_length.append((df.iloc[ij+1].coord - df.iloc[ij].coord).norm()) bond_angle.append(calc_angle(df.iloc[ij-1].coord, df.iloc[ij].coord, df.iloc[ij+1].coord)*180/pi) torsion_angle.append(calc_dihedral(df.iloc[ij-2].coord, df.iloc[ij-1].coord, df.iloc[ij].coord, df.iloc[ij+1].coord)*180/pi) coord.append(df.iloc[ij].coord.get_array()) bond_length.append(0) bond_angle.append(0) torsion_angle.append(0) coord.append(df.iloc[len(df)-1].coord.get_array()) coord = array(coord) df_new = df.drop('coord', axis=1) df_new['x'] = coord[:, 0] df_new['y'] = coord[:, 1] df_new['z'] = coord[:, 2] df_new['bond_length'] = bond_length df_new['bond_angle'] = bond_angle df_new['torsion_angle'] = torsion_angle return df_new
def test_Vector(self): """Test Vector object.""" v1 = Vector(0, 0, 1) v2 = Vector(0, 0, 0) v3 = Vector(0, 1, 0) v4 = Vector(1, 1, 0) self.assertEqual(calc_angle(v1, v2, v3), 1.5707963267948966) self.assertEqual(calc_dihedral(v1, v2, v3, v4), 1.5707963267948966) self.assertTrue( numpy.array_equal((v1 - v2).get_array(), numpy.array([0.0, 0.0, 1.0])) ) self.assertTrue( numpy.array_equal((v1 - 1).get_array(), numpy.array([-1.0, -1.0, 0.0])) ) self.assertTrue( numpy.array_equal( (v1 - (1, 2, 3)).get_array(), numpy.array([-1.0, -2.0, -2.0]) ) ) self.assertTrue( numpy.array_equal((v1 + v2).get_array(), numpy.array([0.0, 0.0, 1.0])) ) self.assertTrue( numpy.array_equal((v1 + 3).get_array(), numpy.array([3.0, 3.0, 4.0])) ) self.assertTrue( numpy.array_equal( (v1 + (1, 2, 3)).get_array(), numpy.array([1.0, 2.0, 4.0]) ) ) self.assertTrue(numpy.array_equal(v1.get_array() / 2, numpy.array([0, 0, 0.5]))) self.assertTrue(numpy.array_equal(v1.get_array() / 2, numpy.array([0, 0, 0.5]))) self.assertEqual(v1 * v2, 0.0) self.assertTrue( numpy.array_equal((v1 ** v2).get_array(), numpy.array([0.0, -0.0, 0.0])) ) self.assertTrue( numpy.array_equal((v1 ** 2).get_array(), numpy.array([0.0, 0.0, 2.0])) ) self.assertTrue( numpy.array_equal( (v1 ** (1, 2, 3)).get_array(), numpy.array([0.0, 0.0, 3.0]) ) ) self.assertEqual(v1.norm(), 1.0) self.assertEqual(v1.normsq(), 1.0) v1[2] = 10 self.assertEqual(v1.__getitem__(2), 10)
def _calculate_vertex_angle(self, vector1, vector2, vector3): """ Calculate a vertex angle between three vectors (vertex = second vector). Parameters ---------- vector1 : Bio.PDB.Vector.Vector or None Coordinates. vector2 : Bio.PDB.Vector.Vector or None Coordinates (defined as vertex of angle). vector2 : Bio.PDB.Vector.Vector or None Coordinates. Returns ------- float or np.nan Vertex angle between the three points. None if any of the input vectors are None. """ if all([vector1, vector2, vector2]): vertex_angle = np.degrees(calc_angle(vector1, vector2, vector3)) vertex_angles = vertex_angle.round(2) return vertex_angle else: return np.nan
def generate_node_features(protein_chains, surface, ns: NeighborSearch, only_ca=Constants.GET_ONLY_CA_ATOMS): pdb_id = protein_chains[0].get_parent().full_id[0] pdb_id = pdb_id[-4:] dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp')) get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0 for chain in protein_chains: start = time.time() residue_generator = chain.get_residues() get_residues_t += time.time() - start last_n_residues = deque( [None, next(residue_generator), next(residue_generator, None)]) while last_n_residues[1] is not None: prev_res = last_n_residues.popleft() prev_res_name = Constants.EMPTY_STR_FEATURE if prev_res is not None: prev_res_name = prev_res.resname res = last_n_residues[0] next_res = last_n_residues[1] next_res_name = Constants.EMPTY_STR_FEATURE if next_res is not None: next_res_name = next_res.resname start = time.time() is_key = True key = res.full_id[2:] if key not in dssp[0]: key = (key[0], (' ', key[1][1], ' ')) if key not in dssp[0]: for dssp_key in dssp[0]: if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][ 1]: key = dssp_key break if key not in dssp[0]: is_key = False # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}') if is_key: dssp_features = dssp[0][key] else: dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) dssp_key_t += time.time() - start start = time.time() is_cb = 'CB' in res cb_ca_surf_angle = 0 ca_cb_surf_angle = 0 ca_atom = res['CA'] ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface) ca_vec = ca_atom.get_vector() if not is_cb: # print('there is no CB ..... :(((((((') pass else: cb_vec = res['CB'].get_vector() cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface) cb_ca_surf_angle = calc_angle(cb_vec, ca_vec, Vector(surface[ca_surf_idx])) ca_cb_surf_angle = calc_angle(ca_vec, cb_vec, Vector(surface[cb_surf_idx])) min_dist_t += time.time() - start start = time.time() res_d, dist_list = residue_depth(res, surface) if res_d is None: res_d = 5.0 print("Nan values!!!") if ca_d is None: ca_d = 5.0 print("Nan values!!!") residue_depth_t += time.time() - start for idx, atom in enumerate(res.get_atoms()): if only_ca: atom = ca_atom start = time.time() atom_d, s_idx = dist_list[idx] atom_coord = atom.get_coord() ca_atom_coord = ca_atom.get_coord() d = atom_coord - ca_atom_coord ca_atom_dist = np.sqrt(np.sum(d * d)) atom_ca_surf_angle = 0 ca_atom_surf_angle = 0 if not np.array_equal(atom_coord, ca_atom_coord): atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec, Vector(surface[s_idx])) ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(), Vector(surface[s_idx])) if atom_d is None: atom_d = 5.0 print(f"Nan valuess!! {atom_d}, {atom}") atom_d_t += time.time() - start start = time.time() setattr(atom, Constants.NODE_APPENDED_FEATURES['prev_res_name'], prev_res_name) setattr(atom, Constants.NODE_APPENDED_FEATURES['next_res_name'], next_res_name) setattr(atom, Constants.NODE_APPENDED_FEATURES['residue_depth'], res_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'], atom_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'], ca_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'], ca_atom_dist) setattr(atom, Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'], cb_ca_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'], ca_cb_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'], atom_ca_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'], ca_atom_surf_angle) setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features) settattr_t += time.time() - start cumsum_main = 0 cumsum_plane = 0 cumsum_atom_main = [0] * len( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS) cumsum_atom_plane = [0] * len( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS) for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS): atoms = ns.search(atom_coord, radius) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_radius_name(num)], len(atoms) - cumsum_main) num_above_plane = num_of_atoms_above_plane( surface[s_idx] - atom_coord, atom_coord, atoms) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_above_plane_radius_name( num)], num_above_plane - cumsum_plane) cumsum_main += len(atoms) cumsum_plane += num_above_plane for i, atom_element in enumerate( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS): atoms_one_element = list( filter( lambda a: a.element.upper() == atom_element. upper(), atoms)) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_radius_name( num, atom_element)], len(atoms_one_element) - cumsum_atom_main[i]) num_above_plane = num_of_atoms_above_plane( surface[s_idx] - atom_coord, atom_coord, atoms_one_element) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants. neighbour_sum_above_plane_radius_name( num, atom_element)], num_above_plane - cumsum_atom_plane[i]) cumsum_atom_main[i] += len(atoms_one_element) cumsum_atom_plane[i] += num_above_plane if only_ca: break last_n_residues.append(next(residue_generator, None))