def ramachandran(): phi_angles = [] psi_angles = [] residues = list(struc.get_residues()) for i in range(1, len(residues) - 1): res = residues[i] res_prev = residues[i - 1] res_next = residues[i + 1] # Check residues have sequential residue numbers if res.get_id()[1] == res_prev.get_id()[1] + 1 and res_next.get_id( )[1] == res.get_id()[1] + 1: try: phi_angle = calc_dihedral(res_prev["C"].get_vector(), res["N"].get_vector(), res["CA"].get_vector(), res["C"].get_vector()) psi_angle = calc_dihedral(res["N"].get_vector(), res["CA"].get_vector(), res["C"].get_vector(), res_next["N"].get_vector()) phi_angles.append(phi_angle) psi_angles.append(psi_angle) except: pass return phi_angles, psi_angles
def build_all_angles_model(pdb_filename): parser = PDBParser() structure = parser.get_structure("sample", Path(PDBdir, pdb_filename)) model = structure[0] chain = model["A"] model_structure_geo = [] prev = "0" N_prev = "0" CA_prev = "0" CO_prev = "0" prev_res = "" rad = 180.0 / math.pi for res in chain: if res.get_resname() in resdict.keys(): geo = Geometry.geometry(resdict[res.get_resname()]) if prev == "0": N_prev = res["N"] CA_prev = res["CA"] C_prev = res["C"] prev = "1" else: n1 = N_prev.get_vector() ca1 = CA_prev.get_vector() c1 = C_prev.get_vector() C_curr = res["C"] N_curr = res["N"] CA_curr = res["CA"] c = C_curr.get_vector() n = N_curr.get_vector() ca = CA_curr.get_vector() geo.CA_C_N_angle = calc_angle(ca1, c1, n) * rad geo.C_N_CA_angle = calc_angle(c1, n, ca) * rad psi = calc_dihedral(n1, ca1, c1, n) ##goes to current res omega = calc_dihedral(ca1, c1, n, ca) ##goes to current res phi = calc_dihedral(c1, n, ca, c) ##goes to current res geo.psi_im1 = psi * rad geo.omega = omega * rad geo.phi = phi * rad geo.N_CA_C_angle = calc_angle(n, ca, c) * rad ##geo.CA_C_O_angle= calc_angle(ca, c, o)*rad ##geo.N_CA_C_O= calc_dihedral(n, ca, c, o)*rad N_prev = res["N"] CA_prev = res["CA"] C_prev = res["C"] ##O_prev=res['O'] model_structure_geo.append(geo) return model_structure_geo
def calculate_dihedrals(self): """ Calculates the dihedral angles: """ # create positive dihedrals [0..2pi] posi = lambda x: x < 0 and 2 * pi + x or x self.gamma = posi(calc_dihedral(self.vP5, self.vC5, self.vC3, self.vO3)) self.omega5 = posi( calc_dihedral(self.vO3, self.vC3, self.vC5, self.vN5)) self.omega3 = posi( calc_dihedral(self.vP5, self.vC5, self.vC3, self.vN3))
def build_phi_psi_model(pdb_filename): parser = PDBParser() structure = parser.get_structure("sample", Path(PDBdir, pdb_filename)) model = structure[0] chain = model["A"] seq = "" phi_diangle = [] psi_diangle = [] omega_diangle = [] for res in chain: if res.get_resname() in resdict.keys(): seq += resdict[res.get_resname()] if len(seq) == 1: N_prev = res["N"] CA_prev = res["CA"] C_prev = res["C"] else: n1 = N_prev.get_vector() ca1 = CA_prev.get_vector() c1 = C_prev.get_vector() C_curr = res["C"] N_curr = res["N"] CA_curr = res["CA"] c = C_curr.get_vector() n = N_curr.get_vector() ca = CA_curr.get_vector() psi = calc_dihedral(n1, ca1, c1, n) ##goes to current res omega = calc_dihedral(ca1, c1, n, ca) phi = calc_dihedral(c1, n, ca, c) ##goes to current res phi_diangle.append(phi * 180.0 / math.pi) psi_diangle.append(psi * 180.0 / math.pi) omega_diangle.append(omega * 180.0 / math.pi) N_prev = res["N"] CA_prev = res["CA"] C_prev = res["C"] model_structure_omega = PeptideBuilder.make_structure( seq, phi_diangle, psi_diangle, omega_diangle ) model_structure_phi_psi = PeptideBuilder.make_structure( seq, phi_diangle, psi_diangle ) return model_structure_omega, model_structure_phi_psi
def featurize(structure: Structure) -> list[Any]: """ Calculates 3D ML features from the `structure`. """ structure1 = freesasa.Structure(pdbpath) result = freesasa.calc(structure1) area_classes = freesasa.classifyResults(result, structure1) Total_area = [] Total_area.append(result.totalArea()) Polar_Apolar = [] for key in area_classes: # print( key, ": %.2f A2" % area_classes[key]) Polar_Apolar.append(area_classes[key]) # get all the residues residues = [res for res in structure.get_residues()] seq_length = [] seq_length.append(len(residues)) # calculate some random 3D features (you should be smarter here!) protein_length = residues[1]["CA"] - residues[-2]["CA"] angle = calc_dihedral( residues[1]["CA"].get_vector(), residues[2]["CA"].get_vector(), residues[-3]["CA"].get_vector(), residues[-2]["CA"].get_vector(), ) # create the feature vector features = [Total_area, Polar_Apolar, protein_length, seq_length, angle] return features
def get_disulfide_bridges(self): # Starts to iterate through the S gamma atoms in the biopython object. list_of_sg_atoms = [atom for atom in list(self.parsed_biopython_structure.get_list())[0].get_atoms() if atom.id == "SG"] self.list_of_disulfides = [] for si, atomi in enumerate(list_of_sg_atoms): for sj, atomj in enumerate(list_of_sg_atoms[si:]): if not atomi == atomj: # Gets the distance between two SG atoms. ij_distance = atomi - atomj # Filter for the distances. if ij_distance >= self.disulfide_min_sg_distance and ij_distance <= self.disulfide_max_sg_distance: # Computes the Cbi-Sgi-Sgj-Cbj dihedral angle (also called the chi3 angle). cbi_vector = atomi.get_parent()["CB"].get_vector() sgi_vector = atomi.get_vector() sgj_vector = atomj.get_vector() cbj_vector = atomj.get_parent()["CB"].get_vector() chi3_dihedral = calc_dihedral(cbi_vector, sgi_vector, sgj_vector, cbj_vector) chi3_dihedral = abs(chi3_dihedral) # Filters for chi3 angle values. if chi3_dihedral >= self.min_chi3_dihedral_value and chi3_dihedral <= self.max_chi3_dihedral_value: self.list_of_disulfides.append({# Measurements. "distance": ij_distance, "chi3_dihedral":chi3_dihedral, # *180.0/math.pi # Atoms. "atom_i": atomi, "atom_j": atomj, # Residues ids. "residue_i": atomi.get_parent().id, "residue_j": atomj.get_parent().id, # Chain ids. "chain_i": atomi.get_parent().get_parent().id, "chain_j": atomj.get_parent().get_parent().id}) return self.list_of_disulfides
def CalcOrientationOf4CAs(CAi1, CAi2, CAj1, CAj2): angle = calc_angle(CAi1, CAi2, CAj1) * 180 / np.pi if CAj2 is None: angle2 = InvalidDegree dihedral = InvalidDegree dihedral2 = InvalidDegree else: angle2 = calc_angle(CAi1, CAi2, CAj2) * 180 / np.pi dihedral = calc_dihedral(CAi1, CAi2, CAj1, CAj2) * 180 / np.pi dihedral2 = calc_dihedral(CAi1, CAi2, CAj2, CAj1) * 180 / np.pi return { 'Ca1Ca2Ca3Ca4': dihedral, 'Ca1Ca2Ca3': angle, 'Ca1Ca2Ca4Ca3': dihedral2, 'Ca1Ca2Ca4': angle2 }
def get_phi(self, previous, source_res): try: C_1 = previous['C'].get_vector() N = source_res['N'].get_vector() CA = source_res['CA'].get_vector() C = source_res['C'].get_vector() return degrees(calc_dihedral(C_1, N, CA, C)) except Exception as e: return 0.0
def get_dihedrals(vectors): """ Get the dihedral angle corresponding to the angle between the planes defined by each pair of vectors in sequence """ dihedrals = [] for i in range(len(vectors) - 3): dihedrals.append( calc_dihedral(vectors[i], vectors[i + 1], vectors[i + 2], vectors[i + 3])) return dihedrals
def get_psi(self, target_res, next_res): try: N = target_res['N'].get_vector() CA = target_res['CA'].get_vector() C = target_res['C'].get_vector() N1_1 = next_res['N'].get_vector() return degrees(calc_dihedral(N, CA, C, N1_1)) except Exception as e: return 0.0
def get_phi_psi_list(self): """Return the list of phi/psi dihedral angles.""" ppl = [] lng = len(self) for i in range(0, lng): res = self[i] try: n = res["N"].get_vector() ca = res["CA"].get_vector() c = res["C"].get_vector() except Exception: # Some atoms are missing # Phi/Psi cannot be calculated for this residue ppl.append((None, None)) res.xtra["PHI"] = None res.xtra["PSI"] = None continue # Phi if i > 0: rp = self[i - 1] try: cp = rp["C"].get_vector() phi = calc_dihedral(cp, n, ca, c) except Exception: phi = None else: # No phi for residue 0! phi = None # Psi if i < (lng - 1): rn = self[i + 1] try: nn = rn["N"].get_vector() psi = calc_dihedral(n, ca, c, nn) except Exception: psi = None else: # No psi for last residue! psi = None ppl.append((phi, psi)) # Add Phi/Psi to xtra dict of residue res.xtra["PHI"] = phi res.xtra["PSI"] = psi return ppl
def get_phi_psi_list(self): """Return the list of phi/psi dihedral angles.""" ppl = [] lng = len(self) for i in range(0, lng): res = self[i] try: n = res['N'].get_vector() ca = res['CA'].get_vector() c = res['C'].get_vector() except Exception: # Some atoms are missing # Phi/Psi cannot be calculated for this residue ppl.append((None, None)) res.xtra["PHI"] = None res.xtra["PSI"] = None continue # Phi if i > 0: rp = self[i - 1] try: cp = rp['C'].get_vector() phi = calc_dihedral(cp, n, ca, c) except Exception: phi = None else: # No phi for residue 0! phi = None # Psi if i < (lng - 1): rn = self[i + 1] try: nn = rn['N'].get_vector() psi = calc_dihedral(n, ca, c, nn) except Exception: psi = None else: # No psi for last residue! psi = None ppl.append((phi, psi)) # Add Phi/Psi to xtra dict of residue res.xtra["PHI"] = phi res.xtra["PSI"] = psi return ppl
def get_tau_list(self): """List of tau torsions angles for all 4 consecutive Calpha atoms.""" ca_list = self.get_ca_list() tau_list = [] for i in range(0, len(ca_list) - 3): atom_list = (ca_list[i], ca_list[i + 1], ca_list[i + 2], ca_list[i + 3]) v1, v2, v3, v4 = [a.get_vector() for a in atom_list] tau = calc_dihedral(v1, v2, v3, v4) tau_list.append(tau) # Put tau in xtra dict of residue res = ca_list[i + 2].get_parent() res.xtra["TAU"] = tau return tau_list
def get_tau_list(self): """List of tau torsions angles for all 4 consecutive Calpha atoms.""" ca_list = self.get_ca_list() tau_list = [] for i in range(0, len(ca_list) - 3): atom_list = (ca_list[i], ca_list[i + 1], ca_list[i + 2], ca_list[i + 3]) v1, v2, v3, v4 = [a.get_vector() for a in atom_list] tau = calc_dihedral(v1, v2, v3, v4) tau_list.append(tau) # Put tau in xtra dict of residue res = ca_list[i + 2].get_parent() res.xtra["TAU"] = tau return tau_list
def featurize(structure: Structure) -> list[Any]: """ Calculates 3D ML features from the `structure`. """ # get all the residues residues = [res for res in structure.get_residues()] # calculate some random 3D features (you should be smarter here!) protein_length = residues[1]["CA"] - residues[-2]["CA"] angle = calc_dihedral( residues[1]["CA"].get_vector(), residues[2]["CA"].get_vector(), residues[-3]["CA"].get_vector(), residues[-2]["CA"].get_vector(), ) # create the feature vector features = [protein_length, angle] return features
def add_terminal_OXT(structure: Structure, C_OXT_length: float = 1.23) -> Structure: """Adds a terminal oxygen atom ('OXT') to the last residue of chain A model 0 of the given structure, and returns the new structure. The OXT atom object will be contained in the last residue object of the structure. This function should be used only when the structure object is completed and no further residues need to be appended.""" rad = 180.0 / math.pi # obtain last residue infomation resRef = getReferenceResidue(structure, -1) N_resRef = resRef["N"] CA_resRef = resRef["CA"] C_resRef = resRef["C"] O_resRef = resRef["O"] n_vec = N_resRef.get_vector() ca_vec = CA_resRef.get_vector() c_vec = C_resRef.get_vector() o_vec = O_resRef.get_vector() # geometry to bring together residue CA_C_OXT_angle = calc_angle(ca_vec, c_vec, o_vec) * rad N_CA_C_O_diangle = calc_dihedral(n_vec, ca_vec, c_vec, o_vec) * rad N_CA_C_OXT_diangle = N_CA_C_O_diangle - 180.0 if N_CA_C_O_diangle < 0: N_CA_C_OXT_diangle = N_CA_C_O_diangle + 180.0 # OXT atom creation OXT_coord = calculateCoordinates(N_resRef, CA_resRef, C_resRef, C_OXT_length, CA_C_OXT_angle, N_CA_C_OXT_diangle) OXT = Atom("OXT", OXT_coord, 0.0, 1.0, " ", "OXT", 0, "O") # modify last residue of the structure to contain the OXT atom resRef.add(OXT) if structure[0]["A"][1].get_resname() == "ACE": del structure[0]["A"][1]["N"] return structure
def main(): arg_parser = argparse.ArgumentParser( usage='Extract data from set of structures') arg_parser.add_argument('struct', type=str, help='structures in pdb or mmcif format', nargs='+') arg_parser.add_argument('-o', '--output', type=str, help='Output file name') arg_parser.add_argument('-r', '--recursive', action='store_true', help='Recursive search of structures in folders', default=False) arg_parser.add_argument('-w', '--pdb-warnings', action='store_true', help='show structure parsing warnings', default=False) arg_parser.add_argument('--phipsi', action='store_true', help='Print phi/psi for residues', default=False) arg_parser.add_argument('-v', '--verbose', action='store_true', help='Verbose output', default=False) args = arg_parser.parse_args() if not args.pdb_warnings: warnings.simplefilter("ignore", PDBConstructionWarning) if args.recursive: struct_list = recursive_expand(args.struct) print("Recursive expansion: {} -> {} structures".format( len(args.struct), len(struct_list))) else: struct_list = args.struct nfiles = len(struct_list) print("nfiles: {}".format(nfiles)) phi_array, psi_array, info = [], [], [] maxres = 0 for structf in struct_list: try: structure = get_structure_from_file(structf) if not structure: eprint("No structure found in file {}".format(structf)) continue print("Read structure(s) from file {}".format(structf)) s = 0 for model in structure: for chain in model: res_list = chain_sequence_aaselect(chain) Nres = len(res_list) - 2 if Nres > maxres: maxres = Nres print("maxres = {} in file {}, structure {}".format( maxres, structf, s)) #maxres = Nres if Nres > maxres else maxres phi_list, psi_list = [None] * maxres, [None] * maxres info_str = '' for res1, res2, res3 in zip(res_list[:-2], res_list[1:-1], res_list[2:]): v1 = res1['C'].get_vector() v2 = res2['N'].get_vector() v3 = res2['CA'].get_vector() v4 = res2['C'].get_vector() v5 = res3['N'].get_vector() phi = calc_dihedral(v1, v2, v3, v4) / np.pi * 180 psi = calc_dihedral(v2, v3, v4, v5) / np.pi * 180 phi_psi_str = "{:6} {:3} {:1} {:4} {:4} PHI= {:8.3f} PSI= {:8.3f}".format( structf, model.get_id(), chain.get_id(), res2.get_resname(), bio_resid_to_str(res2.get_id()), phi, psi) resseq = res2.get_id()[1] try: phi_list[resseq - 1] = phi psi_list[resseq - 1] = psi except: eprint( "Can't assign phi,psi: resseq is out of range") eprint(phi_psi_str) info_str += phi_psi_str + '\n' if args.verbose: print(phi_psi_str) phi_array.append(phi_list) psi_array.append(psi_list) info.append(info_str) if Nres != maxres: eprint( "Data array size mispatch: Nres = {}, maxres = {}". format(Nres, maxres)) eprint("phi_list = {}".format(phi_list)) eprint("psi_list = {}".format(psi_list)) eprint(info_str) s += 1 except FileNotFoundError: eprint("File not found: {}".format(structf)) Nstruct = len(phi_array) print("All {} structures read, maxres = {}".format(Nstruct, maxres)) phi_np = np.zeros((Nstruct, maxres)) psi_np = np.zeros((Nstruct, maxres)) #phi_np = np.array(phi_array) #psi_np = np.array(psi_array) for s in range(Nstruct): try: phi_np[s, :] = phi_array[s] psi_np[s, :] = psi_array[s] except: eprint("Can't copy phi,psi from structure {}:".format(s)) eprint(info[s]) eprint(sys.exc_info()[0]) continue print("PHI shape = {}".format(phi_np.shape)) print("PSI shape = {}".format(psi_np.shape)) if args.output: np.save(args.output + '_phi.npy', phi_np) np.save(args.output + '_psi.npy', psi_np)
def process_tertiary(tertiary): '''compute the bond lengths, bond angles, and dihedral angles''' phi = [] psi = [] omega = [] bond_angle_CNCa = [] bond_angle_NCaC = [] bond_angle_CaCN = [] bond_len_NCa = [] bond_len_CaC = [] bond_len_CN = [] # convert tertiary coords into Vectors pV = [vec for vec in map(lambda v: Vector(v[0], v[1], v[2]), zip(tertiary[0], tertiary[1], tertiary[2]))] for i in range(0, len(pV), 3): # check for zero coords norm_im1 = False norm_i = False norm_i1 = False norm_i2 = False norm_i3 = False norm_i4 = False if i > 0 and pV[i-1].norm() > 0: norm_im1 = True if pV[i].norm() > 0: norm_i = True if pV[i+1].norm() > 0: norm_i1 = True if pV[i+2].norm() > 0: norm_i2 = True if i + 3 < len(pV) and pV[i+3].norm() > 0: norm_i3 = True if i + 3 < len(pV) and pV[i+4].norm() > 0: norm_i4 = True # compute bond lengths if norm_im1 and norm_i: blen_CN = (pV[i-1]-pV[i]).norm() bond_len_CN.append(blen_CN) if norm_i and norm_i1: blen_NCa = (pV[i]-pV[i+1]).norm() bond_len_NCa.append(blen_NCa) if norm_i1 and norm_i2: blen_CaC = (pV[i+1]-pV[i+2]).norm() bond_len_CaC.append(blen_CaC) # compute bond angles if norm_im1 and norm_i and norm_i1: theta_CNCa = calc_angle(pV[i-1], pV[i], pV[i+1]) # C-N-Ca bond_angle_CNCa.append(theta_CNCa) if norm_i and norm_i1 and norm_i2: theta_NCaC = calc_angle(pV[i], pV[i+1], pV[i+2]) # N-Ca-C bond_angle_NCaC.append(theta_NCaC) if norm_i1 and norm_i2 and norm_i3: theta_CaCN = calc_angle(pV[i+1], pV[i+2], pV[i+3]) # Ca-C-N bond_angle_CaCN.append(theta_CaCN) # compute dihedral angles if norm_im1 and norm_i and norm_i1 and norm_i2: phi_i = calc_dihedral( pV[i-1], pV[i], pV[i+1], pV[i+2]) # N-Ca-C-N else: phi_i = INVALID_ANGLE phi.append(phi_i) if norm_i and norm_i1 and norm_i2 and norm_i3: psi_i = calc_dihedral( pV[i], pV[i+1], pV[i+2], pV[i+3]) # C-N-Ca-C else: psi_i = INVALID_ANGLE psi.append(psi_i) if norm_i1 and norm_i2 and norm_i3 and norm_i4: omega_i = calc_dihedral( pV[i+1], pV[i+2], pV[i+3], pV[i+4]) # Ca-C-N-Ca else: omega_i = INVALID_ANGLE omega.append(omega_i) return (phi, psi, omega, bond_angle_NCaC, bond_angle_CaCN, bond_angle_CNCa, bond_len_CN, bond_len_NCa, bond_len_CaC)
def CalcTwoROriMatrix(coordinates): seqLen = len(coordinates) oriMatrix = dict() apts = ['Ca1Cb1Cb2Ca2', 'N1Ca1Cb1Cb2', 'Ca1Cb1Cb2'] for apt in apts: oriMatrix[apt] = np.full((seqLen, seqLen), InvalidDegree, dtype=np.float16) numInvalidTwoROri = 0 def ValidCB(c): if c['CB'] is None: return False ## when CB is copied from CA, we do not use it to calculate some angles if c['CA'] is not None and np.linalg.norm(c['CB'] - c['CA']) < 0.1: return False return True for i in range(seqLen): ci = coordinates[i] if ci is None: continue ## we cannot replace ci CB by its CA atom since CA itself is needed for the three angles if ValidCB(ci): cicb = ci['CB'] elif ci.has_key('vCB') and (ci['vCB'] is not None): cicb = ci['vCB'] else: continue for j in range(seqLen): if i == j: continue cj = coordinates[j] if cj is None: continue if ci['CA'] is not None and cj['CA'] is not None: if ValidCB(cj): oriMatrix['Ca1Cb1Cb2Ca2'][i, j] = calc_dihedral( ci['CA'], cicb, cj['CB'], cj['CA']) * 180 / np.pi elif cj.has_key('vCB') and cj['vCB'] is not None: oriMatrix['Ca1Cb1Cb2Ca2'][i, j] = calc_dihedral( ci['CA'], cicb, cj['vCB'], cj['CA']) * 180 / np.pi ## otherwise, assign InvalidDegree to Ca1Cb1Cb2Ca2 since we cannot replace cj CB by its CA atom if ci['N'] is not None and ci['CA'] is not None: if ValidCB(cj): oriMatrix['N1Ca1Cb1Cb2'][i, j] = calc_dihedral( ci['N'], ci['CA'], cicb, cj['CB']) * 180 / np.pi else: ## replace cj CB by cj CA if the latter exists, otherwise check if vCB exists if cj['CA'] is not None: oriMatrix['N1Ca1Cb1Cb2'][i, j] = calc_dihedral( ci['N'], ci['CA'], cicb, cj['CA']) * 180 / np.pi elif cj.has_key('vCB') and cj['vCB'] is not None: oriMatrix['N1Ca1Cb1Cb2'][i, j] = calc_dihedral( ci['N'], ci['CA'], cicb, cj['vCB']) * 180 / np.pi if ci['CA'] is not None: if ValidCB(cj): oriMatrix['Ca1Cb1Cb2'][i, j] = calc_angle( ci['CA'], cicb, cj['CB']) * 180 / np.pi else: ## replace cj CB by cj CA if the latter exists, otherwise check if vCB exists if cj['CA'] is not None: oriMatrix['Ca1Cb1Cb2'][i, j] = calc_angle( ci['CA'], cicb, cj['CA']) * 180 / np.pi elif cj.has_key('vCB') and cj['vCB'] is not None: oriMatrix['Ca1Cb1Cb2'][i, j] = calc_angle( ci['CA'], cicb, cj['vCB']) * 180 / np.pi for apt in apts: np.fill_diagonal(oriMatrix[apt], ValueOfSelf) #oriMatrix['numInvalidTwoROri'] = numInvalidTwoROri return oriMatrix
def calculateCoordinates(refA: Residue, refB: Residue, refC: Residue, L: float, ang: float, di: float) -> np.ndarray: AV = refA.get_vector() BV = refB.get_vector() CV = refC.get_vector() CA = AV - CV CB = BV - CV ##CA vector AX = CA[0] AY = CA[1] AZ = CA[2] ##CB vector BX = CB[0] BY = CB[1] BZ = CB[2] ##Plane Parameters A = (AY * BZ) - (AZ * BY) B = (AZ * BX) - (AX * BZ) G = (AX * BY) - (AY * BX) ##Dot Product Constant F = math.sqrt(BX * BX + BY * BY + BZ * BZ) * L * math.cos( ang * (math.pi / 180.0)) ##Constants const = math.sqrt( math.pow((B * BZ - BY * G), 2) * (-(F * F) * (A * A + B * B + G * G) + (B * B * (BX * BX + BZ * BZ) + A * A * (BY * BY + BZ * BZ) - (2 * A * BX * BZ * G) + (BX * BX + BY * BY) * G * G - (2 * B * BY) * (A * BX + BZ * G)) * L * L)) denom = ((B * B) * (BX * BX + BZ * BZ) + (A * A) * (BY * BY + BZ * BZ) - (2 * A * BX * BZ * G) + (BX * BX + BY * BY) * (G * G) - (2 * B * BY) * (A * BX + BZ * G)) X = ((B * B * BX * F) - (A * B * BY * F) + (F * G) * (-A * BZ + BX * G) + const) / denom if (B == 0 or BZ == 0) and (BY == 0 or G == 0): const1 = math.sqrt(G * G * (-A * A * X * X + (B * B + G * G) * (L - X) * (L + X))) Y = ((-A * B * X) + const1) / (B * B + G * G) Z = -(A * G * G * X + B * const1) / (G * (B * B + G * G)) else: Y = ((A * A * BY * F) * (B * BZ - BY * G) + G * (-F * math.pow(B * BZ - BY * G, 2) + BX * const) - A * (B * B * BX * BZ * F - B * BX * BY * F * G + BZ * const)) / ( (B * BZ - BY * G) * denom) Z = ((A * A * BZ * F) * (B * BZ - BY * G) + (B * F) * math.pow(B * BZ - BY * G, 2) + (A * BX * F * G) * (-B * BZ + BY * G) - B * BX * const + A * BY * const) / ( (B * BZ - BY * G) * denom) # Get the new Vector from the origin D = Vector(X, Y, Z) + CV with warnings.catch_warnings(): # ignore inconsequential warning warnings.simplefilter("ignore") temp = calc_dihedral(AV, BV, CV, D) * (180.0 / math.pi) di = di - temp rot = rotaxis(math.pi * (di / 180.0), CV - BV) D = (D - BV).left_multiply(rot) + BV return D.get_array()
vector2 = atom2.get_vector() vector3 = atom3.get_vector() angle = calc_angle(vector1, vector2, vector3) print(angle) # 각(별)도 # 0.5872530070961592 atom1 = structure[0]["A"][415]["CA"] atom2 = structure[0]["A"][423]["CA"] atom3 = structure[0]["A"][431]["CA"] atom4 = structure[0]["A"][439]["CA"] vector1 = atom1.get_vector() vector2 = atom2.get_vector() vector3 = atom3.get_vector() vector4 = atom4.get_vector() torsion = calc_dihedral(vector1, vector2, vector3, vector4) print(torsion) # Torsion angle 계산 # 1.28386400091194 model = structure[0] model.atom_to_internal_coordinates() for r in model.get_residues(): if r.internal_coord: print( r, r.internal_coord.get_angle("psi"), r.internal_coord.get_angle("phi"), r.internal_coord.get_angle("omega"), # or "omg" r.internal_coord.get_angle("chi2"), r.internal_coord.get_angle("CB:CA:C"),
def pdb_to_npz(npz_name, pdb_file=False, mmCIF_file=False, std=1): """ Convert a pdb/mcif to trRosetta distances/angles """ if pdb_file: from Bio.PDB.PDBParser import PDBParser bio_parser = PDBParser(PERMISSIVE=1) structure_file = pdb_file structure_id = pdb_file.name[:-4] elif mmCIF_file: from Bio.PDB.MMCIFParser import MMCIFParser bio_parser = MMCIFParser() structure_file = mmCIF_file structure_id = mmCIF_file.name[:-4] else: print("No file given: one pdb or one mmCIF file has to be definied") sys.exit() # Load structure structure = bio_parser.get_structure(structure_id, structure_file) # Get residues and length of protein residues = [] for chain in structure[0]: for residue1 in structure[0][chain.id]: if not is_aa(residue1): continue residues.append(residue1.get_resname()) plen = len(residues) # Setup bins and step for the final matrix DIST_STEP = 0.5 OMEGA_STEP = 15 THETA_STEP = 15 PHI_STEP = 15 z_per_bin = std/DIST_STEP z_step = z_per_bin/2 angle_z_step = 1 minvalue = 0.01 dist_wanted_bins = (20 - 2)/DIST_STEP omega_wanted_bins = 360/OMEGA_STEP theta_wanted_bins = 360/THETA_STEP phi_wanted_bins = 180/PHI_STEP cumm_cutoff = 0.899 # cb_lst = get_cb_coordinates(open(args.pdb_file, 'r'), "A") # contact_mat = get_cb_contacts(len(residues)) dist_mat = np.full((plen, plen, 37), minvalue/36) omega_mat = np.full((plen, plen, 25), minvalue/24) theta_mat = np.full((plen, plen, 25), minvalue/24) phi_mat = np.full((plen, plen, 13), minvalue/12) dist_bins = [i for i in np.arange(2, 2 + (dist_wanted_bins)*0.5, 0.5)] omega_bins = [i for i in np.arange(-180, -180 + (omega_wanted_bins)*OMEGA_STEP, OMEGA_STEP)] theta_bins = [i for i in np.arange(-180, -180 + (theta_wanted_bins)*THETA_STEP, THETA_STEP)] phi_bins = [i for i in np.arange(0, (phi_wanted_bins)*PHI_STEP, PHI_STEP)] dist_num_bins = len(dist_bins) omega_num_bins = len(omega_bins) theta_num_bins = len(theta_bins) phi_num_bins = len(phi_bins) # Iterate over all residues and calculate distances i = 0 j = 0 for chain in structure[0]: for residue1 in structure[0][chain.id]: # Only use real atoms, not HET or water if not is_aa(residue1): continue # If the residue lacks CB (Glycine etc), create a virtual if residue1.has_id('CB'): c1B = residue1['CB'].get_vector() else: c1B = _virtual_cb_vector(residue1) j = 0 for chain in structure[0]: for residue2 in structure[0][chain.id]: symm = False if not is_aa(residue2): continue # print(i,j) if i == j: dist_mat[i, j, 0] = (1-minvalue) omega_mat[i, j, 0] = (1-minvalue) theta_mat[i, j, 0] = (1-minvalue) phi_mat[i, j, 0] = (1-minvalue) j += 1 continue if i > j: dist_mat[i, j] = dist_mat[j, i] omega_mat[i, j] = omega_mat[j, i] symm = True # If the residue lacks CB (Glycine etc), create a virtual if residue2.has_id('CB'): c2B = residue2['CB'].get_vector() else: c2B = _virtual_cb_vector(residue2) ############################################### dist = (c2B-c1B).norm() if dist > 20: dist_mat[i, j, 0] = (1-minvalue) omega_mat[i, j, 0] = (1-minvalue) theta_mat[i, j, 0] = (1-minvalue) phi_mat[i, j, 0] = (1-minvalue) else: # Dist and omega are symmetrical and have already been copied if not symm: ix = np.digitize(dist, dist_bins) cum_prob = 0 b_step = 0 while cum_prob < cumm_cutoff: bin_prob = st.norm.cdf(b_step*-z_step) -\ st.norm.cdf(-z_step*(1+b_step)) dist_mat[i, j, np.min([ix+b_step, dist_num_bins])]\ += bin_prob dist_mat[i, j, np.max([ix-b_step, 1])] += bin_prob cum_prob += bin_prob*2 b_step += 1 ############################################### # # Omega c1A = residue1['CA'].get_vector() c2A = residue2['CA'].get_vector() if not symm: raw_omega = calc_dihedral(c1A, c1B, c2B, c2A) omega = (raw_omega*180)/pi ix = np.digitize(omega, omega_bins) cum_prob = 0 b_step = 0 while cum_prob < cumm_cutoff: bin_prob = st.norm.cdf(b_step*-angle_z_step) -\ st.norm.cdf(-angle_z_step*(1+b_step)) omega_mat[i, j, np.min([ix+b_step, omega_num_bins])]\ += bin_prob omega_mat[i, j, np.max([ix-b_step, 1])] += bin_prob cum_prob += bin_prob*2 b_step += 1 ############################################### # # Theta N1 = residue1['N'].get_vector() raw_theta = calc_dihedral(N1, c1A, c1B, c2B) theta = (raw_theta*180)/pi ix = np.digitize(theta, theta_bins) cum_prob = 0 b_step = 0 while cum_prob < cumm_cutoff: bin_prob = st.norm.cdf(b_step*-angle_z_step) -\ st.norm.cdf(-angle_z_step*(1+b_step)) theta_mat[i, j, np.min([ix+b_step, theta_num_bins])]\ += bin_prob theta_mat[i, j, np.max([ix-b_step, 1])] += bin_prob cum_prob += bin_prob*2 b_step += 1 ############################################### # # Phi raw_phi = calc_angle(c1A, c1B, c2B) phi = (raw_phi*180)/pi ix = np.digitize(phi, phi_bins) cum_prob = 0 b_step = 0 while cum_prob < cumm_cutoff: bin_prob = st.norm.cdf(b_step*-angle_z_step) -\ st.norm.cdf(-angle_z_step*(1+b_step)) phi_mat[i, j, np.min([ix+b_step, phi_num_bins])]\ += bin_prob phi_mat[i, j, np.max([ix-b_step, 1])] += bin_prob cum_prob += bin_prob*2 b_step += 1 j += 1 i += 1 np.savez_compressed(npz_name, dist=dist_mat, omega=omega_mat, theta=theta_mat, phi=phi_mat)
def calc_dihedral(self,atom1,atom2,atom3,atom4): dihedral = calc_dihedral(atom1.get_vector(),\ atom2.get_vector(),atom3.get_vector(), atom4.get_vector()) dihedral *= 180/math.pi if dihedral < 0: dihedral += 360 return dihedral