def get_side_chain_vector(residue): """ Find the average of the unit vectors to different atoms in the side chain from the c-alpha atom. For glycine the average of the N-Ca and C-Ca is used. Returns (C-alpha coordinate vector, side chain unit vector) for residue r """ u = None gly = 0 if is_aa(residue) and residue.has_id('CA'): ca = residue['CA'].get_coord() dv = np.array( [ak.get_coord() for ak in residue.get_unpacked_list()[4:]]) if len(dv) < 1: if residue.has_id('N') and residue.has_id('C'): dv = [residue['C'].get_coord(), residue['N'].get_coord()] dv = np.array(dv) gly = 1 else: return None dv = dv - ca if gly: dv = -dv n = np.sum(abs(dv)**2, axis=-1)**(1. / 2) v = dv / n[:, np.newaxis] u = (Vector(ca), Vector(v.mean(axis=0))) return u
def fromDihedral(A, B, C, bond_BC, bond_CD, angle_BCD, torsion_BC): """ Calculate a new point from three points A,B,C, an dihedral angle (torsion_BC) and bond angle (angle_BCD). Done by putting a coordinate system in C and then going from spherical coordinates and then translating the coordinate system to C. See Parsons et al. for details """ # Turn the bond angle into an angle in [pi/2, pi] angle_BCD = pi - angle_BCD # Calculate position of D from spherical coordinate representation D2 = Vector(cos(angle_BCD), cos(torsion_BC)*sin(angle_BCD), sin(torsion_BC)*sin(angle_BCD))**bond_CD # Calculate rotation matrix M bc = (C-B) / float(bond_BC) # Normalized by previous bond length n = (B-A)**bc n.normalize() # Normalized by calculation nXbc = n**bc # Normalized by definition M = (array([bc.get_array(), nXbc.get_array(), n.get_array()])).T # Calculate position of D by rotation and translation D = D2.left_multiply(M) + C return D
def COM(object): com_n = Vector(0,0,0) com_d = 0.0 for atom in object.get_atoms(): position = atom.get_vector() com_n += Vector(position._ar*np.array(atom.mass)) com_d += atom.mass com = com_n.__div__(com_d) return com
def test_transform(self): """Transform entities (rotation and translation).""" for o in (self.s, self.m, self.c, self.r, self.a): rotation = rotmat(Vector(1,3,5), Vector(1,0,0)) translation=numpy.array((2.4,0,1), 'f') oldpos = self.get_pos(o) o.transform(rotation, translation) newpos = self.get_pos(o) newpos_check = numpy.dot(oldpos, rotation) + translation for i in range(0, 3): self.assertAlmostEqual(newpos[i], newpos_check[i])
def calculateCoordinates(refA, refB, refC, L, ang, di): AV=refA.get_vector() BV=refB.get_vector() CV=refC.get_vector() CA=AV-CV CB=BV-CV ##CA vector AX=CA[0] AY=CA[1] AZ=CA[2] ##CB vector BX=CB[0] BY=CB[1] BZ=CB[2] ##Plane Parameters A=(AY*BZ)-(AZ*BY) B=(AZ*BX)-(AX*BZ) G=(AX*BY)-(AY*BX) ##Dot Product Constant F= math.sqrt(BX*BX + BY*BY + BZ*BZ) * L * math.cos(ang*(math.pi/180.0)) ##Constants const=math.sqrt( math.pow((B*BZ-BY*G),2) *(-(F*F)*(A*A+B*B+G*G)+(B*B*(BX*BX+BZ*BZ) + A*A*(BY*BY+BZ*BZ)- (2*A*BX*BZ*G) + (BX*BX+ BY*BY)*G*G - (2*B*BY)*(A*BX+BZ*G))*L*L)) denom= (B*B)*(BX*BX+BZ*BZ)+ (A*A)*(BY*BY+BZ*BZ) - (2*A*BX*BZ*G) + (BX*BX+BY*BY)*(G*G) - (2*B*BY)*(A*BX+BZ*G) X= ((B*B*BX*F)-(A*B*BY*F)+(F*G)*(-A*BZ+BX*G)+const)/denom if((B==0 or BZ==0) and (BY==0 or G==0)): const1=math.sqrt( G*G*(-A*A*X*X+(B*B+G*G)*(L-X)*(L+X))) Y= ((-A*B*X)+const1)/(B*B+G*G) Z= -(A*G*G*X+B*const1)/(G*(B*B+G*G)) else: Y= ((A*A*BY*F)*(B*BZ-BY*G)+ G*( -F*math.pow(B*BZ-BY*G,2) + BX*const) - A*( B*B*BX*BZ*F- B*BX*BY*F*G + BZ*const)) / ((B*BZ-BY*G)*denom) Z= ((A*A*BZ*F)*(B*BZ-BY*G) + (B*F)*math.pow(B*BZ-BY*G,2) + (A*BX*F*G)*(-B*BZ+BY*G) - B*BX*const + A*BY*const) / ((B*BZ-BY*G)*denom) #GET THE NEW VECTOR from the orgin D=Vector(X, Y, Z) + CV with warnings.catch_warnings(): # ignore inconsequential warning warnings.simplefilter("ignore") temp=calc_dihedral(AV, BV, CV, D)*(180.0/math.pi) di=di-temp rot= rotaxis(math.pi*(di/180.0), CV-BV) D=(D-BV).left_multiply(rot)+BV return D.get_array()
def calculateCoordinates(refA, refB, refC, L, ang, di): AV=refA.get_vector(); BV=refB.get_vector(); CV=refC.get_vector() CA=AV-CV; CB=BV-CV ##CA vector AX=CA[0]; AY=CA[1]; AZ=CA[2] ##CB vector BX=CB[0]; BY=CB[1]; BZ=CB[2] ##Plane Parameters A=(AY*BZ)-(AZ*BY); B=(AZ*BX)-(AX*BZ); G=(AX*BY)-(AY*BX) ##Dot Product Constant F= math.sqrt(BX*BX + BY*BY + BZ*BZ) * L * math.cos(ang*(math.pi/180.0)) ##Constants const=math.sqrt( math.pow((B*BZ-BY*G),2) *(-(F*F)*(A*A+B*B+G*G)+(B*B*(BX*BX+BZ*BZ) + A*A*(BY*BY+BZ*BZ)- (2*A*BX*BZ*G) + (BX*BX+ BY*BY)*G*G - (2*B*BY)*(A*BX+BZ*G))*L*L)) denom= (B*B)*(BX*BX+BZ*BZ)+ (A*A)*(BY*BY+BZ*BZ) - (2*A*BX*BZ*G) + (BX*BX+BY*BY)*(G*G) - (2*B*BY)*(A*BX+BZ*G) X= ((B*B*BX*F)-(A*B*BY*F)+(F*G)*(-A*BZ+BX*G)+const)/denom if((B==0 or BZ==0) and (BY==0 or G==0)): const1=math.sqrt( G*G*(-A*A*X*X+(B*B+G*G)*(L-X)*(L+X))) Y= ((-A*B*X)+const1)/(B*B+G*G) Z= -(A*G*G*X+B*const1)/(G*(B*B+G*G)) else: Y= ((A*A*BY*F)*(B*BZ-BY*G)+ G*( -F*math.pow(B*BZ-BY*G,2) + BX*const) - A*( B*B*BX*BZ*F- B*BX*BY*F*G + BZ*const)) / ((B*BZ-BY*G)*denom) Z= ((A*A*BZ*F)*(B*BZ-BY*G) + (B*F)*math.pow(B*BZ-BY*G,2) + (A*BX*F*G)*(-B*BZ+BY*G) - B*BX*const + A*BY*const) / ((B*BZ-BY*G)*denom) #GET THE NEW VECTOR from the orgin D=Vector(X, Y, Z) + CV with warnings.catch_warnings(): # ignore inconsequential warning warnings.simplefilter("ignore") temp=calc_dihedral(AV, BV, CV, D)*(180.0/math.pi) di=di-temp rot= rotaxis(math.pi*(di/180.0), CV-BV) D=(D-BV).left_multiply(rot)+BV return D
def dihedral_calcul(self, others): """ KC - dihedral score calculated """ self.angle_dihedres = [] all = others all.append(self) for n in xrange(len(self.res)): try: ecart_type = stat_ecart_type([ calc_dihedral( Vector(all[i].get_res()[n]['C'].get_coord()), Vector(all[i].get_res()[n]['CA'].get_coord()), Vector(all[i].get_res()[n]['CB'].get_coord()), Vector(all[i].get_res()[n]['CG'].get_coord())) / math.pi * 180 for i in xrange(len(all)) ]) except: ecart_type = 0.01 self.angle_dihedres.append(ecart_type) plot([x + 1 for x in xrange(len(self.angle_dihedres))], self.angle_dihedres) savefig("angle_dihedres.png") clf() mini = min(self.angle_dihedres) maxi = max(self.angle_dihedres) bfactor_angle = [(var - mini) * 100 / (maxi - mini) for var in self.angle_dihedres] assert max( bfactor_angle ) <= 100, "maximum de bfactor trop haut apres normalisation: " + str( max(bfactor_angle)) assert min( bfactor_angle ) >= 0, "minimum de bfactor trop bas apres normalisation: " + str( min(bfactor_angle)) for model in others: model.set_angles_dihedres(self.angle_dihedres) model.create_bfactor_file(bfactor_angle, "_dihedre")
def add_frame(img): global infile global chain_boundary global end chain1 = [] chain2 = [] line = [] while line[:11] != 'ITEM: ATOMS': line = next(infile) line = next(infile) while line[:5] not in ['ITEM:', '\n']: line_split = line.split() if line_split[1] != '1': pass else: if int(line_split[0]) <= chain_boundary: atom = Vector(line_split[-3:]) atom = Vector((atom._ar * np.array(400.0)) + np.array(-200.0)) chain1.append(atom) else: atom = Vector(line_split[-3:]) atom = Vector((atom._ar * np.array(400.0)) + np.array(-200.0)) chain2.append(atom) try: line = next(infile) except StopIteration: end = True break #if line == '\n': # end = True for i, atom1 in enumerate(chain1): row = [] for j, atom2 in enumerate(chain2): d = (atom1 - atom2).norm() img[i][j] += d
def center(self): """ Pocket centroid. Returns ------- Bio.PDB.vectors.Vector Coordinates for the pocket centroid. """ ca_atoms = self.ca_atoms ca_atom_vectors = ca_atoms["ca.atom"].to_list() ca_atom_vectors = [i for i in ca_atom_vectors if i is not None] centroid = self.center_of_mass(ca_atom_vectors, geometric=False) centroid = Vector(centroid) return centroid
def __init__(self, symbol, name, atomid, coords, bfactor, load_json=False): if not load_json: self.symbol = symbol.capitalize() self.name = name self.atomid = atomid self.coords = coords # (), for consistency save everything as np.array() self.mc_sc = False if self.name == "CA" or self.name == "C" or self.name == "N" or self.name == "O": self.mc_sc = True if element_mass.get(self.symbol) is None: element_mass[self.symbol] = element(self.symbol).atomic_weight self.atomic_mass = element_mass[self.symbol] self.vector = Vector(x=self.coords[0], y=self.coords[1], z=self.coords[2]) self.bfactor = bfactor else: self.symbol = None self.name = None self.atomid = None self.coords = None self.mc_sc = None self.atomic_mass = None self.vector = None self.bfactor = None
def create_random_pdb(separation_distance, move_chain_id, fix_chain_id, input_file_name, output_pdb_name, model_number = 0): results = {} structure = PDBParser(PERMISSIVE=1).get_structure('whatever', input_file_name) chain_moved = structure[model_number][move_chain_id] chain_fixed = structure[model_number][fix_chain_id] old_fixed_centre = COM(chain_fixed) old_moved_centre = COM(chain_moved) com_denominator=0.0 com_numerator = Vector(0,0,0) for atom in chain_moved.get_atoms(): position = atom.get_vector() atom.set_coord(position - old_fixed_centre) #first step is to move origin to the com of fixed_chain. #So far the atoms in the moved_chain have been relocated. for atom in chain_fixed.get_atoms(): position = atom.get_vector() atom.set_coord(position - old_fixed_centre) #now fixed_chain has been relocated. All coordinates are now wrt com of fixed_chain moved_centre = old_moved_centre - old_fixed_centre fixed_centre = Vector(0,0,0) d = (old_fixed_centre - old_moved_centre).norm() results["1_Input_Separation"] = d results["1_Old_fixed_chain_com"]=old_fixed_centre results["1_Old_moved_chain_com"]= old_moved_centre results["0_Intended_Output_Separation"] = separation_distance R1 = generate_3d() R2 = generate_3d() max_distance = 0.0 com_numerator = Vector(0,0,0) com_denominator=0.0 #Now we scale the separation distance and also rotate the chain_moved for atom in chain_moved.get_atoms(): position = atom.get_vector() a = moved_centre.normalized()._ar * np.array(separation_distance) atom.set_coord((position - moved_centre).left_multiply(R2) + Vector(a)) max_distance = max(max_distance, (atom.get_vector().norm())) position = atom.get_vector() com_numerator += Vector(position._ar*np.array(atom.mass)) com_denominator +=atom.mass final_moved_centre = com_numerator.__div__(com_denominator) com_denominator=0.0 com_numerator = Vector(0,0,0) #Now we rotate the chain_fixed for atom in chain_fixed.get_atoms(): position = atom.get_vector() atom.set_coord(position.left_multiply(R1)) max_distance = max(max_distance, (atom.get_vector().norm())) position = atom.get_vector() com_numerator += Vector(position._ar*np.array(atom.mass)) com_denominator +=atom.mass final_fixed_centre = com_numerator.__div__(com_denominator) d = (final_fixed_centre - final_moved_centre).norm() w = PDBIO() w.set_structure(structure) w.save(output_pdb_name) results["2_Output_Separation"]=d results["2_fixed_chain_com"]=final_fixed_centre results["2_moved_chain_com"]=final_moved_centre results["Max_distance"]=max_distance return results
def add_water(refinement_input, ligand_chain, n_waters=2, test=False): if test: np.random.seed(42) if n_waters < 1: return else: output = [] n_inputs = len(refinement_input) water_coords = [] resnums = [] atomnums = [] chains = [] resnames = [] # get maximum residue and atom numbers with open(refinement_input[0], "r") as file: protein = file.readlines() for line in protein: if line.startswith("ATOM") or line.startswith( "HETATM") or line.startswith("TER"): try: resnums.append(line[23:27].strip()) atomnums.append(line[7:11].strip()) chains.append(line[21]) resnames.append(line[17:20]) except: IndexError("Line '{}' is too short".format(line)) lig_length = resnames.count(ligand_chain) resnums = [int(num) for num in resnums if num] max_resnum = max(resnums) water_resnums = [] water_chain = chains[0] # water chain = 1st protein chain atomnum = max([int(num) for num in atomnums if num]) + 1 + lig_length water = cs.water * n_waters * n_inputs for inp in range(n_inputs): for n in range(n_waters): O_coords = Vector( [np.random.randint(0, 100) for i in range(3)]) H1_coords = O_coords + Vector(0.757, 0.586, 0.0) H2_coords = O_coords + Vector(-0.757, 0.586, 0.0) water_coords = water_coords + [list(O_coords)] + [ list(H1_coords) ] + [list(H2_coords)] max_resnum += 1 # each water must have a different residue number water_resnums = water_resnums + [max_resnum] * 3 max_resnum += 1 water_atomnums = [atomnum + j for j in range(n_waters * 3 * n_inputs)] # PDB lines - water water_output = [] for atom, num, resnum, coord in zip(water, water_atomnums, water_resnums, water_coords): coord = ["{:7.4f}".format(c) for c in coord] coord = " ".join(coord) water_output.append(atom.format(num, water_chain, resnum, coord)) sliced_water_output = [] for i in range(0, len(water_output), n_waters * 3): sliced_water_output.append(water_output[i:i + n_waters * 3]) # loop over minimisation inputs for inp, w in zip(refinement_input, sliced_water_output): new_protein_file = inp protein = [] ligand = [] # read in protein and ligand lines with open(inp, "r") as inp: lines = inp.readlines() for line in lines: if line.startswith("ATOM") or line.startswith("HETATM"): if line[17:20].strip() == ligand_chain: ligand.append(line) else: protein.append(line) # add water to PDB with open(new_protein_file, "w+") as file: for line in protein: file.write(line) file.write("\n") for line in w: file.write(line) file.write("\n") for line in ligand: file.write(line) # load again with Biopython parser = PDBParser() structure = parser.get_structure("complex", new_protein_file) water_list = [] protein_list = Selection.unfold_entities(structure, "A") temp_protein_file = os.path.join( os.path.dirname(inp.name), os.path.basename(inp.name).replace(".pdb", "_temp.pdb")) for res in structure.get_residues(): if res.resname == 'HOH': water_list = water_list + Selection.unfold_entities( res, "A") # check for water contacts contacts5 = [] for w in water_list: contacts5 = contacts5 + NeighborSearch(protein_list).search( w.coord, 5.0, "A") contacts5 = [c for c in contacts5 if c not in water_list] # exclude "self" contacts # translate water, if needed while contacts5: contacts5 = [] for w in water_list: x, y, z = w.coord w.set_coord([x - 5, y, z]) contacts5 = contacts5 + NeighborSearch( protein_list).search(w.coord, 5.0, "A") contacts5 = [c for c in contacts5 if c not in water_list] # save final output io = PDBIO() io.set_structure(structure) io.save(temp_protein_file) output.append(new_protein_file) new_water_lines = [] with open(temp_protein_file, "r") as temp: temp_lines = temp.readlines() for line in temp_lines: if line[17:20].strip() == "HOH": line = line.replace(line[7:11], str(int(line[7:11]) + lig_length)) if line[12:15] == "2HW": line = line + "\nTER\n" new_water_lines.append(line) new_water_lines[-2] = new_water_lines[-2].replace("\nTER\n", "") with open(new_protein_file, "w+") as file: for line in protein: file.write(line) file.write("\nTER\n") for line in new_water_lines: file.write(line) file.write("\n") for line in ligand: file.write(line) file.write("TER") os.remove(temp_protein_file) return output
Atom1.append(atom) elif atom.name == 'SG': Atom2.append(atom) i = 0 for i in range(len(Atom1)): resid.append([Atom1[i], Atom2[i]]) i += 1 j = 0 for j in range(len(list)): dict[list[j]] = resid[j] j += 1 #print(atom1, atom1) for resi in resid: for res in resid: atom1 = resi[0] atom2 = resi[1] atom3 = res[1] atom4 = res[0] distance = atom3 - atom2 v1 = atom1.get_vector() v2 = atom2.get_vector() v3 = atom3.get_vector() v4 = atom4.get_vector() vector = Vector.calc_dihedral(v1, v2, v3, v4) if 85 < vector < 95 : if 1.9 < distance < 2.1: print('S-S:')
def cluster(parametersobject): number_of_orientations = parametersobject.parameterdic['Number_of_orientations'] skip = parametersobject.parameterdic['Skip_initial_frames'] data = [] f_framelist = open('analysis/frames_read.txt', 'w+') framelist = [] for i in range(1, 1+number_of_orientations): with open('analysis/coord_matrix_'+str(i).zfill(3)+'.txt', 'r') as f: l = f.readlines() data.extend([[float(p) for p in line.strip().split()[1:10]] for line in l[skip:]]) f_framelist.write(str(len(l))+'\n') framelist.append(len(l)-skip) f_framelist.close() ms = MeanShift(n_jobs = -2, cluster_all = True) ms.fit(data) labels = ms.labels_ names, numbers = np.unique(labels, return_counts = True) cluster_centers = ms.cluster_centers_ fout = open('analysis/clusters.txt', 'w+') fout.write('label\tcount\ttheta\tphi\ttheta_x\ttheta_y\ttheta_z\tx\ty\tz\tR[0][0]\tR[0][1]\tR[0][2]\tR[1][0]\tR[1][1]\tR[1][2]\tR[2][0]\tR[2][1]\tR[2][2]\n') phi = 0 for i, line in enumerate(cluster_centers): R = [_[:] for _ in [[]]*9] x, y, z = line[0], line[1], line[2] R[0] = line[3:6] R[1] = line[6:9] R[2] = np.cross(R[0], R[1]) V = Vector(x, y, z) if V.norm() > 1e-6: theta = V.angle(Vector(0,0,1)) norm = np.sqrt(x*x + y*y) if norm > 1e-6: phi = np.arctan2(y,x) #otherwise phi isn't updated and the previous value is copied. Keeps it from jumping near the poles. else: theta = 0.0 theta_x = np.arctan2(R[2][1], R[2][2]) theta_y = np.arctan2(-R[2][0], np.sqrt(R[2][1]*R[2][1]+R[2][2]*R[2][2])) theta_z = np.arctan2(R[1][0], R[0][0]) fout.write(str(names[i])+'\t'+str(numbers[i])+'\t') fout.write(str(theta)+'\t'+str(phi)+'\t') fout.write(str(theta_x)+'\t'+str(theta_y)+'\t'+str(theta_z)+'\t') for value in line: fout.write(str(value)+'\t') fout.write(str(R[2][0])+'\t'+str(R[2][1])+'\t'+str(R[2][2])+'\t') fout.write('\n') fout.close() n_clusters_ = len(np.unique(labels)) print("Number of estimated clusters:", n_clusters_) classification = open('analysis/frame_cluster_types.txt', 'w+') frame_counter = 0 framelist_counter = 0 for label in labels: if frame_counter<framelist[framelist_counter]: classification.write(str(label)+'\t') frame_counter+=1 else: framelist_counter+=1 classification.write('\n'+str(label)+'\t') frame_counter=1 '''
def cross(v1, v2): return Vector(v1[1] * v2[2] - v1[2] * v2[1], v1[2] * v2[0] - v1[0] * v2[2], v1[0] * v2[1] - v1[1] * v2[0])
def find_geometry(metals, structure, permissive=False, all_metals=False, external=None): # check metal contacts output = [] checked_metals = [] structure_list = Selection.unfold_entities(structure, "A") for metal in metals: # search distance based on metal type if metal[0].element == "YB": dist = 3.5 elif metal[0].element == "K": dist = 3.3 else: dist = 2.9 metal_str = "{}:{}:{}".format(metal[2].id, metal[1].get_id()[1], metal[0].name) in_ext = [] for i in external: if metal_str in i: in_ext = True if not in_ext and list(metal[0].coord) not in checked_metals: coords = metal[0].coord contacts = [] for chain in structure.get_chains(): for residue in chain.get_residues(): contacts_atoms = NeighborSearch(structure_list).search( coords, dist, "A") # exclude self-contacts, carbons and hydrogens excluded_contacts = cs.metals + ['C', 'H'] contacts_atoms = [ c for c in contacts_atoms if c.element not in excluded_contacts ] for atom in contacts_atoms: if residue in chain.get_residues( ) and atom in residue.get_atoms(): contacts.append([atom, residue, chain]) combinations = list(itertools.combinations(contacts, 2)) combinations = [list(c) for c in combinations] # get all atom - metal - atom angles for c in combinations: vi = Vector(c[0][0].coord) vj = Vector(c[1][0].coord) angle = vectors.calc_angle(vi, coords, vj) * 180 / np.pi c.append(angle) geo, coordinated_atoms = angle_classification(combinations, False) if geo is None and permissive: geo, coordinated_atoms = angle_classification( combinations, True) if geo is None and all_metals and combinations: geo, coordinated_atoms = angle_classification( combinations, True) if geo: print( "Found {} geometry around {} (residue {}). Adding constraints." .format(geo, metal[0].name, metal[1].get_id()[1])) checked_metals.append(list(metal[0].coord)) else: coordinated_atoms = combinations checked_metals.append(list(metal[0].coord)) geo = "no" print( "Found {} geometry around {} (residue {}). Adding constraints to all atoms within {}A of the metal." .format(geo, metal[0].name, metal[1].get_id()[1], dist)) elif geo is None and not all_metals: raise ce.NoGeometryAroundMetal( "Failed to determine geometry around {} (residue {}). Add constraints manually or set 'constrain_all_metals: true' to constrain all atoms within {}A of the metal." .format(metal[0].name, metal[1].get_id()[1], dist)) elif geo is None and all_metals and not combinations: print("No atoms coordinated to {} (residue {}).".format( metal[0].name, metal[1].get_id()[1])) elif geo: checked_metals.append(list(metal[0].coord)) print( "Found {} geometry around {} (residue {}). Adding constraints." .format(geo, metal[0].name, metal[1].get_id()[1])) elif geo is None and all_metals and combinations: geo, coordinated_atoms = angle_classification( combinations, True) if geo is None: geo = "no" coordinated_atoms = combinations checked_metals.append(list(metal[0].coord)) print( "Found {} geometry around {} (residue {}). Adding constraints to all atoms within {}A of the metal." .format(geo, metal[0].name, metal[1].get_id()[1], dist)) else: print( "Found {} geometry around {} (residue {}). Adding constraints." .format(geo, metal[0].name, metal[1].get_id()[1])) elif geo is None and all_metals and not combinations: print("No atoms coordinated to {} (residue {}).".format( metal[0].name, metal[1].get_id()[1])) elif geo is None and not all_metals and not permissive: raise ce.NoGeometryAroundMetal( "Failed to determine geometry around {} (residue {}). Add constraints manually or set 'constrain_all_metals: true' to constrain all atoms within {}A of the metal." .format(metal[0].name, metal[1].get_id()[1], dist)) else: checked_metals.append(list(metal[0].coord)) print( "Found {} geometry around {} (residue {}). Adding constraints." .format(geo, metal[0].name, metal[1].get_id()[1])) # format string yaml_string = "{}-{}-{}:{}:{}-{}:{}:{}" spring_const = 50 string_atoms = [] for c in coordinated_atoms: atom1, atom2, angle = c if atom1 not in string_atoms: string_atoms.append(atom1) if atom2 not in string_atoms: string_atoms.append(atom2) for atom in string_atoms: atomname1 = atom[0].name resnum1 = atom[1].get_id()[1] chain1 = atom[2].get_id() atomname2 = metal[0].name resnum2 = metal[1].get_id()[1] chain2 = metal[2].get_id() atom_dist = atom[0] - metal[0] out = yaml_string.format(spring_const, atom_dist, chain1, resnum1, atomname1, chain2, resnum2, atomname2) output.append(out) output = list(set(output)) if output: output = ['{}'.format(o) for o in output] return output
def generate_node_features(protein_chains, surface, ns: NeighborSearch, only_ca=Constants.GET_ONLY_CA_ATOMS): pdb_id = protein_chains[0].get_parent().full_id[0] pdb_id = pdb_id[-4:] dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp')) get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0 for chain in protein_chains: start = time.time() residue_generator = chain.get_residues() get_residues_t += time.time() - start last_n_residues = deque( [None, next(residue_generator), next(residue_generator, None)]) while last_n_residues[1] is not None: prev_res = last_n_residues.popleft() prev_res_name = Constants.EMPTY_STR_FEATURE if prev_res is not None: prev_res_name = prev_res.resname res = last_n_residues[0] next_res = last_n_residues[1] next_res_name = Constants.EMPTY_STR_FEATURE if next_res is not None: next_res_name = next_res.resname start = time.time() is_key = True key = res.full_id[2:] if key not in dssp[0]: key = (key[0], (' ', key[1][1], ' ')) if key not in dssp[0]: for dssp_key in dssp[0]: if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][ 1]: key = dssp_key break if key not in dssp[0]: is_key = False # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}') if is_key: dssp_features = dssp[0][key] else: dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) dssp_key_t += time.time() - start start = time.time() is_cb = 'CB' in res cb_ca_surf_angle = 0 ca_cb_surf_angle = 0 ca_atom = res['CA'] ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface) ca_vec = ca_atom.get_vector() if not is_cb: # print('there is no CB ..... :(((((((') pass else: cb_vec = res['CB'].get_vector() cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface) cb_ca_surf_angle = calc_angle(cb_vec, ca_vec, Vector(surface[ca_surf_idx])) ca_cb_surf_angle = calc_angle(ca_vec, cb_vec, Vector(surface[cb_surf_idx])) min_dist_t += time.time() - start start = time.time() res_d, dist_list = residue_depth(res, surface) if res_d is None: res_d = 5.0 print("Nan values!!!") if ca_d is None: ca_d = 5.0 print("Nan values!!!") residue_depth_t += time.time() - start for idx, atom in enumerate(res.get_atoms()): if only_ca: atom = ca_atom start = time.time() atom_d, s_idx = dist_list[idx] atom_coord = atom.get_coord() ca_atom_coord = ca_atom.get_coord() d = atom_coord - ca_atom_coord ca_atom_dist = np.sqrt(np.sum(d * d)) atom_ca_surf_angle = 0 ca_atom_surf_angle = 0 if not np.array_equal(atom_coord, ca_atom_coord): atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec, Vector(surface[s_idx])) ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(), Vector(surface[s_idx])) if atom_d is None: atom_d = 5.0 print(f"Nan valuess!! {atom_d}, {atom}") atom_d_t += time.time() - start start = time.time() setattr(atom, Constants.NODE_APPENDED_FEATURES['prev_res_name'], prev_res_name) setattr(atom, Constants.NODE_APPENDED_FEATURES['next_res_name'], next_res_name) setattr(atom, Constants.NODE_APPENDED_FEATURES['residue_depth'], res_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'], atom_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'], ca_d) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'], ca_atom_dist) setattr(atom, Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'], cb_ca_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'], ca_cb_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'], atom_ca_surf_angle) setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'], ca_atom_surf_angle) setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features) settattr_t += time.time() - start cumsum_main = 0 cumsum_plane = 0 cumsum_atom_main = [0] * len( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS) cumsum_atom_plane = [0] * len( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS) for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS): atoms = ns.search(atom_coord, radius) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_radius_name(num)], len(atoms) - cumsum_main) num_above_plane = num_of_atoms_above_plane( surface[s_idx] - atom_coord, atom_coord, atoms) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_above_plane_radius_name( num)], num_above_plane - cumsum_plane) cumsum_main += len(atoms) cumsum_plane += num_above_plane for i, atom_element in enumerate( Constants.NEIGHBOUR_SUM_RADIUS_ATOMS): atoms_one_element = list( filter( lambda a: a.element.upper() == atom_element. upper(), atoms)) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants.neighbour_sum_radius_name( num, atom_element)], len(atoms_one_element) - cumsum_atom_main[i]) num_above_plane = num_of_atoms_above_plane( surface[s_idx] - atom_coord, atom_coord, atoms_one_element) setattr( atom, Constants.NODE_APPENDED_FEATURES[ Constants. neighbour_sum_above_plane_radius_name( num, atom_element)], num_above_plane - cumsum_atom_plane[i]) cumsum_atom_main[i] += len(atoms_one_element) cumsum_atom_plane[i] += num_above_plane if only_ca: break last_n_residues.append(next(residue_generator, None))
def to_vector(t): return Vector(t[0], t[1], t[2])
def from4atoms(OA, OB, OC, OD, u, v, l, m, accept_no_solution=True): u = pi - u v = pi - v a = OA - OC b = OB - OD ddiff = OC - OD + a - b # (BA) = (B-A) x = a.norm() * l * cos(u) y = b.norm() * m * cos(v) rx = x ry = y - b * ddiff r1 = a r2 = b if(abs(r1[0]) < 0.000001): if(abs(r2[0]) < 0.000001): print "WARNING: r1 = r2 = 0.0" # Swap rows tmp = r1 r1 = r2 r2 = tmp tmp = rx ry = rx rx = tmp # Reduce the matrix factor = r1[0] r1 = r1 / factor rx = rx / factor factor = r2[0] r2 = r2 - (r1 ** factor) ry = ry - rx * factor factor = r2[1] r2 = r2 / factor ry = ry / factor factor = r1[1] r1 = r1 - r2 ** factor rx = rx - ry * factor # Make solution space vectors alpha = r1[2] beta = r2[2] gamma = rx delta = ry u = Vector(-alpha,-beta,1.0) v = Vector(gamma, delta, 0) # Solve quadratic equation for norm of c acoef = (u.norm())**2 bcoef = 2*(u*v) ccoef = (v.norm())**2 - l**2 disc = bcoef**2 - 4*acoef*ccoef if(disc < 0.0): # This is the sick case where we can't find _any_ solution if not accept_no_solution: raise ValueError, "from4atoms: no solution found (disc=%f)" % disc else: disc = 0 x1 = (-bcoef - sqrt(disc))/(2*acoef) x2 = (-bcoef + sqrt(disc))/(2*acoef) # Create the two c-vectors c1 = u ** x1 + v c2 = u ** x2 + v # The two candidate E-poitns E1 = OA + c1 E2 = OA + c2 # The two candidate d-vectors d1 = E1 - OB d2 = E2 - OB # Pick the one with smallest norm difference d1norm = d1.norm() d2norm = d2.norm() diff1 = abs(d1norm - m) diff2 = abs(d2norm - m) if(diff1 < diff2): return E1 else: return E2
def analyse(input_file_name, refer_file_name, moved_chain_id, fixed_chain_id, r_moved_chain_id, r_fixed_chain_id, output_file1, output_file2, r_model_number=0): structure = PDBParser(PERMISSIVE=1).get_structure('to_analyse', input_file_name) reference = PDBParser(PERMISSIVE=1).get_structure('reference', refer_file_name) r_chain_moved = reference[r_model_number][r_moved_chain_id] r_chain_fixed = reference[r_model_number][r_fixed_chain_id] theta = [] phi = [] theta_x = [] theta_y = [] theta_z = [] d = [] coords_x = [] coords_y = [] coords_z = [] matrix_entries = [_[:] for _ in [[]] * 9] for model_number, model in enumerate(structure): chain_moved = structure[model_number][moved_chain_id] chain_fixed = structure[model_number][fixed_chain_id] com_denominator = 0.0 com_numerator = Vector(0, 0, 0) for atom in chain_moved.get_atoms(): position = atom.get_vector() com_numerator += Vector(position._ar * np.array(atom.mass)) com_denominator += atom.mass moved_centre = com_numerator.__div__(com_denominator) com_denominator = 0.0 com_numerator = Vector(0, 0, 0) for atom in chain_fixed.get_atoms(): position = atom.get_vector() com_numerator += Vector(position._ar * np.array(atom.mass)) com_denominator += atom.mass fixed_centre = com_numerator.__div__(com_denominator) com_denominator = 0.0 com_numerator = Vector(0, 0, 0) reference_set = np.asarray([ coord for coord in [atom.get_coord() for atom in r_chain_fixed.get_atoms()] ]) coordinate_set = np.asarray([ coord for coord in [atom.get_coord() for atom in chain_fixed.get_atoms()] ]) sup = SVDSuperimposer() sup.set(reference_set, coordinate_set) sup.run() R, V = sup.get_rotran() for atom in model.get_atoms(): atom.transform(R, V) for atom in chain_moved.get_atoms(): com_numerator += Vector( (atom.get_vector())._ar * np.array(atom.mass)) com_denominator += atom.mass moved_centre = com_numerator.__div__(com_denominator) com_denominator = 0.0 com_numerator = Vector(0, 0, 0) for atom in chain_fixed.get_atoms(): com_numerator += Vector( (atom.get_vector())._ar * np.array(atom.mass)) com_denominator += atom.mass fixed_centre = com_numerator.__div__(com_denominator) if fixed_centre.norm() > 0.5: print("Fixed chain norm is " + str(fixed_centre.norm()) + " in model " + str(model_number) + ". Should have been at the origin. Check code...") com_denominator = 0.0 com_numerator = Vector(0, 0, 0) x = moved_centre._ar[0] y = moved_centre._ar[1] z = moved_centre._ar[2] coords_x.append(x) coords_y.append(y) coords_z.append(z) d.append((moved_centre - fixed_centre).norm()) if moved_centre.norm() > 1e-6: theta.append(moved_centre.angle(Vector(0, 0, 1))) norm = np.sqrt(x * x + y * y) if norm > 1e-6: phi.append(np.arctan2(y, x)) else: theta.append(0.0) reference_set = np.asarray([ coord for coord in [atom.get_coord() for atom in r_chain_moved.get_atoms()] ]) coordinate_set = np.asarray([ coord for coord in [atom.get_coord() for atom in chain_moved.get_atoms()] ]) sup = SVDSuperimposer() sup.set(reference_set, coordinate_set) sup.run() R, V = sup.get_rotran() theta_x.append(np.arctan2(R[2][1], R[2][2])) theta_y.append( np.arctan2(-R[2][0], np.sqrt(R[2][1] * R[2][1] + R[2][2] * R[2][2]))) theta_z.append(np.arctan2(R[1][0], R[0][0])) for _ in range(3): matrix_entries[_].append(R[0][_]) matrix_entries[_ + 3].append(R[1][_]) matrix_entries[_ + 6].append(R[2][_]) f_results1 = open(output_file1, "w+") for frame in range(0, len(structure)): f_results1.write( str(frame) + '\t' + str(d[frame]) + '\t' + str(theta[frame]) + '\t' + str(phi[frame]) + '\t' + str(theta_x[frame]) + '\t' + str(theta_y[frame]) + '\t' + str(theta_z[frame]) + '\n') f_results1.close() f_results2 = open(output_file2, "w+") for frame in range(0, len(structure)): f_results2.write( str(frame) + '\t' + str(coords_x[frame]) + '\t' + str(coords_y[frame]) + '\t' + str(coords_z[frame]) + '\t') for _ in range(3): f_results2.write(str(matrix_entries[_][frame]) + '\t') f_results2.write(str(matrix_entries[_ + 3][frame]) + '\t') f_results2.write(str(matrix_entries[_ + 6][frame]) + '\t') f_results2.write('\n') f_results2.close()
def add_water(self): """ Adds water to the PDB file in a random position, then translates them until there are no clashes. """ if self.test: np.random.seed(42) output = [] n_inputs = len(self.input_pdbs) water_coords = [] resnums = [] atomnums = [] chains = [] resnames = [] # Open the original PDB file with open(self.input_pdbs[0], "r") as file: # Figure out which lines refer to the actual structure and CONECTs, drop everything else lines = file.readlines() conect = [line for line in lines if "CONECT" in line] pdb_lines = [ line for line in lines if "END" not in line and "CONECT" not in line ] for line in pdb_lines: if (line.startswith("ATOM") or line.startswith("HETATM") or line.startswith("TER")): try: # Extract atom information resnum = line[22:27].strip() atomnum = line[7:11].strip() chain = line[21] resname = line[17:20] resnums.append(resnum) atomnums.append(atomnum) chains.append(chain) resnames.append(resname) # If there are already waters in the system but were not selected to be perturbed, we exclude # them if resname == "HOH": water = f"{chain}:{resnum}" if (water not in self.user_waters and water not in self.water_to_exclude): self.water_to_exclude.append(water) # Line too short - Remarks pdb except IndexError: pass # Return if no waters are supposed to be added if self.n_waters < 1: return else: # Check the maximum existing residue name, so we know where to introduce the waters lig_length = resnames.count(self.ligand_residue) resnums = [int(num) for num in resnums if num] max_resnum = max(resnums) water_resnums = [] # Figure out the chain ID and atom numbers to introduce the waters water_chain = chains[0] # water chain = 1st protein chain atomnum = max([int(num) for num in atomnums if num]) + 1 + lig_length # Enumerate enough water templates to add n_waters to each input water = cs.water * self.n_waters * n_inputs for input_pdb in range(n_inputs): for water_string in range(self.n_waters): # Randomize oxygen coordinates - create an [x, y, z] vector O_coords = Vector( [np.random.randint(0, 100) for _ in range(3)]) # Add hydrogens to the oxygen H1_coords = O_coords + Vector(0.757, 0.586, 0.0) H2_coords = O_coords + Vector(-0.757, 0.586, 0.0) water_coords = (water_coords + [list(O_coords)] + [list(H1_coords)] + [list(H2_coords)]) # Increment residue number, so each added water has a different one max_resnum += 1 water_resnums = water_resnums + [max_resnum] * 3 max_resnum += 1 # Calculate atom numbers of all waters water_atomnums = [ atomnum + j for j in range(self.n_waters * 3 * n_inputs) ] # Create water PDB lines based on calculated atom numbers, residues, etc. water_output = [] for atom, num, resnum, coord in zip(water, water_atomnums, water_resnums, water_coords): # Format coordinates, so they fit into the PDB format coord = ["{:7.4f}".format(c) for c in coord] coord = " ".join(coord) water_output.append( atom.format(num, water_chain, resnum, coord)) # Slice created water PDB lines and split between different input PDBs sliced_water_output = [] for i in range(0, len(water_output), self.n_waters * 3): sliced_water_output.append(water_output[i:i + self.n_waters * 3]) # Loop over PDB inputs and for input_pdb, water_output in zip(self.input_pdbs, sliced_water_output): new_protein_file = input_pdb # Write PDB lines followed by created water lines with open(input_pdb, "w+") as file: for line in pdb_lines: file.write(line) file.write("\n") for line in water_output: file.write(line) file.write("END") # Load the input PDB file again with Biopython to check for contacts parser = PDBParser() structure = parser.get_structure("complex", new_protein_file) water_list = [] # Get all protein atoms to check for clashes protein_list = Selection.unfold_entities(structure, "A") # Get all relevant water atoms to check for clashes for res in structure.get_residues(): resnum = res._id[1] if res.resname == "HOH": if resnum not in resnums: water_list = water_list + Selection.unfold_entities( res, "A") # Check contacts between added waters and the protein at 5.0 angstrom contacts5 = [] for water_output in water_list: contacts5 = contacts5 + NeighborSearch( protein_list).search(water_output.coord, 5.0, "A") contacts5 = [c for c in contacts5 if c not in water_list] # exclude "self" contacts # Keep on tranlsating the water molecules as long as there are clashes at 5.0 A while contacts5: contacts5 = [] for w_ in water_list: x, y, z = w_.coord # Set new coordinates and check contacts again w_.set_coord([x - 5, y, z]) contacts5 = contacts5 + NeighborSearch( protein_list).search(w_.coord, 5.0, "A") contacts5 = [ c for c in contacts5 if c not in water_list ] # Save final output with translated water as a temporary file temp_protein_file = os.path.join( os.path.dirname(input_pdb), os.path.basename(input_pdb).replace(".pdb", "_temp.pdb"), ) io = PDBIO() io.set_structure(structure) io.save(temp_protein_file) output.append(new_protein_file) # Open the temporary file created with biopython new_water_lines = [] with open(temp_protein_file, "r") as temp: temp_lines = temp.readlines() # Iterate over lines created with biopython for line in temp_lines: if (line[17:20].strip() == "HOH" and int(line[22:27].strip()) not in resnums): line = line.replace( line[7:11], str(int(line[7:11]) + lig_length)) if line[12:15] == "2HW": line = line.strip("\n") + "\nTER\n" # If it's one of added waters, we manually change its residue number an save new_water_lines.append(line) del new_water_lines[ -1] # Last biopython line is a not needed TER # Save new water lines, so they are not duplicated in the next run with open("added_waters.txt", "a+") as water_file: for line in new_water_lines: water_file.write(line) # Overwrite the original input PDB, save original PDB lines, added water lines and the original CONECTs. with open(new_protein_file, "w+") as file: for line in pdb_lines: file.write(line) if not line.startswith("TER"): file.write("TER\n") for line in new_water_lines: file.write(line) for line in conect: file.write(line) file.write("\n") file.write("END") # Remove temporary biopython file os.remove(temp_protein_file)