def GetUniqueChains(self, pdir, pdbID, chains_to_check): """ Returns a List Unique Chains based on the C-alpha atom information. Structure based, not sequence based """ e = 'pdb' + self.pdbID + '.ent' BioParser = PDBParser(PERMISSIVE=True, QUIET=True) BioStructure = BioParser.get_structure( self.pdbID, pdir + 'pdb' + self.pdbID + '.ent') BioModel = BioStructure[0] Chain_AtomSeq = [] listMatches = [] for item in chains_to_check: pdbid_chain = e[3:7] + '_' + item BioChain = BioModel[item] residues = [] for residue in BioChain: for atom in residue: if atom.name == 'CA': aa1 = amino_dict.replace_all(residue.resname, amino_dict.one_letter) residues.append(aa1) req_res = [x for x in residues if x in amino_dict.amino] atom = "".join(req_res) Chain_AtomSeq.append((pdbid_chain, atom)) Chain_Dict = {} for k, v in Chain_AtomSeq: Chain_Dict.setdefault(k, v) # print (Chain_Dict) allChains = [i for i in Chain_Dict.values()] set_allChains = list(set(allChains)) # print (set_allChains) groups = {} for k, v in Chain_Dict.items(): groups.setdefault(v, []).append(k) matches = {k: v for k, v in groups.items()} list_of_matches = [i for i in matches.values()] # print (list_of_matches) listMatches.append(list_of_matches) req_matches = [i[0] for i in matches.values()] return sorted(req_matches), sorted(list_of_matches)
def AEI(AAAB14): """ Returns the Reduced Amino Acid Representation for the amino acid vertices participating in the Delaunay Tessellation. Specific for Ion Environments project.""" Data = [] Head = AAAB14[0] Tail = AAAB14[1:] pos = GetInd(Head, 'QuadB') Head.insert(pos + 1, 'AEI_Group') Head.insert(pos + 2, 'Sort_AEI') Data.append(Head) for item in Tail: point = item[pos] point = point[0:3] Vais = amino_dict.replace_all(point, amino_dict.Red_Wang) Vais = Vais.upper() Vais_sort = ''.join(sorted(Vais)) item.insert(pos + 1, Vais) item.insert(pos + 2, Vais_sort) Data.append(item) return Data
def ProteinDelaunay(pdbid, chain): """ Generate the Delaunay Tessellation of all the points in the given point cloud. The point cloud is basically the Calpha coordinates of a three dimensional protein. The point, vertices, simplices and neighbors in the entire point cloud are obtained as arrays. """ Data = [] Head = ['PDBID', 'Quad', 'SortedQuad', 'RedAlpha', 'SortRedAlpha', 'V1', 'V2', 'V3', 'V4', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'SumL', 'AvgL', 'DevL', 'DevTetra', 'Vol', 'TF1', 'TF2', 'TF3', 'TF4', 'SumTF', 'AvgTF', 'hullArea', 'hullVolume'] Data.append(Head) pointcloud, bf, resname = PointCloudData(pdbid, chainid) print "Given PDB ID: ", pdbid print "Given Chain ID:", chain print "Number of C-alpha points: ", len(pointcloud) # Convex Hull. ConvxHull = ConvexHull(pointcloud) hullArea = round(ConvxHull.area, 4) hullVolume = round(ConvxHull.volume, 4) # Delaunay Tessellation delaunay_hull = Delaunay(pointcloud, furthest_site=False, incremental=False, qhull_options='Qc') # noqa E501 delaunay_points = delaunay_hull.points delaunay_vertices = delaunay_hull.vertices delaunay_simplices = delaunay_hull.simplices delaunay_neighbors = delaunay_hull.neighbors print "Number of Delaunay Simplices: ", len(delaunay_simplices) for i in delaunay_vertices: # Obtain the indices of the vertices. one, two, three, four = i[2], i[1], i[3], i[0] # Obtain the coordinates based on the indices. cordA = pointcloud[one] cordB = pointcloud[two] cordC = pointcloud[three] cordD = pointcloud[four] # Get three letter amino acid names based on indices. a = resname[one] b = resname[two] c = resname[three] d = resname[four] # Get the temprature factors for the amino acids. a_tf = bf[one] b_tf = bf[two] c_tf = bf[three] d_tf = bf[four] # Get the string of three letter amino acids # forming the vertices of the tetrahedra. amino = [a, b, c, d] sortAmino = sorted(amino) amino = '-'.join(amino) sortAmino = '-'.join(sortAmino) # Get one letter code of the amino acids oneA = amino_dict.replace_all(a, amino_dict.one_letter) oneB = amino_dict.replace_all(b, amino_dict.one_letter) oneC = amino_dict.replace_all(c, amino_dict.one_letter) oneD = amino_dict.replace_all(d, amino_dict.one_letter) oneLet = [oneA, oneB, oneC, oneD] sortOneLet = sorted(oneLet) oneLet = ''.join(oneLet) sortOneLet = ''.join(sortOneLet) # Get Reduced Amino Acid Representations. flpA = amino_dict.replace_all(oneA, amino_dict.FLP) flpB = amino_dict.replace_all(oneB, amino_dict.FLP) flpC = amino_dict.replace_all(oneC, amino_dict.FLP) flpD = amino_dict.replace_all(oneD, amino_dict.FLP) flp = [flpA, flpB, flpC, flpD] sortflp = sorted(flp) flp = (''.join(flp)).upper() sortflp = (''.join(sortflp)).upper() # Calculate distances between the tetrahedra vertices. AB = np.linalg.norm(cordA - cordB) AC = np.linalg.norm(cordA - cordC) AD = np.linalg.norm(cordA - cordD) BC = np.linalg.norm(cordB - cordC) BD = np.linalg.norm(cordB - cordD) CD = np.linalg.norm(cordC - cordD) # Calculate the tetrahedra Volume. A_prime = cordA - cordD B_prime = cordB - cordD C_prime = cordC - cordD primes = [A_prime, B_prime, C_prime] primes = np.asarray(primes) det = np.linalg.det(primes) Vol = round((abs(det) / 6), 4) # Sum of Edge Lengths. SumL = (AB + AC + AD + BC + BD + CD) SumL = round(SumL, 4) # Average Edge Lengths. AvgL = round((SumL / 6), 4) # Deviation in Edge Lengths. devLp = (AB - AvgL) ** 2 devLq = (AC - AvgL) ** 2 devLr = (AD - AvgL) ** 2 devLs = (BC - AvgL) ** 2 devLt = (BD - AvgL) ** 2 devLu = (CD - AvgL) ** 2 devLy = [devLp, devLq, devLr, devLs, devLt, devLu] sumDevL = sum(devLy) DevL = round(math.sqrt(sumDevL / 6.0), 4) # Deviation in Tetrahedrality lenArr = [AB, AC, AD, BC, BD, CD] DevT = DevTetra(lenArr) # Sum and Average Temperature Factors. SumTF = round((a_tf + b_tf + c_tf + d_tf), 4) AvgTF = round(SumTF / 4, 4) # Data List line = [pdbid, oneLet, sortOneLet, flp, sortflp, one, two, three, four, AB, AC, AD, BC, BD, CD, SumL, AvgL, DevL, DevT, Vol, a_tf, b_tf, c_tf, d_tf, SumTF, AvgTF, hullArea, hullVolume] Data.append(line) ## Get coordinates based on the vertices. ## vertices_coords store the x, y, z coordinates for the delaunay_vertices. vertices_coords = pointcloud[delaunay_vertices] ## delaunay_indices store the indices for the delaunay_points. delaunay_indices = np.arange(len(delaunay_points)) ## Get ready for mayavi plot. fig = mlab.figure(1, bgcolor=(0, 0, 0)) fig.scene.disable_render = True ## Get a 3d scatter plot for the delaunay_points. mlab.points3d(delaunay_points[:,0], delaunay_points[:,1], delaunay_points[:,2], scale_factor=0.40, color=(0.99, 0.00, 0.00)) ion_c_alpha_scatter = mlab.pipeline.scalar_scatter(delaunay_points[:,0], delaunay_points[:,1], delaunay_points[:,2], delaunay_indices) ion_c_alpha_delaunay = mlab.pipeline.delaunay3d(ion_c_alpha_scatter) ion_c_alpha_edges = mlab.pipeline.extract_edges(ion_c_alpha_delaunay) mlab.pipeline.surface(ion_c_alpha_edges, colormap='winter', opacity=0.4) mlab.savefig(pdbid + '_MayaviViz.x3d') mlab.show() return Data
def ProteinDelaunay(pdbid, chain): """ Generate the Delaunay Tessellation of all the points in the given point cloud. The point cloud is basically the Calpha coordinates of a three dimensional protein. The point, vertices, simplices and neighbors in the entire point cloud are obtained as arrays. """ Data = [] Head = [ 'PDBID', 'Quad', 'SortedQuad', 'RedAlpha', 'SortRedAlpha', 'V1', 'V2', 'V3', 'V4', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'SumL', 'AvgL', 'DevL', 'DevTetra', 'Vol', 'TF1', 'TF2', 'TF3', 'TF4', 'SumTF', 'AvgTF' ] Data.append(Head) pointcloud, bf, resname = PointCloudData(pdbid, chainid) # Convex Hull. ConvxHull = ConvexHull(pointcloud) hullArea = round(ConvxHull.area, 4) hullVolume = round(ConvxHull.volume, 4) # Delaunay Tessellation delaunay_hull = Delaunay(pointcloud, furthest_site=False, incremental=False, qhull_options='Qc') # noqa E501 delaunay_points = delaunay_hull.points delaunay_vertices = delaunay_hull.vertices delaunay_simplices = delaunay_hull.simplices delaunay_neighbors = delaunay_hull.neighbors for i in delaunay_vertices: # Obtain the indices of the vertices. one, two, three, four = i[2], i[1], i[3], i[0] # Obtain the coordinates based on the indices. cordA = pointcloud[one] cordB = pointcloud[two] cordC = pointcloud[three] cordD = pointcloud[four] # Get three letter amino acid names based on indices. a = resname[one] b = resname[two] c = resname[three] d = resname[four] # Get the temprature factors for the amino acids. a_tf = bf[one] b_tf = bf[two] c_tf = bf[three] d_tf = bf[four] # Get the string of three letter amino acids # forming the vertices of the tetrahedra. amino = [a, b, c, d] sortAmino = sorted(amino) amino = '-'.join(amino) sortAmino = '-'.join(sortAmino) # Get one letter code of the amino acids oneA = amino_dict.replace_all(a, amino_dict.one_letter) oneB = amino_dict.replace_all(b, amino_dict.one_letter) oneC = amino_dict.replace_all(c, amino_dict.one_letter) oneD = amino_dict.replace_all(d, amino_dict.one_letter) oneLet = [oneA, oneB, oneC, oneD] sortOneLet = sorted(oneLet) oneLet = ''.join(oneLet) sortOneLet = ''.join(sortOneLet) # Get Reduced Amino Acid Representations. flpA = amino_dict.replace_all(oneA, amino_dict.FLP) flpB = amino_dict.replace_all(oneB, amino_dict.FLP) flpC = amino_dict.replace_all(oneC, amino_dict.FLP) flpD = amino_dict.replace_all(oneD, amino_dict.FLP) flp = [flpA, flpB, flpC, flpD] sortflp = sorted(flp) flp = (''.join(flp)).upper() sortflp = (''.join(sortflp)).upper() # Calculate distances between the tetrahedra vertices. AB = np.linalg.norm(cordA - cordB) AC = np.linalg.norm(cordA - cordC) AD = np.linalg.norm(cordA - cordD) BC = np.linalg.norm(cordB - cordC) BD = np.linalg.norm(cordB - cordD) CD = np.linalg.norm(cordC - cordD) # Calculate the tetrahedra Volume. A_prime = cordA - cordD B_prime = cordB - cordD C_prime = cordC - cordD primes = [A_prime, B_prime, C_prime] primes = np.asarray(primes) det = np.linalg.det(primes) Vol = round((abs(det) / 6), 4) # Sum of Edge Lengths. SumL = (AB + AC + AD + BC + BD + CD) SumL = round(SumL, 4) # Average Edge Lengths. AvgL = round((SumL / 6), 4) # Deviation in Edge Lengths. devLp = (AB - AvgL)**2 devLq = (AC - AvgL)**2 devLr = (AD - AvgL)**2 devLs = (BC - AvgL)**2 devLt = (BD - AvgL)**2 devLu = (CD - AvgL)**2 devLy = [devLp, devLq, devLr, devLs, devLt, devLu] sumDevL = sum(devLy) DevL = round(math.sqrt(sumDevL / 6.0), 4) # Deviation in Tetrahedrality lenArr = [AB, AC, AD, BC, BD, CD] DevT = DevTetra(lenArr) # Sum and Average Temperature Factors. SumTF = round((a_tf + b_tf + c_tf + d_tf), 4) AvgTF = round(SumTF / 4, 4) # Data List line = [ pdbid, oneLet, sortOneLet, flp, sortflp, one, two, three, four, AB, AC, AD, BC, BD, CD, SumL, AvgL, DevL, DevT, Vol, a_tf, b_tf, c_tf, d_tf, SumTF, AvgTF ] Data.append(line) return Data