def run(self): tmp = QTemporaryFile() result = {} io = None dssp = None prevChain = None key = None if tmp.open(): io = PDBIO() io.set_structure(self.struct) io.save(tmp.fileName()) try: dssp = DSSP(self.struct[0], tmp.fileName(), dssp='mkdssp') prevChain = next(iter(dssp.keys()))[0] for key in dssp.keys(): #print(key[0]) if key[0] == prevChain: #print(key) # I THINK I'M DOING THIS PART WRONG result[dssp[key][0] + self.offset] = dssp[key][2] self.finished.emit([result, self.seq, self.node]) except: traceback.print_exc() print("SORRY, DSSP WAS NOT FOUND") self.finished.emit([None, None, None]) del tmp, result, io, dssp, prevChain, key
def calculate_solvent_access_score(self, threshold): ''' Calculate the accessibility score between the predicted model and the template pdb structure. Args: threshold cutoff (int): Cutoff for relative accessibility residue values. Returns: float: The Accessibility score calculated ''' # Path to the PDBs of predicted and template models pred_model_pdb = "data/templates/" + self.template.name + "/" + self.template.modeller_pdb + ".atm" template_pdb = "data/templates/" + self.template.name + "/" + self.template.reindexed_pdb + ".atm" # Parse PDBs pred_model = PDBParser(QUIET=True).get_structure( "pred_model", pred_model_pdb)[0] template_model = PDBParser(QUIET=True).get_structure( "template_model", template_pdb)[0] # Run DSSP on both PDB files of the template and the Modeller's model dssp_pred_model = DSSP(pred_model, pred_model_pdb, dssp="bin/dssp-2.0.4-linux-amd64") dssp_template_model = DSSP(template_model, template_pdb, dssp="bin/dssp-2.0.4-linux-amd64") # Parse the DSSP output to retrieve the relative % of solvant accessible area for each CA. #get alignement index query_index_ali = [ index for index, residue in enumerate(self.query.residues) if str(residue) != "-" ] template_index_ali = [ index for index, residue in enumerate(self.template.residues) if str(residue) != "-" ] #attribuate alignemnt index rsa_pred_model = dict( zip(query_index_ali, [dssp_pred_model[key][3] for key in dssp_pred_model.keys()])) rsa_template_model = dict( zip(template_index_ali, [ dssp_template_model[key][3] for key in dssp_template_model.keys() ])) # Keep only residues under a relative accessibilities threshold: buried residues pred_access_residues = keep_accessible_residues( rsa_pred_model, threshold) template_access_residues = keep_accessible_residues( rsa_template_model, threshold) # Get the common buried residues common_residues_len = len( set(pred_access_residues).intersection(template_access_residues)) # Normalization return common_residues_len / len(query_index_ali)
def run_DSSP(self, corresponding_gene_call, pdb_filepath): """ DSSP is ran using the API developed in Biopython. That means we don't work directly from the text output of DSSP, but rather a Biopython object. """ # Determine the model name by loading the structure file p = PDBParser() structure = p.get_structure(corresponding_gene_call, pdb_filepath) model = structure[ 0] # pdb files can have multiple models. DSSP assumes the first. # run DSSP residue_annotation = DSSP(model, pdb_filepath, dssp=self.DSSP_executable, acc_array="Wilke") if not len(residue_annotation.keys()): raise ConfigError("Your executable of DSSP, `{}`, exists but didn't return any meaningful output. This\ is a known issue with certain distributions of DSSP. For information on how to test\ that your version is working correctly, please visit\ http://merenlab.org/2016/06/18/installing-third-party-software/#dssp"\ .format(self.DSSP_executable, pdb_filepath)) # convert to a digestible format return self.convert_DSSP_output_from_biopython_to_dataframe( residue_annotation)
def make_dssp(pdb_id, chain, chain_delimiter=':'): """ Retrieve dssp string from PDB database :param pdb_id: pdb id :param chain: pdb chain id :param chain_delimiter: delimiter between id and chain id :return: id, aa sequence, ss sequence """ url = PDB_URL + pdb_id + '.pdb' urllib.request.urlretrieve(url, pdb_id) parser = PDBParser() structure = parser.get_structure(pdb_id, pdb_id) dssp = DSSP(structure[0], pdb_id, dssp='mkdssp') aa = '' ss = '' for key in dssp.keys(): if key[0] == chain: aa += dssp[key][1] ss += SS_MAP[dssp[key][2]] os.remove(pdb_id) return pdb_id + chain_delimiter + chain, aa, ss
def test_dssp_with_mmcif_file_and_different_chain_ids(self): """Test DSSP generation from MMCIF which has different label and author chain IDs.""" if self.dssp_version < StrictVersion("2.2.0"): self.skipTest("Test requires DSSP version 2.2.0 or greater") pdbfile = "PDB/1A7G.cif" model = self.cifparser.get_structure("1A7G", pdbfile)[0] dssp = DSSP(model, pdbfile) self.assertEqual(len(dssp), 82) self.assertEqual(dssp.keys()[0][0], "E")
def ss_map_creator(self, struc_to_aln_index_mapping): ''' Connects the alignment mapping index and the secondary structural assignments from DSSP. ''' ss_aln_index_map = {} inv_map, model = self.structure_loader(struc_to_aln_index_mapping) dssp = DSSP(model, self.struc_path) for a_key in list(dssp.keys()): ss_aln_index_map[inv_map[a_key[1][1]]] = self.DSSP_code_mycode[ dssp[a_key][2]] return ss_aln_index_map
def depth_map_creator(self, struc_to_aln_index_mapping): '''Connects the alignment mapping index and the residue depth''' res_depth_aln_index_map = {} inv_map, model = self.structure_loader(struc_to_aln_index_mapping) dssp = DSSP(model, self.struc_path) #rd = ResidueDepth(model) for a_key in list(dssp.keys()): if dssp[a_key][3] > 0.2: res_depth_aln_index_map[inv_map[a_key[1][1]]] = 'E' else: res_depth_aln_index_map[inv_map[a_key[1][1]]] = 'B' return res_depth_aln_index_map
def both_map_creator(self, struc_to_aln_index_mapping): '''Connects the alignment mapping index and the residue depth''' sda = {} inv_map, model = self.structure_loader(struc_to_aln_index_mapping) try: dssp = DSSP(model, self.struc_path) except OSError as e: raise OSError("DSSP failed with the following error:\n" + e) for a_key in list(dssp.keys()): if a_key[1][1] in inv_map.keys(): if dssp[a_key][3] > 0.2: sda[inv_map[a_key[1][1]]] = 'E' + self.DSSP_code_mycode[ dssp[a_key][2]] else: sda[inv_map[a_key[1][1]]] = 'B' + self.DSSP_code_mycode[ dssp[a_key][2]] return sda
def get_DSSPList(file): p = PDBParser() ''' # parses the pdb file ''' s = p.get_structure('X', file) ''' # getting the structure ''' model = s[0] d = DSSP(model, file,dssp=fileExe,acc_array=asaName) ''' # DSSP executable ''' dssp_dict,dssp_keys = dssp_dict_from_pdb_file(file,DSSP=fileExe) ''' # Create a dssp dictionary from a PDB file ''' #print (file) dssp_list = [] a_keys =list(d.keys()) #print(a_keys) for v in a_keys: rasa_values = (d[v]) ''' #values of the dictionary that gives the RASA values ''' acc_values = dssp_dict[v] '''# from the dictionary that provides the residue number, aa, and the acc value''' x = v[1][1],acc_values[0],acc_values[2],rasa_values[3] '''# residue number, amino acid, acc value and RASA value''' dssp_list.append(x) '''# creating a list of these values''' return dssp_list
def run_DSSP(self, corresponding_gene_call, pdb_filepath): """ DSSP is ran using the API developed in Biopython. That means we don't work directly from the text output of DSSP, but rather a Biopython object. """ # Determine the model name by loading the structure file p = PDBParser() structure = p.get_structure(corresponding_gene_call, pdb_filepath) model = structure[0] # pdb files can have multiple models. DSSP assumes the first. # run DSSP residue_annotation = DSSP(model, pdb_filepath, dssp = self.DSSP_executable, acc_array = "Wilke") if not len(residue_annotation.keys()): raise ConfigError("Your executable of DSSP, `{}`, exists but didn't return any meaningful output. This\ is a known issue with certain distributions of DSSP. For information on how to test\ that your version is working correctly, please visit\ http://merenlab.org/2016/06/18/installing-third-party-software/#dssp"\ .format(self.DSSP_executable, pdb_filepath)) # convert to a digestible format return self.convert_DSSP_output_from_biopython_to_dataframe(residue_annotation)
# The Model in the PDB file consists of multiple Chains. # Calling DSSP with the current Model will concatenate # all Chains, and this is unavoidable because it's # external to Python. The DSSP result will need to be # truncated after the fact. warnings.simplefilter("ignore") structure = PDBParser().get_structure("tmp", path) multiple_chains = True model = next(structure.get_models()) # I always work with Model 1 output = DSSP(model, path) # DSSP expects a Model, why? if multiple_chains: # A DSSP object is dictionary-like, but with ordered keys. # The keys are nested tuples which are structured like PDB # Residue identifiers. The first element of the tuple is # the Chain name (typically, "A"). chain_a = output.keys()[0][0] subset_keys = [k for k in output.keys() if k[0] == chain_a] # Now use the keys in chain "A" to truncate output. The # output object need not be a DSSP object, it just needs to # yield the right values when iterated below. output = [output[k] for k in subset_keys] nr, aa, hh, st, exp = [], [], [], [], [] for n in output: nr.append(n[0]) aa.append(n[1]) exp.append(n[3]) if n[2] == "H": hh.append(1.0) else:
def parse(pdb, seq_len): structure = p.get_structure('', pdb) model = structure[0] try: dssp = DSSP(model, pdb, acc_array='Wilke') except: return six_state = np.zeros((seq_len, 6), dtype=np.float32) dihedrals_sc = np.zeros((seq_len, 4), dtype=np.float32) rsa = np.zeros((seq_len, 1)) valid = np.zeros(seq_len, dtype=np.bool_) done = set() for k in dssp.keys(): index = k[1][1] - 1 if index in done: if not k[1][-1].strip(): continue if index < 0 or index >= seq_len: index = max(done, default=0) done.add(index) dssp_content = dssp[k] ss = dssp_content[2] rsa_ = dssp_content[3] if rsa_ == 'NA': rsa_ = np.nan phi = math.radians(dssp_content[4]) psi = math.radians(dssp_content[5]) six_state[index, :] = 0. six_state[index, ss_dict[ss]] = 1. rsa[index, 0] = rsa_ dihedrals_sc[index, 0] = math.sin(phi) dihedrals_sc[index, 1] = math.cos(phi) dihedrals_sc[index, 2] = math.sin(psi) dihedrals_sc[index, 3] = math.cos(psi) valid[index] = True # Fix rsa NaNs if np.isnan(rsa).any(): nans = np.isnan(rsa) x = lambda z: z.nonzero()[0] rsa[nans] = np.interp(x(nans), x(~nans), rsa[~nans]) assert not np.isnan(rsa).any() three_state = np.zeros((six_state.shape[0], 3), dtype=np.float32) three_state[:, 0] = six_state[:, 0] + six_state[:, 3] three_state[:, 1] = six_state[:, 1] + six_state[:, 2] three_state[:, 2] = six_state[:, 4] + six_state[:, 5] assert three_state.sum(axis=1).max() == 1., three_state.sum(axis=1) assert six_state.sum(axis=1).max() == 1., six_state.sum(axis=1) dssp_arr = np.concatenate((three_state, six_state, rsa, dihedrals_sc), axis=1) return dssp_arr, valid
]) can_coord -= center chain_coords.append(can_coord) chain_length = len(can_coord) chain_color = np.random.rand(1, 3) chain_colors.append(chain_color) color.append(np.tile(chain_color, (chain_length, 1))) chain_radius.append([ vrad(atom.get_id()) for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) if len(chains) > 1: color = np.concatenate(color) #dssp color2 = [] struct3 = [dssp[key][2] for key in list(dssp.keys())] residues = [ residue for residue in structure.get_residues() if residue.get_resname() in resdict.keys() ] for i in range(len(struct3)): dsspcolor = crgbaDSSP(struct3[i])[0:3] n_atoms = len([ atom for atom in residues[i] if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) color2.append(np.tile(dsspcolor, (n_atoms, 1))) if len(struct3) > 1: color2 = np.concatenate(color2) #atom radius radius = [
class MatViewer(object): visualization_modes = ['cpk','backbone','aminoacid','dssp'] def __init__(self, pdbdata, mode='cpk'): #Analyze pdb file self.parser = PDBParser(QUIET=True,PERMISSIVE=True) self.structure = self.parser.get_structure('model',pdbdata) #DSSP prediction self.model = self.structure[0] self.dssp = DSSP(self.model, pdbdata) #Mode selection if mode not in MatViewer.visualization_modes: raise Exception('Not recognized visualization mode %s' % mode) self.mode = mode #Make the plot if self.mode == 'cpk': self.cpk2d() elif self.mode == 'backbone': self.bb2d() elif self.mode == 'aminoacid': self.aa2d() elif self.mode == 'dssp': self.dssp2d() def cpk2d(self): """Draws atoms in a CPK colour palette""" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for atom_type, color in colors.iteritems(): atoms = [atom for atom in self.structure.get_atoms() if atom.get_id() == atom_type] coordinates = [atom.coord for atom in atoms] if not len(atoms)==0: x, y, z=zip(*coordinates) ax.scatter(x, y, z, c=color, marker='o') #Select the atoms that are not identified atoms_1 = [atom for atom in self.structure.get_atoms()] atoms_2 = [atom for atom in self.structure.get_atoms() if atom.get_id() in colors.keys()] atoms_pink = list(set(atoms_1)-set(atoms_2)) coordinates_pink = [atom.coord for atom in atoms_pink] xp, yp, zp = zip(*coordinates_pink) ax.scatter(xp, yp, zp, c='pink', marker='o') ax.axis("off") plt.show() def bb2d(self): """Draws CA-N atoms linked by lines""" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for chain in self.structure.get_chains(): can_atoms = [atom for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N'] can_coordinates = [atom.coord for atom in can_atoms] x,y,z=zip(*can_coordinates) ccolor = np.random.rand(3,1) ax.plot(x, y, z, c=ccolor, linewidth=2) ax.scatter(x, y, z, c=ccolor, marker='o') ax.axis("off") plt.show() def aa2d(self): """Draws atoms using a colour palette depending on the type of residue""" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') #Select residues only if they're aminoacids for resname, residuetype in resdict.iteritems(): residues = [residue for residue in self.structure.get_residues() if residue.get_resname() == resname] rescoord = [] color = colorrgba(residuetype) for residue in residues: atoms = [atom for atom in residue.get_atoms()] coordinates = [atom.coord for atom in atoms] rescoord.append(np.array(coordinates)) if len(rescoord)>1: rescoord = np.concatenate(rescoord) if not len(residues)==0: x, y, z =zip(*rescoord) ax.scatter(x, y, z, c=color, marker='o') #Select residues that are not aminoacids, skipping water residues_1 = [residue for residue in self.structure.get_residues() if residue.get_resname() != 'HOH'] residues_2 = [residue for residue in self.structure.get_residues() if residue.get_resname() in resdict.keys()] residues_pink = list(set(residues_1)-set(residues_2)) rescoordpink = [] for residue in residues_pink: atomspink = [atom for atom in residue.get_atoms()] coordinatespink = [atom.coord for atom in atomspink] rescoordpink.append(np.array(coordinatespink)) if len(rescoordpink)>1: rescoordpink = np.concatenate(rescoordpink) xp, yp, zp = zip(*rescoordpink) ax.scatter(xp, yp, zp, c='pink', marker='o') ax.axis("off") plt.show() def dssp2d(self): """Draw CA-N atoms linked by lines, coloured by their tertiary structure prediction""" fig = plt.figure() ax = fig.add_subplot(111, projection='3d') #Create the residue lists linked to their predictions residues = [residue for residue in self.structure.get_residues() if residue.get_resname() in resdict.keys()] struct3 = [self.dssp[key][2] for key in list(self.dssp.keys())] respred = zip(struct3,residues) #Create the point cloud depending on the prediction for prediction, color in colorsDSSP.iteritems(): residuesp = [residue[1] for residue in respred if residue[0] == prediction] predcoord_can = [] for residue in residuesp: atomsp = [atom for atom in residue.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N'] coordinatesp = [atom.coord for atom in atomsp] predcoord_can.append(np.array(coordinatesp)) if len(predcoord_can)>1: predcoord_can = np.concatenate(predcoord_can) if not len(residuesp)==0: x, y, z = zip(*predcoord_can) ax.scatter(x, y, z, c=color, marker='o') #Create the chains linking the aminoacids for chain in self.structure.get_chains(): can_atoms = [atom for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N'] can_coordinates = [atom.coord for atom in can_atoms] x, y, z = zip(*can_coordinates) ccolor = np.random.rand(3,1) ax.plot(x, y, z, c=ccolor, linewidth=1) ax.axis("off") plt.show()
def __init__(self): ShowBase.__init__(self) self.cloud = False self.help = False self.screen_text = [] #Desglosamos archivo PDB pdbdata = sys.argv[1] parser = PDBParser(QUIET=True, PERMISSIVE=True) structure = parser.get_structure('model', pdbdata) #Hacemos la prediccion DSSP model = structure[0] dssp = DSSP(model, pdbdata) #Creamos los modelos self.cpknode = render.attachNewNode("CPK") self.aanode = render.attachNewNode("Aminoacids") self.bbnode = render.attachNewNode("BackBone") self.dsspnode = render.attachNewNode("DSSP") self.nnode = render.attachNewNode("Cloud") #CPK for atom in structure.get_atoms(): x, y, z = atom.coord atomid = atom.get_id() a = loader.loadModel("data/atom_sphere") a.setPos(x, y, z) a.reparentTo(self.cpknode) a.setColor(colorrgba(atomid)) a.setScale(vrad(atomid)) self.cpknode.flattenStrong() #Aminoacids self.residues = [ residue for residue in structure.get_residues() if residue.get_resname() in resdict.keys() ] for residue in self.residues: resid = residue.get_resname() color = colorrgba(restype(resid)) atoms = [atom for atom in residue.get_atoms()] for atom in atoms: x, y, z = atom.coord atomid = atom.get_id() a = loader.loadModel("data/atom_sphere") a.setPos(x, y, z) a.setColor(color) a.setScale(vrad(atomid)) a.reparentTo(self.aanode) self.residues2 = [ residue for residue in structure.get_residues() if not residue in self.residues and residue.get_resname() != 'HOH' ] for residue in self.residues2: atoms = [atom for atom in residue.get_atoms()] for atom in atoms: x, y, z = atom.coord atomid = atom.get_id() a = loader.loadModel("data/atom_sphere") a.setPos(x, y, z) a.setColor(colorrgba(atomid)) a.setScale(vrad(atomid)) a.reparentTo(self.aanode) self.aanode.flattenStrong() self.aanode.hide() #Backbone for chain in structure.get_chains(): carr = np.random.rand(3, 1) ccolor = float(carr[0]), float(carr[1]), float(carr[2]), 1.0 can_atoms = [ atom for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] can_coordinates = [atom.coord for atom in can_atoms] for atom in can_atoms: x, y, z = atom.coord atomid = atom.get_id() a = loader.loadModel("data/atom_sphere") a.setPos(x, y, z) a.reparentTo(self.bbnode) a.setColor(ccolor) a.setScale(vrad(atomid) / 2.5) lines = LineSegs() lines.setColor(ccolor) lines.moveTo(can_coordinates[0][0], can_coordinates[0][1], can_coordinates[0][2]) for i in range(len(can_atoms))[1:]: lines.drawTo(can_coordinates[i][0], can_coordinates[i][1], can_coordinates[i][2]) lines.setThickness(6) lnode = lines.create() self.linenp = NodePath(lnode) self.linenp.instanceTo(self.bbnode) #Cloud catoms = [atom for atom in chain.get_atoms()] for atom in catoms: x, y, z = atom.coord atomid = atom.get_id() a = loader.loadModel("data/atom_sphere") a.setPos(x, y, z) a.reparentTo(self.nnode) a.setColor(ccolor) a.setScale(vrad(atomid) * 1.1) self.bbnode.flattenStrong() self.bbnode.hide() self.nnode.setTransparency(TransparencyAttrib.MAlpha) self.nnode.setAlphaScale(0.3) self.nnode.hide() #DSSP self.linenp.instanceTo(self.dsspnode) self.struct3 = [dssp[key][2] for key in list(dssp.keys())] for i in range(len(self.struct3)): dsspcolor = crgbaDSSP(self.struct3[i]) can_atoms = [ atom for atom in self.residues[i] if atom.get_name() == 'CA' or atom.get_name() == 'N' ] for atom in can_atoms: x, y, z = atom.coord atomid = atom.get_id() a = loader.loadModel("data/atom_sphere") a.setPos(x, y, z) a.reparentTo(self.dsspnode) a.setColor(dsspcolor) a.setScale(vrad(atomid) / 2.5) self.dsspnode.flattenStrong() self.dsspnode.hide() #Colocamos la proteina en el centro self.cpknode.setPos(0, 0, 0) self.bbnode.setPos(0, 0, 0) self.aanode.setPos(0, 0, 0) self.nnode.setPos(0, 0, 0) #Colocamos la camara en el centro xc, yc, zc = self.cpknode.getBounds().getCenter() self.center = xc, yc, zc self.pradius = self.cpknode.getBounds().getRadius() self.center_camera() #Creamos la iluminacion de ambiente self.ambient = AmbientLight('alight') self.ambient.setColor(LVecBase4f(0.16, 0.16, 0.17, 1.0)) self.alight = render.attachNewNode(self.ambient) render.setLight(self.alight) #Creamos la iluminacion direccional self.directional = DirectionalLight('dlight') self.directional.setColor(LVecBase4f(0.8, 0.7, 0.75, 1.0)) self.directional.setShadowCaster(True, 512, 512) render.setShaderAuto() self.dlight = render.attachNewNode(self.directional) self.dlight.setPos(0, -50, 0) render.setLight(self.dlight) self.dlight.lookAt(self.cpknode.getBounds().getCenter()) # Post procesado render.setAntialias(AntialiasAttrib.MAuto) #Teclado self.accept('c', self.toggle_cloud) self.accept('1', self.showmodel, [self.cpknode]) self.accept('2', self.showmodel, [self.aanode]) self.accept('3', self.showmodel, [self.bbnode]) self.accept('4', self.showmodel, [self.dsspnode]) self.accept('x', self.center_camera) self.accept('arrow_left', self.taskMgr.add, [self.spinCameraTaskX, "SpinCameraTaskX"]) self.accept('arrow_up', self.taskMgr.add, [self.spinCameraTaskY, "SpinCameraTaskY"]) self.accept('arrow_down', self.stop_camera) self.accept('escape', sys.exit)
class Canvas(app.Canvas): visualization_modes = ['cpk', 'backbone', 'aminoacid', 'dssp'] def __init__(self, pdbdata, mode='cpk'): #Startup app.Canvas.__init__(self, keys='interactive', size=(W, H)) #Loading shaders self.program = gloo.Program(vertex, fragment) #Analyze pdb file self.parser = PDBParser(QUIET=True, PERMISSIVE=True) self.structure = self.parser.get_structure('model', pdbdata) #DSSP prediction self.pmodel = self.structure[0] self.dssp = DSSP(self.pmodel, pdbdata) #Mode selection if mode not in Canvas.visualization_modes: raise Exception('Not recognized visualization mode %s' % mode) self.mode = mode #Camera settings self.translate = 50 self.translate = max(-1, self.translate) self.view = translate((0, 0, -self.translate), dtype=np.float32) self.model = np.eye(4, dtype=np.float32) self.projection = np.eye(4, dtype=np.float32) self.program['u_projection'] = self.projection self.quaternion = Quaternion() #Load data depending on the mdoe self.apply_zoom() self.atom_information() self.load_data() self.show() def atom_information(self): """Determines the coordinates, colors and sizes of the atoms depending on the mode""" if self.mode == 'cpk': #list of atoms self.atoms = [atom for atom in self.structure.get_atoms()] self.natoms = len(self.atoms) #atom coordinates self.coordinates = np.array([atom.coord for atom in self.atoms]) self.center = centroid(self.coordinates) self.coordinates -= self.center #atom color self.color = [ np.array(colorrgba(atom.get_id())[0:3]) for atom in self.atoms ] #atom radius self.radius = [vrad(atom.get_id()) for atom in self.atoms] elif self.mode == 'aminoacid': #list of atoms self.atoms = [ atom for atom in self.structure.get_atoms() if atom.get_parent().resname != 'HOH' ] self.natoms = len(self.atoms) #atom coordinates self.coordinates = np.array([atom.coord for atom in self.atoms]) self.center = centroid(self.coordinates) self.coordinates -= self.center #atom color self.color = [ colorrgba(restype(atom.get_parent().resname))[0:3] for atom in self.atoms ] #atom radius self.radius = [vrad(atom.get_id()) for atom in self.atoms] elif self.mode == 'backbone': #list of atoms self.atoms = [ atom for atom in self.structure.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] self.natoms = len(self.atoms) #atom coordinates self.coordinates = np.array([atom.coord for atom in self.atoms]) self.center = centroid(self.coordinates) self.coordinates -= self.center #atom color self.color = [] self.chains = [] for chain in self.structure.get_chains(): self.chains.append(chain) self.chain_length = len([ atom for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) self.chain_color = np.random.rand(1, 3) self.color.append( np.tile(self.chain_color, (self.chain_length, 1))) if len(self.chains) > 1: self.color = np.concatenate(self.color) #atom radius self.radius = [vrad(atom.get_id()) for atom in self.atoms] elif self.mode == 'dssp': #list of atoms self.atoms = [ atom for atom in self.structure.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] self.natoms = len(self.atoms) #atom coordinates self.coordinates = np.array([atom.coord for atom in self.atoms]) self.center = centroid(self.coordinates) self.coordinates -= self.center #atom color self.struct3 = [ self.dssp[key][2] for key in list(self.dssp.keys()) ] self.residues = [ residue for residue in self.structure.get_residues() if residue.get_resname() in resdict.keys() ] self.color = [] for i in range(len(self.struct3)): self.dsspcolor = crgbaDSSP(self.struct3[i])[0:3] self.n_atoms = len([ atom for atom in self.residues[i] if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) self.color.append(np.tile(self.dsspcolor, (self.n_atoms, 1))) if len(self.struct3) > 1: self.color = np.concatenate(self.color) #atom radius self.radius = [vrad(atom.get_id()) for atom in self.atoms] def load_data(self): """Make an array with all the data and load it into VisPy Gloo""" data = np.zeros(self.natoms, [('a_position', np.float32, 3), ('a_color', np.float32, 3), ('a_radius', np.float32, 1)]) data['a_position'] = self.coordinates data['a_color'] = self.color data['a_radius'] = self.radius #*self.pixel_scale self.program.bind(gloo.VertexBuffer(data)) self.program['u_model'] = self.model self.program['u_view'] = self.view self.program['u_light_position'] = 0., 0., 2. self.program['u_light_spec_position'] = -5., 5., -5. print 'Data loaded' def on_resize(self, event): width, height = event.size def apply_zoom(self): width, height = self.physical_size gloo.set_viewport(0, 0, width, height) self.projection = perspective(95.0, width / float(height), 1.0, 400.0) self.program['u_projection'] = self.projection def on_draw(self, event): gloo.clear() self.program.draw('points') def on_mouse_move(self, event): if event.button == 1 and event.last_event is not None: x0, y0 = event.last_event.pos x1, y1 = event.pos w, h = self.size self.quaternion = (self.quaternion * Quaternion(*_arcball(x0, y0, w, h)) * Quaternion(*_arcball(x1, y1, w, h))) self.model = self.quaternion.get_matrix() self.program['u_model'] = self.model self.update() elif event.button == 2 and event.last_event is not None: x0, y0 = event.last_event.pos x1, y1 = event.pos self.translate += (y1 - y0) self.translate = max(-1, self.translate) self.view = translate((0, 0, -self.translate), dtype=np.float32) self.program['u_view'] = self.view self.update()
def atom_information(pdbdata, mode): #analyze pdb file parser = PDBParser(QUIET=True, PERMISSIVE=True) structure = parser.get_structure('model', pdbdata) #DSSP prediction pmodel = structure[0] dssp = DSSP(pmodel, pdbdata) #Set variables global coordinates global color global radius global chains global chain_coords global chain_colors if mode == 'cpk': #list of atoms atoms = [atom for atom in structure.get_atoms()] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color color = [colorrgba(atom.get_id()) for atom in atoms] #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms]) elif mode == 'aminoacid': #list of atoms atoms = [ atom for atom in structure.get_atoms() if atom.get_parent().resname != 'HOH' ] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color color = [ colorrgba(restype(atom.get_parent().resname)) for atom in atoms ] #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms]) elif mode == 'backbone': #list of atoms atoms = [ atom for atom in structure.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color color = [] #list of arrays of coordinates and colors for each chain chains = [] chain_colors = [] chain_coords = [] for chain in structure.get_chains(): chains.append(chain) can_coord = np.array([ atom.coord for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) can_coord -= center chain_coords.append(can_coord) chain_length = len(can_coord) chain_color = np.append(np.random.rand(1, 3), [1.0]) chain_colors.append(chain_color) color.append(np.tile(chain_color, (chain_length, 1))) if len(chains) > 1: color = np.concatenate(color) #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms]) elif mode == 'dssp': #list of atoms atoms = [ atom for atom in structure.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ] natoms = len(atoms) #atom coordinates coordinates = np.array([atom.coord for atom in atoms]) center = centroid(coordinates) coordinates -= center #atom color struct3 = [dssp[key][2] for key in list(dssp.keys())] residues = [ residue for residue in structure.get_residues() if residue.get_resname() in resdict.keys() ] color = [] for i in range(len(struct3)): dsspcolor = crgbaDSSP(struct3[i]) n_atoms = len([ atom for atom in residues[i] if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) color.append(np.tile(dsspcolor, (n_atoms, 1))) if len(struct3) > 1: color = np.concatenate(color) #list of arrays of coordinates and colors for each chain chains = [] chain_colors = [] chain_coords = [] for chain in structure.get_chains(): chains.append(chain) chain_color = np.append(np.random.rand(1, 3), [1.0]) chain_colors.append(chain_color) can_coord = np.array([ atom.coord for atom in chain.get_atoms() if atom.get_name() == 'CA' or atom.get_name() == 'N' ]) can_coord -= center chain_coords.append(can_coord) #atom radius radius = np.array([vrad(atom.get_id()) for atom in atoms])
import numpy as np import tables as tb ############################################################################################################################## ###################################################### PARSE DSSP ############################################################ ############################################################################################################################## fa= open(sys.argv[1]).read().splitlines() filename=sys.argv[2] #this will eventually become the pdb file that we run dssp on dssp=open(filename) p=PDBParser() structure = p.get_structure(filename, dssp) model= structure[0] #there is only one structure for dssp (NMR for example has more) and the dssp parser can only take one structure dssp= DSSP(model, filename) a_key = list(dssp.keys()) statedic= {'H':1, 'I':2 , 'G':3, 'E':4, 'B':5, 'T':6, 'S':7, '-':8} #, '-':0} dsspAA=[ ] states= [ ] for line in a_key: # print dssp[line] dsspAA.append(dssp[line][1]) states.append(statedic[dssp[line][2]]) ############################################################################################################################## ######################################################ONE HOT ENCODING ####################################################### ############################################################################################################################## seq= fa[1]
fa = open(sys.argv[1]).read().splitlines() filename = sys.argv[ 2] #this will eventually become the pdb file that we run dssp on dssp = open(filename) #max_val= {'A':106, 'C':135, 'D':163, 'E':194, 'F':197, 'G':84, 'H':184, 'I':169, 'K':205, 'L':164, 'M':188, 'N':157, 'P':136, 'R':248, \ #'S':130, 'T':142, 'V':142, 'W':227, 'Y':222, 'Z':196} #'B':160, 'Q':194, 'X':222 p = PDBParser() structure = p.get_structure(filename, dssp) model = structure[ 0] #there is only one structure for dssp (NMR for example has more) and the dssp parser can only take one structure dssp = DSSP(model, filename) a_key = list(dssp.keys()) rsa = [] dsspAA = [] for line in a_key: #print dssp[line] rsa.append(dssp[line][3]) dsspAA.append(dssp[line][1]) rsa = np.asarray(rsa) rsa[np.where(rsa == 'NA')] = np.nan rsa = np.asarray(rsa, dtype=float) ############################################################################################################################## ######################################################MAKING PADDED ARRAY ####################################################### ##############################################################################################################################