def score(self, multi=False, details=None, **kwargs): self.ref_structures.setCoords(self.ref_original) self.structures.setCoords(self.this_original) if kwargs["exclude_sites"]: idxs = self.ref_original.getResindices() mask = idxs[np.logical_and.reduce([ idxs != s[0] for s in kwargs["this_score_idxs"]])] self.ref_original = self.ref_original[mask] self.this_original = self.this_original[mask] ref_structures = prody.Ensemble() structures = prody.Ensemble() ref_structures.setAtoms(self.ref_original) structures.setAtoms(self.this_original) mask = idxs[np.logical_and.reduce([ idxs != s for s in np.array(kwargs["this_score_idxs"]).T[0,0,:]])] ref_structures.addCoordset(self.ref_structures.getCoordsets()[:,mask,:]) structures.addCoordset(self.structures.getCoordsets()[:,mask,:]) self.ref_structures = ref_structures self.structures = structures self.ref_cds = None if multi: return self._score_single(kwargs=kwargs) else: return self._score_multi(kwargs=kwargs)
def get_scaling_factors(original, runs, cd_function, exclude_application_site=True, fitting_operation=None, fitting_string=None, target_cd=1.0): out = np.zeros((len(original), runs[0].results.shape[0])) coords_out = [] atoms = original for r in runs: this_original = original.copy() if exclude_application_site: this_idxs = [ np.where(atoms.getResindices() == fs.origin.getResindex())[0][0] for fs in r.force_sets ] ref_original = this_original.select(" and ".join(["(not resindex %s)"%s for s in this_idxs ])) ref_original_c = ref_original.getCoordsets() else: ref_original_c = this_original.getCoordsets() ref_original_c = np.repeat(ref_original_c, r.results.shape[0], axis=0) fitted_ref_structures = prody.Ensemble(title="displaced_fitted_structures") fitted_ref_structures.setAtoms(atoms) for d,displacements in enumerate(r.results): displaced_atoms = original.copy() displaced_atoms.setCoords(atoms.getCoords() + displacements.reshape((displacements.shape[0]/3,3))) if fitting_operation is not None: fitting_original_atoms = atoms.select(fitting_string) fitting_displaced_atoms = displaced_atoms.select(fitting_string) transformation = fitting_operation(fitting_displaced_atoms, fitting_original_atoms).transformation this_ref_fit = fitting_operation(displaced_atoms, atoms, transformation=transformation) fitted_ref_structures.addCoordset(this_ref_fit.transformed()) else: fitted_ref_structures.addCoordset(displaced_atoms) if exclude_application_site: filtered_fitted_ref_structures = prody.Ensemble(title="displaced_fitted_structures") filtered_fitted_ref_structures.setAtoms(ref_original) filtered_fitted_ref_structures.addCoordset(np.delete(fitted_ref_structures.getCoordsets(), this_idxs, axis=1)) else: filtered_fitted_ref_structures = fitted_ref_structures coords_out.append(fitted_ref_structures.getCoordsets()) out[this_idxs,:] = cd_function(ref_original_c, filtered_fitted_ref_structures.getCoordsets()) out = np.array(out) return out, out, np.array(coords_out)
def assign_pcs(args): fn, topf, eda, pcs, sel, outf = args if fn.endswith("pdb"): pdb = prody.parsePDB(fn) pdb = pdb.select(sel).copy() ensemble = prody.Ensemble('A single pdb file ensemble') ensemble.setCoords(pdb.getCoords()) ensemble.addCoordset(pdb.getCoordsets()) ensemble.iterpose() PCs = prody.calcProjection(ensemble, eda[pcs]) print(PCs) return elif fn.endswith(".dcd"): structure = prody.parsePDB(topf) str_sel = structure.select(sel) #dcd = prody.DCDFile(fn) dcd = prody.Trajectory(fn) dcd.link(structure) dcd.setCoords(structure) dcd.setAtoms(str_sel) PCs = prody.calcProjection(dcd, eda[pcs]) if outf is not None: header = " ".join(["PC%d" % (i + 1) for i in pcs]) np.savetxt(outf, PCs, fmt="%.4f", header=header, comments="") else: print("Unsupport file type: %s" % fn) return None return PCs
def get_traj_rmsds(reference, trajectory): ref_backbone = reference.select('backbone or name OC2') traj_backbone = trajectory.select('backbone or name OC2') ensemble = prody.Ensemble('trajectory ensemble') ensemble.setCoords(ref_backbone.getCoords()) ensemble.addCoordset(traj_backbone.getCoordsets()) ensemble.superpose() return ensemble.getRMSDs()
def calcPCA(filenameEnsemble): ubi = dy.parsePDB(filenameEnsemble, subset='ca') ensemble = dy.Ensemble('ensemble') ensemble.setCoords(ubi.getCoords()) ensemble.addCoordset(ubi.getCoordsets()) ensemble.iterpose() pca = dy.PCA('Ubiquitin') pca.buildCovariance(ensemble) pca.calcModes() return pca
def find_close(native_name, traj_name, skip_frames): native = prody.parsePDB(native_name) traj = prody.parsePDB(traj_name) ensemble = prody.Ensemble('ensemble') ensemble.setCoords(native.getCoords()) ensemble.addCoordset(traj.getCoordsets()[skip_frames:, ...]) # skip the first 10 frames ensemble.superpose() native_coords = native.getCoords() ensemble_coords = ensemble.getCoordsets() diff2 = (ensemble_coords - native_coords)**2 diff2 = numpy.sum(diff2, axis=2) min_dev = numpy.min(diff2, axis=0) return numpy.sqrt(numpy.sum(min_dev) / float(min_dev.shape[0]))
def setUpClass(cls): # Generate and read the pdb cls.pdb_path = "tmp_pdb.pdb" open(cls.pdb_path, "w").write(amber_short_ca_contents) try: prody.confProDy(verbosity='none') #setVerbosity('none') except Exception: print "Impossible to silent prody" cls.pdb = prody.parsePDB(cls.pdb_path, subset='calpha') # Save coordsets before superposition cls.not_iterposed_coordsets = numpy.array(cls.pdb.getCoordsets()) # Do Prody iterposition cls.ensemble = prody.Ensemble('pca_test_ensemble') cls.ensemble.setCoords(cls.pdb.getCoords()) cls.ensemble.addCoordset(cls.pdb.getCoordsets()) #prody.setVerbosity('info') cls.ensemble.iterpose() cls.coordsets = cls.ensemble.getCoordsets()
def run(self): if self.do_write is not None: write_DF = DF in self.do_write write_DR = DR in self.do_write write_F_DR = F_DR in self.do_write write_P_XYZ = P_XYZ in self.do_write write_FP_XYZ = FP_XYZ in self.do_write write_RAW_SCORES = RAW_SCORES in self.do_write write_CD = CD in self.do_write else: write_DF = False write_DR = False write_F_DR = False write_P_XYZ = False write_FP_XYZ = False write_RAW_SCORES = False write_CD = False ref_idxs = [ref.origin.getResindex() for ref in self.refs] ref_combinations = [[j] for j in self.refs] combinations = itertools.product(*(ref_combinations + self.sets)) self.scores = np.zeros([len(self.indices) for i in self.sets]) self.max_scores = np.zeros([len(self.indices) for i in self.sets]) self.min_scores = np.zeros([len(self.indices) for i in self.sets]) displaced_atoms = self.atoms.copy() self.ref_run.prepare() self.ref_run.run() fitted_ref_structures = prody.Ensemble(title="displaced_fitted_structures") fitted_ref_structures.setAtoms(self.atoms) if write_F_DR: F_DR_values = np.zeros(self.ref_run.results.shape) if write_FP_XYZ: FP_XYZ_values = np.zeros((self.ref_run.results.shape[0],self.ref_run.results.shape[1]/3,3)) if write_P_XYZ: P_XYZ_values = np.zeros((self.ref_run.results.shape[0],self.ref_run.results.shape[1]/3,3)) for d,displacements in enumerate(self.ref_run.results): displaced_atoms.setCoords(self.atoms.getCoords() + displacements.reshape((displacements.shape[0]/3,3))) if self.fitting_operation is not None: fitting_original_atoms = self.atoms.select(self.fitting_string) fitting_displaced_atoms = displaced_atoms.select(self.fitting_string) transformation = self.fitting_operation(fitting_displaced_atoms, fitting_original_atoms).transformation this_ref_fit = self.fitting_operation(displaced_atoms, self.atoms, transformation=transformation) fitted_ref_structures.addCoordset(this_ref_fit.transformed()) if write_F_DR: F_DR_values[d,:] = (this_ref_fit.transformed().getCoords()- self.atoms.getCoords()).flatten() if write_FP_XYZ: FP_XYZ_values[d,:] = this_ref_fit.transformed().getCoords() else: fitted_ref_structures.addCoordset(displaced_atoms) if write_P_XYZ: P_XYZ_values[d,:] = displaced_atoms.getCoords() if self.do_write is not None: if write_DR: self.write_queue.put((DR, ref_idxs, self.ref_run.results)) if write_DF: self.write_queue.put((DF, ref_idxs, self.ref_run.F)) if write_FP_XYZ: self.write_queue.put((FP_XYZ, ref_idxs, FP_XYZ_values)) if write_F_DR: self.write_queue.put((F_DR, ref_idxs, F_DR_values)) if write_P_XYZ: self.write_queue.put((P_XYZ, ref_idxs, P_XYZ_values)) del self.ref_run.results del self.ref_run.F for r,run in enumerate(self.runs): this_comb = combinations.next() this_ref = this_comb[:len(self.refs)] this_perturbation = this_comb[-len(self.sets):] fitted_structures = prody.Ensemble(title="displaced_fitted_structures") fitted_structures.setAtoms(self.atoms) this_idxs = [this.origin.getResindex() for this in this_comb] run.prepare() run.run() for d,displacements in enumerate(run.results): displaced_atoms.setCoords(self.atoms.getCoords() + displacements.reshape((displacements.shape[0]/3,3))) if self.fitting_operation is not None: fitting_original_atoms = self.atoms.select(self.fitting_string) fitting_displaced_atoms = displaced_atoms.select(self.fitting_string) transformation = self.fitting_operation(fitting_displaced_atoms, fitting_original_atoms).transformation this_fit = self.fitting_operation(displaced_atoms, self.atoms, transformation=transformation) fitted_structures.addCoordset(this_fit.transformed()) if write_F_DR: F_DR_values[d,:] = (this_ref_fit.transformed().getCoords() - self.atoms.getCoords()).flatten() if write_FP_XYZ: FP_XYZ_values[d,:] = this_fit.transformed().getCoords() else: fitted_structures.addCoordset(displaced_atoms) if write_P_XYZ: P_XYZ_values[d,:] = displaced_atoms.getCoords() ref_score_idxs = [ np.where(c.origin.getResindex() == self.indices) for c in this_ref ] this_score_idxs = [ np.where(c.origin.getResindex() == self.indices) for c in this_comb ] self.score_kwargs["ref_idxs"] = ref_idxs self.score_kwargs["this_idxs"] = this_idxs self.score_kwargs["ref_score_idxs"] = ref_score_idxs self.score_kwargs["this_score_idxs"] = this_score_idxs score_idxs = [ np.where(c.origin.getResindex() == self.indices) for c in this_perturbation ] score = self.scoring_function( self.atoms, fitted_ref_structures, fitted_structures, metadata=[fs.metadata for fs in run.force_sets], ) if r == 0: ref_cds = score.ref_cds else: score.ref_cds = ref_cds avg_s, min_s, max_s, details = score.get(**self.score_kwargs) self.scores[np.array(score_idxs)] = avg_s self.min_scores[np.array(score_idxs)] = min_s self.max_scores[np.array(score_idxs)] = max_s if self.do_write is not None: if write_DR: self.write_queue.put((DR, this_idxs, run.results)) if write_DF: self.write_queue.put((DF, this_idxs, run.F)) if write_FP_XYZ: self.write_queue.put((FP_XYZ, this_idxs, FP_XYZ_values)) if write_F_DR: self.write_queue.put((F_DR, this_idxs, F_DR_values)) if write_P_XYZ: self.write_queue.put((P_XYZ, this_idxs, P_XYZ_values)) if write_RAW_SCORES: self.write_queue.put((RAW_SCORES, this_idxs, details[RAW_SCORES])) if write_CD: self.write_queue.put((ref_CD, this_idxs, details[CD][0])) self.write_queue.put((dfs_CD, this_idxs, details[CD][1])) del run.F del run.results
def prody_pca(coords, **kwargs): """Perform PCA calculations for PDB or DCD format *coords* file. """ for key in DEFAULTS: if not key in kwargs: kwargs[key] = DEFAULTS[key] from os.path import isdir, splitext, join outdir = kwargs.get('outdir') if not isdir(outdir): raise IOError('{0} is not a valid path'.format(repr(outdir))) import prody LOGGER = prody.LOGGER prefix = kwargs.get('prefix') nmodes = kwargs.get('nmodes') selstr = kwargs.get('select') quiet = kwargs.pop('quiet', False) altloc = kwargs.get('altloc') ext = splitext(coords)[1].lower() if ext == '.gz': ext = splitext(coords[:-3])[1].lower() if ext == '.dcd': pdb = kwargs.get('psf') or kwargs.get('pdb') if pdb: if splitext(pdb)[1].lower() == '.psf': pdb = prody.parsePSF(pdb) else: pdb = prody.parsePDB(pdb, altlocs=altlocs) dcd = prody.DCDFile(coords) if prefix == '_pca' or prefix == '_eda': prefix = dcd.getTitle() + prefix if len(dcd) < 2: raise ValueError('DCD file must have multiple frames') if pdb: if pdb.numAtoms() == dcd.numAtoms(): select = pdb.select(selstr) dcd.setAtoms(select) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) else: select = pdb.select(selstr) if select.numAtoms() != dcd.numAtoms(): raise ValueError('number of selected atoms ({0}) does ' 'not match number of atoms in the DCD ' 'file ({1})'.format( select.numAtoms(), dcd.numAtoms())) if pdb.numCoordsets(): dcd.setCoords(select.getCoords()) else: select = prody.AtomGroup() select.setCoords(dcd.getCoords()) pca = prody.PCA(dcd.getTitle()) nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: if len(dcd) > 1000: pca.buildCovariance(dcd, aligned=kwargs.get('aligned'), quiet=quiet) pca.calcModes(nmodes) ensemble = dcd else: ensemble = dcd[:] if not kwargs.get('aligned'): ensemble.iterpose(quiet=quiet) pca.performSVD(ensemble) nmodes = pca.numModes() else: pdb = prody.parsePDB(coords) if pdb.numCoordsets() < 2: raise ValueError('PDB file must contain multiple models') if prefix == '_pca' or prefix == '_eda': prefix = pdb.getTitle() + prefix select = pdb.select(selstr) LOGGER.info('{0} atoms are selected for calculations.'.format( len(select))) if select is None: raise ValueError('selection {0} do not match any atoms'.format( repr(selstr))) LOGGER.info('{0} atoms will be used for PCA calculations.'.format( len(select))) ensemble = prody.Ensemble(select) pca = prody.PCA(pdb.getTitle()) if not kwargs.get('aligned'): ensemble.iterpose() nproc = kwargs.get('nproc') if nproc: try: from threadpoolctl import threadpool_limits except ImportError: raise ImportError( 'Please install threadpoolctl to control threads') with threadpool_limits(limits=nproc, user_api="blas"): pca.performSVD(ensemble) else: pca.performSVD(ensemble) LOGGER.info('Writing numerical output.') if kwargs.get('outnpz'): prody.saveModel(pca, join(outdir, prefix)) if kwargs.get('outscipion'): prody.writeScipionModes(outdir, pca) prody.writeNMD(join(outdir, prefix + '.nmd'), pca[:nmodes], select) extend = kwargs.get('extend') if extend: if pdb: if extend == 'all': extended = prody.extendModel(pca[:nmodes], select, pdb) else: extended = prody.extendModel(pca[:nmodes], select, select | pdb.bb) prody.writeNMD( join(outdir, prefix + '_extended_' + extend + '.nmd'), *extended) else: prody.LOGGER.warn('Model could not be extended, provide a PDB or ' 'PSF file.') outall = kwargs.get('outall') delim = kwargs.get('numdelim') ext = kwargs.get('numext') format = kwargs.get('numformat') if outall or kwargs.get('outeig'): prody.writeArray(join(outdir, prefix + '_evectors' + ext), pca.getArray(), delimiter=delim, format=format) prody.writeArray(join(outdir, prefix + '_evalues' + ext), pca.getEigvals(), delimiter=delim, format=format) if outall or kwargs.get('outcov'): prody.writeArray(join(outdir, prefix + '_covariance' + ext), pca.getCovariance(), delimiter=delim, format=format) if outall or kwargs.get('outcc') or kwargs.get('outhm'): cc = prody.calcCrossCorr(pca) if outall or kwargs.get('outcc'): prody.writeArray(join(outdir, prefix + '_cross-correlations' + ext), cc, delimiter=delim, format=format) if outall or kwargs.get('outhm'): resnums = select.getResnums() hmargs = {} if resnums is None else {'resnums': resnums} prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'), cc, xlabel='Residue', ylabel='Residue', title=pca.getTitle() + ' cross-correlations', **hmargs) if outall or kwargs.get('outsf'): prody.writeArray(join(outdir, prefix + '_sqfluct' + ext), prody.calcSqFlucts(pca), delimiter=delim, format=format) if outall or kwargs.get('outproj'): prody.writeArray(join(outdir, prefix + '_proj' + ext), prody.calcProjection(ensemble, pca), delimiter=delim, format=format) figall = kwargs.get('figall') cc = kwargs.get('figcc') sf = kwargs.get('figsf') sp = kwargs.get('figproj') if figall or cc or sf or sp: try: import matplotlib.pyplot as plt except ImportError: LOGGER.warning('Matplotlib could not be imported. ' 'Figures are not saved.') else: prody.SETTINGS['auto_show'] = False LOGGER.info('Saving graphical output.') format = kwargs.get('figformat') width = kwargs.get('figwidth') height = kwargs.get('figheight') dpi = kwargs.get('figdpi') format = format.lower() if figall or cc: plt.figure(figsize=(width, height)) prody.showCrossCorr(pca) plt.savefig(join(outdir, prefix + '_cc.' + format), dpi=dpi, format=format) plt.close('all') if figall or sf: plt.figure(figsize=(width, height)) prody.showSqFlucts(pca) plt.savefig(join(outdir, prefix + '_sf.' + format), dpi=dpi, format=format) plt.close('all') if figall or sp: indices = [] for item in sp.split(): try: if '-' in item: item = item.split('-') if len(item) == 2: indices.append( list(range(int(item[0]) - 1, int(item[1])))) elif ',' in item: indices.append( [int(i) - 1 for i in item.split(',')]) else: indices.append(int(item) - 1) except: pass for index in indices: plt.figure(figsize=(width, height)) prody.showProjection(ensemble, pca[index]) if isinstance(index, Integral): index = [index] index = [str(i + 1) for i in index] plt.savefig(join( outdir, prefix + '_proj_' + '_'.join(index) + '.' + format), dpi=dpi, format=format) plt.close('all')