def initialize(self): R = np.kron(np.ones((self.M, self.K)), np.eye(self.D)) Y = average_structure(self.X) self.Y = np.array([Y.copy() for _ in range(self.K)]) self.R[...] = R.reshape(self.M, self.K, self.D, self.D) self.random_membership()
def testEnsemble(self): """ The posterior of a gaussian scale mixture with gamma prior is a Student's t distribution, with parameters alpha and beta. Give enough samples, we shoud be able to estimate these parameters """ pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb') ensemble = LegacyStructureParser(pdbfile).parse_models() X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble]) x_mu = average_structure(X) n =X.shape[1] m =X.shape[0] R = numpy.zeros((m,3,3)) t = numpy.ones((m,3)) prior = GammaPrior() mixture = ScaleMixture(scales=n, prior = prior, d=3) from csb.bio.utils import fit, wfit for i in range(m): R[i,:,:], t[i,:] = fit(x_mu, X[i]) # gibbs sampling cycle for j in range(200): # apply rotation data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) **2, -1)**0.5 for i in range(m)]).T # sample scales mixture.estimate(data) # sample rotations for i in range(m): R[i,:,:], t[i,:] = wfit(x_mu, X[i], mixture.scales) self.assertEqual(mixture.scales.shape, (211,)) R_opt = numpy.eye(3) t_opt = numpy.zeros((3,)) for k in range(m): for i in range(3): self.assertAlmostEqual(t[k,i], t_opt[i], delta=2.) for j in range(3): self.assertAlmostEqual(abs(R[k,i, j]), R_opt[i, j], delta=0.15)
def main(self): try: parser = LegacyStructureParser(self.args.pdb) models = parser.models() except IOError as e: self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) if len(models) < 2: self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) ensemble = parser.parse_models(models) X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble]) x_mu = average_structure(X) #n = X.shape[1] m = X.shape[0] R = numpy.zeros((m, 3, 3)) t = numpy.ones((m, 3)) prior = GammaPrior() mixture = ScaleMixture(scales=X.shape[1], prior=prior, d=3) for i in range(m): R[i, :, :], t[i, :] = fit(x_mu, X[i]) # gibbs sampling cycle for j in range(self.args.niter): # apply rotation data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5 for i in range(m)]).T # sample scales mixture.estimate(data) # sample rotations for i in range(m): R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales) out_ensemble = csb.bio.structure.Ensemble() for i, model in enumerate(ensemble): model.transform(R[i], t[i]) out_ensemble.models.append(model) out_ensemble.to_pdb(self.args.outfile)
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) mobile_obj = get_object_name(selection, 1) n_models = cmd.count_states(mobile_obj) if int(guide): selection = '(%s) and guide' % (selection) X = asarray(get_ensemble_coords(selection)) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3,0:3] = R[0] back[0:3,3] = t[0] for i in range(n_models): m[0:3,0:3] = R[i].T m[3,0:3] = -t[i] cmd.transform_object(mobile_obj, list(m.flat), state=i+1) # fit back to first state cmd.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) if int(guide): selection = '(%s) and guide' % (selection) mobile_objs = _self.get_object_list(selection) n_states_objs = [] X = [] for obj in mobile_objs: X_obj = get_ensemble_coords('({}) & {}'.format(selection, obj), _self=_self) if X and len(X_obj) and len(X[0]) != len(X_obj[0]): raise CmdException('objects have different number of atoms') X.extend(X_obj) n_states_objs.append(len(X_obj)) n_models = len(X) X = asarray(X) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3,0:3] = R[0] back[0:3,3] = t[0] transformation_i = 0 for mobile_obj, n_states in zip(mobile_objs, n_states_objs): for state_i in range(n_states): m[0:3, 0:3] = R[transformation_i].T m[3, 0:3] = -t[transformation_i] _self.transform_object(mobile_obj, list(m.flat), state=state_i + 1) transformation_i += 1 # fit back to first state _self.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) _self.alter('({}) & {} & state 1'.format(selection, mobile_obj), 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, http://csb.codeplex.com ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) mobile_obj = get_object_name(selection, 1) n_models = cmd.count_states(mobile_obj) if int(guide): selection = '(%s) and guide' % (selection) X = asarray(get_ensemble_coords(selection)) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([ distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models) ]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3, 0:3] = R[0] back[0:3, 3] = t[0] for i in range(n_models): m[0:3, 0:3] = R[i].T m[3, 0:3] = -t[i] cmd.transform_object(mobile_obj, list(m.flat), state=i + 1) # fit back to first state cmd.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(selection, 'b = b_iter.next()', space=locals()) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))