def testRandom(self): mixture = ScaleMixture(scales=[1., 1., 1., 1.], prior=GammaPrior(), d=3) mixture.random() samples = mixture.random(10000) mu = numpy.mean(samples) var = numpy.var(samples) self.assertAlmostEqual(0.0, mu, delta=1e-1) self.assertAlmostEqual(1., var, delta=1e-1)
def testEnsemble(self): """ The posterior of a gaussian scale mixture with gamma prior is a Student's t distribution, with parameters alpha and beta. Give enough samples, we shoud be able to estimate these parameters """ pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb') ensemble = LegacyStructureParser(pdbfile).parse_models() X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble]) x_mu = average_structure(X) n =X.shape[1] m =X.shape[0] R = numpy.zeros((m,3,3)) t = numpy.ones((m,3)) prior = GammaPrior() mixture = ScaleMixture(scales=n, prior = prior, d=3) from csb.bio.utils import fit, wfit for i in range(m): R[i,:,:], t[i,:] = fit(x_mu, X[i]) # gibbs sampling cycle for j in range(200): # apply rotation data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) **2, -1)**0.5 for i in range(m)]).T # sample scales mixture.estimate(data) # sample rotations for i in range(m): R[i,:,:], t[i,:] = wfit(x_mu, X[i], mixture.scales) self.assertEqual(mixture.scales.shape, (211,)) R_opt = numpy.eye(3) t_opt = numpy.zeros((3,)) for k in range(m): for i in range(3): self.assertAlmostEqual(t[k,i], t_opt[i], delta=2.) for j in range(3): self.assertAlmostEqual(abs(R[k,i, j]), R_opt[i, j], delta=0.15)
def main(self): try: parser = LegacyStructureParser(self.args.pdb) models = parser.models() except IOError as e: self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR) if len(models) < 2: self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR) ensemble = parser.parse_models(models) X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble]) x_mu = average_structure(X) #n = X.shape[1] m = X.shape[0] R = numpy.zeros((m, 3, 3)) t = numpy.ones((m, 3)) prior = GammaPrior() mixture = ScaleMixture(scales=X.shape[1], prior=prior, d=3) for i in range(m): R[i, :, :], t[i, :] = fit(x_mu, X[i]) # gibbs sampling cycle for j in range(self.args.niter): # apply rotation data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5 for i in range(m)]).T # sample scales mixture.estimate(data) # sample rotations for i in range(m): R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales) out_ensemble = csb.bio.structure.Ensemble() for i, model in enumerate(ensemble): model.transform(R[i], t[i]) out_ensemble.models.append(model) out_ensemble.to_pdb(self.args.outfile)
def testInvGammaMAP(self): """ The posterior of a gaussian scale mixture with gamma prior is a Student's t distribution, with parameters alpha and beta. Give enough samples, we shoud be able to estimate these parameters """ pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb') ensemble = LegacyStructureParser(pdbfile).parse_models() X = numpy.array(ensemble[0].get_coordinates(['CA'], True)) Y = numpy.array(ensemble[13].get_coordinates(['CA'], True)) prior = InvGammaPrior() prior.estimator = InvGammaPosteriorMAP() mixture = ScaleMixture(scales=X.shape[0], prior=prior, d=3) from csb.bio.utils import fit, wfit R, t = fit(X, Y) #numpy.random.seed(100) # gibbs sampling cycle for i in range(200): # apply rotation data = numpy.sum((X - numpy.dot(Y, numpy.transpose(R)) - t) ** 2, axis= -1) ** (1. / 2) # sample scales mixture.estimate(data) # sample rotations R, t = wfit(X, Y, mixture.scales) self.assertEqual(mixture.scales.shape, (211,)) R_opt = numpy.eye(3) t_opt = numpy.zeros((3,)) for i in range(3): self.assertAlmostEqual(t[i], t_opt[i], delta=2.) for j in range(3): self.assertAlmostEqual(R_opt[i, j], R[i, j], delta=1e-1)
def testLogProb(self): x = numpy.linspace(-5, 5, 1000) normal = csb.statistics.pdf.Normal() mixture = ScaleMixture(scales=[1., 1., 1., 1.], prior=None, d=1) px = mixture(x) gx = normal(x) for i in range(len(px)): self.assertAlmostEqual(px[i], 4 * gx[i], delta=1e-1)
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) mobile_obj = get_object_name(selection, 1) n_models = cmd.count_states(mobile_obj) if int(guide): selection = '(%s) and guide' % (selection) X = asarray(get_ensemble_coords(selection)) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3,0:3] = R[0] back[0:3,3] = t[0] for i in range(n_models): m[0:3,0:3] = R[i].T m[3,0:3] = -t[i] cmd.transform_object(mobile_obj, list(m.flat), state=i+1) # fit back to first state cmd.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0, cycles=10, match='align', guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of the model in the first selection on to the model in the second selection. The weights are estimated with maximum likelihood. The result should be very similar to "theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS mobile = string: atom selection target = string: atom selection mobile_state = int: object state of mobile selection {default: current} target_state = int: object state of target selection {default: current} load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} SEE ALSO intra_xfit, align, super, fit, cealign, theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import distance_sq, wfit, fit from . import querying cycles, quiet = int(cycles), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mobile_obj = querying.get_object_name(mobile, 1) if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj) if target_state < 1: target_state = querying.get_selection_state(target) if int(guide): mobile = '(%s) and guide' % (mobile) target = '(%s) and guide' % (target) mm = MatchMaker(mobile, target, match) Y = asarray(querying.get_coords(mm.mobile, mobile_state)) X = asarray(querying.get_coords(mm.target, target_state)) if int(seed): R, t = identity(3), zeros(3) else: R, t = fit(X, Y) if int(bfit): # adapted from csb.apps.bfit from csb.bio.utils import distance, probabilistic_fit from csb.statistics.scalemixture import ScaleMixture mixture = ScaleMixture(scales=X.shape[0], prior=_bfit_get_prior(distribution), d=3) for _ in range(cycles): data = distance(Y, dot(X - t, R)) mixture.estimate(data) R, t = probabilistic_fit(X, Y, mixture.scales) scales = mixture.scales else: for _ in range(cycles): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) m = identity(4) m[0:3,0:3] = R m[0:3,3] = t cmd.transform_object(mobile_obj, list(m.flat)) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' xfit: %d atoms aligned' % (len(X)))
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) if int(guide): selection = '(%s) and guide' % (selection) mobile_objs = _self.get_object_list(selection) n_states_objs = [] X = [] for obj in mobile_objs: X_obj = get_ensemble_coords('({}) & {}'.format(selection, obj), _self=_self) if X and len(X_obj) and len(X[0]) != len(X_obj[0]): raise CmdException('objects have different number of atoms') X.extend(X_obj) n_states_objs.append(len(X_obj)) n_models = len(X) X = asarray(X) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3,0:3] = R[0] back[0:3,3] = t[0] transformation_i = 0 for mobile_obj, n_states in zip(mobile_objs, n_states_objs): for state_i in range(n_states): m[0:3, 0:3] = R[transformation_i].T m[3, 0:3] = -t[transformation_i] _self.transform_object(mobile_obj, list(m.flat), state=state_i + 1) transformation_i += 1 # fit back to first state _self.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) _self.alter('({}) & {} & state 1'.format(selection, mobile_obj), 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0, cycles=10, match='align', guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of the model in the first selection on to the model in the second selection. The weights are estimated with maximum likelihood. The result should be very similar to "theseus". Requires CSB, https://github.com/csb-toolbox/CSB ARGUMENTS mobile = string: atom selection target = string: atom selection mobile_state = int: object state of mobile selection {default: current} target_state = int: object state of target selection {default: current} load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} SEE ALSO intra_xfit, align, super, fit, cealign, theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import distance_sq, wfit, fit from . import querying cycles, quiet = int(cycles), int(quiet) mobile_state, target_state = int(mobile_state), int(target_state) mobile_obj = querying.get_object_name(mobile, 1, _self=_self) if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj, _self=_self) if target_state < 1: target_state = querying.get_selection_state(target, _self=_self) if int(guide): mobile = '(%s) and guide' % (mobile) target = '(%s) and guide' % (target) mm = MatchMaker(mobile, target, match, _self=_self) Y = asarray(_self.get_coords(mm.mobile, mobile_state)) X = asarray(_self.get_coords(mm.target, target_state)) if int(seed): R, t = identity(3), zeros(3) else: R, t = fit(X, Y) if int(bfit): # adapted from csb.apps.bfit from csb.bio.utils import distance, probabilistic_fit from csb.statistics.scalemixture import ScaleMixture mixture = ScaleMixture(scales=X.shape[0], prior=_bfit_get_prior(distribution), d=3) for _ in range(cycles): data = distance(Y, dot(X - t, R)) mixture.estimate(data) R, t = probabilistic_fit(X, Y, mixture.scales) scales = mixture.scales else: for _ in range(cycles): data = distance_sq(Y, dot(X - t, R)) scales = 1.0 / data.clip(1e-3) R, t = wfit(X, Y, scales) m = identity(4) m[0:3,0:3] = R m[0:3,3] = t _self.transform_object(mobile_obj, list(m.flat)) if int(load_b): b_iter = iter(-log(scales)) _self.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next}) if not quiet: print(' xfit: %d atoms aligned' % (len(X)))
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1, bfit=0, distribution='student', _self=cmd): ''' DESCRIPTION Weighted superposition of all states of an object to the intermediate structure over all states. The weights are estimated with maximum likelihood. The result should be very similar to "intra_theseus". Requires CSB, http://csb.codeplex.com ARGUMENTS selection = string: atom selection load_b = 0 or 1: save -log(weights) into B-factor column {default: 0} NOTE Assumes all states to have identical number of CA-atoms. SEE ALSO xfit, intra_fit, intra_theseus ''' from numpy import asarray, identity, log, dot, zeros from csb.bio.utils import wfit, fit from .querying import get_ensemble_coords, get_object_name cycles, quiet = int(cycles), int(quiet) mobile_obj = get_object_name(selection, 1) n_models = cmd.count_states(mobile_obj) if int(guide): selection = '(%s) and guide' % (selection) X = asarray(get_ensemble_coords(selection)) R, t = [identity(3)] * n_models, [zeros(3)] * n_models if int(bfit): # adapted from csb.apps.bfite from csb.bio.utils import average_structure, distance from csb.statistics.scalemixture import ScaleMixture average = average_structure(X) mixture = ScaleMixture(scales=X.shape[1], prior=_bfit_get_prior(distribution), d=3) for i in range(n_models): R[i], t[i] = fit(X[i], average) for _ in range(cycles): data = asarray([ distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models) ]) mixture.estimate(data.T) for i in range(n_models): R[i], t[i] = wfit(X[i], average, mixture.scales) scales = mixture.scales else: if int(seed): ensemble = X else: ensemble = [] for i in range(n_models): R[i], t[i] = fit(X[i], X[0]) ensemble.append(dot(X[i] - t[i], R[i])) for _ in range(cycles): ensemble = asarray(ensemble) average = ensemble.mean(0) data = ensemble.var(0).sum(1) scales = 1.0 / data.clip(1e-3) ensemble = [] for i in range(n_models): R[i], t[i] = wfit(X[i], average, scales) ensemble.append(dot(X[i] - t[i], R[i])) m = identity(4) back = identity(4) back[0:3, 0:3] = R[0] back[0:3, 3] = t[0] for i in range(n_models): m[0:3, 0:3] = R[i].T m[3, 0:3] = -t[i] cmd.transform_object(mobile_obj, list(m.flat), state=i + 1) # fit back to first state cmd.transform_object(mobile_obj, list(back.flat), state=0) if int(load_b): b_iter = iter(-log(scales)) cmd.alter(selection, 'b = b_iter.next()', space=locals()) if not quiet: print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))