Exemplo n.º 1
0
    def testRandom(self):
               
        mixture = ScaleMixture(scales=[1., 1., 1., 1.],
                               prior=GammaPrior(), d=3)

        mixture.random()
        samples = mixture.random(10000)

        mu = numpy.mean(samples)
        var = numpy.var(samples)
        
        self.assertAlmostEqual(0.0, mu, delta=1e-1)
        self.assertAlmostEqual(1., var, delta=1e-1)
Exemplo n.º 2
0
    def testEnsemble(self):
        """
        The posterior of a gaussian scale mixture with gamma prior
        is a Student's t distribution, with parameters alpha and beta.

        Give enough samples, we shoud be able to estimate these parameters
        """
        pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb')
        ensemble = LegacyStructureParser(pdbfile).parse_models()

        X = numpy.array([model.get_coordinates(['CA'], True) for model in ensemble])
        x_mu = average_structure(X)
        n =X.shape[1]
        m =X.shape[0]
        R = numpy.zeros((m,3,3))
        t = numpy.ones((m,3))

          
        prior = GammaPrior()

        mixture = ScaleMixture(scales=n, prior = prior, d=3)
                               

        from csb.bio.utils import fit, wfit

        for i in range(m):
            R[i,:,:], t[i,:] = fit(x_mu, X[i])
        
        # gibbs sampling cycle
        for j in range(200):
            # apply rotation
            data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) **2, -1)**0.5
                                for i in range(m)]).T
            # sample scales
            mixture.estimate(data)
            # sample rotations
            for i in range(m):
                R[i,:,:], t[i,:] = wfit(x_mu, X[i], mixture.scales)


        self.assertEqual(mixture.scales.shape, (211,))
        
        R_opt = numpy.eye(3)
        t_opt = numpy.zeros((3,))
        for k in range(m):
            for i in range(3):
                self.assertAlmostEqual(t[k,i], t_opt[i], delta=2.)
                for j in range(3):
                    self.assertAlmostEqual(abs(R[k,i, j]), R_opt[i, j], delta=0.15)
Exemplo n.º 3
0
    def main(self):
        try:
            parser = LegacyStructureParser(self.args.pdb)
            models = parser.models()

        except IOError as e:
            self.exit('PDB file parsing failed\n' + str(e.value), ExitCodes.IO_ERROR)

        if len(models) < 2:
            self.exit('PDB file contains only one model', ExitCodes.USAGE_ERROR)

        ensemble = parser.parse_models(models)
        X = numpy.array([model[self.args.chain].get_coordinates(['CA'], True) for model in ensemble])
        x_mu = average_structure(X)
        #n = X.shape[1]
        m = X.shape[0]
        R = numpy.zeros((m, 3, 3))
        t = numpy.ones((m, 3))


        prior = GammaPrior()
        mixture = ScaleMixture(scales=X.shape[1],
                               prior=prior, d=3)

        for i in range(m):
            R[i, :, :], t[i, :] = fit(x_mu, X[i])
        
        # gibbs sampling cycle
        for j in range(self.args.niter):
            # apply rotation
            data = numpy.array([numpy.sum((x_mu - numpy.dot(X[i], numpy.transpose(R[i])) - t[i]) ** 2, -1) ** 0.5
                                for i in range(m)]).T
            # sample scales
            mixture.estimate(data)
            # sample rotations
            for i in range(m):
                R[i, :, :], t[i, :] = wfit(x_mu, X[i], mixture.scales)


        out_ensemble = csb.bio.structure.Ensemble()

        for i, model in enumerate(ensemble):
            model.transform(R[i], t[i])
            out_ensemble.models.append(model)

        out_ensemble.to_pdb(self.args.outfile)
Exemplo n.º 4
0
    def testInvGammaMAP(self):
        """
        The posterior of a gaussian scale mixture with gamma prior
        is a Student's t distribution, with parameters alpha and beta.

        Give enough samples, we shoud be able to estimate these parameters
        """
        pdbfile = self.config.getTestFile('ake-xray-ensemble-ca.pdb')
        ensemble = LegacyStructureParser(pdbfile).parse_models()
        X = numpy.array(ensemble[0].get_coordinates(['CA'], True))
        Y = numpy.array(ensemble[13].get_coordinates(['CA'], True))


               
        prior = InvGammaPrior()
        prior.estimator = InvGammaPosteriorMAP()
        mixture = ScaleMixture(scales=X.shape[0],
                               prior=prior, d=3)

        from csb.bio.utils import fit, wfit

        R, t = fit(X, Y)
        #numpy.random.seed(100)
        # gibbs sampling cycle
        for i in range(200):
            # apply rotation
            data = numpy.sum((X - numpy.dot(Y, numpy.transpose(R)) - t) ** 2, axis= -1) ** (1. / 2)
            # sample scales
            mixture.estimate(data)
            # sample rotations
            R, t = wfit(X, Y, mixture.scales)
        
        self.assertEqual(mixture.scales.shape, (211,))
        
        R_opt = numpy.eye(3)
        t_opt = numpy.zeros((3,))
        
        for i in range(3):
            self.assertAlmostEqual(t[i], t_opt[i], delta=2.)
            for j in range(3):
                self.assertAlmostEqual(R_opt[i, j], R[i, j], delta=1e-1)
Exemplo n.º 5
0
    def testLogProb(self):

        x = numpy.linspace(-5, 5, 1000)

        normal = csb.statistics.pdf.Normal()

        mixture = ScaleMixture(scales=[1., 1., 1., 1.],
                               prior=None, d=1)

        px = mixture(x)
        gx = normal(x)

        for i in range(len(px)):
            self.assertAlmostEqual(px[i], 4 * gx[i], delta=1e-1)
Exemplo n.º 6
0
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1,
        bfit=0, distribution='student', _self=cmd):
    '''
DESCRIPTION

    Weighted superposition of all states of an object to the intermediate
    structure over all states. The weights are estimated with maximum
    likelihood.

    The result should be very similar to "intra_theseus".

    Requires CSB, https://github.com/csb-toolbox/CSB

ARGUMENTS

    selection = string: atom selection

    load_b = 0 or 1: save -log(weights) into B-factor column {default: 0}

NOTE

    Assumes all states to have identical number of CA-atoms.

SEE ALSO

    xfit, intra_fit, intra_theseus
    '''
    from numpy import asarray, identity, log, dot, zeros
    from csb.bio.utils import wfit, fit
    from .querying import get_ensemble_coords, get_object_name

    cycles, quiet = int(cycles), int(quiet)
    mobile_obj = get_object_name(selection, 1)
    n_models = cmd.count_states(mobile_obj)

    if int(guide):
        selection = '(%s) and guide' % (selection)

    X = asarray(get_ensemble_coords(selection))

    R, t = [identity(3)] * n_models, [zeros(3)] * n_models

    if int(bfit):
        # adapted from csb.apps.bfite

        from csb.bio.utils import average_structure, distance
        from csb.statistics.scalemixture import ScaleMixture

        average = average_structure(X)

        mixture = ScaleMixture(scales=X.shape[1],
                prior=_bfit_get_prior(distribution), d=3)

        for i in range(n_models):
            R[i], t[i] = fit(X[i], average)

        for _ in range(cycles):
            data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)])
            mixture.estimate(data.T)
            for i in range(n_models):
                R[i], t[i] = wfit(X[i], average, mixture.scales)

        scales = mixture.scales

    else:
        if int(seed):
            ensemble = X
        else:
            ensemble = []
            for i in range(n_models):
                R[i], t[i] = fit(X[i], X[0])
                ensemble.append(dot(X[i] - t[i], R[i]))

        for _ in range(cycles):
            ensemble = asarray(ensemble)
            average = ensemble.mean(0)
            data = ensemble.var(0).sum(1)
            scales = 1.0 / data.clip(1e-3)

            ensemble = []
            for i in range(n_models):
                R[i], t[i] = wfit(X[i], average, scales)
                ensemble.append(dot(X[i] - t[i], R[i]))

    m = identity(4)
    back = identity(4)
    back[0:3,0:3] = R[0]
    back[0:3,3] = t[0]

    for i in range(n_models):
        m[0:3,0:3] = R[i].T
        m[3,0:3] = -t[i]
        cmd.transform_object(mobile_obj, list(m.flat), state=i+1)

    # fit back to first state
    cmd.transform_object(mobile_obj, list(back.flat), state=0)

    if int(load_b):
        b_iter = iter(-log(scales))
        cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next})

    if not quiet:
        print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
Exemplo n.º 7
0
def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0,
        cycles=10, match='align', guide=1, seed=0, quiet=1,
        bfit=0, distribution='student', _self=cmd):
    '''
DESCRIPTION

    Weighted superposition of the model in the first selection on to the model
    in the second selection. The weights are estimated with maximum likelihood.

    The result should be very similar to "theseus".

    Requires CSB, https://github.com/csb-toolbox/CSB

ARGUMENTS

    mobile = string: atom selection
 
    target = string: atom selection

    mobile_state = int: object state of mobile selection {default: current}

    target_state = int: object state of target selection {default: current}

    load_b = 0 or 1: save -log(weights) into B-factor column {default: 0}

SEE ALSO

    intra_xfit, align, super, fit, cealign, theseus
    '''
    from numpy import asarray, identity, log, dot, zeros
    from csb.bio.utils import distance_sq, wfit, fit
    from . import querying

    cycles, quiet = int(cycles), int(quiet)
    mobile_state, target_state = int(mobile_state), int(target_state)
    mobile_obj = querying.get_object_name(mobile, 1)

    if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj)
    if target_state < 1: target_state = querying.get_selection_state(target)

    if int(guide):
        mobile = '(%s) and guide' % (mobile)
        target = '(%s) and guide' % (target)

    mm = MatchMaker(mobile, target, match)

    Y = asarray(querying.get_coords(mm.mobile, mobile_state))
    X = asarray(querying.get_coords(mm.target, target_state))

    if int(seed):
        R, t = identity(3), zeros(3)
    else:
        R, t = fit(X, Y)

    if int(bfit):
        # adapted from csb.apps.bfit

        from csb.bio.utils import distance, probabilistic_fit
        from csb.statistics.scalemixture import ScaleMixture

        mixture = ScaleMixture(scales=X.shape[0],
                prior=_bfit_get_prior(distribution), d=3)

        for _ in range(cycles):
            data = distance(Y, dot(X - t, R))
            mixture.estimate(data)
            R, t = probabilistic_fit(X, Y, mixture.scales)

        scales = mixture.scales

    else:
        for _ in range(cycles):
            data = distance_sq(Y, dot(X - t, R))
            scales = 1.0 / data.clip(1e-3)
            R, t = wfit(X, Y, scales)

    m = identity(4)
    m[0:3,0:3] = R
    m[0:3,3] = t
    cmd.transform_object(mobile_obj, list(m.flat))

    if int(load_b):
        b_iter = iter(-log(scales))
        cmd.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next})

    if not quiet:
        print(' xfit: %d atoms aligned' % (len(X)))
Exemplo n.º 8
0
def intra_xfit(selection, load_b=0, cycles=20, guide=1, seed=0, quiet=1,
        bfit=0, distribution='student', _self=cmd):
    '''
DESCRIPTION

    Weighted superposition of all states of an object to the intermediate
    structure over all states. The weights are estimated with maximum
    likelihood.

    The result should be very similar to "intra_theseus".

    Requires CSB, https://github.com/csb-toolbox/CSB

ARGUMENTS

    selection = string: atom selection

    load_b = 0 or 1: save -log(weights) into B-factor column {default: 0}

NOTE

    Assumes all states to have identical number of CA-atoms.

SEE ALSO

    xfit, intra_fit, intra_theseus
    '''
    from numpy import asarray, identity, log, dot, zeros
    from csb.bio.utils import wfit, fit
    from .querying import get_ensemble_coords, get_object_name

    cycles, quiet = int(cycles), int(quiet)

    if int(guide):
        selection = '(%s) and guide' % (selection)

    mobile_objs = _self.get_object_list(selection)
    n_states_objs = []
    X = []

    for obj in mobile_objs:
        X_obj = get_ensemble_coords('({}) & {}'.format(selection, obj), _self=_self)

        if X and len(X_obj) and len(X[0]) != len(X_obj[0]):
            raise CmdException('objects have different number of atoms')

        X.extend(X_obj)
        n_states_objs.append(len(X_obj))

    n_models = len(X)
    X = asarray(X)

    R, t = [identity(3)] * n_models, [zeros(3)] * n_models

    if int(bfit):
        # adapted from csb.apps.bfite

        from csb.bio.utils import average_structure, distance
        from csb.statistics.scalemixture import ScaleMixture

        average = average_structure(X)

        mixture = ScaleMixture(scales=X.shape[1],
                prior=_bfit_get_prior(distribution), d=3)

        for i in range(n_models):
            R[i], t[i] = fit(X[i], average)

        for _ in range(cycles):
            data = asarray([distance(average, dot(X[i] - t[i], R[i])) for i in range(n_models)])
            mixture.estimate(data.T)
            for i in range(n_models):
                R[i], t[i] = wfit(X[i], average, mixture.scales)

        scales = mixture.scales

    else:
        if int(seed):
            ensemble = X
        else:
            ensemble = []
            for i in range(n_models):
                R[i], t[i] = fit(X[i], X[0])
                ensemble.append(dot(X[i] - t[i], R[i]))

        for _ in range(cycles):
            ensemble = asarray(ensemble)
            average = ensemble.mean(0)
            data = ensemble.var(0).sum(1)
            scales = 1.0 / data.clip(1e-3)

            ensemble = []
            for i in range(n_models):
                R[i], t[i] = wfit(X[i], average, scales)
                ensemble.append(dot(X[i] - t[i], R[i]))

    m = identity(4)
    back = identity(4)
    back[0:3,0:3] = R[0]
    back[0:3,3] = t[0]

    transformation_i = 0
    for mobile_obj, n_states in zip(mobile_objs, n_states_objs):
        for state_i in range(n_states):
            m[0:3, 0:3] = R[transformation_i].T
            m[3, 0:3] = -t[transformation_i]
            _self.transform_object(mobile_obj, list(m.flat), state=state_i + 1)
            transformation_i += 1

        # fit back to first state
        _self.transform_object(mobile_obj, list(back.flat), state=0)

        if int(load_b):
            b_iter = iter(-log(scales))
            _self.alter('({}) & {} & state 1'.format(selection, mobile_obj),
                      'b = next(b_iter)',
                      space={'b_iter': b_iter, 'next': next})

    if not quiet:
        print(' intra_xfit: %d atoms in %d states aligned' % (len(X[0]), n_models))
Exemplo n.º 9
0
def xfit(mobile, target, mobile_state=-1, target_state=-1, load_b=0,
        cycles=10, match='align', guide=1, seed=0, quiet=1,
        bfit=0, distribution='student', _self=cmd):
    '''
DESCRIPTION

    Weighted superposition of the model in the first selection on to the model
    in the second selection. The weights are estimated with maximum likelihood.

    The result should be very similar to "theseus".

    Requires CSB, https://github.com/csb-toolbox/CSB

ARGUMENTS

    mobile = string: atom selection
 
    target = string: atom selection

    mobile_state = int: object state of mobile selection {default: current}

    target_state = int: object state of target selection {default: current}

    load_b = 0 or 1: save -log(weights) into B-factor column {default: 0}

SEE ALSO

    intra_xfit, align, super, fit, cealign, theseus
    '''
    from numpy import asarray, identity, log, dot, zeros
    from csb.bio.utils import distance_sq, wfit, fit
    from . import querying

    cycles, quiet = int(cycles), int(quiet)
    mobile_state, target_state = int(mobile_state), int(target_state)
    mobile_obj = querying.get_object_name(mobile, 1, _self=_self)

    if mobile_state < 1: mobile_state = querying.get_object_state(mobile_obj, _self=_self)
    if target_state < 1: target_state = querying.get_selection_state(target, _self=_self)

    if int(guide):
        mobile = '(%s) and guide' % (mobile)
        target = '(%s) and guide' % (target)

    mm = MatchMaker(mobile, target, match, _self=_self)

    Y = asarray(_self.get_coords(mm.mobile, mobile_state))
    X = asarray(_self.get_coords(mm.target, target_state))

    if int(seed):
        R, t = identity(3), zeros(3)
    else:
        R, t = fit(X, Y)

    if int(bfit):
        # adapted from csb.apps.bfit

        from csb.bio.utils import distance, probabilistic_fit
        from csb.statistics.scalemixture import ScaleMixture

        mixture = ScaleMixture(scales=X.shape[0],
                prior=_bfit_get_prior(distribution), d=3)

        for _ in range(cycles):
            data = distance(Y, dot(X - t, R))
            mixture.estimate(data)
            R, t = probabilistic_fit(X, Y, mixture.scales)

        scales = mixture.scales

    else:
        for _ in range(cycles):
            data = distance_sq(Y, dot(X - t, R))
            scales = 1.0 / data.clip(1e-3)
            R, t = wfit(X, Y, scales)

    m = identity(4)
    m[0:3,0:3] = R
    m[0:3,3] = t
    _self.transform_object(mobile_obj, list(m.flat))

    if int(load_b):
        b_iter = iter(-log(scales))
        _self.alter(mm.mobile, 'b = next(b_iter)', space={'b_iter': b_iter, 'next': next})

    if not quiet:
        print(' xfit: %d atoms aligned' % (len(X)))
Exemplo n.º 10
0
def intra_xfit(selection,
               load_b=0,
               cycles=20,
               guide=1,
               seed=0,
               quiet=1,
               bfit=0,
               distribution='student',
               _self=cmd):
    '''
DESCRIPTION

    Weighted superposition of all states of an object to the intermediate
    structure over all states. The weights are estimated with maximum
    likelihood.

    The result should be very similar to "intra_theseus".

    Requires CSB, http://csb.codeplex.com

ARGUMENTS

    selection = string: atom selection

    load_b = 0 or 1: save -log(weights) into B-factor column {default: 0}

NOTE

    Assumes all states to have identical number of CA-atoms.

SEE ALSO

    xfit, intra_fit, intra_theseus
    '''
    from numpy import asarray, identity, log, dot, zeros
    from csb.bio.utils import wfit, fit
    from .querying import get_ensemble_coords, get_object_name

    cycles, quiet = int(cycles), int(quiet)
    mobile_obj = get_object_name(selection, 1)
    n_models = cmd.count_states(mobile_obj)

    if int(guide):
        selection = '(%s) and guide' % (selection)

    X = asarray(get_ensemble_coords(selection))

    R, t = [identity(3)] * n_models, [zeros(3)] * n_models

    if int(bfit):
        # adapted from csb.apps.bfite

        from csb.bio.utils import average_structure, distance
        from csb.statistics.scalemixture import ScaleMixture

        average = average_structure(X)

        mixture = ScaleMixture(scales=X.shape[1],
                               prior=_bfit_get_prior(distribution),
                               d=3)

        for i in range(n_models):
            R[i], t[i] = fit(X[i], average)

        for _ in range(cycles):
            data = asarray([
                distance(average, dot(X[i] - t[i], R[i]))
                for i in range(n_models)
            ])
            mixture.estimate(data.T)
            for i in range(n_models):
                R[i], t[i] = wfit(X[i], average, mixture.scales)

        scales = mixture.scales

    else:
        if int(seed):
            ensemble = X
        else:
            ensemble = []
            for i in range(n_models):
                R[i], t[i] = fit(X[i], X[0])
                ensemble.append(dot(X[i] - t[i], R[i]))

        for _ in range(cycles):
            ensemble = asarray(ensemble)
            average = ensemble.mean(0)
            data = ensemble.var(0).sum(1)
            scales = 1.0 / data.clip(1e-3)

            ensemble = []
            for i in range(n_models):
                R[i], t[i] = wfit(X[i], average, scales)
                ensemble.append(dot(X[i] - t[i], R[i]))

    m = identity(4)
    back = identity(4)
    back[0:3, 0:3] = R[0]
    back[0:3, 3] = t[0]

    for i in range(n_models):
        m[0:3, 0:3] = R[i].T
        m[3, 0:3] = -t[i]
        cmd.transform_object(mobile_obj, list(m.flat), state=i + 1)

    # fit back to first state
    cmd.transform_object(mobile_obj, list(back.flat), state=0)

    if int(load_b):
        b_iter = iter(-log(scales))
        cmd.alter(selection, 'b = b_iter.next()', space=locals())

    if not quiet:
        print(' intra_xfit: %d atoms in %d states aligned' %
              (len(X[0]), n_models))