def test_loglik(self):
     nsamples = 1000000
     Gauss = Gaussian(n=1,mu=array([0]),sigma=array([[4]]))
     dat = Gauss.sample(nsamples)
     logWeights = self.mog.loglik(dat) - Gauss.loglik(dat)
     Z = logsumexp(logWeights)-log(nsamples)
     print "test_loglik: z: " ,exp(Z)
     self.assertTrue(abs(exp(Z)-1)<1e-01)
Пример #2
0
 def test_loglik(self):
     nsamples = 1000000
     Gauss = Gaussian(n=1, mu=array([0]), sigma=array([[4]]))
     dat = Gauss.sample(nsamples)
     logWeights = self.mog.loglik(dat) - Gauss.loglik(dat)
     Z = logsumexp(logWeights) - log(nsamples)
     print "test_loglik: z: ", exp(Z)
     self.assertTrue(abs(exp(Z) - 1) < 1e-01)
Пример #3
0
def fill_dict_with_defaults(dic):
    """
    Helper function, which sets missing values for a given dictionary containing information about the distribution to test.

    Argument:
    :param dic: dictionary containing information about the distribution to test.
    :type dic:  dictionary

    Output:
    :returns: dictionary with missing values filled in.
    :rtype:   dictionary
    """
    RD = cp(dic)
    try:
        if isinstance(RD['dist'](), RD['dist']):
            RD['dist'] = RD['dist']()
    except TypeError:
        pass
    except:
        raise AssertionError(
            'Distribution %s does not seem to support a default distribution (called with no arguments) '
            % (RD['dist']))
    if not RD['dist'].param.has_key('n'):
        n = 1
    else:
        n = RD['dist']['n']
    if not 'nsamples' in dic.keys():
        RD['nsamples'] = 500000
    if not 'tolerance' in dic.keys():
        RD['tolerance'] = 1e-01
    if not 'support' in dic.keys():
        RD['support'] = (-np.inf, np.inf)
    if not 'proposal_low' in dic.keys():
        if RD['support'][1] - RD['support'][0] < np.inf:
            RD['proposal_low'] = Uniform({
                'n': n,
                'low': RD['support'][0],
                'high': RD['support'][1]
            })
        elif RD['support'][0] == 0 and RD['support'][1] == np.inf:
            RD['proposal_low'] = Gamma({'u': 1., 's': 2.})
        else:
            RD['proposal_low'] = Gaussian({'n': n, 'sigma': np.eye(n) * 0.8})
    if not 'proposal_high' in dic.keys():
        if RD['support'][1] - RD['support'][0] < np.inf:
            RD['proposal_high'] = Uniform({
                'n': n,
                'low': RD['support'][0],
                'high': RD['support'][1]
            })
        elif RD['support'][0] == 0 and RD['support'][1] == np.inf:
            RD['proposal_high'] = Gamma({'u': 3., 's': 2.})
        else:
            RD['proposal_high'] = Gaussian({'n': n, 'sigma': np.eye(n) * 10})

    return RD
Пример #4
0
def laplaceApproximation(listOfDist, initPoint=None):
    """Function that calculates the Laplace approximation for the product
    of likelihood functions associated with the the given list of
    Distributions. It returns a Gaussian Distribution with the mean
    set to the mode of the product of the likelihoods given in the
    input list and the variance to the negative, inverse Hessian of
    that product. Note that the resulting distribution is a
    distribution over parameters, whereas the input list is a
    distribution over datapoints. Therefore, to include a prior
    distribution over parameters within the product using this
    implementation, a particularly designed data-distribution has to
    be implemented.


    The distributions in the list are assumed to have implemented dldx
    as well as dldx2 method (first and second derivative)

    :param listOfDist: List of distributions from which the product is build as a product over the corresponding likelihood functions.
    :type listOfDist: list of natter.Distributions
    :param initPoint: Initial point for the mean of the laplace Approximation (optional argument). If None is given, then the parameters from the first distribution in the list are taken as an initial point.
    :type initPoint: numpy.ndarray
    :returns: laplace approximated distribution
    :rtype: natter.Distributions.Gaussian

    """

    # check argument
    for dist in listOfDist:
        OK = hasattr(dist, 'dldx')
        OK = OK and hasattr(dist, 'dldx2')
        if not OK:
            raise ValueError('Distribution has not implemented dldx or dldx2')
    if initPoint is None:
        initPoint = listOfDist[0].primary2array()  #sample(1).X.flatten()

    def f(x):
        fx = 0.0
        for dist in listOfDist:
            fx -= dist.loglik(x)
        return fx

    def df(x):
        grad = zeros(len(initPoint.flatten()))
        for dist in listOfDist:
            grad = grad - dist.dldx(Data(x))
        return grad

    def ddf(x):
        H = zeros((len(initPoint.flatten()), len(initPoint.flatten())))
        for dist in listOfDist:
            H = H - dist.dldx2(Data(x))
        return H

    xmin, fopt, gopt, Hopt, func_calls, grad_calls, warnflag = \
          fmin_bfgs(f,initPoint,fprime=df)
    Hopt = ddf(xmin)
    laplaceApprox = Gaussian({'n': len(xmin.flatten())})
    laplaceApprox['mu'] = xmin
    laplaceApprox['sigma'] = inv(Hopt)
    return laplaceApprox
Пример #5
0
def MarginalHistogramEqualization(psource, ptarget=None):
    """
    Creates a non-linear filter that changes the marginal distribution
    of each single data dimension independently. For that sake it
    takes two ISA models and performs a histogram equalization on each
    of the marginal distributions.

    *Important*: The ISA models must have one-dimensional subspaces!

    If ptarget is omitted, it will be set to a N(0,I) Gaussian by default.

    :param psource: Source distribution which must be a natter.Distributions.ISA model with one-dimensional subspaces
    :type psource: natter.Distributions.ISA
    :param ptarget: Target distribution which must be a natter.Distributions.ISA model with one-dimensional subspaces
    :type ptarget: natter.Distributions.ISA
    :returns: A non-linear filter that changes for marginal distributions of the data from the respective psource into the respective ptarget
    :rtype: natter.Transforms.NonlinearTransform

    """
    from natter.Distributions import ISA, Gaussian

    if not isinstance(psource, ISA):
        raise TypeError(
            'Transform.TransformFactory.MarginalHistogramEqualization: psource must be an ISA model'
        )
    else:
        psource = psource.copy()

    if not ptarget == None and not isinstance(ptarget, ISA):
        raise TypeError(
            'Transform.TransformFactory.MarginalHistogramEqualization: ptarget must be an ISA model'
        )

    for ss in psource['S']:
        if len(ss) != 1:
            raise Errors.DimensionalityError(
                'Transform.TransformFactory.MarginalHistogramEqualization: psource must have one-dimensional subspaces'
            )

    if ptarget == None:
        ptarget = ISA(S=[(k, ) for k in range(psource['n'])],
                      P=[Gaussian(n=1) for k in range(psource['n'])])
    else:
        ptarget = ptarget.copy()

    g = lambda dat: reduce(lambda x, y: x.stack(y), [
        ptarget['P'][k].ppf(psource['P'][k].cdf(dat[k, :]))
        for k in range(psource['n'])
    ])
    gdet = lambda y: psource.loglik(y) - ptarget.loglik(g(y))

    name = 'Marginal Histogram Equalization Transform: %s --> %s' % (
        psource['P'][0].name, ptarget['P'][0].name)
    return NonlinearTransform(g, name, logdetJ=gdet)
Пример #6
0
 def test_estimate(self):
     sigm1 = symrand(2)
     sigm1 = np.dot(sigm1, sigm1.T)
     sigm2 = symrand(2)
     sigm2 = np.dot(sigm2, sigm2.T)
     P = [
         Gaussian(n=2, mu=2 * randn(2), sigma=sigm1),
         Gaussian(n=2, mu=2 * randn(2), sigma=sigm2)
     ]
     # P = [Gamma(n=1,u=3*rand(),s=3*rand()) for k in xrange(self.K)]
     # P = [TruncatedGaussian(a=0.1,b=10,mu=3*randn(1),sigma=3*rand(1,1)) for k in xrange(self.K)]
     alpha = rand(2)
     alpha = alpha / sum(alpha)
     mog = FiniteMixtureDistribution(P=P, alpha=alpha)
     mog.primary = ['alpha', 'P']
     dat = mog.sample(500)
     arr0 = mog.primary2array()
     print mog
     mog.array2primary(arr0 + np.random.rand(len(arr0)) * 1e-04)
     mog.estimate(dat, method='hybrid')
     err = np.sum(np.abs(arr0 - mog.primary2array()))
     print mog
     print "error: ", err
     self.assert_(err < 1.0)
Пример #7
0
 def setUp(self):
     self.n = 1
     self.K = 3
     self.nsamples = 10000
     self.a = 0.1
     self.b = 10
     alpha = rand(self.K)
     alpha = alpha / sum(alpha)
     P = [
         Gaussian(n=1, mu=3 * randn(1), sigma=3 * rand(1, 1))
         for k in xrange(self.K)
     ]
     #P = [TruncatedGaussian(a=self.a,b=self.b,mu=3*randn(1),sigma=3*rand(1,1)+1) for k in xrange(self.K)]
     #P = [Gamma(n=1,u=3*rand(),s=3*rand()) for k in xrange(self.K)]
     self.mog = FiniteMixtureDistribution(P=P, alpha=alpha)
     self.mog.primary = ['alpha', 'P']
     self.dat = self.mog.sample(self.nsamples)
Пример #8
0
 def setUp(self):
     self.Gauss = Gaussian({'n': 1})
     self.entropy = 0.5 * (1 + log(2 * pi))
     self.Gauss2D = Gaussian({'n': 2})
     self.Gauss2D.primary = ['mu', 'sigma']
Пример #9
0
class TestGaussian(unittest.TestCase):
    def setUp(self):
        self.Gauss = Gaussian({'n': 1})
        self.entropy = 0.5 * (1 + log(2 * pi))
        self.Gauss2D = Gaussian({'n': 2})
        self.Gauss2D.primary = ['mu', 'sigma']

    def test_init(self):
        pass

    def test_sample(self):
        d = self.Gauss.sample(10)
        pass

    def test_loglik(self):
        nsamples = 1e06
        dat = self.Gauss.sample(nsamples)
        lv = self.Gauss.loglik(dat)
        hs = sum(-lv) / nsamples  # sampled entropy
        self.assertTrue(abs(hs - self.entropy) <= 1e-02)

    def test_array2primary(self):
        arr = self.Gauss.primary2array()
        pbefore = self.Gauss.param.copy()
        arrr = randn(len(arr))
        self.Gauss.array2primary(arrr)
        self.Gauss.array2primary(arr)
        pafter = self.Gauss.param.copy()
        diff = 0.0
        for key in pbefore.keys():
            diff += norm(pbefore[key] - pafter[key])
        self.assertTrue(diff <= 1e-05)

    def test_dldtheta(self):
        d = self.Gauss2D.sample(10)

        def f(X):
            self.Gauss2D.array2primary(X)
            lv = self.Gauss2D.loglik(d)
            slv = sum(lv)
            return slv

        def df(X):
            self.Gauss2D.array2primary(X)
            gv = self.Gauss2D.dldtheta(d)
            sgv = sum(gv, axis=1)
            return sgv

        theta0 = self.Gauss2D.primary2array()
        theta0 = abs(randn(len(theta0))) + 1
        err = check_grad(f, df, theta0)
        print "error in gradient: ", err
        self.assertTrue(err < 1e-02)

    def test_estimate(self):
        data = self.Gauss2D.sample(500)
        self.Gauss2D.primary = ['sigma']
        thetaOrig = self.Gauss2D.primary2array()
        theta0 = abs(randn(len(thetaOrig)))
        self.Gauss2D.array2primary(theta0)
        self.Gauss2D.estimate(data, method="gradient")
        thetaOpt = self.Gauss2D.primary2array()
        err1 = thetaOpt - thetaOrig
        print "Error in thetas with gradient: ", norm(err1)
        data = self.Gauss2D.sample(1000000)
        theta0 = abs(randn(len(thetaOrig)))
        self.Gauss2D.array2primary(theta0)
        self.Gauss2D.estimate(data, method="analytic")
        thetaOpt = self.Gauss2D.primary2array()
        err2 = thetaOpt - thetaOrig
        print "Error in thetas maxim likelihood: ", norm(err2)
        self.assertTrue((norm(err2) < 1e-01))
Пример #10
0
 def setUp(self):
     self.Gauss = Gaussian({'n':1})
     self.entropy = 0.5*(1+log(2*pi))
     self.Gauss2D = Gaussian({'n':2})
     self.Gauss2D.primary=['mu', 'sigma']
Пример #11
0
class TestGaussian(unittest.TestCase):
    def setUp(self):
        self.Gauss = Gaussian({'n':1})
        self.entropy = 0.5*(1+log(2*pi))
        self.Gauss2D = Gaussian({'n':2})
        self.Gauss2D.primary=['mu', 'sigma']
    def test_init(self):
        pass

    def test_sample(self):
        d = self.Gauss.sample(10)
        pass

    def test_loglik(self):
        nsamples=1e06
        dat=self.Gauss.sample(nsamples)
        lv = self.Gauss.loglik(dat)
        hs = sum(-lv)/nsamples          # sampled entropy
        self.assertTrue( abs(hs-self.entropy)<= 1e-02 )


    def test_array2primary(self):
        arr = self.Gauss.primary2array();
        pbefore = self.Gauss.param.copy()
        arrr = randn(len(arr))
        self.Gauss.array2primary(arrr)
        self.Gauss.array2primary(arr);
        pafter  = self.Gauss.param.copy()
        diff = 0.0;
        for key in pbefore.keys():
            diff += norm(pbefore[key] - pafter[key])
        self.assertTrue(diff <=1e-05)

    def test_dldtheta(self):
        d = self.Gauss2D.sample(10)
        def f(X):
            self.Gauss2D.array2primary(X)
            lv = self.Gauss2D.loglik(d);
            slv = sum(lv)
            return slv
        def df(X):
            self.Gauss2D.array2primary(X)
            gv = self.Gauss2D.dldtheta(d)
            sgv = sum(gv, axis=1);
            return sgv
        theta0 = self.Gauss2D.primary2array()
        theta0 = abs(randn(len(theta0)))+1
        err = check_grad(f,df,theta0)
        print "error in gradient: ", err
        self.assertTrue(err < 1e-02)
        

    def test_estimate(self):
        data = self.Gauss2D.sample(500)
        self.Gauss2D.primary= ['sigma']
        thetaOrig = self.Gauss2D.primary2array()
        theta0 = abs(randn(len(thetaOrig)))
        self.Gauss2D.array2primary(theta0)
        self.Gauss2D.estimate(data,method="gradient")
        thetaOpt = self.Gauss2D.primary2array()
        err1 = thetaOpt-thetaOrig
        print "Error in thetas with gradient: " , norm(err1)
        data = self.Gauss2D.sample(1000000)
        theta0 = abs(randn(len(thetaOrig)))
        self.Gauss2D.array2primary(theta0)
        self.Gauss2D.estimate(data,method="analytic")
        thetaOpt = self.Gauss2D.primary2array()
        err2 = thetaOpt-thetaOrig
        print "Error in thetas maxim likelihood: " , norm(err2)
        self.assertTrue((norm(err2)<1e-01))
Пример #12
0
 def setUp(self):
     self.n =2
     self.W = LinearTransform(eye(self.n))
     self.ECG = EllipticallyContourGamma(n=self.n,W= self.W)
     self.Gaussian = Gaussian({'n':self.n})
     self.data = self.Gaussian.sample(1000)
Пример #13
0
class TestEllipticallyContourGamma(unittest.TestCase):
    def setUp(self):
        self.n =2
        self.W = LinearTransform(eye(self.n))
        self.ECG = EllipticallyContourGamma(n=self.n,W= self.W)
        self.Gaussian = Gaussian({'n':self.n})
        self.data = self.Gaussian.sample(1000)
        
    def test_init(self):
        pass


    def test_array2primary(self):
        abefore = self.ECG.primary2array();
        #arrr = randn(len(abefore))
        #        abefore=arrr
        #self.ECG.array2primary(arrr)
        #aafter  = self.ECG.primary2array()
        #diff = norm(abefore-aafter)
        #self.assertTrue(diff <=1e-05)

    def test_loglik(self):
        nsamples=100000
        dataImportance = self.Gaussian.sample(nsamples)
        logweights = self.ECG.loglik(dataImportance)-self.Gaussian.loglik(dataImportance)
        Z = logsumexp(logweights)-log(nsamples)
        self.assertTrue(abs(exp(Z)-1)<1e-01)
                        
    
    def test_sample(self):
        nsamples = 10000
        data = self.ECG.sample(nsamples)
        logWeights = self.Gaussian.loglik(data) -self.ECG.loglik(data)
        Z = logsumexp(logWeights)-log(nsamples)
        self.assertTrue(abs(exp(Z)-1)<1e-01)


    def test_dldtheta(self):
        self.ECG.primary = ['q']
        def f(X):
            self.ECG.array2primary(X)
            lv = self.ECG.loglik(self.data);
            slv = sum(lv)
            return slv
        def df(X):
            self.ECG.array2primary(X)
            gv = self.ECG.dldtheta(self.data)
            sgv = sum(gv, axis=1);
            return sgv
        theta0 = self.ECG.primary2array()
        theta0 = abs(randn(len(theta0)))+1
        err = check_grad(f,df,theta0)
        print "error in gradient: ", err
        self.ECG.primary = ['W']
        def f2(X):
            self.ECG.array2primary(X)
            lv = self.ECG.loglik(self.data);
            slv = sum(lv)
            return slv
        def df2(X):
            self.ECG.array2primary(X)
            gv = self.ECG.dldtheta(self.data)
            sgv = sum(gv, axis=1);
            return sgv
        theta0 = self.ECG.primary2array()
        theta0 = abs(randn(len(theta0)))+1
        err = check_grad(f2,df2,theta0)
        print "error in gradient: ", err
        self.assertTrue(err < 1e-02)