Exemplo n.º 1
0
def test_null_constrained():

    # Create a mixed population of Z-scores: 1000 standard normal and
    # 20 uniformly distributed between 3 and 4.
    grid = np.linspace(0.001, 0.999, 1000)
    z0 = norm.ppf(grid)
    z1 = np.linspace(3, 4, 20)
    zs = np.concatenate((z0, z1))

    for estimate_mean in False,True:
        for estimate_scale in False,True:
            for estimate_prob in False,True:

                emp_null = NullDistribution(zs, estimate_mean=estimate_mean,
                                            estimate_scale=estimate_scale,
                                            estimate_null_proportion=estimate_prob)

                if not estimate_mean:
                    assert_allclose(emp_null.mean, 0, atol=1e-5, rtol=1e-5)
                if not estimate_scale:
                    assert_allclose(emp_null.sd, 1, atol=1e-5, rtol=1e-2)
                if not estimate_prob:
                    assert_allclose(emp_null.null_proportion, 1, atol=1e-5, rtol=1e-2)

                # consistency check
                assert_allclose(emp_null.pdf(np.r_[-1, 0, 1]),
                                norm.pdf(np.r_[-1, 0, 1], loc=emp_null.mean,
                                         scale=emp_null.sd),
                                rtol=1e-13)
Exemplo n.º 2
0
def smooth_PSTHs(PSTHs, smooth_sd=0.5):
    n_psths, n_bins = PSTHs.shape
    kern_x = np.linspace(0, n_bins, n_bins, endpoint=False) - n_bins/2. + 1
    #... plus one seems to remove offsets in convolution
    kern = norm.pdf(kern_x, scale=smooth_sd)
    kern /= kern.sum()
    return np.apply_along_axis(np.convolve, 1, PSTHs, kern, 'same')
Exemplo n.º 3
0
def arma_likelihood(time_series, phis=array([]), thetas=array([]), mu=0.,
        sigma=1.):
    """
    Return the log-likelihood of the ARMA model parameters, given the time
    series.

    Parameters
    ----------
    time_series : ndarray of shape (n,1)
        The time series in question
    phis : ndarray of shape (p,)
        The phi parameters
    thetas : ndarray of shape (q,)
        The theta parameters
    mu : float
        The parameter mu
    sigma : float
        The parameter sigma

    Returns
    -------
    log_likelihood : float
        The log-likelihood of the model
    """
    F, Q, H, dim_states, dim_time_series = state_space_rep(phis, thetas, mu,
            sigma)
    mus, covs = kalman(F, Q, H, time_series - mu)
    likelihood = 0.
    for i in xrange(len(mus)):
        cond_mu = H.dot(mus[i])
        cond_sigma = H.dot(covs[i].dot(H.T))
        likelihood += log(norm.pdf(time_series[i] - mu, loc=cond_mu,
            scale=sqrt(cond_sigma)))
    return float(likelihood)
Exemplo n.º 4
0
def post_l(l,x,d,alpha,sigma):
    #normal = [norm.pdf(np.cos(alpha)*(x[1]-d[1]) + np.sin(alpha)*(x[0]-d[0])) for d in data]\
    mean=(np.cos(alpha)*(d[1]) + np.sin(alpha)*(d[0])).mean()
    std=np.sqrt(np.cos(alpha)*(x[1]) + np.sin(alpha)*(x[0]))
    normal = norm.pdf(l,mean,std) # sigma?
    #normal = norm.pdf(np.cos(alpha)*(x[1]-d[:,1]) + np.sin(alpha)*(x[0]-d[:,0])) 
    return np.prod(normal)
Exemplo n.º 5
0
def eval_fun(abc, pop, *args):
    # errors = []
    # for indiv in pop:
    #     # inverse exponential with offset, y = a * exp(b/x) + c
    #     predicted = (indiv[0] * np.exp(indiv[1] / args[0]) + indiv[2])
    #     errors.append(predicted - args[1])

    # evaluate the population with some broadcasting
    pred = (pop[:,0][:,np.newaxis] *
            np.exp(pop[:,1][:,np.newaxis]/args[0][np.newaxis,:]) +
            pop[:,2][:,np.newaxis])
    errors = pred - args[1][np.newaxis,:]

    # sum of squared error
    #errors = np.asarray(errors)
    sse = np.sum(errors*errors,1)
    #sae = np.sum(np.abs(errors),1)

    # calculate the weight with a normal kernel
    weights = np.log(norm.pdf(sse,scale=pop[:,3]))
    #weights = np.log(norm.pdf(sse,scale=.1))

    # see if return both weights and predicted vals
    if abc._save_posts:
        return weights,pred
    else:
        return weights
Exemplo n.º 6
0
 def _updateInternalSamplingScore(self, amount, location, width):
     ''' Updating process is performed as follows:
     Normal distribution probability density is calculated such as its
     peak is located at `location`, its variance is defined by `width`
     such that the area under the PDF curve equals the absolute value of
     `amount`. The resulting curve is then added to the score
     '''
     values = norm.pdf(self.x, location, width) * amount
     self._scores = [v1 + v2 for (v1, v2) in zip(self._scores, values)]
Exemplo n.º 7
0
 def _kern(self, y, x = 0.0, h = 1.0):
   '''Gaussian kernel, gives the weight ascribed to points in y relative to x with scale parameter h
   
   Parameters
   ----------
   y : 1-dim length m, or 2 dimensional (n*m) numpy.array of floats containing coordinates of n, m-dimensional feature points 
   x : float or 1-dim length m numpy.array of floats containing the coordinates of point relative to which kernel weights are calculated
   h : float or 1-dim length m numpy.array of floats containing the scale parameter for each dimension
   NOTE - the final division through by h is present in the original R package, but I'm fairly sure it's an error  
   '''
   return gauss.pdf((y-x)/h) / h
Exemplo n.º 8
0
def sample_pa(x,d,sigma):
    first=(x[1]-d[1]).mean()
    second=(x[0]-d[0]).mean()
    print "x, d, sigma ", x, d, sigma
    cnt = 0
    L = []
    z = first / np.sqrt(first**2+second**2) 
    L.append(-np.arccos(-z))
    L.append(-np.arccos( z))
    L.append( np.arccos(-z))
    L.append( np.arccos( z))
    Lmax = max([norm.pdf(np.cos(a)*first + np.sin(a)*second,0,sigma) for a in L])
    while True:
        u=rnd.uniform(0,Lmax)
        a=rnd.uniform(-np.pi/2.,np.pi/2.)
        normal = norm.pdf(np.cos(a)*first + np.sin(a)*second,0,sigma) # right?
        if u < normal:
            break
        cnt += 1
    print cnt
    return a
Exemplo n.º 9
0
def estimate_params_for_normal(x, low_bound , mu_initial, sigma_initial):
	"""
		Takes a vector x of truncated data with a known lower
		truncation bound and estimates the parameters of the 
		fit of an untruncated normal distribution.
		code from Chris Fonnesbeck's Python data analysis tutorial on Sense
		https://sense.io/prometheus2305/data-analysis-in-python/files/Statistical%20Data%20Modeling.py
	"""


	# normalize vector
	mu_initial = float(mu_initial)
	sigma_initial = float(sigma_initial)
	#x = np.random.normal(size=10000,loc=2000,scale= 2000)

	x = map(lambda y: (y-mu_initial )/sigma_initial ,x)
	a =  (low_bound - mu_initial)/sigma_initial # normalize lower bound
	

	#_ = plt.hist(x, bins=100)
	#plt.show()
	#plt.close()

	# We can construct a log likelihood for this function using the conditional
	# form	
	trunc_norm = lambda theta, a, x: -(np.log(norm.pdf(x, theta[0], theta[1])) - 
	                                      np.log(1 - norm.cdf(a, theta[0], theta[1]))).sum()

	# For this example, we will use another optimization algorithm, the
	# **Nelder-Mead simplex algorithm**. It has a couple of advantages: 
	# 
	# - it does not require derivatives
	# - it can optimize (minimize) a vector of parameters
	# 
	# SciPy implements this algorithm in its `fmin` function:

	# we have normalized data, given that the loer truncation point a
	# is pretty far out in the tail - the standard normal parameters are
	# a first good guess, i.e. 0,1
	initial_guess = np.array([0,1]) 
	sol = fmin(trunc_norm, initial_guess , args=(a, x))
	print sol
	mean_normalized,stddev_normalized = sol[0],sol[1]
	mean_est =( 1 + mean_normalized ) * mu_initial
	stddev_est = stddev_normalized * sigma_initial
	print mean_est,stddev_est
	return mean_est,stddev_est
Exemplo n.º 10
0
def test_null_distribution():

    # Create a mixed population of Z-scores: 1000 standard normal and
    # 20 uniformly distributed between 3 and 4.
    grid = np.linspace(0.001, 0.999, 1000)
    z0 = norm.ppf(grid)
    z1 = np.linspace(3, 4, 20)
    zs = np.concatenate((z0, z1))
    emp_null = NullDistribution(zs, estimate_null_proportion=True)

    assert_allclose(emp_null.mean, 0, atol=1e-5, rtol=1e-5)
    assert_allclose(emp_null.sd, 1, atol=1e-5, rtol=1e-2)
    assert_allclose(emp_null.null_proportion, 0.98, atol=1e-5, rtol=1e-2)

    # consistency check
    assert_allclose(emp_null.pdf(np.r_[-1, 0, 1]),
                    norm.pdf(np.r_[-1, 0, 1],
                             loc=emp_null.mean,
                             scale=emp_null.sd),
                    rtol=1e-13)
Exemplo n.º 11
0
 def __init__(self,
              num_target_qubits: int,
              mu: float = 0,
              sigma: float = 1,
              low: float = -1,
              high: float = 1) -> None:
     r"""
     Args:
         num_target_qubits: Number of qubits it acts on, has a minimum value of 1.
         mu: Expected value of considered normal distribution
         sigma: standard deviation of considered normal distribution
         low: Lower bound, i.e., the value corresponding to \|0...0>
             (assuming an equidistant grid)
         high: Upper bound, i.e., the value corresponding to \|1...1>
             (assuming an equidistant grid)
     """
     validate_min('num_target_qubits', num_target_qubits, 1)
     probabilities, _ = UnivariateDistribution.\
         pdf_to_probabilities(
             lambda x: norm.pdf(x, mu, sigma), low, high, 2 ** num_target_qubits)
     super().__init__(num_target_qubits, probabilities, low, high)
Exemplo n.º 12
0
def arma_likelihood(time_series,
                    phis=array([]),
                    thetas=array([]),
                    mu=0.,
                    sigma=1.):
    """
    Return the log-likelihood of the ARMA model parameters, given the time
    series.

    Parameters
    ----------
    time_series : ndarray of shape (n,1)
        The time series in question
    phis : ndarray of shape (p,)
        The phi parameters
    thetas : ndarray of shape (q,)
        The theta parameters
    mu : float
        The parameter mu
    sigma : float
        The parameter sigma

    Returns
    -------
    log_likelihood : float
        The log-likelihood of the model
    """
    F, Q, H, dim_states, dim_time_series = state_space_rep(
        phis, thetas, mu, sigma)
    mus, covs = kalman(F, Q, H, time_series - mu)
    likelihood = 0.
    for i in xrange(len(mus)):
        cond_mu = H.dot(mus[i])
        cond_sigma = H.dot(covs[i].dot(H.T))
        likelihood += log(
            norm.pdf(time_series[i] - mu, loc=cond_mu, scale=sqrt(cond_sigma)))
    return float(likelihood)
Exemplo n.º 13
0
def run():
    """
    <Description>

    Args:
        param1: This is the first param.
    
    Returns:
        This is a description of what is returned.
    """
    nSamples = [10,50,150,300,400,500,1000]
    distArr = []
    np.random.seed(42)
    for n in nSamples:
        dist = np.random.normal(loc=0.0, scale=1.0, size=n)
        distArr.append(dist)
    # make a figure showing the different distributions for
    # different number of samples
    nBins = 30
    bins = np.linspace(-3,3,nBins)
    nGraphs = len(nSamples)
    fig = plt.figure(figsize=(8,24))
    plt.subplot(nGraphs,1,1)
    plt.title("Recommend at least 300 points from N(0,1)",
              fontsize=25)
    for i,n in enumerate(nSamples):
        plt.subplot(nGraphs,1,i+1)
        plt.hist(distArr[i],bins=bins,label="N={:d}".format(n),normed=True)
        labelPDF = "N(0,1) Dist." if (i == nGraphs-1) else ""
        plt.plot(bins,norm.pdf(bins),label=labelPDF,
                 linewidth=3,linestyle='--',color='r')
        plt.legend()
    plt.ylabel("Weighted Proportion",fontsize=20)
    plt.xlabel("Value of Standard Normal",fontsize=20)
    plt.tight_layout()
    fig.savefig("CompareDist.png")
Exemplo n.º 14
0
        def f(x): return norm.pdf(x)

        # set low/high values
        low = [-normal_max_value] + [0]*self.K
 def f(x):
     return norm.pdf(x)
Exemplo n.º 16
0
def pl_no_gamma(l, p, sigma):
    u, v = p
    if u > l:
        return 1 / (l + 1.0) * norm.pdf(np.linalg.norm([u - l, v]), 0, sigma)
    return 1 / (l + 1.0) * norm.pdf(v, 0, sigma)
Exemplo n.º 17
0
def pl_no_gamma(l,p,sigma):
    u,v = p
    if u > l:
        return   1/(l+1.0)*norm.pdf(np.linalg.norm([u-l,v]),0,sigma)
    return     1/(l+1.0)*norm.pdf(v,0,sigma)
Exemplo n.º 18
0
 def _get_class_posterior(self,x,c) :
     # calculate log probs for feature
     log_probs = zeros(x.size)
     for i,f in enumerate(x) :
         log_probs[i] = log10(norm.pdf(f,self.means[c,i],self.stdvs[c,i]))
     return log_probs
Exemplo n.º 19
0
S_X1 = np.std(X1, ddof=1)

X2 = OI_Data['Mean tBMD']
SortedValues2 = np.sort(X2.values)
N2 = len(X2)
X2_Bar = np.mean(X2)
S_X2 = np.std(X2, ddof=1)

## Kernel density estimation (Gaussian kernel)
NormalIQR = np.sum(np.abs(norm.ppf(np.array([0.25,0.75]), 0, 1)))

KernelEstimator1 = np.zeros(N1)
DataIQR1 = np.abs(X1.quantile(0.75)) - np.abs(X1.quantile(0.25))
KernelHalfWidth1 = 0.9*N1**(-1/5) * min(S_X1,DataIQR1/NormalIQR)
for Value in SortedValues1:
    KernelEstimator1 += norm.pdf(SortedValues1-Value,0,KernelHalfWidth1*2)
KernelEstimator1 = KernelEstimator1/KernelEstimator1.sum()

KernelEstimator2 = np.zeros(N2)
DataIQR2 = np.abs(X2.quantile(0.75)) - np.abs(X2.quantile(0.25))
KernelHalfWidth2 = 0.9*N2**(-1/5) * min(S_X2,DataIQR2/NormalIQR)
for Value in SortedValues2:
    KernelEstimator2 += norm.pdf(SortedValues2-Value,0,KernelHalfWidth2*2)
KernelEstimator2 = KernelEstimator2/KernelEstimator2.sum()

## Prepare histogram and store data
BinsValues = np.linspace(450,700,21)
Counts1, Bins = np.histogram(Healthy_Data['Mean tBMD'],BinsValues)
RelativeWeights1 = Counts1/Counts1.sum()
Counts2, Bins = np.histogram(OI_Data['Mean tBMD'],BinsValues)
RelativeWeights2 = Counts2/Counts2.sum()
Exemplo n.º 20
0
def segment(image, n_segments=2, burn_in=1000, samples=1000, lag=5):
    """
    Return image segment samples.

    Parameters
    ----------
    image : (N,M) ndarray
        Pixel array with single-dimension values (e.g. hue)

    Returns
    -------
    labels : (samples,N,M) ndarray
        The image segment label array
    emission_params: (samples,K,2) ndarray
        The Gaussian emission distribution parameters (mean, precision)
    log_probs : (samples,) ndarray
    """

    # allocate arrays
    res_labels = zeros((samples, image.shape[0], image.shape[1]), dtype=int)
    res_emission_params = zeros((samples, n_segments, 6))
    res_log_prob = zeros((samples,))

    padded_labels = ones((image.shape[0] + 2, image.shape[1] + 2), dtype=int)*-1
    labels = padded_labels[1:-1, 1:-1]
    emission_params = zeros((n_segments, 6))
    log_prob = None

    conditional = zeros((n_segments,))


    # init emission_params
    sample_mean_r = image[:,:,0].mean()
    sample_mean_g = image[:,:,1].mean()
    sample_mean_b = image[:,:,2].mean()
    sample_var_r = image[:,:,0].var()
    sample_var_g = image[:,:,1].var()
    sample_var_b = image[:,:,2].var()
    sample_prec_r = 1./sample_var_r
    sample_prec_g = 1./sample_var_g
    sample_prec_b = 1./sample_var_b
    for k in xrange(n_segments):
        """
        emission_params[k,0] = norm.rvs(sample_mean_r,
                sqrt(sample_var_r/n_segments))
        emission_params[k,1] = sample_prec_r
        emission_params[k,2] = norm.rvs(sample_mean_g,
                sqrt(sample_var_g/n_segments))
        emission_params[k,3] = sample_prec_g
        emission_params[k,4] = norm.rvs(sample_mean_b,
                sqrt(sample_var_b/n_segments))
        emission_params[k,5] = sample_prec_b
        """
        emission_params[k,0] = norm.rvs(0.5, 0.1)
        emission_params[k,1] = 1/(0.25**2)
        emission_params[k,2] = norm.rvs(0.5, 0.1)
        emission_params[k,3] = 1/(0.25**2)
        emission_params[k,4] = norm.rvs(0.5, 0.1)
        emission_params[k,5] = 1/(0.25**2)

    # init labels
    for n in xrange(image.shape[0]):
        for m in xrange(image.shape[1]):
            labels[n,m] = randint(0, n_segments)

    try:
        # gibbs
        for i in xrange(burn_in + samples*lag - (lag - 1)):

            for n in xrange(image.shape[0]):
                for m in xrange(image.shape[1]):
                    # resample label
                    for k in xrange(n_segments):
                        labels[n,m] = k
                        conditional[k] = 0.
                        conditional[k] += phi_blanket(
                                memoryview(padded_labels), n, m,
                                memoryview(FS))
                        """
                        for x in xrange(max(n-2,0), min(n+3,image.shape[0])):
                            for y in xrange(max(m-2,0), min(m+3,
                                    image.shape[1])):
                                clique = padded_labels[x:x+3,y:y+3]
                                conditional[k] += phi(clique)
                        """
                        mean_r = emission_params[k, 0]
                        var_r = 1./emission_params[k, 1]
                        mean_g = emission_params[k, 2]
                        var_g = 1./emission_params[k, 3]
                        mean_b = emission_params[k, 4]
                        var_b = 1./emission_params[k, 5]
                        conditional[k] += log(norm.pdf(image[n,m,0], mean_r,
                            sqrt(var_r)))
                        conditional[k] += log(norm.pdf(image[n,m,1], mean_g,
                            sqrt(var_g)))
                        conditional[k] += log(norm.pdf(image[n,m,2], mean_b,
                            sqrt(var_b)))

                    labels[n,m] = sample_categorical(conditional)

            for k in xrange(n_segments):
                mask = (labels == k)

                # resample label mean red
                mean_r = emission_params[k, 0]
                prec_r = emission_params[k, 1]
                numer_r = TAU_0*MU_0 + prec_r*sum(image[mask][:, 0])
                denom_r = TAU_0 + prec_r*sum(mask)
                post_mean_r = numer_r/denom_r
                post_var_r = 1./(denom_r)
                emission_params[k, 0] = norm.rvs(post_mean_r, sqrt(post_var_r))

                # resample label var red
                post_alpha_r = ALPHA_0 + sum(mask)/2.
                post_beta_r = BETA_0 + sum((image[mask][:, 0] - emission_params[k,0])**2)/2.
                post_r = gamma(post_alpha_r, scale=1./post_beta_r)
                emission_params[k, 1] = post_r.rvs()

                # resample label mean green
                mean_g = emission_params[k, 2]
                prec_g = emission_params[k, 3]
                numer_g = TAU_0*MU_0 + prec_g*sum(image[mask][:, 1])
                denom_g = TAU_0 + prec_g*sum(mask)
                post_mean_g = numer_g/denom_g
                post_var_g = 1./(denom_g)
                emission_params[k, 2] = norm.rvs(post_mean_g, sqrt(post_var_g))

                # resample label var green
                post_alpha_g = ALPHA_0 + sum(mask)/2.
                post_beta_g = BETA_0 + sum((image[mask][:, 1] - emission_params[k,2])**2)/2.
                post_g = gamma(post_alpha_g, scale=1./post_beta_g)
                emission_params[k, 3] = post_g.rvs()

                # resample label mean blue
                mean_b = emission_params[k, 4]
                prec_b = emission_params[k, 5]
                numer_b = TAU_0*MU_0 + prec_b*sum(image[mask][:, 2])
                denom_b = TAU_0 + prec_b*sum(mask)
                post_mean_b = numer_b/denom_b
                post_var_b = 1./(denom_b)
                emission_params[k, 4] = norm.rvs(post_mean_b, sqrt(post_var_b))

                # resample label var blue
                post_alpha_b = ALPHA_0 + sum(mask)/2.
                post_beta_b = BETA_0 + sum((image[mask][:, 2] - emission_params[k,4])**2)/2.
                post_b = gamma(post_alpha_b, scale=1./post_beta_b)
                emission_params[k, 5] = post_b.rvs()
                
            log_prob = 0.
            for n in xrange(image.shape[0]):
                for m in xrange(image.shape[1]):
                    #clique = padded_labels[n:n+3,m:m+3]
                    label = labels[n,m]
                    mean_r = emission_params[label, 0]
                    var_r = 1./emission_params[label, 1]
                    mean_g = emission_params[label, 2]
                    var_g = 1./emission_params[label, 3]
                    mean_b = emission_params[label, 4]
                    var_b = 1./emission_params[label, 5]
                    #log_prob += phi(clique)
                    log_prob += log(norm.pdf(image[n,m,0], mean_r, sqrt(var_r)))
                    log_prob += log(norm.pdf(image[n,m,1], mean_g, sqrt(var_g)))
                    log_prob += log(norm.pdf(image[n,m,2], mean_b, sqrt(var_b)))
                    # prior on theta?
            log_prob += phi_all(memoryview(padded_labels), memoryview(FS))

            sys.stdout.write('\riter {} log_prob {}'.format(i, log_prob))
            sys.stdout.flush()

            if i < burn_in:
                pass
            elif not (i - burn_in)%lag:
                res_i = i/lag
                res_emission_params[res_i] = emission_params[:]
                res_labels[res_i] = labels
                res_log_prob[i] = log_prob

        sys.stdout.write('\n')
        return res_labels, res_emission_params, res_log_prob
    except KeyboardInterrupt:
        return res_labels, res_emission_params, res_log_prob
Exemplo n.º 21
0
    def __init__(self,
                 n_normal,
                 normal_max_value,
                 p_zeros,
                 rhos,
                 i_normal=None,
                 i_ps=None):
        """
        Constructor.

        The Gaussian Conditional Independence Model for Credit Risk
        Reference: https://arxiv.org/abs/1412.1183

        Args:
            n_normal (int): number of qubits to represent the latent normal random variable Z
            normal_max_value (float): min/max value to truncate the latent normal random variable Z
            p_zeros (list or array): standard default probabilities for each asset
            rhos (list or array): sensitivities of default probability of assets with respect to latent variable Z
            i_normal (list or array): indices of qubits to represent normal variable
            i_ps (list or array): indices of qubits to represent asset defaults
        """
        self.n_normal = n_normal
        self.normal_max_value = normal_max_value
        self.p_zeros = p_zeros
        self.rhos = rhos
        self.K = len(p_zeros)
        num_qubits = [n_normal] + [1] * self.K

        # set and store indices
        if i_normal is not None:
            self.i_normal = i_normal
        else:
            self.i_normal = range(n_normal)

        if i_ps is not None:
            self.i_ps = i_ps
        else:
            self.i_ps = range(n_normal, n_normal + self.K)

        # get normal (inverse) CDF and pdf
        F = lambda x: norm.cdf(x)
        F_inv = lambda q: norm.ppf(q)
        f = lambda x: norm.pdf(x)

        # set low/high values
        low = [-normal_max_value] + [0] * self.K
        high = [normal_max_value] + [1] * self.K

        # call super constructor
        super().__init__(num_qubits, low=low, high=high)

        # create normal distribution
        self._normal = NormalDistribution(n_normal, 0, 1, -normal_max_value,
                                          normal_max_value)

        # create linear rotations for conditional defaults
        self._slopes = np.zeros(self.K)
        self._offsets = np.zeros(self.K)
        self._rotations = []
        for k in range(self.K):

            psi = F_inv(p_zeros[k]) / np.sqrt(1 - rhos[k])

            # compute slope / offset
            slope = -np.sqrt(rhos[k]) / np.sqrt(1 - rhos[k])
            slope *= f(psi) / np.sqrt(1 - F(psi)) / np.sqrt(F(psi))
            offset = 2 * np.arcsin(np.sqrt(F(psi)))

            # adjust for integer to normal range mapping
            offset += slope * (-normal_max_value)
            slope *= 2 * normal_max_value / (2**n_normal - 1)

            self._offsets[k] = offset
            self._slopes[k] = slope

            lry = LinearYRotation(slope,
                                  offset,
                                  n_normal,
                                  i_state=self.i_normal,
                                  i_target=self.i_ps[k])
            self._rotations += [lry]
Exemplo n.º 22
0
 def __init__(self, num_target_qubits, mu=0, sigma=1, low=-1, high=1):
     self.validate(locals())
     probabilities, _ = UnivariateDistribution.\
         pdf_to_probabilities(lambda x: norm.pdf(x, mu, sigma), low, high, 2 ** num_target_qubits)
     super().__init__(num_target_qubits, probabilities, low, high)
Exemplo n.º 23
0
def PermutationTest(x,y,NRepetition=45**2,SignificanceLevel=0.05):

    # Analyze data
    x_bar = np.mean(x)
    y_bar = np.mean(y)

    d = x_bar - y_bar

    XData = pd.DataFrame({'Values':x,'Group':'Control'},index=range(len(x)))
    YData = pd.DataFrame({'Values':y,'Group':'Test'},index=range(len(y)))

    Pool = XData.append(YData,ignore_index=True)
    N = len(Pool)

    D = np.zeros(NRepetition)
    for i in range(NRepetition):

        n = np.random.randint(1,N-1)

        SampleA = Pool.sample(n)
        SampleB = Pool.drop(SampleA.index)

        D[i] = SampleA['Values'].mean() - SampleB['Values'].mean()

    # Analyze distribution of D
    from scipy.stats.distributions import norm
    D.sort()
    D_bar = np.mean(D)
    S_D = np.std(D,ddof=1)
    N_D = len(D)

    # Kernel density estimation (Gaussian kernel)
    KernelEstimator = np.zeros(N_D)
    NormalIQR = np.abs(norm.interval(0.25,0,1)).sum()
    DataIQR = np.abs(np.quantile(D,0.75)) - np.abs(np.quantile(D,0.25))
    KernelHalfWidth = 0.9 * N_D ** (-1 / 5) * S_D
    for Value in D:
        KernelEstimator += norm.pdf(D - Value, 0, KernelHalfWidth * 2)
    KernelEstimator = KernelEstimator / N_D

    ## Histogram and density distribution
    TheoreticalDistribution = norm.pdf(D, D_bar, S_D)
    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Histogram = Axes.hist(D, density=True, bins=20, edgecolor=(0, 0, 1), color=(1, 1, 1), label='Histogram')
    Axes.plot(D, KernelEstimator, color=(1, 0, 0), label='Kernel Density')
    Axes.plot(D, TheoreticalDistribution, linestyle='--', color=(0, 0, 0), label='Normal Distribution')
    plt.xlabel('D values')
    plt.ylabel('Density (-)')
    plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15), prop={'size':10})
    plt.show()
    plt.close(Figure)

    EmpiricalQuantiles = np.arange(0.5, N_D + 0.5) / N_D
    MinValue = np.quantile(D,SignificanceLevel / 2)
    MaxValue = np.quantile(D,1 - SignificanceLevel / 2)
    RejectionRange = np.array([[-np.inf,MinValue],[MaxValue,np.inf]])

    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Histogram = Axes.hist(D, density=True, bins=20, edgecolor=(0, 0, 1), color=(1, 1, 1), label='Histogram')
    Axes.fill_between([min(D),MinValue], [max(Histogram[0]),max(Histogram[0])], color=(0, 0, 0), alpha=0.1)
    Axes.fill_between([max(D),MaxValue], [max(Histogram[0]),max(Histogram[0])], color=(0, 0, 0), alpha=0.1, label='Rejection range')
    Axes.plot([d,d], [0,max(Histogram[0])], color=(1, 0, 0), label='Actual difference')
    plt.xlabel('D values')
    plt.ylabel('Density (-)')
    plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15), prop={'size':10})
    plt.show()
    plt.close(Figure)

    p = len(D[abs(D)>=abs(d)]) / len(D)

    # Z = (D - D_bar) / S_D
    # z_d = (d - D_bar) / S_D
    # TheoreticalQuantiles = norm.cdf(Z)
    #
    # # Compute range of Z values
    # D_zmin = D_bar - 10 * S_D
    # D_zmax = D_bar + 10 * S_D
    #
    # Step = 0.001
    # x = np.arange(D.min(), D.max(), Step)  # range of x in spec
    # y = norm.pdf(x, D_bar, S_D)
    #
    # x_all = np.arange(D_zmin, D_zmax, Step)  # entire range of x, both in and out of spec
    # # y_all = norm.pdf(x_all, D_bar, S_D)
    #
    # x_all = np.arange(-10, 10, Step)  # entire range of x, both in and out of spec
    # y_all = norm.pdf(x_all, 0, 1)
    # y_d = norm.pdf(z_d, 0, 1)
    #
    # y_sorted = np.zeros(len(y_all))
    # y_sorted += y_all
    # y_sorted.sort()
    #
    # CI = 0.95
    # y_area = 0
    # i = 1
    # while y_area / y_all.sum() < CI:
    #     y_area += y_sorted[-i]
    #     i += 1
    # z_CI = i / 2 * Step
    #
    # # Entire range of x, both in and out of spec
    # x_CI = np.arange(-z_CI, z_CI, Step)
    # y_CI = norm.pdf(x_CI, 0, 1)
    #
    # # Plot in data space
    # Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    # Axes.fill_between(x_CI, y_CI, 0, alpha=0.15, color=(0, 0, 0), label=str(int(0.95 * 100)) + '% CI')
    # Axes.plot([z_d,z_d], [0,y_d], color=(0, 0, 1), label='Difference Observed')
    # Axes.plot(x_all, y_all, color=(1, 0, 0), label='Normal distribution')
    # Axes.set_xlabel('Z values')
    # # plt.xlim([D_bar - 4.2 * S_D, D_bar + 4.2 * S_D])
    # plt.xlim([-5, 5])
    # plt.ylim([0, 0.45])
    # plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15))
    # # plt.show()
    # plt.close()
    #
    # # Plot in data space
    # d_CI = (z_CI + D_bar) * S_D
    # dx_CI = np.arange(-d_CI, d_CI, Step)
    # dy_CI = norm.pdf(dx_CI, D_bar, S_D)
    # d_y = norm.pdf(d, D_bar, S_D)
    #
    # Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    # Axes.fill_between(dx_CI, dy_CI, 0, alpha=0.15, color=(0, 0, 0), label=str(int(0.95 * 100)) + '% CI')
    # Axes.plot([d, d], [0, d_y], color=(0, 0, 1), label='Difference Observed')
    # Axes.plot(D, TheoreticalDistribution, color=(1, 0, 0), label='Normal distribution')
    # Axes.set_xlabel('D values')
    # plt.ylim([0,max(TheoreticalDistribution)*1.05])
    # plt.legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15))
    # plt.show()

    return d, RejectionRange, p
Exemplo n.º 24
0
# Below we visualize the distributions of SBP for the whole population
# (i.e. the marginal distribution), and for the subpopulations of people
# who are 40 and 60 years old, respectively.  This visualization is based
# on a model that has been fit to the data.  It may be misleading if the
# model do not fit the data well.  This is an important topic, but we
# will set it aside for now.

# +
sbp = np.linspace(50, 200, 100)  # Grid of possible blood pressure values
mn0 = da.BPXSY1.mean()  # marginal mean blood pressure
sd0 = da.BPXSY1.std()  # marginal SD of blood pressure

from scipy.stats.distributions import norm

y0 = norm.pdf(sbp, mn0, sd0)

mn1 = np.dot(result.params,
             [1, 40])  # Conditional mean for a 40 year old person
sd1 = np.sqrt(result.scale)
y1 = norm.pdf(sbp, mn1, sd1)

mn2 = np.dot(result.params,
             [1, 60])  # Conditional mean for a 60 year old person
sd2 = np.sqrt(result.scale)
y2 = norm.pdf(sbp, mn2, sd2)

sns.set_style("whitegrid")
ax = sns.lineplot(sbp, y0, label="Overall")
sns.lineplot(sbp, y1, label="40 year old")
sns.lineplot(sbp, y2, label="60 year old")
Exemplo n.º 25
0
def v_truncate(x: float) -> float:
    """Computes the additive correction term to the moment matching
    approximation of the truncated Gaussian as detailed in original paper.
    """
    return norm.pdf(x) / norm.cdf(x)
Exemplo n.º 26
0
 def pdf(self, data):
     pdf_y_given_x = norm.pdf(data[:, 1],
                              loc=np.sin(4 * data[:, 0]) + 0.5 * data[:, 0],
                              scale=self.scale * np.abs(data[:, 0]))
     return pdf_y_given_x
Exemplo n.º 27
0
def plot_EgSAX(data, segMeans, alphaSize, compRatio):
  
  from matplotlib.ticker import NullFormatter
  from plot_utils import adjust_spines
  
  # Stored originally as wordSize x numStreams. Becaule plot func plots columsn by default 
  
  if segMeans.ndim == 1:
    numStreams = 1
    segMeans = np.atleast_2d(segMeans).T
  else:
    numStreams = segMeans.shape[1]
  wordSize = segMeans.shape[0]
  bpList = bp_lookup(alphaSize)  
  
  PAAStreams = np.zeros((wordSize*compRatio, numStreams)) 
  
  for stream in xrange(numStreams):
    temp = []
    for mean in segMeans[:,stream]: 
      temp.extend([mean]*int(compRatio)) 
    
    PAAStreams[:,stream] = np.array(temp)
  
  # Multiple Ploting 
  nullfmt   = NullFormatter()         # no labels
  
  # start with a rectangular Figure
  plt.figure()  
  # Plot Gaussina Axes (l,b r,t)
  axGauss = plt.axes([0.05,0.1, 0.2, 0.85]) 
  adjust_spines(axGauss, [])  
  # Plot Sax Axes
  axSAX = plt.axes([0.3,0.1, 0.65, 0.85], )
  
  # no labels
  #axSAX.xaxis.set_major_formatter(nullfmt)
  axGauss.yaxis.set_major_formatter(nullfmt)
  axGauss.yaxis.set_minor_formatter(nullfmt)
  
  # the SAX plot:
  axSAX.plot(PAAStreams, drawstyle = 'steps', color = 'r', lw = 1.5)
  for bp in bpList:
    axSAX.axhline(y=bp, xmin=0, xmax=PAAStreams.shape[0], ls = '--', color = 'k')
  axSAX.plot(data, color = 'b', lw = 1)
  
  # the Gaussian plot
  y = np.linspace(-3, 3, 1000)
  gauss_data = gauss_dist.pdf(y)
  axGauss.plot(-gauss_data, y)

  for bp in bpList:
      axGauss.axhline(y=bp, xmin=0, xmax=PAAStreams.shape[0], ls = '--', color = 'k')  

  # Tweek Axis
  axSAX.set_ylim( axGauss.get_ylim() )
  #axGauss.set_xlim( 0, -0.5 )
  adjust_spines(axGauss, [])
  adjust_spines(axSAX, ['left', 'bottom'])
  plt.xlabel('Time Steps')
  
  
  plt.show()  
  
  return PAAStreams
Exemplo n.º 28
0
Arquivo: test.py Projeto: tel/Eyerez
def zLogLik(x, y, z):
    return log(n.pdf(x*y, loc = z, scale = 0.2))
Exemplo n.º 29
0
 def pdf(self, data):
     return norm.pdf(data-self.limit, loc=self.mu, scale=math.sqrt(self.sigma))
Exemplo n.º 30
0
    def smooth_overlap(e_k_3d, e=0., scale=0.02, axis=2):
        e_k_3d[np.isnan(e_k_3d)] = -np.inf
        t1 = norm.pdf(e_k_3d, loc=e, scale=scale)
        # todo interpolate axis 2

        return np.sum(t1, axis=(axis, 3))
Exemplo n.º 31
0
 def pdf(self, data):
     #print self.sigma
     return norm.pdf(data, loc=self.mu, scale=math.sqrt(self.sigma))
Exemplo n.º 32
0
from scipy.stats import kde

x1 = np.random.normal(-1, 0.5, 15)

# parameters: (loc=0.0, scale=1.0, size=None)

x2 = np.random.normal(6, 1, 10)
y = np.r_[x1, x2]

# r_ Translates slice objects to concatenation along the first axis.

x = np.linspace(min(y), max(y), 100)
s = 0.4   # Smoothing parameter

kernels = np.transpose([norm.pdf(x, yi, s) for yi in y])

# Calculate the kernels
density = kde.gaussian_kde(y)

# plt.plot(x, kernels, 'k:')
# plt.plot(x, kernels.sum(1), 'r')
# plt.plot(y, np.zeros(len(y)), 'bo', ms=10)

xgrid = np.linspace(x.min(), x.max(), 200)
# plt.hist(y, bins=28, normed=True)
# plt.plot(xgrid, density(xgrid), 'r-')

# Create a bi-modal distribution with a mixture of Normals.

x1 = np.random.normal(-1, 2, 15) # parameters: (loc=0.0, scale=1.0, size=None)
Exemplo n.º 33
0
def count_likelihood_standard(this_counts,tot_counts,this_port,this_cv,this_fract_recov=1):
    L = [norm.pdf(this_c,loc=tot_c*this_port*this_fract_recov,scale=(tot_c*this_port*this_fract_recov)*this_cv) \
         for this_c,tot_c in zip(this_counts,tot_counts)]
    logL = numpy.log2(numpy.array(L))
    return sum(logL)
Exemplo n.º 34
0
## Analyze distribution of Ln(CV)
for i in range(2):
    D = SystemFitted[i]['LogCV'].values
    D.sort()
    D_bar = np.mean(D)
    S_D = np.std(D, ddof=1)
    N_D = len(D)

    ## Kernel density estimation (Gaussian kernel)
    KernelEstimator = np.zeros(N_D)
    NormalIQR = np.abs(norm.interval(0.25, 0, 1)).sum()
    DataIQR = np.abs(np.quantile(D, 0.75)) - np.abs(np.quantile(D, 0.25))
    KernelHalfWidth = 0.9 * N_D**(-1 / 5) * S_D
    for Value in D:
        KernelEstimator += norm.pdf(D - Value, 0, KernelHalfWidth * 2)
    KernelEstimator = KernelEstimator / N_D

    ## Histogram and density distribution
    TheoreticalDistribution = norm.pdf(D, D_bar, S_D)
    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Histogram = Axes.hist(D,
                          density=True,
                          bins=20,
                          edgecolor=(0, 0, 1),
                          color=(1, 1, 1),
                          label='Histogram')
    Axes.plot(D, KernelEstimator, color=(1, 0, 0), label='Kernel Density')
    Axes.plot(D,
              TheoreticalDistribution,
              linestyle='--',
Exemplo n.º 35
0
def QQPlot(DataValues, Alpha_CI=0.95, DataLabel='Data'):

    ### Based on: https://www.tjmahr.com/quantile-quantile-plots-from-scratch/
    ### Itself based on Fox book: Fox, J. (2015)
    ### Applied Regression Analysis and Generalized Linear Models.
    ### Sage Publications, Thousand Oaks, California.

    # Data analysis
    N = len(DataValues)
    X_Bar = np.mean(DataValues)
    S_X = np.std(DataValues)

    # Sort data to get the rank
    Data_Sorted = np.zeros(N)
    Data_Sorted += DataValues
    Data_Sorted.sort()

    # Compute quantiles
    EmpiricalQuantiles = np.arange(0.5, N + 0.5) / N
    TheoreticalQuantiles = norm.ppf(EmpiricalQuantiles, X_Bar, S_X)
    ZQuantiles = norm.ppf(EmpiricalQuantiles, 0, 1)

    # Compute data variance
    DataIQR = np.quantile(DataValues, 0.75) - np.quantile(DataValues, 0.25)
    NormalIQR = np.sum(np.abs(norm.cdf(np.array([0.25, 0.75]), 0, 1)))
    Variance = DataIQR / NormalIQR
    Z_Space = np.linspace(min(ZQuantiles), max(ZQuantiles), 100)
    Variance_Line = Z_Space * Variance + np.median(DataValues)

    # Compute alpha confidence interval (CI)
    Z_SE = np.sqrt(norm.cdf(Z_Space) *
                   (1 - norm.cdf(Z_Space)) / N) / norm.pdf(Z_Space)
    Data_SE = Z_SE * Variance
    Z_CI_Quantile = norm.ppf(np.array([(1 - Alpha_CI) / 2]), 0, 1)

    # Create point in the data space
    Data_Space = np.linspace(min(TheoreticalQuantiles),
                             max(TheoreticalQuantiles), 100)

    # QQPlot
    BorderSpace = max(0.05 * abs(Data_Sorted.min()),
                      0.05 * abs(Data_Sorted.max()))
    Y_Min = Data_Sorted.min() - BorderSpace
    Y_Max = Data_Sorted.max() + BorderSpace
    Figure, Axes = plt.subplots(1, 1, figsize=(5.5, 4.5), dpi=100)
    Axes.plot(TheoreticalQuantiles,
              Data_Sorted,
              linestyle='none',
              marker='o',
              mew=0.5,
              fillstyle='none',
              color=(0, 0, 0),
              label=DataLabel)
    Axes.plot(Data_Space,
              Variance_Line,
              linestyle='--',
              color=(1, 0, 0),
              label='Variance :' + str(format(np.round(Variance, 2), '.2f')))
    Axes.plot(Data_Space,
              Variance_Line + Z_CI_Quantile * Data_SE,
              linestyle='--',
              color=(0, 0, 1),
              label=str(int(100 * Alpha_CI)) + '% CI')
    Axes.plot(Data_Space,
              Variance_Line - Z_CI_Quantile * Data_SE,
              linestyle='--',
              color=(0, 0, 1))
    plt.xlabel('Theoretical quantiles (-)')
    plt.ylabel('Empirical quantiles (-)')
    plt.ylim([Y_Min, Y_Max])
    plt.legend(loc='upper center',
               ncol=3,
               bbox_to_anchor=(0.5, 1.15),
               prop={'size': 10})
    plt.show()
    plt.close(Figure)

    return Variance
Exemplo n.º 36
0
# The simulation above shows that when the subsample size increases from 100 to 400 (a factor of 4), the standard deviation of the difference between two correlation coefficients decreases by roughly a factor of 2.  The mathematical expression sqrt(2 / m) is an approximation to this standard deviation that can be computed without access to any data.

# ### The shape of sampling distributions

# Above we focused on the magnitude of the difference between a statistic calculated on two independent samples from a population.  Here we focus instead on the shape of the distribution of statistics calculated on subsamples.  As discussed in the lectures, the central limit theorem implies that many (but not all) statistics have approximately normal sampling distributions, even if the underlying data are not close to being normally distributed.
#
# We will illustrate this phenomenon using the systolic blood pressure data from the NHANES study.  First we use a histogram to look at the distribution of individual systolic blood pressure values.  Note that it is somewhat right-skewed.

# In[8]:

sns.distplot(da.BPXSY1.dropna())

# Next we calculate 1000 sample means from 1000 subsamples of size 50 and inspect their distribution.

# In[9]:

m = 50
sbp_mean = []
for i in range(1000):
    dx = da.sample(m)
    sbp_mean.append(dx.BPXSY1.dropna().mean())
sns.distplot(sbp_mean)

# The lines below plot the density of a normal approximation to the data generated above
x = np.linspace(np.min(sbp_mean), np.max(sbp_mean), 100)
from scipy.stats.distributions import norm
y = norm.pdf(x, np.mean(sbp_mean), np.std(sbp_mean))
plt.plot(x, y, color='orange')

# The plots above show that while the distribution of individual systolic blood pressure measures is somewhat skewed to the right, the distribution of means of size 50 is approximately symmetric.  The distribution of means is also approximately normal, as shown by the orange curve, which is the best-fitting normal approximation to the data.
Exemplo n.º 37
0
steps = [-deme_size, 0, deme_size]
dis_prop = (sigma**2) / (2.0 * deme_size**2
                         )  # Caluculate Dispersal Probability for Deme-Model
p = np.array([dis_prop, 1 - 2 * dis_prop, dis_prop])
# draw_list = np.random.choice(steps, p=p, size=50000000)  # First do the deme offset
# draw_list = np.around(np.random.normal(scale=sigma, size=10000000))
# draw_list = np.around(np.random.uniform(low=-half_length, high=half_length, size=50000000))
draw_list = np.around(np.random.laplace(scale=scale, size=5000000))

print("Mean: %.2f" % np.mean(draw_list))
print("Std: %.4f" % np.std(draw_list))

# Now plot different dispersal kernels:
x_plot = np.linspace(-10, 10, 100000)
y_norm = norm.pdf(x_plot, scale=2)
y_laplace = laplace.pdf(x_plot, scale=scale)
y_uniform = uniform.pdf(x_plot, scale=2 * half_length, loc=-half_length)

plt.figure()
plt.plot(x_plot, y_laplace, label="Laplace: 3", linewidth=3)
plt.plot(x_plot, y_norm, label="Normal: 0", linewidth=3)
plt.plot(x_plot, y_uniform, label="Uniform: -1.2", linewidth=3, color='y')
plt.ylabel("Probability Density", fontsize=25)
plt.legend(prop={'size': 25})
plt.tick_params(axis='x', labelsize=15)
plt.tick_params(axis='y', labelsize=15)
plt.show()

plt.figure()
x_plot = np.linspace(-14, 14, 100000)
Exemplo n.º 38
0
 def f(x):  # pylint: disable=invalid-name
     return norm.pdf(x)
Exemplo n.º 39
0
vbmm = VBMM.VBMM(x, max_components=10)
vbmm.Fit()

mixtures = np.concatenate(
    (vbmm.pis[vbmm.components], vbmm.means[vbmm.components],
     vbmm.sigmas[vbmm.components]))
mixtures = np.reshape(mixtures, (3, -1))

plt.hist(x,
         histtype='stepfilled',
         bins=50,
         alpha=0.85,
         color="#7A68A6",
         normed=True,
         label='Real data')
plt.xlabel('data')
plt.xlim(0, 60)
plt.ylim(0, 0.25)

x_range = np.linspace(x.min() - 1, x.max() + 1, 500)
y_range = np.asarray([
    mixtures[0, i] * norm.pdf(x_range, mixtures[1, i], mixtures[2, i])
    for i in range(mixtures.shape[1])
])
y_range = np.sum(y_range, axis=0)
plt.plot(x_range, y_range, color="#A60628", linewidth=2, label='Esitmated pdf')

plt.legend()
plt.title('VBMM')
plt.show()