示例#1
0
    def sample(self, sampleShape):
        """
        Sample from the distribution

        :param sampleShape: shape of the sample
        :return: data sampled from the distribution, it is a numpy array
        of shape 'sampleShape'
        """

        # scipy.stats.genextreme's sampling method 'rvs seems to have a bug:
        # it sometimes outputs points which have 0 probability of occurrence,
        # I sample those values until none of the generated point have 0 prob
        # of occurrence
        data = genextreme.rvs(c=-self.shapeParam,
                              loc=self.locParam,
                              scale=self.scaleParam,
                              size=sampleShape)

        checkTrue = 1 + self.shapeParam * (
            data - self.locParam) / self.scaleParam <= 0

        while np.any(checkTrue):
            idx = np.where(checkTrue)
            data[idx] = genextreme.rvs(c=-self.shapeParam,
                                       loc=self.locParam,
                                       scale=self.scaleParam,
                                       size=data[idx].shape)

            checkTrue = 1 + self.shapeParam * (
                data - self.locParam) / self.scaleParam <= 0

        return data
def test_projection(sample, _NNODES, _NRANKS, _NITER, _PROJ_NNODES, p):

    sone = np.reshape(sample, sample.shape[0] * sample.shape[1])

    # parameteric bootstrapping with mom
    mblock = []
    lblock = []
    pblock = []
    emv_block = []
    pwm_block = []
    x = []

    i = _NNODES
    # perform a series of intermediate projections
    while i <= _PROJ_NNODES + 1:

        for j in range(50):  # use a boostrap of size 30 per projection
            #stemp=np.random.permutation(sone)
            stemp = np.random.choice(sone, sone.shape[0], replace=True)
            sblock = np.reshape(stemp, (-1, _NITER))
            mx1 = np.amax(sblock, axis=0)
            lblock.append(np.sum(mx1))

            pwm_block.append(
                em_pwm(mx1, _NNODES * _NRANKS, i * _NRANKS) * _NITER)
            pwmfit = pwm_fit(mx1)
            r = genextreme.rvs(pwmfit[0],
                               loc=pwmfit[1],
                               scale=pwmfit[2],
                               size=i * _NITER)
            #reshape and take the max per iteration
            pwmblock = np.reshape(r, (-1, _NITER))
            mx2 = np.amax(pwmblock, axis=0)
            # append the sum of maximumns
            pblock.append(np.sum(mx2))

            emv_block.append(emv(mx1, _NNODES * _NRANKS, i * _NRANKS) * _NITER)
            momfit = mom_fit(mx1)
            r = genextreme.rvs(momfit[0],
                               loc=momfit[1],
                               scale=momfit[2],
                               size=i * _NITER)
            #reshape and take the max per iteration
            momblock = np.reshape(r, (-1, _NITER))
            mx2 = np.amax(momblock, axis=0)
            # append the sum of maximumns
            mblock.append(np.sum(mx2))
            x.append(i * _NRANKS)
        i *= 2

    temp_block = mblock.copy()
    temp_block.extend(pblock)
    temp_x = x.copy()
    temp_x.extend(x)

    # get medians and CI for both mom and pwm

    return temp_x, temp_block
示例#3
0
    def SimulateSample(self, n=9, m=1000):
        """Plots the sampling distribution of the sample mean.

        mu: hypothetical population mean
        sigma: hypothetical population standard deviation
        n: sample size
        m: number of iterations
        """
        def VertLine(x, y=1):
            thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

        means = []
        for _ in range(m):
            xs = genextreme.rvs(c=self.shape, loc=self.loc, scale=self.scale, size=n)
            xbar = np.mean(xs)
            means.append(xbar)

        stderr = self.RMSE(means, self.loc)
        print('Erro Padrão', stderr)

        cdf = thinkstats2.Cdf(means)
        ci = cdf.Percentile(5), cdf.Percentile(95)
        print('Intervalo de Confiança: ', ci)
        VertLine(ci[0])
        VertLine(ci[1])

        # plot the CDF
        thinkplot.Cdf(cdf)
        #thinkplot.Save(root='estimation1',
         #              xlabel='sample mean',
          #             ylabel='CDF',
           #            title='Sampling distribution')
def get_rvs_data(data,
                 validation_window,
                 ticks=(0.7, 0.8, 0.9, 1),
                 interpolator=interpolate.InterpolatedUnivariateSpline,
                 **kwargs):
    """Get distribution data."""
    arg, loc, scale = forecast_params(data=data,
                                      ticks=ticks,
                                      interpolator=interpolator,
                                      **kwargs)
    scale = max(0, scale)
    rvs_data = genextreme.rvs(c=arg,
                              loc=loc,
                              scale=scale,
                              size=validation_window)
    return rvs_data
示例#5
0
def datasets():
    return (
        pd.Series(genextreme.rvs(size=1000, c=-0.2)),
        pd.Series(expon.rvs(size=1000)),
    )
def gev_project(params, k, samples=1000):
    shape, loc, scale = params
    project_samples = [
        max(genextreme.rvs(shape, loc, scale, k)) for x in range(k * samples)
    ]
    return gevfit.fit(project_samples)
emv_block=[]


x=[]
for i in range(_NNODES,_PROJ_NNODES+1,_NNODES):
    print(i)

    for j in range(30):
        stemp=np.random.permutation(sone)
        sblock=np.reshape(stemp, (-1,_NITER)) 
        mx1=np.amax(sblock, axis=0)
        lblock.append(np.sum(mx1))
        
        emv_block.append(emv(mx1, _NNODES, i)*_NITER)
        momfit=mom_fit(mx1)
        r = genextreme.rvs(momfit[0], loc=momfit[1], scale=momfit[2], size=i*_NITER)
        #reshape and take the max per iteration
        momblock=np.reshape(r, (-1,_NITER)) 
        mx2=np.amax(momblock, axis=0) 
        # append the sum of maximumns
        mblock.append(np.sum(mx2))
        x.append(i*_NRANKS)


arr_block=np.array(lblock)

# get donfidence intervals using quantiles      
lowCI=np.percentile(arr_block, p)
#highCI=np.percentile(arr_block, 100-p)
block_workload=sum(lblock)/len(lblock)
print("one workload with B {}".format(block_workload))
示例#8
0
# Display the probability density function (``pdf``):

x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = genextreme(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = genextreme.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c))
# True

# Generate random numbers:

r = genextreme.rvs(c, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
 def rvs(self, size, c=None, loc=None, scale=None):
     return genextreme.rvs(ifnone(c, self.c), ifnone(loc, self.loc()),
                           ifnone(scale, self.scale()), size)
示例#10
0
import numpy as np
from scipy.optimize import minimize
from scipy.stats import genextreme

n = 3
ns = 2
p = 2
true_theta = np.array([100,30,0.1], dtype = float)
true_beta = np.repeat(0.0,p)
xlist = []
zlist = []
for i in range(ns):
    z = np.random.normal(size = n*p)
    z = z.reshape(n,p)
    x = genextreme.rvs(loc = true_theta[0],
               scale = true_theta[1],
               c = true_theta[2],
               size=n)
    xlist.append(x)
    zlist.append(z)

def gevreg_m(xlist, zlist, lambda =0 ):
    p = zlist[1].shape[1]
    ns = len(xlist)
    tvec = np.repeat(0.0, ns*3+p)
    

    def lgev(x, loc = 0, scale = 1, shape = 0):
        if (scale <= 0) :
            return ( -1e+6)
        x = (x - loc)/scale
        if (shape == 0):
示例#11
0
from scipy.stats import genextreme

# genextreme.pdf(xi, mu, sigma)
r = genextreme.rvs(0.5, 0.2, 0.3, size=5000)
for it in r:
    print(it)

# returns: mu, sigma, xi
#
#     .1991     .2954     .5047   CONVGD
示例#12
0
def gev_resample_project(samples, nsamples, k):
    shape, loc, scale = gevfit.fit(samples)
    return [max(genextreme.rvs(shape, loc, scale, k)) for x in range(nsamples)]