def iso_heterochronicity(language_annotator):
    # stats.ks_2samp(uniform_ratio_list,your_list)
    lengths = data["length_filtered"][data["language_annotator"] == language_annotator][:-1].values
    lengths = lengths[~numpy.isnan(lengths)]
    test_iso = stats.kstest(lengths, "norm", N=1000000, args=([numpy.mean(lengths)]))[0]
    test_het = stats.kstest(lengths, "uniform", N=1000000, args=([0, 0.5]))[0]
    return test_iso, test_het
示例#2
0
文件: stats.py 项目: bioragul/ddg
def _get_xy_dataset_statistics(x_values, y_values, fcorrect_x_cutoff = 1.0, fcorrect_y_cutoff = 1.0, x_fuzzy_range = 0.1, y_scalar = 1.0):
    '''
    A function which takes two lists of values of equal length with corresponding entries and returns a dict containing
    a variety of metrics.
    :param x_values: A list of values for the X-axis (experimental values).
    :param y_values: A list of values for the X-axis (predicted values).
    :param fcorrect_x_cutoff: See get_xy_dataset_statistics.
    :param fcorrect_y_cutoff: See get_xy_dataset_statistics.
    :param x_fuzzy_range: See get_xy_dataset_statistics.
    :param y_scalar: See get_xy_dataset_statistics.
    :return: A table of statistics.
    '''
    from scipy.stats import pearsonr, spearmanr, normaltest, ks_2samp, kstest, norm
    assert(len(x_values) == len(y_values))
    return dict(
        pearsonr = pearsonr(x_values, y_values),
        spearmanr = spearmanr(x_values, y_values),
        gamma_CC = gamma_CC(x_values, y_values),
        MAE = mae(x_values, y_values),
        normaltestx = normaltest(x_values),
        normaltesty = normaltest(y_values),
        kstestx = kstest(x_values, 'norm'),
        kstesty = kstest(y_values, 'norm'),
        ks_2samp = ks_2samp(x_values, y_values),
        fraction_correct = fraction_correct(x_values, y_values, x_cutoff = fcorrect_x_cutoff, y_cutoff = fcorrect_y_cutoff),
        fraction_correct_fuzzy_linear = fraction_correct_fuzzy_linear(x_values, y_values, x_cutoff = fcorrect_x_cutoff, x_fuzzy_range = x_fuzzy_range, y_scalar = y_scalar),
    )
示例#3
0
    def __init__(self, data, **kwargs):
        r"""Constructor. This will fit both chi2 function in the different
        regimes.
            *data*      -   Data sample to use for fitting

        Keyword Argument:
            *chi1/2*    -   Keyword arguments like floc, fshape, etc. that are
                            passed to the constructor of the corresponding
                            chi2 scipy object.

        """
        data = np.asarray(data)

        c1 = kwargs.pop("chi1", dict())
        c2 = kwargs.pop("chi2", dict())

        self.par1 = chi2.fit(data[data > 0.], **c1)
        self.par2 = chi2.fit(-data[data < 0.], **c2)

        self.f1 = chi2(*self.par1)
        self.f2 = chi2(*self.par2)

        self.eta = float(np.count_nonzero(data > 0.)) / len(data)
        self.eta_err = np.sqrt(self.eta * (1. - self.eta) / len(data))

        # get fit-quality
        self.ks1 = kstest(data[data > 0.], "chi2", args=self.par1)[1]
        self.ks2 = kstest(-data[data < 0.], "chi2", args=self.par2)[1]

        return
示例#4
0
def start_routine(filename, P, Me_range, Re_range, n_range, noise_level, R, zp):
	noise = make_noise(R, noise_level)

	with open(filename, 'wb') as csvfile:
		writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
		header = ['%s = %.2f' % (P[i].name, P[i].value) for i in ['MeD', 'ReD', 'nD']]
		writer.writerow(header + ['range= [%.1f, %.1f]' % (R[0], R[-1])] + ['noise level = %.1f' % noise_level])
		writer.writerow(['MeB_initial', 'ReB_initial', 'nB_initial', 'MeD_final', 'ReD_final', 'nD_final', 'MeB_final', 'ReB_final', 'nB_final',\
		 'redchi2_all', 'redchi2_excl', 'KS', 'KS_excl'])
		for nB in n_range:
			for ReB in Re_range:
				for MeB in Me_range:
					pars = S.copy_params(P, False)
					pars.add_many(('MeB', float(MeB), True, 1.), ('ReB', float(ReB), True, 0.01), ('nB', float(nB), True, 0.1))
					pars['nD'].vary = False
					test_gal = S.sersic2(pars, R, zp, False) + noise

					new_pars = S.copy_params(pars, False)
					
					fit_data, res_excl = S.fit(new_pars, S.sersic2, R, zp, test_gal, weights=None, fit_range=None, redchi_marker=30.)

					initials = [pars[i].value for i in ['MeB', 'ReB', 'nB']]
					if fit_data is None:
						writer.writerow(['N/A'] * 13) 
					else:
						finals = [new_pars[i].value for i in ['MeB', 'ReB', 'nB', 'MeD', 'ReD', 'nD']]
						redchi_excl = np.sum(res_excl) / fit_data.nfree
						KS, KS_excl = stats.kstest(fit_data.residual, 'norm')[1], stats.kstest(res_excl, 'norm')[1]
						writer.writerow(initials+finals+['%r' % (res_excl)])
def fit_maxwell(input):
	print os.path.basename(input)
	data = []
	with open(input) as file:
		for line in file:
			data.append(float(line.strip()))
	#total = sum(data)
	#data = [d / total for d in data]
	#print data

	maxwell = stats.maxwell
	
	params = maxwell.fit(data, floc=0)
	print params
	d, p = stats.kstest(data, "maxwell", mode="asymp")
	print d, p
	norm = stats.norm

	params = norm.fit(data) #, floc=0)
	print params
	d, p = stats.kstest(data, "norm", mode="asymp")
	print d, p

	plt.hist(data, bins=50, normed=True, alpha=0.6, color='g')

	# Plot the PDF.
	xmin, xmax = plt.xlim()
	x = np.linspace(xmin, xmax, 100)
	p = norm.pdf(x, params[0], params[1])
	plt.plot(x, p, 'k', linewidth=2)
	title = "Fit results: mu = %.2f,  std = %.2f" % params
	plt.title(title)

	plt.show()
示例#6
0
  def testExponentialSampleMultiDimensional(self):
    with self.test_session():
      batch_size = 2
      lam_v = [3.0, 22.0]
      lam = constant_op.constant([lam_v] * batch_size)

      exponential = exponential_lib.Exponential(rate=lam)

      n = 100000
      samples = exponential.sample(n, seed=138)
      self.assertEqual(samples.get_shape(), (n, batch_size, 2))

      sample_values = samples.eval()

      self.assertFalse(np.any(sample_values < 0.0))
      for i in range(2):
        self.assertLess(
            stats.kstest(
                sample_values[:, 0, i],
                stats.expon(scale=1.0 / lam_v[i]).cdf)[0],
            0.01)
        self.assertLess(
            stats.kstest(
                sample_values[:, 1, i],
                stats.expon(scale=1.0 / lam_v[i]).cdf)[0],
            0.01)
示例#7
0
    def test_unit_vector(self):
        with self._model():
            UnitVector("x", shape=(2, 3))
            trace = self._sample()

        # Make sure that the unit vector constraint is satisfied
        assert np.allclose(np.sum(trace["x"]**2, axis=-1), 1.0)

        # Pull out the component and compute the angle
        x = trace["x"][:, :, 0]
        y = trace["x"][:, :, 1]
        z = trace["x"][:, :, 2]
        theta = np.arctan2(y, x)

        # The angle should be uniformly distributed
        cdf = lambda x: np.clip((x + np.pi) / (2 * np.pi), 0, 1)  # NOQA
        for i in range(theta.shape[1]):
            s, p = kstest(theta[:, i], cdf)
            assert s < 0.05

        # As should the vertical component
        cdf = lambda x: np.clip((x + 1) / 2, 0, 1)  # NOQA
        for i in range(z.shape[1]):
            s, p = kstest(z[:, i], cdf)
            assert s < 0.05
示例#8
0
def test_iterative_imputer_truncated_normal_posterior():
    #  test that the values that are imputed using `sample_posterior=True`
    #  with boundaries (`min_value` and `max_value` are not None) are drawn
    #  from a distribution that looks gaussian via the Kolmogorov Smirnov test.
    #  note that starting from the wrong random seed will make this test fail
    #  because random sampling doesn't occur at all when the imputation
    #  is outside of the (min_value, max_value) range
    pytest.importorskip("scipy", minversion="0.17.0")
    rng = np.random.RandomState(42)

    X = rng.normal(size=(5, 5))
    X[0][0] = np.nan

    imputer = IterativeImputer(min_value=0,
                               max_value=0.5,
                               sample_posterior=True,
                               random_state=rng)

    imputer.fit_transform(X)
    # generate multiple imputations for the single missing value
    imputations = np.array([imputer.transform(X)[0][0] for _ in range(100)])

    assert all(imputations >= 0)
    assert all(imputations <= 0.5)

    mu, sigma = imputations.mean(), imputations.std()
    ks_statistic, p_value = kstest((imputations - mu) / sigma, 'norm')
    if sigma == 0:
        sigma += 1e-12
    ks_statistic, p_value = kstest((imputations - mu) / sigma, 'norm')
    # we want to fail to reject null hypothesis
    # null hypothesis: distributions are the same
    assert ks_statistic < 0.2 or p_value > 0.1, \
        "The posterior does appear to be normal"
示例#9
0
def get_chiKS(profile, fitsDF, mu_list, h_list, break_R, bounds, infoDF):
	P = lm.Parameters()
	P.add_many(('mu01',mu_list[0]), ('mu02', mu_list[1]), ('h1', h_list[0]), ('h2', h_list[1]))
	P.add('Rbr', break_R)
	P.add_many(('nB', fitsDF.nB), ('ReB', fitsDF.ReB), ('MB', fitsDF.MB), ('deltaRe', 1., True), ('BD_ratio', fitsDF.BD_ratio), ('ReD', fitsDF.ReD))
	res = (profile.I - total_model(P, profile.R.values, infoDF.zp, False)) / profile.I_err
	res_norm = F.sersic(P, infoDF.zp, profile.R.values, profile.I.values, profile.I_err, False) 
	return np.sum(res**2.), stats.kstest(res, 'norm'), np.sum(res_norm**2.), stats.kstest(res_norm, 'norm')
示例#10
0
def check_distribution(dist, args, alpha):
    D,pval = stats.kstest(dist,'', args=args, N=1000)
    if (pval < alpha):
        D,pval = stats.kstest(dist,'',args=args, N=1000)
        #if (pval < alpha):
        #    D,pval = stats.kstest(dist,'',args=args, N=1000)
        assert (pval > alpha), "D = " + str(D) + "; pval = " + str(pval) + \
               "; alpha = " + str(alpha) + "\nargs = " + str(args)
示例#11
0
def check_distribution_rvs(dist, args, alpha, rvs):
    # test from scipy.stats.tests
    # this version reuses existing random variables
    D, pval = stats.kstest(rvs, dist, args=args, N=1000)
    if (pval < alpha):
        D, pval = stats.kstest(dist, '', args=args, N=1000)
        npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
                    "; alpha = " + str(alpha) + "\nargs = " + str(args))
示例#12
0
    def single_exp(self, options):
        """
        This method returns a line in the form 
        Original_variable | real_parameters | number_of_observations | log-likelihood_of_real_params | Estim_params_T2_SSPSC | log-likelihood_T2_SSPSC | p-value_T2_SSPSC | Estim_params_T2_StSI | log-likelihood_T2_StSI | p-value_T2_StSI | AIC_selected_model | AIC_relative_prob
        The input is an array of options containing:
        ['choice of distrib', 'param1, param2', 'number of values']
        The input is going to be used to know the kind of experiment
        we are going to do. 
        The procedure is to simulate data with one distribution and then
        to do a KS test for determining if the data comes from a
        SSPSC or from a StSI model.
        Moreover, it computes AIC values in order to say wich model explains better the simulated data
        """
        params = options[1].split(',')
        if options[0]=='1':
            alpha=float(params[0])
            T=float(params[1])
            X = T2_SSPSC(alpha, T)
            type_of_variable = 'T2_SSPSC'
            real_params = '({}, {})'.format(alpha, T)
        elif options[0]=='2':
            n=int(params[0])
            M=float(params[1])
            X = T2_StSI(n, M)
            type_of_variable = 'T2_StSI'
            real_params = (n, M).__str__()
        number_of_observations = int(options[2])
        alpha_integer = options[3]
        
        # We are going to simulate "number_of_observations" independent values. Then we use a half of that values for parameters estimation and the other half for the KS-test
        
        # Simulating values
        obs_estim = X.simulate_values(number_of_observations/2)
        obs_test = X.simulate_values(number_of_observations/2)
        obs = obs_estim + obs_test
        real_likelihood = X.log_likelihood(obs_estim)

        # Estimating parameters of both models and doing a KS test
        B = T2_SSPSC() # We initialize a variable with default parameters
        if int(alpha_integer) == 0:
            [alpha, T, ll_fittedB] = B.exact_maxllk(obs_estim)
        else:
            [alpha, T, ll_fittedB] = B.exact_maxllk_integer(obs_estim)
        fittedB = T2_SSPSC(alpha, T)
        KS_fittedB = kstest(obs_test, fittedB.cdf)
        pvalue_fittedB = KS_fittedB[1]
        S = T2_StSI() # We initialize a variable with default parameters
        [n, M, ll_fittedS] = S.max_likelihood_estimation(obs_estim)
        fittedS = T2_StSI(n, M)
        KS_fittedS = kstest(obs_test, fittedS.cdf)
        pvalue_fittedS = KS_fittedS[1]

        # Computing AIC values
        (best_model, relative_prob) = self.AIC_compare(ll_fittedB, ll_fittedS)

        result_text= type_of_variable+' | '+real_params+' | '+options[2]+' | '+real_likelihood.__str__()+' | '+(fittedB.alpha, fittedB.T).__str__()+' | '+ll_fittedB.__str__()+' |  '+pvalue_fittedB.__str__()+' | '+(fittedS.n, fittedS.M).__str__()+' | '+ll_fittedS.__str__()+' | '+pvalue_fittedS.__str__()+' | '+best_model.__str__()+' | '+relative_prob.__str__()
        observations = obs.__str__()
        return [result_text, observations]
示例#13
0
def check_distribution_rvs(distfn, args, alpha, rvs):
    ## signature changed to avoid calling a distribution by name
    # test from scipy.stats.tests
    # this version reuses existing random variables
    D,pval = stats.kstest(rvs, distfn.cdf, args=args, N=1000)
    if (pval < alpha):
        D,pval = stats.kstest(distfn.rvs, distfn.cdf, args=args, N=1000)
        npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) +
               "; alpha = " + str(alpha) + "\nargs = " + str(args))
def printresults(sample, arg, bres, kind='bootstrap'):
    '''calculate and print Bootstrap or Monte Carlo result

    Parameters
    ----------
    sample : array
        original sample data
    arg : float   (for general case will be array)
    bres : array
        parameter estimates from Bootstrap or Monte Carlo run
    kind : {'bootstrap', 'montecarlo'}
        output is printed for Mootstrap (default) or Monte Carlo

    Returns
    -------
    None, currently only printing

    Notes
    -----
    still a bit a mess because it is used for both Bootstrap and Monte Carlo

    made correction:
        reference point for bootstrap is estimated parameter

    not clear:
        I'm not doing any ddof adjustment in estimation of variance, do we
        need ddof>0 ?

    todo: return results and string instead of printing

    '''
    print 'true parameter value'
    print arg
    print 'MLE estimate of parameters using sample (nobs=%d)'% (nobs)
    argest = distr.fit_fr(sample, frozen=[np.nan, 0.0, 1.0])
    print argest
    if kind == 'bootstrap':
        #bootstrap compares to estimate from sample
        argorig = arg
        arg = argest

    print '%s distribution of parameter estimate (nrepl=%d)'% (kind, nrepl)
    print 'mean = %f, bias=%f' % (bres.mean(0), bres.mean(0)-arg)
    print 'median', np.median(bres, axis=0)
    print 'var and std', bres.var(0), np.sqrt(bres.var(0))
    bmse = ((bres - arg)**2).mean(0)
    print 'mse, rmse', bmse, np.sqrt(bmse)
    bressorted = np.sort(bres)
    print '%s confidence interval (90%% coverage)' % kind
    print bressorted[np.floor(nrepl*0.05)], bressorted[np.floor(nrepl*0.95)]
    print '%s confidence interval (90%% coverage) normal approximation' % kind
    print stats.norm.ppf(0.05, loc=bres.mean(), scale=bres.std()),
    print stats.norm.isf(0.05, loc=bres.mean(), scale=bres.std())
    print 'Kolmogorov-Smirnov test for normality of %s distribution' % kind
    print ' - estimated parameters, p-values not really correct'
    print stats.kstest(bres, 'norm', (bres.mean(), bres.std()))
示例#15
0
def test_pos4d_transforms_slit_rotated(photons1000):
    '''Test coordinate transforms on rotated entrance aperture.'''
    p = photons1000

    rotation = axangle2aff(np.array([0, 1, 0]), np.deg2rad(90))
    myslit = marxs.optics.aperture.RectangleAperture(orientation=rotation[:3, :3], zoom=0.5)
    p = myslit.process_photons(p)
    assert np.allclose(p['pos'][:, 2], 0)
    assert kstest(p['pos'][:, 0] + 0.5, "uniform")[1] > 0.01
    assert kstest(p['pos'][:, 1] + 0.5, "uniform")[1] > 0.01
示例#16
0
def kstest():
    n1 = 200
    n2 = 300
    a = stats.norm.rvs(size=n1, loc=0, scale=1)
    b = stats.norm.rvs(size=n2, loc=0.5, scale=1.5)
    c = stats.norm.rvs(size=n2, loc=0.01, scale=1)
    
    print(stats.ks_2samp(a, b))
    print(stats.ks_2samp(a, c))
    print stats.kstest(a, 'norm')
示例#17
0
 def test_step(self):
     o,c = covariance('c',pm.Gamma('v',3,3,size=2))
     M = pm.MCMC([o])
     M.sample(10000,1000)
     theta1=(np.arctan2(M.trace('c_eigenvalues')[:][:,0,0],M.trace('c_eigenvalues')[:][:,0,1])+np.pi)/2./np.pi
     theta2 =(np.arctan2(M.trace('c_eigenvalues')[:][:,1,0],M.trace('c_eigenvalues')[:][:,1,1])+np.pi)/2./np.pi
     
     d1,p1 = stats.kstest(theta1,'uniform')
     d2,p2 = stats.kstest(theta2,'uniform')
     
     assert(p1>.05)
     assert(p2>.05)
def check_normality():
    '''Check if the distribution is normal.'''
    
    # Set the parameters
    numData = 1000
    myMean = 0
    mySD = 3
    
    # To get reproducable values, I provide a seed value
    np.random.seed(1234)   
    
    # Generate and show random data
    data = stats.norm.rvs(myMean, mySD, size=numData)
    fewData = data[:100]
    plt.hist(data)
    plt.show()

    # --- >>> START stats <<< ---
    # Graphical test: if the data lie on a line, they are pretty much
    # normally distributed
    _ = stats.probplot(data, plot=plt)
    plt.show()

    pVals = pd.Series()
    pFewVals = pd.Series()
    # The scipy normaltest is based on D-Agostino and Pearsons test that
    # combines skew and kurtosis to produce an omnibus test of normality.
    _, pVals['Omnibus']    = stats.normaltest(data)
    _, pFewVals['Omnibus'] = stats.normaltest(fewData)

    # Shapiro-Wilk test
    _, pVals['Shapiro-Wilk']    = stats.shapiro(data)
    _, pFewVals['Shapiro-Wilk'] = stats.shapiro(fewData)
    
    # Or you can check for normality with Lilliefors-test
    _, pVals['Lilliefors']    = lillifors(data)
    _, pFewVals['Lilliefors'] = lillifors(fewData)
    
    # Alternatively with original Kolmogorov-Smirnov test
    _, pVals['Kolmogorov-Smirnov']    = stats.kstest((data-np.mean(data))/np.std(data,ddof=1), 'norm')
    _, pFewVals['Kolmogorov-Smirnov'] = stats.kstest((fewData-np.mean(fewData))/np.std(fewData,ddof=1), 'norm')
    
    print('p-values for all {0} data points: ----------------'.format(len(data)))
    print(pVals)
    print('p-values for the first 100 data points: ----------------')
    print(pFewVals)
    
    if pVals['Omnibus'] > 0.05:
        print('Data are normally distributed')
    # --- >>> STOP stats <<< ---
    
    return pVals['Kolmogorov-Smirnov']
示例#19
0
def test_pos4d_transforms_slit(photons1000, myslit):
    '''Test coordinate transforms on initialization of optical elements.

    The initial 4D transforms should be done to any optical element.
    Here, I pick the entrance aperture for testing, because it places the
    positional vector of the plucker coordinates in a plane, independent
    of the initial values.
    '''

    p = myslit.process_photons(photons1000)
    assert np.allclose(p['pos'][:, 0], 0)
    assert kstest((p['pos'][:, 1] + 2) / 4, "uniform")[1] > 0.01
    assert kstest((p['pos'][:, 2] + 2) / 4, "uniform")[1] > 0.01
    def test_create_dataset(self):
        gen = TSPGenerator(self._num_points)
        data = gen.generate()

        nose.tools.assert_equal(data.shape, (self._num_points, 2))

        # check x axis is drawn from uniform distribution
        D, p_value = stats.kstest(data[:, 0], 'uniform', args=(0, 10))
        nose.tools.assert_greater(p_value, 0.05)

        # check y axis is drawn from uniform distribution
        D, p_value = stats.kstest(data[:, 1], 'uniform', args=(0, 10))
        nose.tools.assert_greater(p_value, 0.05)
示例#21
0
	def draw_normal (self, xs, bins):

		mean = np.mean(xs)
		std = np.std(xs)
		norm_d_val, norm_p_val = kstest(xs, 'norm')
		t_d_val, t_p_val = kstest(xs, 't', args=(1,))

		print 'Mean: {}, Std: {}, KS_D: {}'.format(mean, std, norm_d_val)

		legendtext = '$\mu$ = {:.4f}\n$\sigma$ = {:.4f}\nD (Normal) = {:.4f}\nD (T) = {:.4f}'.format(mean,
		 std, norm_d_val, t_d_val)
		plt.text(-15, 0.12, legendtext, fontsize='x-small')

		y = mlab.normpdf(bins, mean, std)
		plt.plot(bins, y, '--', color='grey')
示例#22
0
def test_ks(wave, flux, p_optg, p_optl):
    xmin = np.min(wave)
    xmax = np.max(wave)
    x = np.linspace(xmin, xmax, 1000)
    # Gauss
    y_gauss_ord = np.sort(rectagauss(x, *p_optg))
    y_expg_ord = np.sort(flux)
    dng, probg = kstest(y_expg_ord, cdf, args=(y_gauss_ord,))

    # Lorentz
    y_lorentz_ord = np.sort(rectalorentz(x, *p_optl))
    y_expl_ord = np.sort(flux)
    dnl, probl = kstest(y_expl_ord, cdf, args=(y_lorentz_ord,))

    return probg, probl
示例#23
0
 def test_kstest(self):
     for varname, cdf in self.cdfs.items():
         samples = self.samples[varname]
         if samples.ndim == 1:
             t, p = stats.kstest(samples[::self.ks_thin], cdf=cdf)
             assert self.alpha < p
         elif samples.ndim == 2:
             pvals = []
             for samples_, cdf_ in zip(samples.T, cdf):
                 t, p = stats.kstest(samples_[::self.ks_thin], cdf=cdf_)
                 pvals.append(p)
             t, p = stats.combine_pvalues(pvals)
             assert self.alpha < p
         else:
             raise NotImplementedError()
  def testSamplesAgreeWithCdfForSamplesOverLargeRange(self):
    # Consider the cdf for distribution X, F(x).
    # If U ~ Uniform[0, 1], then Y := F^{-1}(U) is distributed like X since
    # P[Y <= y] = P[F^{-1}(U) <= y] = P[U <= F(y)] = F(y).
    # If F is a bijection, we also have Z = F(X) is Uniform.
    #
    # Make an exponential with large mean (= 100).  This ensures we will get
    # quantized values over a large range.  This large range allows us to
    # pretend that the cdf F is a bijection, and hence F(X) is uniform.
    # Note that F cannot be bijection since it is constant between the
    # integers.  Hence, F(X) (see below) will not be uniform exactly.
    with self.test_session():
      qdist = distributions.QuantizedDistribution(
          distribution=distributions.Exponential(lam=0.01))
      # X ~ QuantizedExponential
      x = qdist.sample_n(n=10000, seed=42)
      # Z = F(X), should be Uniform.
      z = qdist.cdf(x)
      # Compare the CDF of Z to that of a Uniform.
      # dist = maximum distance between P[Z <= a] and P[U <= a].
      # We ignore pvalue, since of course this distribution is not exactly, and
      # with so many sample points we would get a false fail.
      dist, _ = stats.kstest(z.eval(), "uniform")

      # Since the distribution take values (approximately) in [0, 100], the
      # cdf should have jumps (approximately) every 1/100 of the way up.
      # Assert that the jumps are not more than 2/100.
      self.assertLess(dist, 0.02)
示例#25
0
def calc_ks_stats(scores, exp_scores=None):
    from scipy import stats
    if exp_scores:
        (D, p_val) = stats.ks_2samp(scores, exp_scores)
    else:
        (D, p_val) = stats.kstest(scores, stats.uniform.cdf)
    return {'D':D, 'p_val':p_val}
    def test_statistics(self):
        # There is a statistical test and has a non-zero chance of failure during normal operation.
        # Re-run the test to see if the error persists.
        for rate in [123.0*Hz, 0.123*kHz]:
            for t_stop in [2345*ms, 2.345*second]:
                spiketrain = stgen.homogeneous_poisson_process(rate, t_stop=t_stop)
                intervals = isi(spiketrain)

                expected_spike_count = int((rate * t_stop).simplified)
                self.assertLess(pdiff(expected_spike_count, spiketrain.size), 0.2)  # should fail about 1 time in 1000

                expected_mean_isi = (1/rate)
                self.assertLess(pdiff(expected_mean_isi, intervals.mean()), 0.2)

                expected_first_spike = 0*ms
                self.assertLess(spiketrain[0] - expected_first_spike, 7*expected_mean_isi)

                expected_last_spike = t_stop
                self.assertLess(expected_last_spike - spiketrain[-1], 7*expected_mean_isi)

                # Kolmogorov-Smirnov test
                D, p = kstest(intervals.rescale(t_stop.units),
                              "expon",
                              args=(0, expected_mean_isi.rescale(t_stop.units)),  # args are (loc, scale)
                              alternative='two-sided')
                self.assertGreater(p, 0.001)
                self.assertLess(D, 0.12)
def test_permuted_ols_check_h0_noeffect_signswap(random_state=0):
    rng = check_random_state(random_state)
    # design parameters
    n_samples = 100
    # create dummy design with no effect
    target_var = rng.randn(n_samples, 1)
    tested_var = np.ones((n_samples, 1))
    # permuted OLS
    # We check that h0 is close to the theoretical distribution, which is
    # known for this simple design (= t(n_samples - dof)).
    perm_ranges = [10, 100, 1000]  # test various number of permutations
    all_kstest_pvals = []
    # we compute the Mean Squared Error between cumulative Density Function
    # as a proof of consistency of the permutation algorithm
    all_mse = []
    for i, n_perm in enumerate(np.repeat(perm_ranges, 10)):
        pval, orig_scores, h0 = permuted_ols(
            tested_var, target_var, model_intercept=False,
            n_perm=n_perm, two_sided_test=False, random_state=i)
        assert_equal(h0.size, n_perm)
        # Kolmogorov-Smirnov test
        kstest_pval = stats.kstest(h0, stats.t(n_samples).cdf)[1]
        all_kstest_pvals.append(kstest_pval)
        mse = np.mean(
            (stats.t(n_samples).cdf(np.sort(h0))
             - np.linspace(0, 1, h0.size + 1)[1:]) ** 2)
        all_mse.append(mse)
    all_kstest_pvals = np.array(all_kstest_pvals).reshape(
        (len(perm_ranges), -1))
    all_mse = np.array(all_mse).reshape((len(perm_ranges), -1))
    # check that a difference between distributions is not rejected by KS test
    assert_array_less(0.01 / (len(perm_ranges) * 10.), all_kstest_pvals)
    # consistency of the algorithm: the more permutations, the less the MSE
    assert_array_less(np.diff(all_mse.mean(1)), 0)
    def test_statistics(self):
        # There is a statistical test and has a non-zero chance of failure during normal operation.
        # Re-run the test to see if the error persists.
        a = 3.0
        for b in (67.0*Hz, 0.067*kHz):
            for t_stop in (2345*ms, 2.345*second):
                spiketrain = stgen.homogeneous_gamma_process(a, b, t_stop=t_stop)
                intervals = isi(spiketrain)

                expected_spike_count = int((b/a * t_stop).simplified)
                self.assertLess(pdiff(expected_spike_count, spiketrain.size), 0.25)  # should fail about 1 time in 1000

                expected_mean_isi = (a/b).rescale(ms)
                self.assertLess(pdiff(expected_mean_isi, intervals.mean()), 0.3)

                expected_first_spike = 0*ms
                self.assertLess(spiketrain[0] - expected_first_spike, 4*expected_mean_isi)

                expected_last_spike = t_stop
                self.assertLess(expected_last_spike - spiketrain[-1], 4*expected_mean_isi)

                # Kolmogorov-Smirnov test
                D, p = kstest(intervals.rescale(t_stop.units),
                              "gamma",
                              args=(a, 0, (1/b).rescale(t_stop.units)),  # args are (a, loc, scale)
                              alternative='two-sided')
                self.assertGreater(p, 0.001)
                self.assertLess(D, 0.25)
示例#29
0
文件: views.py 项目: nbohmlev/rnaVis
def makeKS(request, genotype_ids, modelName):
    allModelNames = getModelNames()
    
    data = {}
    criticalValue = 0.05
    
    for model in allModelNames:
        genotype = get_object_or_404(Genotype, pk=genotype_ids[0])
        exec("seqSet = genotype.%s_set.all()"%model)
        allSeqLens = []
        for innerItem in seqSet:
            allSeqLens.append(innerItem.seqLen)

        allSeqLens = np.array(allSeqLens)
        mu = np.mean(allSeqLens)
        sigma = np.std(allSeqLens)
    
        normed_allSeqLens = (allSeqLens - mu)/sigma
        result = stats.kstest(normed_allSeqLens, 'norm')
        
        if result[1] >= 0.05:
            hyp = True
        else:
            hyp = False
        
        data[model] = {"kstest":result, "hypothesis":hyp} 
    return {'result': data, 'modelName':modelName}
示例#30
0
    def getbcdf_pval_swc(self, swc): 
        """
        get p-value of beta CDF with candidated sigma weight
        parameters
        ----------

        returns
        -------
        pval: 
        d: 
        Y: 
        ba: 
        bb: 
        bcdf: beta.cdf
        """
        Y = [self.kernelizeisw(x, swc) for x in self._data]

        Y.sort()
        Y = featureScaling(Y)
        y_m = np.mean(Y)
        y_v = np.var(Y, ddof = 1)
        if math.isnan(y_v) or y_v == 0: 
            return 0

        ba = y_m ** 2 * ((1 - y_m) / y_v - 1 / y_m)
        bb = ba * (1 - y_m) / y_m
        bcdf = beta.cdf(Y, ba, bb)
                    
        # Y = featureScaling(Y)
        d, pval = scistats.kstest(Y, lambda cdf: bcdf)
        
        params = p3c(pval, d, Y, [x for x in swc], ba, bb, 0, bcdf)
        return params
  commandline="\
%s --detectors %s --par-file %s --input-files %s --outfile %s --prior-file %s --Nlive %s --Nmcmcinitial %s --sampleprior %s" \
% (execu, dets, parf, datafile, outfile, priorf, Nlive, Nmcmcinitial, priorsamples)

  sp.check_call(commandline, shell=True)

  # read in prior samples
  f = h5py.File(outfile, 'r')
  a = f['lalinference']
  h0samps = a['lalinference_nest']['nested_samples']['H0'][:]

  # get normed histogram of samples
  [n, nedges] = np.histogram(h0samps, bins=20, range=(0., h0ul), density=True)
  nc = np.cumsum(n)*(nedges[1]-nedges[0])

  stat, p = ss.kstest(nc, 'uniform')

  print "K-S test p-value for upper range of %e = %f" % (h0ul, p)

  if p < 0.005:
    print "There might be a problem for this prior distribution"
    import matplotlib.pyplot as pl
    fig, ax = pl.subplots(1, 1)
    ax.hist(h0samps, bins=20, normed=True, cumulative=True, histtype='stepfilled', alpha=0.2)
    ax.plot([0., h0ul], [0., 1], 'k--')
    ax.set_xlim((0., h0ul))
    ax.set_ylim((0., 1.))
    ax.set_xlabel('h_0')
    ax.set_ylabel('Cumulative probability')
    pl.show()
    break
示例#32
0
    x = np.linspace(mu - 4 * sigma, mu + 4 * sigma)
    plt.plot(x, stats.norm.cdf(x, mu, sigma), "b-")


alpha = 0.01
n = 10000
mu = 25
sigma = 2
plus_deviations = []
minus_deviations = []

# Genero n números aleatorios siguiendo una distribucion N(mu, sigma)
numbers = generate_by_acceptance_rejection(n, mu, sigma)

# Efectua el test Komogorov-Smirnov y obtiene el p-value
statistic, pvalue = stats.kstest(numbers, stats.norm(loc=mu, scale=sigma).cdf)

print("Nivel de Significacion: {:.2f} ".format(alpha))
print("p-valor: {:.2f} ".format(pvalue))

# Comparo el p-value con el nivel de significancia deseado, y si es mayor, entonces
# no hay evidencia para rechazar la hipotesis nula, y se acepta
if alpha <= pvalue:
    print("El test acepta la hipotesis nula.")
else:
    print("El test rechaza la hipótesis nula")

# Alternativa: Ordena las muestras de manera ascendente y obtiene el estadistico
# y lo compara con el valor limite obtenido de la tabla de Kolgomorov-Smirnov,
# Si el estadistico es <= valor limite, aceptamos la hipotesis
'''
示例#33
0
 def norm_ks(rvs):
     return stats.kstest(rvs, 'norm')
def check_distribution(kin, temp, ndof, kb=8.314e-3,
                       verbosity=2, screen=False, filename=None,
                       ene_unit=None, temp_unit=None):
    r"""
    Checks if a kinetic energy trajectory is Maxwell-Boltzmann distributed.

    .. warning: This is a low-level function. Additionally to being less
       user-friendly, there is a higher probability of erroneous and / or
       badly documented behavior due to unexpected inputs. Consider using
       the high-level version based on the SimulationData object. See
       physical_validation.kinetic_energy.check_mb_ensemble for more
       information and full documentation.

    Parameters
    ----------
    kin : array-like
        Kinetic energy snapshots of the system.
    temp : float
        Target temperature of the system. Used to construct the
        Maxwell-Boltzmann distribution.
    ndof : float
        Number of degrees of freedom in the system. Used to construct the
        Maxwell-Boltzmann distribution.
    kb : float
        Boltzmann constant :math:`k_B`. Default: 8.314e-3 (kJ/mol).
    verbosity : int
        0: Silent.
        1: Print minimal information.
        2: Print result details.
        3: Print additional information.
        Default: 2.
    screen : bool
        Plot distributions on screen. Default: False.
    filename : string
        Plot distributions to `filename`.pdf. Default: None.
    ene_unit : string
        Energy unit - used for output only.
    temp_unit : string
        Temperature unit - used for output only.

    Returns
    -------
    result : float
        The p value of the test.

    See Also
    --------
    physical_validation.kinetic_energy.distribution : High-level version
    """

    # Discard burn-in period and time-correlated frames
    kin = trajectory.prepare(kin, verbosity=verbosity, name='Kinetic energy')
    kt = kb * temp

    if ndof <= 0:
        warnings.warn('Zero degrees of freedom!')
        p = np.float('NaN')
    else:
        d, p = stats.kstest(kin, 'gamma', (ndof/2, 0, kt))

    # ====================== #
    # Plot to screen or file #
    # ====================== #
    do_plot = screen or filename is not None
    if do_plot:
        ana_dist = stats.gamma(ndof/2, scale=kt)
        ana_kin = np.linspace(ana_dist.ppf(0.0001),
                              ana_dist.ppf(0.9999), 200)
        ana_hist = ana_dist.pdf(ana_kin)

        tunit = ''
        if temp_unit is not None:
            tunit = temp_unit

        data = [{'y': kin,
                 'hist': int(len(kin)/150),
                 'args': dict(label='Trajectory', density=True, alpha=0.5)}]
        if ndof > 0:
            data.append(
                {'x': ana_kin,
                 'y': ana_hist,
                 'args': dict(label='Analytical T=' + str(temp) + tunit, lw=5)})

        unit = ''
        if ene_unit is not None:
            unit = ' [' + ene_unit + ']'

        plot.plot(data,
                  legend='lower left',
                  title='Kinetic energy distribution',
                  xlabel='Kinetic energy' + unit,
                  ylabel='Probability [%]',
                  sci_x=True,
                  percent=True,
                  filename=filename,
                  screen=screen)

    if verbosity > 0:
        if verbosity > 1:
            message = ('Kinetic energy distribution check (strict)\n'
                       'Kolmogorov-Smirnov test result: p = {:g}\n'
                       'Null hypothesis: Kinetic energy is Maxwell-Boltzmann distributed'.format(p))
        else:
            message = 'p = {:g}'.format(p)
        print(message)

    return p
示例#35
0
         max_lik_em[j] = l[-1]
         alpha_em[j] = m.alpha
         beta_em[j] = m.beta
         mu_em[j] = m.mu
         mu_prime_em[j] = m.mu_prime
         phi_em[j] = m.phi
         phi_prime_em[j] = m.phi_prime
 ## Calculate p-values
 m = meg.meg_model(G[j], tau_zero=True, full_links=True, verbose=False, discrete=False, force_square=True, evaluate_directed=False)
 m.specification(main_effects=True, interactions=False, poisson_me=False, poisson_int=False, hawkes_me=True, hawkes_int=True, D=1, verbose=False)
 ## Parameter values
 m.alpha = alpha_em[j]; m.beta = beta_em[j]; m.mu = mu_em[j]; m.mu_prime = mu_prime_em[j]; m.phi = phi_em[j]; m.phi_prime = phi_prime_em[j] 
 ## P-value calculations
 m.pvalues()
 pp = [p for x in m.pvals_train.values() for p in list(x)]
 ks_score_em += [stats.kstest(pp, 'uniform')[0]]
 ks_pval_em += [stats.kstest(pp, 'uniform')[1]]
 ## Repeat for Adam (gradient ascent)
 max_lik_ga[j] = -1e100
 for s in range(nrep):
     ## Set seed for *same* initialisation
     np.random.seed(seeds[s])
     ## Initialise the parameter values
     m.alpha = np.random.uniform(low=0, high=1, size=m.n)
     m.beta = np.random.uniform(low=0, high=1, size=m.n)
     m.mu = np.random.uniform(low=0.1, high=1, size=m.n)
     m.mu_prime = np.random.uniform(low=0.1, high=1, size=m.n)
     m.phi = np.random.uniform(low=0.1, high=1, size=m.n)
     m.phi_prime = np.random.uniform(low=0.1, high=1, size=m.n)
     ## Optimise using EM
     l = m.optimise_meg(prior_penalisation=False, learning_rate=5e-2, method='adam', max_iter=250, verbose=False, tolerance=1e-6, iter_print=False)
示例#36
0
angle = np.arctan2(ndjj[:, 1], ndjj[:, 0]) / np.pi
angle = np.round(angle * NDIRS).astype(int) % NDIRS

# 4. ver qual histograma de direcção "é" uniforme
RULE = 'dmax'

min_dir = 0
min_g = np.inf
for dir in range(NDIRS):
    udjj = np.sqrt(np.sum(djj[angle == dir]**2, 1))
    if RULE == 'gini':
        g = gini(udjj)
    elif RULE == 'ks':
        H = np.histogram(udjj, NBINS, (0, D))[0]
        H = H / H.sum()
        g = kstest(H, 'uniform')[0]
    elif RULE == 'dmax':
        g = dmax(udjj)

    print(dir, g)
    if g < min_g:
        min_g = g
        min_dir = dir

if NDIRS == 4:
    sides = ['-', '/', '|', '\\']
else:
    sides = [str(i) for i in range(NDIRS)]
print('uniform side: %s' % sides[min_dir])

plt.scatter(X[:, 0], X[:, 1], 1, 'black')
import pandas
from scipy import stats

df = pandas.read_csv('experiment.csv', sep=',')

print(df.describe())
print()

print(stats.kstest('norm', 'norm', N=3))
print(stats.kstest('norm', 'norm', N=500))
print(stats.kstest(df, 'norm'))
print()
示例#38
0
    # b.
    daily_cov = factor.cov()
    print('daily covariance matrix of factor data: ')
    print(daily_cov)
    daily_corr = factor.corr()
    print('daily correlation matrix of factor data: ')
    print(round(daily_corr), 2)

    # c.
    rolling_coef = rolling_coef(factor, 90)
    print(rolling_coef)

    # d. distribution test
    test_result = {}
    for c in factor.columns:
        test_stat = kstest(factor[c], 'norm')
        test_result[c] = test_stat.pvalue
    print('The result of KS test')
    print(round(pd.DataFrame.from_dict(test_stat), 2))

    # e. Beta
    ticker_list = [
        'SPY', 'XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY'
    ]
    tickers = reduce(lambda x, y: x + y,
                     [ticker + ' ' for ticker in ticker_list])
    ETFs = ETF(tickers)
    price_data = ETFs.get_price_data("2010-01-01", "2019-08-01")
    ETF_return = ETFs.cal_return_data()

    beta = ETF_return.apply(lambda x: LinReg(factor, x, return_value='beta'))
示例#39
0
for col_name in value_cols:
    w,pvalue=stats.shapiro(df[col_name])
    label=''
    if pvalue<0.005:
        label ='***'
    elif pvalue< 0.01:
        label = '**'
    elif pvalue< 0.05:
        label = '*'

    print(f" {label}属性 {col_name} 统计量={w} p值={pvalue}")


# Kolmogorov-Smirnov检验
# Perform the Kolmogorov-Smirnov test for goodness of fit.
#
# This performs a test of the distribution F(x) of an observed random variable against a given distribution G(x). Under the null hypothesis the two distributions are identical, F(x)=G(x). The alternative hypothesis can be either ‘two-sided’ (default), ‘less’ or ‘greater’. The KS test is only valid for continuous distributions.
print("==  Kolmogorov-Smirnov检验 ==")

for col_name in value_cols:
    D, pvalue =stats.kstest(df[col_name],'norm')
    label=''
    if pvalue<0.005:
        label ='***'
    elif pvalue< 0.01:
        label = '**'
    elif pvalue< 0.05:
        label = '*'

    print(f" {label}属性 {col_name} 统计量={D} p值={pvalue}")

def stddev(sample, mean):
    diffs = []
    for num in sample:
        diffs.append((num - mean)**2)

    variance = sum(diffs) / len(diffs)
    return variance**0.5


relative_std = stddev(relative_delays, relative_mean)
print("calculated delayed mean is %s" % (relative_mean))
print("calculated delayed standard deviation is %s" % (relative_std))

_, p = st.kstest(relative_delays, 'norm', (relative_mean, relative_std))
print(
    "calculated p-value from normal distribution with calculated delayed mean and standard deviation is %s (no rejection)"
    % (p))

xs = np.arange(-0.2, 0.8, 0.01)
ys = st.norm.pdf(xs, loc=relative_mean, scale=relative_std)

plt.hist(relative_delays,
         20,
         density=True,
         label="percentage of trains delayed per station")
plt.plot(
    xs,
    ys,
    label=
示例#41
0
vals = [normal_vals, dual_vals]
pdf = [normal_pdf, dual_pdf]
xlims = [(-4, 4), (-4, 10)]

#------------------------------------------------------------
# Compute the statistics and plot the results
fig = plt.figure(figsize=(5, 7))
fig.subplots_adjust(left=0.13, right=0.95, bottom=0.06, top=0.95, hspace=0.1)

for i in range(2):
    ax = fig.add_subplot(2, 1, 1 + i)  # 2 x 1 subplot

    # compute some statistics
    A2, sig, crit = stats.anderson(vals[i])
    D, pD = stats.kstest(vals[i], "norm")
    W, pW = stats.shapiro(vals[i])

    mu, sigma = mean_sigma(vals[i], ddof=1)
    median, sigmaG = median_sigmaG(vals[i])

    N = len(vals[i])
    Z1 = 1.3 * abs(mu - median) / sigma * np.sqrt(N)
    Z2 = 1.1 * abs(sigma / sigmaG - 1) * np.sqrt(N)

    print(70 * '_')
    print("  Kolmogorov-Smirnov test: D = %.2g  p = %.2g" % (D, pD))
    print("  Anderson-Darling test: A^2 = %.2g" % A2)
    print("    significance  | critical value ")
    print("    --------------|----------------")
    for j in range(len(sig)):
示例#42
0
sim.run(10)

comp_var = np.asarray(proj.getWeights(format='array'))
shape = np.copy(comp_var.shape)
connected = np.where(~np.isnan(comp_var))
comp_var = comp_var[connected]
num_active = comp_var.size
sim.end()

assert num_active == n_neurons
assert np.all(connected[0] == connected[1])

from scipy import stats
scale = dist_params['high'] - dist_params['low']
s, p = stats.kstest((comp_var - dist_params['low']) / scale, 'uniform')
assert p > 0.05

v_min = comp_var.min()
v_max = comp_var.max()
v_avg = comp_var.mean()
print(f"Stats for sampled {var} = {v_min}, {v_avg}, {v_max}")

half_range = dist_params['low'] + \
             (dist_params['high'] - dist_params['low']) / 2.
print(f"Stats for ideal {var} = {dist_params['low']}, "
      f"{half_range}, {dist_params['high']}")

epsilon = 0.1
assert np.abs(v_min - dist_params['low']) < epsilon
assert np.abs(v_max - dist_params['high']) < epsilon
示例#43
0
def test_beta_ellipticity():

    from skypy.galaxy.ellipticity import beta_ellipticity

    # Initialise a randomised ellipticity distribution, an equivalent beta
    # distribution and special cases where the ellipticity distribution is
    # equivalent to a uniform distribution and an arcsine distribution.
    a, b = np.random.lognormal(size=2)
    args = (a / (a + b), a + b)
    beta_dist = stats.beta(a, b)
    ellipticity_dist = beta_ellipticity(*args)
    ellipticity_uniform = beta_ellipticity(0.5, 2.0)
    ellipticity_arcsine = beta_ellipticity(0.5, 1.0)

    # Range of input values spanning the support of the distributions
    x = np.linspace(0, 1, 100)

    # Check basic properties of distribution implementation
    check_normalization(beta_ellipticity, args, 'beta_ellipticity')
    check_edge_support(beta_ellipticity, args)
    check_random_state_property(beta_ellipticity, args)
    check_pickling(beta_ellipticity, args)

    # Check distribution moments
    m, v, s, k = ellipticity_dist.stats(moments='mvsk')
    check_mean_expect(beta_ellipticity, args, m, 'beta_ellipticity')
    check_var_expect(beta_ellipticity, args, m, v, 'beta_ellipticity')
    check_skew_expect(beta_ellipticity, args, m, v, s, 'beta_ellipticity')
    check_kurt_expect(beta_ellipticity, args, m, v, k, 'beta_ellipticity')
    check_moment(beta_ellipticity, args, m, v, 'beta_ellipticity')

    # Compare ellipticity distribution functions (e.g. pdf, cdf...) against
    # functions for an equivalent beta distribution
    assert allclose(ellipticity_dist.pdf(x), beta_dist.pdf(x))
    assert allclose(ellipticity_dist.logpdf(x), beta_dist.logpdf(x))
    assert allclose(ellipticity_dist.cdf(x), beta_dist.cdf(x))
    assert allclose(ellipticity_dist.logcdf(x), beta_dist.logcdf(x))
    assert allclose(ellipticity_dist.ppf(x), beta_dist.ppf(x))
    assert allclose(ellipticity_dist.sf(x), beta_dist.sf(x))
    assert allclose(ellipticity_dist.logsf(x), beta_dist.logsf(x))
    assert allclose(ellipticity_dist.isf(x), beta_dist.isf(x))
    assert isclose(ellipticity_dist.entropy(), beta_dist.entropy())
    assert isclose(ellipticity_dist.median(), beta_dist.median())
    assert isclose(ellipticity_dist.std(), beta_dist.std())
    assert allclose(ellipticity_dist.interval(x), beta_dist.interval(x))

    # Test scalar output
    assert np.isscalar(ellipticity_dist.rvs())

    # Test array output
    assert ellipticity_dist.rvs(size=10).shape == (10,)

    # Test broadcast output
    e_ratio = 0.5 * np.ones((13, 1, 5))
    e_sum = 0.5 * np.ones((7, 5))
    rvs = beta_ellipticity.rvs(e_ratio, e_sum)
    assert rvs.shape == np.broadcast(e_ratio, e_sum).shape

    # Kolmogorov-Smirnov test comparing ellipticity and beta distributions
    D, p = stats.kstest(ellipticity_dist.rvs, beta_dist.cdf, N=1000)
    assert p > 0.01, 'D = {}, p = {}'.format(D, p)

    # Kolmogorov-Smirnov test comparing ellipticity and uniform distributions
    D, p = stats.kstest(ellipticity_uniform.rvs, 'uniform', N=1000)
    assert p > 0.01, 'D = {}, p = {}'.format(D, p)

    # Kolmogorov-Smirnov test comparing ellipticity and arcsine distributions
    D, p = stats.kstest(ellipticity_arcsine.rvs, 'arcsine', N=1000)
    assert p > 0.01, 'D = {}, p = {}'.format(D, p)
示例#44
0
predictions = [i[0] for i in model.predict(tf.compat.v1.data.make_one_shot_iterator(dataset_test))]

test_rmse = mean_squared_error(y_test[:len(predictions)], predictions, squared= False)

test_mae = mean_absolute_error(y_test[:len(predictions)], predictions)
print("Test data MAE: ", test_mae, "\n")

test_mse = mean_squared_error(y_test[:len(predictions)], predictions, squared= False)
test_rmse = np.sqrt(test_mse)
print("Test data RMSE: ", test_rmse, "\n")

test_r2 = r2_score(y_test[:len(predictions)], predictions)
print("Test data R2: ", test_r2, "\n")

kstest_results_actual_values = stats.kstest(y_test, 'norm')
print("kstest_results_actual_values", kstest_results_actual_values)
kstest_results_predicted_values = stats.kstest(predictions, 'norm')
print("kstest_results_predicted_values", kstest_results_predicted_values)

P_VALUE = 0.05

if kstest_results_predicted_values.pvalue <= P_VALUE or kstest_results_predicted_values.pvalue <= P_VALUE:
    print(stats.kruskal(y_test, predictions))
    print(stats.mannwhitneyu(y_test, predictions))
else:
    levene_results = stats.levene(y_test, predictions)
    print("levene_results", levene_results)
    print(stats.ttest_ind(y_test, predictions, equal_var= levene_results.pvalue > P_VALUE))

plt.plot(history.history['loss'])
示例#45
0
def distribution_compared_group(data, sample_size=10000, reports_path='.'):
    '''
    This function works to compare the degree distribution at the individual and group mean level to
    a number of standard distribution: Uniform, Normal, Gamma, Exponential, Poisson, Triangular,
    LogNormal, and Weibull. Further, the comparison is made using a one-way
    Kolmogorov-Smirnov test for goodness of fit and a p-value is computed at the individual
    and group level.

    params:
    sample size = number of random values to generate per distribution test--default val = 10000
    data = list of degree distributions
     '''
    from scipy import stats
    from scipy.stats import uniform, norm, gamma, expon, poisson, triang, lognorm, weibull_min
    import numpy as np
    from numpy.random import weibull
    import os
    import statistics

    #####################################
    # Group level comparison
    group_deg_mean = []
    for jj in data['Degree_dist']:
        group_deg_mean.append(statistics.mean(jj))

    group_deg_array = np.array(group_deg_mean)
    group_centered_deg_mean = group_deg_array - np.mean(group_deg_array)

    data_uniform = uniform.rvs(size=sample_size,
                               loc=np.min(group_centered_deg_mean),
                               scale=np.max(group_centered_deg_mean))

    data_normal = norm.rvs(size=sample_size,
                           loc=np.median(group_centered_deg_mean),
                           scale=1)

    data_gamma = gamma.rvs(a=5, size=sample_size)

    data_expon = expon.rvs(scale=1,
                           loc=np.median(group_centered_deg_mean),
                           size=sample_size)

    mu = 3
    data_poisson = poisson.rvs(mu=mu, size=sample_size)

    c = 0.158
    data_triang = triang.rvs(c=c, size=sample_size)

    s = 0.954
    data_lognorm = lognorm.rvs(s=s, size=sample_size)

    a = 5.0
    data_weibull = np.random.weibull(a=a, size=sample_size)

    Distributions = [
        data_uniform, data_normal, data_gamma, data_expon, data_poisson,
        data_triang, data_lognorm, data_weibull
    ]
    dist_names = [
        'Uniform', 'Normal', 'Gamma', 'Exponential', 'Poisson', 'Triangular',
        'LogNormal', 'Weibull'
    ]

    group_uniform = [
        'Uniform:',
        stats.kstest(group_centered_deg_mean, data_uniform)
    ]
    group_normal = [
        'Normal', stats.kstest(group_centered_deg_mean, data_normal)
    ]
    group_gamma = ['Gamma', stats.kstest(group_centered_deg_mean, data_gamma)]
    group_exponential = [
        'Exponential',
        stats.kstest(group_centered_deg_mean, data_expon)
    ]
    group_poisson = [
        'Poisson',
        stats.kstest(group_centered_deg_mean, data_poisson)
    ]
    group_traing = [
        'Triang', stats.kstest(group_centered_deg_mean, data_triang)
    ]
    group_lognormal = [
        'Lognormal',
        stats.kstest(group_centered_deg_mean, data_lognorm)
    ]
    group_weibull = [
        'Weibull',
        stats.kstest(group_centered_deg_mean, data_weibull)
    ]

    for ii, jj in zip(Distributions, dist_names):
        sns.set_context('talk')
        ax = sns.histplot(ii, bins=50, kde=True, color='skyblue')
        #hist_kws={'linewidth':15, 'alpha':1})
        ax.set(title=jj + ' Sample Degree Distribution', ylabel='Frequency')
        aj = sns.histplot(group_centered_deg_mean,
                          bins=50,
                          kde=True,
                          color='forestgreen')
        plt.savefig(reports_path + '/' + jj +
                    ' Sample Degree Distribution.png')
        plt.clf()

    ks_test_results_group = [
        group_uniform, group_normal, group_gamma, group_exponential,
        group_poisson, group_traing, group_lognormal, group_weibull
    ]

    with open(reports_path + '/' + 'ks_test_results_group.txt',
              'w') as filehandle:
        for listitem in ks_test_results_group:
            filehandle.write('%s\n' % listitem)

    return ks_test_results_group
示例#46
0
np.random.seed(seed=2015)
beta = stats.beta(a=4, b=2)
print("method 2:")
print(beta.rvs(size=10))

norm_dist = stats.norm(loc=0.5, scale=2)
n = 200
dat = norm_dist.rvs(size=n)
print("mean of data is: " + str(np.mean(dat)))
print("median of data is: " + str(np.median(dat)))
print("standard deviation of data is: " + str(np.std(dat)))

mu = np.mean(dat)
sigma = np.std(dat)
stat_val, p_val = stats.kstest(dat, 'norm', (mu, sigma))
print('KS-statistic D = %6.3f p-value = %6.4f' % (stat_val, p_val))
stat_val, p_val = stats.ttest_1samp(dat, 0)
print('One-sample t-statistic D = %6.3f, p-value = %6.4f' % (stat_val, p_val))

norm_dist2 = stats.norm(loc=-0.2, scale=1.2)
dat2 = norm_dist2.rvs(size=50)
stat_val, p_val = stats.ttest_ind(dat, dat2, equal_var=False)
print('Two-sample t-statistic D = %6.3f, p-value = %6.4f' % (stat_val, p_val))

g_dist = stats.gamma(a=2)
print("quantiles of 2, 4 and 5:")
print(g_dist.cdf([2, 4, 5]))
print("Values of 25%, 50% and 90%:")
print(g_dist.pdf([0.25, 0.5, 0.95]))
print(stats.norm.moment(6, loc=0, scale=1))
示例#47
0
time_deltas = time_deltas[(0 <= time_deltas) & (time_deltas <= 150)]

fig = sns.distplot(time_deltas, kde=False, fit=stats.gamma)
plt.title('Time Elapsed Between Incident and Claim')
plt.xlabel('Days')
plt.ylabel('Density')

(a, loc, scale) = stats.gamma.fit(time_deltas)
plt.legend([
    'Gamma dist. fit (a={0:.2f}, loc={1:.2f}, scale={2:.2f})'.format(
        a, loc, scale), 'Time Elapsed'
])
plt.savefig('../plots/wait_time.png', dpi=400, bbox_inches='tight')
plt.close()

(D, p) = stats.kstest(time_deltas, 'gamma', args=(a, loc, scale))

payments = data[['Claim Number', 'Claim Amount', 'Close Amount',
                 'Status']].copy()
payments['Fraction'] = data['Close Amount'] / data['Claim Amount']
payments['Status'] = payments['Status'].str.replace('^Closed[\d\D]*', 'Closed')
payments['Status'] = payments['Status'].str.replace('^Insufficient[\d\D]*$',
                                                    'Insufficient')
payments['Status'] = payments['Status'].str.replace('[\d\D]*assigned[\d\D]*',
                                                    'Assigned')
payments['Status'] = payments['Status'].str.replace('^Pending[\d\D]*$',
                                                    'Pending')

fig = sns.countplot(y='Status', data=payments)
plt.title('Status of Claims')
plt.xlabel('Count')
示例#48
0
     # delete last column that is all 0
     wave_coef_matrix = np.delete(wave_coef_matrix, -1, axis=1)
     
     ## to train on training set only
     wave_coef_matrix_training = wave_coef_matrix[training_index,:]
     wave_coef_matrix_testig = wave_coef_matrix[testing_index,:]
     
     ## from wave_coef_matrix choose 10 columns least likely to be random
     # using Kolmogorov-Smirnov (KS) test
     # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.653.6292&rep=rep1&type=pdf
     all_feature_test_result = []
     for ii in range(0, wave_coef_matrix_training.shape[1]):
         feature = wave_coef_matrix_training[:,ii].astype(float)
         # run KS test on this feature
         # https://stackoverflow.com/questions/17901112/using-scipys-stats-kstest-module-for-goodness-of-fit-testing
         test_result = stats.kstest(feature, 'norm', args=(np.mean(feature), np.std(feature)))
         # test_result 0: test static 1: p value
         all_feature_test_result.append(test_result[0])
     all_feature_test_result = np.asarray(all_feature_test_result)
         
     ## pick the n largest p value column as selected features to build the matrix
     # selected_feature_index = all_feature_test_result.argsort()[-10:][::1]
     feature_num = 25
     selected_feature_index = all_feature_test_result.argsort()[:feature_num]
 
     ## do PCA analysis after wavelet anaylis
     ## inital 8 PC
     ## plot top 2 PCA component in 2d graph 
     #pca_a = PCA(n_components=8)
     #pca_a.fit(wave_coef_matrix)
     #eigen_a=pca_a.explained_variance_ratio_
示例#49
0
print(std_steps_regular)

print(std_level_zchaff_irregular)
print(std_level_zchaff_regular)

print(std_num_decisions_zchaff_irregular)
print(std_num_decisions_zchaff_regular)

print(std_avg_length_trials_walksat_irregular)
print(std_avg_length_trials_walksat_regular)

#NORMALITY TESTS KOLMOGOROV SMIRNOV
print(
    stats.kstest(
        (pycosat_irregular_metrics[6] - np.mean(pycosat_irregular_metrics[6]))
        / np.std(pycosat_irregular_metrics[6]),
        cdf='norm',
        N=15000))

print(
    stats.kstest(
        (pycosat_regular_metrics[6] - np.mean(pycosat_regular_metrics[6])) /
        np.std(pycosat_regular_metrics[6]),
        cdf='norm',
        N=15000))

print(
    stats.kstest(
        (pycosat_irregular_metrics[1] - np.mean(pycosat_irregular_metrics[1]))
        / np.std(pycosat_irregular_metrics[1]),
        cdf='norm',
示例#50
0
    float(
        re.sub(
            ',', '.',
            re.sub('[^\w.,]', '', final_df['div_per_100_marriges_'].loc[ix])))
    if
    len(re.findall('\w', str(final_df['div_per_100_marriges_'].loc[ix]))) > 0
    else 0 for ix in final_df.index
]

final_df['div_per_100_marriges_'] = temp

#imputing other variables:
for var in final_df:
    n = len(final_df[final_df[var] == 0])
    if n != 0:
        if kstest(final_df[var].values, 'norm', N=len(final_df))[1] > 0.05:
            change = [np.mean(final_df[var])] * n
            final_df[var][final_df[var] == 0] = change

        else:
            change = [np.median(final_df[var])] * n
            final_df[var][final_df[var] == 0] = change

min_year = final_df.groupby('year')['All ages (%)'].mean().idxmin()
max_year = final_df.groupby('year')['All ages (%)'].mean().idxmax()

max_ = final_df[final_df.year == max_year]
min_ = final_df[final_df.year == min_year]

overall_corr = final_df.drop(columns=['year', 'country']).corr()
min_corr = min_.drop(columns=['year', 'country']).corr()
示例#51
0
                             axis=1).tolist())
        list_df_pp.append(
            pd.Series([pair in set_pp_cell for pair in list_pp],
                      index=list_pp))
        set_po_cell = set(
            df_po_cell.apply(lambda x: (x['region1'], x['region2']),
                             axis=1).tolist())
        list_df_po.append(
            pd.Series([pair in set_po_cell for pair in list_po],
                      index=list_po))

    df_pp = pd.concat(list_df_pp, axis=1)
    df_po = pd.concat(list_df_po, axis=1)
    df_cell = pd.concat([df_pp, df_po], axis=0)
    df_cell.columns = list_cell

    # ks test
    df_pval = pd.DataFrame(np.full(shape=(len(list_cell), len(celltypes)),
                                   fill_value=1),
                           index=list_cell,
                           columns=celltypes)
    for cell in list_cell:
        cell_pair = df_cell.index[df_cell[cell]]
        for celltype in celltypes:
            all_score = dict_cell_scores[celltype]
            cell_score = all_score[cell_pair]
            df_pval.loc[cell, celltype] = \
                kstest(np.array(cell_score), np.array(all_score), alternative='less')[1]
    file_pvals = os.path.join(path_cell_interatome, 'interactome_pvals.txt')
    df_pval.to_csv(file_pvals, sep='\t')
示例#52
0
def unit_stability(units_b, units=None, feat_names=['amps'], dist='norm', test='ks'):
    '''
    Computes the probability that the empirical spike feature distribution(s), for specified
    feature(s), for all units, comes from a specific theoretical distribution, based on a specified
    statistical test. Also computes the coefficients of variation of the spike feature(s) for all
    units.

    Parameters
    ----------
    units_b : bunch
        A units bunch containing fields with spike information (e.g. cluster IDs, times, features,
        etc.) for all units.
    units : array-like (optional)
        A subset of all units for which to create the bar plot. (If `None`, all units are used)
    feat_names : list of strings (optional)
        A list of names of spike features that can be found in `spks` to specify which features to
        use for calculating unit stability.
    dist : string (optional)
        The type of hypothetical null distribution for which the empirical spike feature
        distributions are presumed to belong to.
    test : string (optional)
        The statistical test used to compute the probability that the empirical spike feature
        distributions come from `dist`.

    Returns
    -------
    p_vals_b : bunch
        A bunch with `feat_names` as keys, containing a ndarray with p-values (the probabilities
        that the empirical spike feature distribution for each unit comes from `dist` based on
        `test`) for each unit for all `feat_names`.
    cv_b : bunch
        A bunch with `feat_names` as keys, containing a ndarray with the coefficients of variation
        of each unit's empirical spike feature distribution for all features.

    See Also
    --------
    plot.feat_vars

    Examples
    --------
    1) Compute 1) the p-values obtained from running a one-sample ks test on the spike amplitudes
    for each unit, and 2) the variances of the empirical spike amplitudes distribution for each
    unit. Create a histogram of the variances of the spike amplitudes for each unit, color-coded by
    depth of channel of max amplitudes. Get cluster IDs of those units which have variances greater
    than 50.
        >>> p_vals_b, variances_b = bb.metrics.unit_stability(units_b)
        # Plot histograms of variances color-coded by depth of channel of max amplitudes
        >>> fig = bb.plot.feat_vars(units_b, feat_name='amps')
        # Get all unit IDs which have amps variance > 50
        >>> var_vals = np.array(tuple(variances_b['amps'].values()))
        >>> bad_units = np.where(var_vals > 50)
    '''

    # Get units.
    if not(units is None):  # we're using a subset of all units
        unit_list = list(units_b[feat_names[0]].keys())
        # for each `feat` and unit in `unit_list`, remove unit from `units_b` if not in `units`
        for feat in feat_names:
            [units_b[feat].pop(unit) for unit in unit_list if not(int(unit) in units)]
    unit_list = list(units_b[feat_names[0]].keys())  # get new `unit_list` after removing units

    # Initialize `p_vals` and `variances`.
    p_vals_b = bb.core.Bunch()
    cv_b = bb.core.Bunch()

    # Set the test as a lambda function (in future, more tests can be added to this dict)
    tests = \
        {
            'ks': lambda x, y: stats.kstest(x, y)
        }
    test_fun = tests[test]

    # Compute the statistical tests and variances. For each feature, iteratively get each unit's
    # p-values and variances, and add them as keys to the respective bunches `p_vals_feat` and
    # `variances_feat`. After iterating through all units, add these bunches as keys to their
    # respective parent bunches, `p_vals` and `variances`.
    for feat in feat_names:
        p_vals_feat = bb.core.Bunch((unit, 0) for unit in unit_list)
        cv_feat = bb.core.Bunch((unit, 0) for unit in unit_list)
        for unit in unit_list:
            # If we're missing units/features, create a NaN placeholder and skip them:
            if len(units_b['times'][str(unit)]) == 0:
                p_val = np.nan
                cv = np.nan
            else:
                # compute p_val and var for current feature
                _, p_val = test_fun(units_b[feat][unit], dist)
                cv = np.var(units_b[feat][unit]) / np.mean(units_b[feat][unit])
            # Append current unit's values to list of units' values for current feature:
            p_vals_feat[str(unit)] = p_val
            cv_feat[str(unit)] = cv
        p_vals_b[feat] = p_vals_feat
        cv_b[feat] = cv_feat

    return p_vals_b, cv_b
示例#53
0
            elif distname in right:
                sm = rvs.mean()
                sstd = np.sqrt(rvs.var())
                par_est = tuple(distfn.fit(rvs, loc=0, scale=1))
            else:
                sm = rvs.mean()
                sstd = np.sqrt(rvs.var())
                par_est = tuple(distfn.fit(rvs, loc=sm, scale=sstd))

            print('fit', par_est)
            arg_est = par_est[:-2]
            loc_est = par_est[-2]
            scale_est = par_est[-1]
            rvs_normed = (rvs - loc_est) / scale_est
            ks_stat, ks_pval = stats.kstest(rvs_normed, distname, arg_est)
            print('kstest', ks_stat, ks_pval)
            quant = 0.1
            crit = distfn.ppf(1 - quant * float(rind),
                              loc=loc_est,
                              scale=scale_est,
                              *par_est)
            tail_prob = stats.t.sf(crit, dgp_arg, scale=dgp_scale)
            print('crit, prob', quant, crit, tail_prob)
            #if distname == 'norm':
            #plothist(rvs,loc_est,scale_est)
            #args = tuple()
            results.append([
                distname, ks_stat, ks_pval, arg_est, loc_est, scale_est, crit,
                tail_prob
            ])
示例#54
0

print("#,FY,avg, v, p-value")
arpv = np.zeros((200, 3)) #一標本KS検定の結果を格納する
for i in range(n_topics):
    for j in range(3):
        s = 0
        s2 = 0
        for k in range(ndata):
            s += ar[i][j][k]
            s2 += ar[i][j][k]**2
        # 平均
        avg = s / ndata 
        # 標準偏差
        v = math.sqrt(((s2 - avg**2 * ndata) / (ndata - 1)))
        pv = stats.kstest(ar[i][j], stats.norm(loc = avg, scale=v).cdf)
        if (pv[1] < 0.05): # 正規分布
            print("{},{},{:.6f},{:.6f},{:.6f},パラメトリック".format(i, j+2015, avg, v, pv[1]))
        else:
            print("{},{},{:.6f},{:.6f},{:.6f},ノンパラメトリック".format(i, j+2015, avg, v, pv[1]))
        arpv[i][j] = pv[1]
print("一標本KS検定結果")
print("#,15,16,17")
for i in range(n_topics):
    print(f"{i},{arpv[i][0]},{arpv[i][1]},{arpv[i][2]}")

print("二標本KS検定")
print("#,15vs16,16vs17")
for i in range(n_topics):
    # 2015 vs 2016
    # 2016 vs 2017
示例#55
0
def fit_summary_plot(df, dfo=None, show=True, bins=15, select=True, **kwargs):

    if dfo is None:
        dfo = load_sample(select=select, **kwargs)
    if select:
        df = df[df["selected_jig"] == 1].reset_index(drop=True)

    fig = plt.figure(figsize=(6, 6))

    ax0 = plt.subplot(3, 1, 1)
    histkwargs = dict(density=True, histtype="step")
    rng = (min(dfo['mueff_av'].min(), df['uae_obs_jig'].min()),
           max(dfo['mueff_av'].max(), df['uae_obs_jig'].max()))
    ax0.hist(dfo['mueff_av'].values,
             color="k",
             range=rng,
             bins=bins,
             label="obs",
             **histkwargs)
    ax0.hist(df['uae_obs_jig'].values,
             color="b",
             range=rng,
             bins=bins,
             label="model",
             **histkwargs)
    ks = kstest(dfo['mueff_av'].values, df['uae_obs_jig'].values)[1]
    ax0.legend(loc="best")
    ax0.set_xlabel(f"uae (KS pval={ks:.2f})")

    ax1 = plt.subplot(3, 1, 2)
    rng = (min(dfo['rec_arcsec'].min(), df['rec_obs_jig'].min()),
           max(dfo['rec_arcsec'].max(), df['rec_obs_jig'].max()))
    ax1.hist(dfo['rec_arcsec'].values,
             color="k",
             range=rng,
             bins=bins,
             **histkwargs,
             label="obs")
    ax1.hist(df['rec_obs_jig'].values,
             color="b",
             range=rng,
             bins=bins,
             **histkwargs,
             label="model")
    ks = kstest(dfo['rec_arcsec'].values, df['rec_obs_jig'].values)[1]
    ax1.legend(loc="best")
    ax1.set_xlabel(f"rec (KS pval={ks:.2f})")

    ax1 = plt.subplot(3, 1, 3)
    rng = (min(dfo['g_r'].min(), df['colour_obs'].min()),
           max(dfo['g_r'].max(), df['colour_obs'].max()))
    ax1.hist(dfo['g_r'].values,
             color="k",
             range=rng,
             bins=bins,
             **histkwargs,
             label="obs")
    ax1.hist(df['colour_obs'].values,
             color="b",
             range=rng,
             bins=bins,
             **histkwargs,
             label="model")
    ks = kstest(dfo['g_r'].values, df['colour_obs'].values)[1]
    ax1.legend(loc="best")
    ax1.set_xlabel(f"g-r (KS pval={ks:.2f})")

    plt.tight_layout()
    if show:
        plt.show(block=False)

    return fig
示例#56
0
####################################################################
###################### Numbers from C++ Tests ######################
####################################################################

cPRNGs = np.loadtxt("prngNums.txt")

mean = "The Mean = " + str('%.4f' % np.mean(cPRNGs))
variance = "The Variance = " + str('%.5f' % np.var(cPRNGs))

# Cycle Check
first = cPRNGs[0]
cycle = "m"
count = 0
for i in cPRNGs[1:]:
    count = count + 1
    if i == first:
        cycle = count
        break

cPRNGs.sort()
# Kolmogorov Smirnov Test
result = stats.kstest(cPRNGs, 'norm')

plt.hist(cPRNGs)
plt.title("C++ STD LCG Test")
plt.xlabel("Value\n\n" + mean + "\n" + variance + "\n" + str(result) +
           "\n\nFigure 1: C++ Test")
plt.ylabel("Frequency")
plt.savefig('histc++1.png', bbox_inches='tight')
plt.show()
示例#57
0
        _, t, p, a, m, _, _, _ = simulate(
            rng, N, 0, 1, 1, lambda rng: rng.exponential(0.1)
        )
        T[i, :] = t
        P[i, :] = p
        A[i, :] = a
        M[i, :] = m
    Tl = T[:, -1]
    Pl = P[:, -1]
    Al = A[:, -1]
    Ml = M[:, -1]

    # %% Display simulation
    plt.figure(1, clear=True)
    plt.hist(Pl)
    print(st.kstest(Pl, "norm"))
    plt.title(
        f"$P_{{{N}}}, p = {st.kstest((Pl-np.mean(Pl))/np.std(Pl), 'norm')[1]:.4}$"
    )
    plt.figure(2, clear=True)
    plt.hist(Tl)
    plt.title(
        f"$T_{{{N}}}, p = {st.kstest((Tl-np.mean(Tl))/np.std(Tl), 'norm')[1]:.4}$"
    )
    plt.figure(3, clear=True)
    plt.hist(Al)
    plt.title(
        f"$A_{{{N}}}, p = {st.kstest((Al-np.mean(Al))/np.std(Al), 'norm')[1]:.4}$"
    )
    plt.figure(4, clear=True)
    plt.hist(Ml)
示例#58
0
 def _kstest(self, loc, scale, samples):
     # Uses the Kolmogorov-Smirnov test for goodness of fit.
     ks, _ = sp_stats.kstest(samples,
                             sp_stats.laplace(loc, scale=scale).cdf)
     # Return True when the test passes.
     return ks < 0.02
示例#59
0
def test_correct_sampling(sampler_c, model_and_weights, set_pdf_power):
    sampler = set_pdf_power(sampler_c)

    hi = sampler.hilbert
    if isinstance(hi, DiscreteHilbert):
        n_states = hi.n_states

        ma, w = model_and_weights(hi, sampler)

        n_samples = max(40 * n_states, 100)

        ps = (np.absolute(nk.nn.to_array(
            hi, ma, w, normalize=False))**sampler.machine_pow)
        ps /= ps.sum()

        n_rep = 6
        pvalues = np.zeros(n_rep)

        sampler_state = sampler.init_state(ma, w, seed=SAMPLER_SEED)

        for jrep in range(n_rep):
            sampler_state = sampler.reset(ma, w, state=sampler_state)

            # Burnout phase
            samples, sampler_state = sampler.sample(ma,
                                                    w,
                                                    state=sampler_state,
                                                    chain_length=n_samples //
                                                    100)

            assert samples.shape == (
                n_samples // 100,
                sampler.n_chains,
                hi.size,
            )
            samples, sampler_state = sampler.sample(ma,
                                                    w,
                                                    state=sampler_state,
                                                    chain_length=n_samples)

            assert samples.shape == (n_samples, sampler.n_chains, hi.size)

            sttn = hi.states_to_numbers(
                np.asarray(samples.reshape(-1, hi.size)))
            n_s = sttn.size

            # fill in the histogram for sampler
            unique, counts = np.unique(sttn, return_counts=True)
            hist_samp = np.zeros(n_states)
            hist_samp[unique] = counts

            # expected frequencies
            f_exp = n_s * ps
            statistics, pvalues[jrep] = chisquare(hist_samp, f_exp=f_exp)

        s, pval = combine_pvalues(pvalues, method="fisher")
        assert pval > 0.01 or np.max(pvalues) > 0.01

    elif isinstance(hi, ContinuousBoson):
        ma, w = model_and_weights(hi, sampler)
        n_samples = 5000
        n_discard = 2000
        n_rep = 6
        pvalues = np.zeros(n_rep)

        sampler_state = sampler.init_state(ma, w, seed=SAMPLER_SEED)
        for jrep in range(n_rep):
            sampler_state = sampler.reset(ma, w, state=sampler_state)

            # Burnout phase
            samples, sampler_state = sampler.sample(ma,
                                                    w,
                                                    state=sampler_state,
                                                    chain_length=n_discard)

            assert samples.shape == (
                n_discard,
                sampler.n_chains,
                hi.size,
            )
            samples, sampler_state = sampler.sample(ma,
                                                    w,
                                                    state=sampler_state,
                                                    chain_length=n_samples)

            assert samples.shape == (n_samples, sampler.n_chains, hi.size)

            samples = samples.reshape(-1, samples.shape[-1])

            dist = multivariate_normal(
                mean=np.zeros(samples.shape[-1]),
                cov=np.linalg.inv(
                    sampler.machine_pow *
                    np.dot(w["params"]["kernel"].T, w["params"]["kernel"])),
            )
            exact_samples = dist.rvs(size=samples.shape[0])

            counts, bins = np.histogramdd(samples, bins=10)
            counts_exact, _ = np.histogramdd(exact_samples, bins=bins)

            statistics, pvalues[jrep] = kstest(counts.reshape(-1),
                                               counts_exact.reshape(-1))

        s, pval = combine_pvalues(pvalues, method="fisher")
        assert pval > 0.01 or np.max(pvalues) > 0.01
 def pareto_ks(loc, rvs):
     #start_scale = rvs.min() - loc # not used yet
     est = stats.pareto.fit_fr(rvs, 1., frozen=[np.nan, loc, np.nan])
     args = (est[0], loc, est[1])
     return stats.kstest(rvs, 'pareto', args)[0]