def NZAD(self, vmin = 0.005, vmax = 1.995, delv = 0.05):
   """                                                                              
   Compute the Anderson Darling statistic and p-value for the
   two distributions of sumpz and true_z vector of spec-z's
   Since the Anderson Darling test requires a properly normalized
   distribution over the [vmin,vmax] range, will need to create
   a new qp object defined on the range np.arange(vmin,vmax+delv,delv)
   Parameters:                                      
   vmin, vmax: specz values outside of these values are discarded
   delz: grid spacing for [vmin,vmax] interval to create new qp
   object
   -----------
   using: string
   which parameterization to evaluate
   Returns:
   --------
   Anderson-Darling statistic and pvalue
   """
   #copy the form of Rongpu's use of skgof functions
   #will have to use QPPDFCDF class, as those expect objects
   #that have a .cdf method for a vector of values
   print "using %f and %f for vmin and vmax\n"%(vmin,vmax)
   szs = self.truth
   mask = (szs > vmin) & (szs < vmax)
   vgrid = np.arange(vmin,vmax+delv,delv)
   veval = self.stackpz.evaluate(vgrid,'gridded',True,False)
   vobj = qp.PDF(gridded = (veval[0],veval[1]))
   tmpnzfunc = QPPDFCDF(vobj,self.dx)
   nzAD = skgof.ad_test(szs[mask],tmpnzfunc)
   return nzAD.statistic, nzAD.pvalue
    def AD(self, using, dx=0.0001, vmin=0.005, vmax=0.995):
        """
        Compute the Anderson-Darling statistic and p-value for the PIT 
        values by comparing with a uniform distribution between 0 and 1. 
        
        Since the statistic diverges at 0 and 1, PIT values too close to
        0 or 1 are discarded. 

        Parameters:
        -----------
        using: string
            which parameterization to evaluate
        dx: float
            step size for integral
        vmin, vmax: floats
            PIT values outside this range are discarded
        Returns:
        --------
        AD statistic and pvalue

        """
        if self.pitarray is not None:
            pits = np.array(self.pitarray)
        else:
            pits = np.array(self.PIT(using=using,dx=dx))
            self.pitarray = pits
        mask = (pits>vmin) & (pits<vmax)
        print "now with proper uniform range"
        delv = vmax-vmin
        ad_result = skgof.ad_test(pits[mask], stats.uniform(loc=vmin,scale=delv))
        return ad_result.statistic, ad_result.pvalue
示例#3
0
def computeAD(data,mu,sd,seed):
    np.random.seed(seed)
    from skgof import ad_test
    from scipy.stats import norm, anderson
    res = ad_test(data, norm(loc=mu,scale=sd))
    res2 = anderson(data, 'norm')
    return [res.statistic, res.pvalue,res2.critical_values.tolist()]
示例#4
0
    def _p(test_i, null_i, M_i, d_i):
        gpd_fit = None
        gpd_fit_p_value = None

        n_i = n
        
        # TODO: no need to sort as much as N numbers, do partial sort:
        #  but this requires some tests (both performance and unit)
        # null_i_partitioned = np.partition(null_i, n_i+1)
        # null_i_first_n_sorted = sorted(null_i_partitioned[:-n_i+1])
        null_i = sorted(null_i)
        t = None
        
        if all(np.isnan(null_i)):
            return np.nan, False, np.nan, np.nan
        
        # compute ecdf based, biased estimate of p-value
        raw_ecdf_estimate = (ecdf_pseudocount + d_i.sum()) / (N + 1)
        
        if M_i < m:
            # fit GDP, reducing $n$ until convergance
            while n_i > 0:
                
                # -1 because Python has 0-based indexing
                t = (null_i[-n_i-1] + null_i[-n_i-2]) / 2
                
                y_untill_n = null_i[-n_i:]
                exceedences = y_untill_n - t

                assert all(y_untill_n >= t)
                assert len(exceedences) == n_i
                
                fit = genpareto.fit(exceedences)
                fitted = genpareto(*fit)
                gpd_fit = fitted
                
                gpd_fit_p_value = ad_test(exceedences, fitted).pvalue

                if gpd_fit_p_value <= 0.05:
                    break
                else:
                    n_i -= decrease_n_by

        if gpd_fit and gpd_fit_p_value < 0.05:
            return n_i / N * (1 - gpd_fit.cdf(test_i - t)), True, gpd_fit_p_value, raw_ecdf_estimate
        else:
            if gpd_fit:
                # TODO: get index and highlight which observation could not be fitted!
                warn(f'A good GPD fit could not be reached, using ECDF estimate instead')
            
            return raw_ecdf_estimate, False, np.nan, raw_ecdf_estimate
    def NZAD(self, vmin=0.005, vmax=1.995):
        """                                                                              
      Compute the Anderson Darling statistic and p-value for the
      two distributions of sumpz and true_z vector of spec-z's
      Parameters:                                      
      vmin, vmax: specz values outside of these values are discarded
      -----------
      using: string
      which parameterization to evaluate
      Returns:
      --------
      Anderson-Darling statistic and pvalue
      """
        #copy the form of Rongpu's use of skgof functions
        #will have to use QPPDFCDF class, as those expect objects
        #that have a .cdf method for a vector of values
        print "using %f and %f for vmin and vmax\n" % (vmin, vmax)
        szs = self.truth
        mask = (szs > vmin) & (szs < vmax)

        tmpnzfunc = QPPDFCDF(self.stackpz, self.dx)
        nzAD = skgof.ad_test(szs[mask], tmpnzfunc)
        return nzAD.statistic, nzAD.pvalue