def zip_cdf(x_arr, a, max_obs): #print x_arr result = [] for x in x_arr: if x <= max_obs: result.append( zipf.cdf(x, a) / zipf.cdf(max_obs, a)) #print result return result
def _generate_zipf_queries(self): a = 1.5 queries = [] for i in range(QUERY_SIZE): query = [] for j in range(DIM): start = np.random.zipf(a) while (zipf.cdf(start, a=a) + self.perColSelectivity >= 1): start = np.random.zipf(a) end = zipf.ppf(zipf.cdf(start, a=a) + self.perColSelectivity, a=a) query.append(start) query.append(end) queries.append(query) return queries
def test_zipfian_asymptotic(self): # test limiting case that zipfian(a, n) -> zipf(a) as n-> oo a = 6.5 N = 10000000 k = np.arange(1, 21) assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a)) assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a)) assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a)) assert_allclose(zipfian.stats(a, N, moments='msvk'), zipf.stats(a, moments='msvk'))
x = np.arange(zipf.ppf(0.01, a), zipf.ppf(0.99, a)) ax.plot(x, zipf.pmf(x, a), 'bo', ms=8, label='zipf pmf') ax.vlines(x, 0, zipf.pmf(x, a), colors='b', lw=5, alpha=0.5) # Alternatively, the distribution object can be called (as a function) # to fix the shape and location. This returns a "frozen" RV object holding # the given parameters fixed. # Freeze the distribution and display the frozen ``pmf``: rv = zipf(a) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show() # Check accuracy of ``cdf`` and ``ppf``: prob = zipf.cdf(x, a) np.allclose(x, zipf.ppf(prob, a)) # True # Generate random numbers: r = zipf.rvs(a, size=1000)