def get_poisson_distribution(date_range, country_code, global_min, global_max): """ Args: date_range (pandas.core.series.Series): The date range of country data for the poisson distribution to be applied to. country_code (string): The country code of the country being explored. global_min (pandas.core.series.Series): A time series list of the global minimum tendencies for tor users. global_max (pandas.core.series.Series): A time series list of the global maximum tendencies for tor users. """ current_date = date_range[0] comparison_date = date_range[1] #print(date_range) # If there is not a global min or a global max on the day in question then don't even try if pd.isnull(global_min[date_range.name]) or pd.isnull(global_max[date_range.name]): return pd.Series({"country":country_code,"min":None, "max":None}) # We can't do this without both dates if np.isnan(comparison_date) or np.isnan(current_date): return pd.Series({"country":country_code,"min":None, "max":None}) else: down_score = 0 up_score = 0 # poisson.ppf(plausable_range, shape_params) min_range = global_min[date_range.name] * poisson.ppf(1-0.9999, comparison_date) max_range = global_max[date_range.name] * poisson.ppf(0.9999, comparison_date) if current_date < min_range: down_score = 1 if current_date > max_range: up_score = 1 return pd.Series({"country":country_code,"min":min_range, "max":max_range, "users":current_date, "event_censor":down_score, "event_spike":up_score})
def CorrectInitialCorrel(lambda1, lambda2, r): samples = 500 u = np.random.uniform(low=0, high=1, size=samples) maxcor = pearsonr(poisson.ppf(u, lambda1), poisson.ppf(u, lambda2)) mincor = pearsonr(poisson.ppf(u, lambda1), poisson.ppf(1 - u, lambda2)) a = -maxcor[0] * mincor[0] / (maxcor[0] + mincor[0]) b = np.log((maxcor[0] + a) / a) c = -a corrected = np.log((r + a) / a) / b return np.NaN if corrected > 1 or corrected < -1 else corrected
def absolute_plot(series, minc, maxc, labels,INTERVAL, xtitle): in_minc = [] in_maxc = [] for i, v in enumerate(series): if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None: in_minc += [minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL])] in_maxc += [maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL])] if not in_minc[-1] < in_maxc[-1]: print in_minc[-1], in_maxc[-1], series[i-INTERVAL], minc[i], maxc[i] assert in_minc[-1] < in_maxc[-1] else: in_minc += [None] in_maxc += [None] raw_plot(series, in_minc, in_maxc, labels, xtitle)
def write_all(tss, minc, maxc, INTERVAL=7): ranges_file = file("direct-users-ranges.csv", "w") ranges_file.write("date,country,minusers,maxusers\n") exclude = set(["all", "??", "date"]) for c in tss.country_codes: if c in exclude: continue series = tss.get_country_series(c) for i, v in enumerate(series): if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None: minv = minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL]) maxv = maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL]) if not minv < maxv: print minv, maxv, series[i-INTERVAL], minc[i], maxc[i] assert minv < maxv ranges_file.write("%s,%s,%s,%s\n" % (tss.all_dates[i], c, minv, maxv)) ranges_file.close()
def censor_score(series, minc, maxc, INTERVAL, scoring_interval=None): upscore = 0 downscore = 0 if scoring_interval is None: scoring_interval = len(series) assert(len(series) >= scoring_interval) for i, v in enumerate(series): if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[i-INTERVAL] != None and series[i-INTERVAL] != 0 and minc[i]!= None and maxc[i]!= None: in_minc = minc[i] * poisson.ppf(1-0.9999, series[i-INTERVAL]) in_maxc = maxc[i] * poisson.ppf(0.9999, series[i-INTERVAL]) if (i >= (len(series) - scoring_interval)): downscore += 1 if minc[i] != None and v < in_minc else 0 upscore += 1 if maxc[i] != None and v > in_maxc else 0 return downscore, upscore
def GenerateMultivariatePoisson(p, samples, R, lmbda): normal_mu = np.repeat(0, p) # Dimensions of Distribution normal = np.random.multivariate_normal(mean=normal_mu, cov=R, size=samples).T p = norm.cdf(normal) pois = poisson.ppf(p, lmbda) # Inverse Poisson Distribution return pois
def censor_score(series, minc, maxc, INTERVAL, scoring_interval=None): upscore = 0 downscore = 0 if scoring_interval is None: scoring_interval = len(series) assert (len(series) >= scoring_interval) for i, v in enumerate(series): if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[ i - INTERVAL] != None and series[ i - INTERVAL] != 0 and minc[i] != None and maxc[i] != None: in_minc = minc[i] * poisson.ppf(1 - 0.9999, series[i - INTERVAL]) in_maxc = maxc[i] * poisson.ppf(0.9999, series[i - INTERVAL]) if (i >= (len(series) - scoring_interval)): downscore += 1 if minc[i] != None and v < in_minc else 0 upscore += 1 if maxc[i] != None and v > in_maxc else 0 return downscore, upscore
def absolute_plot(series, minc, maxc, labels, INTERVAL, xtitle): in_minc = [] in_maxc = [] for i, v in enumerate(series): if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[ i - INTERVAL] != None and series[ i - INTERVAL] != 0 and minc[i] != None and maxc[i] != None: in_minc += [ minc[i] * poisson.ppf(1 - 0.9999, series[i - INTERVAL]) ] in_maxc += [maxc[i] * poisson.ppf(0.9999, series[i - INTERVAL])] if not in_minc[-1] < in_maxc[-1]: print in_minc[-1], in_maxc[-1], series[ i - INTERVAL], minc[i], maxc[i] assert in_minc[-1] < in_maxc[-1] else: in_minc += [None] in_maxc += [None] raw_plot(series, in_minc, in_maxc, labels, xtitle)
def write_all(tss, minc, maxc, RANGES_FILE, INTERVAL=7): ranges_file = file(RANGES_FILE, "w") ranges_file.write("date,country,minusers,maxusers\n") exclude = set(["all", "??", "date"]) for c in tss.country_codes: if c in exclude: continue series = tss.get_country_series(c) for i, v in enumerate(series): if i > 0 and i - INTERVAL >= 0 and series[i] != None and series[ i - INTERVAL] != None and series[ i - INTERVAL] != 0 and minc[i] != None and maxc[i] != None: minv = minc[i] * poisson.ppf(1 - 0.9999, series[i - INTERVAL]) maxv = maxc[i] * poisson.ppf(0.9999, series[i - INTERVAL]) if not minv < maxv: print minv, maxv, series[i - INTERVAL], minc[i], maxc[i] assert minv < maxv ranges_file.write("%s,%s,%s,%s\n" % (tss.all_dates[i], c, minv, maxv)) ranges_file.close()
z[:, j] = a * z[:, j - 1] + np.sqrt(1 - a**2) * z[:, j] u = norm.cdf(z) # The covariates x1 = np.random.normal(size=(n, q)) x2 = np.random.normal(size=(n, q)) # The mean parameters for the marginal distributions lpr = x1 - 0.5 * x2 expval = np.exp(lpr) # The response values. These are marginally Poisson with the specified means. y = np.zeros((n, q)) for i in range(n): for j in range(q): y[i, j] = poisson.ppf(u[i, j], expval[i, j]) idv = np.outer(np.arange(n), np.ones(q)) time = np.outer(np.ones(n), np.arange(q)) df = pd.DataFrame({ "y": y.flat, "x1": x1.flat, "x2": x2.flat, "grp": idv.flat, "time": time.flat }) model = sm.GEE.from_formula("y ~ x1 + x2", groups="grp", family=sm.families.Poisson(),
def negbinom(u, mu, scale): p = (scale - 1) / scale r = mu * (1 - p) / p x = np.random.gamma(r, p / (1 - p), len(u)) return poisson.ppf(u, mu=x)