def trunc_logser_pmf(x, p, upper_bound): """Probability mass function for the upper truncated log-series Parameters ---------- x : array_like Values of `x` for which the pmf should be determined p : float Parameter for the log-series distribution upper_bound : float Upper bound of the distribution Returns ------- pmf : array Probability mass function for each value of `x` """ if p < 1: return logser.pmf(x, p) / logser.cdf(upper_bound, p) else: x = np.array(x) ivals = np.arange(1, upper_bound + 1) normalization = sum(p ** ivals / ivals) pmf = (p ** x / x) / normalization return pmf
def __init__(self,parameters,initial_distribution=None): for (key,value) in parameters.iteritems(): setattr(self,key,value) if initial_distribution is None: # initialize distribution from given parameters nbar = self.get_n() M = self.get_M() self.distribution = np.mat(logser.pmf(1+np.arange(self.Ncap),logarithmic_theta_from_mean(nbar))).T*M else: self.distribution = initial_distribution if self.theta_m0 > 0: self.zeta = ((2-self.epsilon)/(self.epsilon-1)+self.theta_N)/self.theta_m0 else: self.zeta = None
def __init__(self, parameters, initial_distribution=None): for (key, value) in parameters.iteritems(): setattr(self, key, value) if initial_distribution is None: # initialize distribution from given parameters nbar = self.get_n() M = self.get_M() self.distribution = np.mat( logser.pmf(1 + np.arange(self.Ncap), logarithmic_theta_from_mean(nbar))).T * M else: self.distribution = initial_distribution if self.theta_m0 > 0: self.zeta = ((2 - self.epsilon) / (self.epsilon - 1) + self.theta_N) / self.theta_m0 else: self.zeta = None
def get_distribution(dist_name, k): k = int(k) if dist_name == 'Uniform': raw_distribution = [1] * k elif dist_name == 'Two-step': raw_distribution = [1] * (k // 2) + [4] * (k - k // 2) elif dist_name == 'Three-step': raw_distribution = [1] * (k // 3) + [3] * (k // 3) + [9] * (k - 2 * k // 3) elif dist_name == 'Subset-uniform': raw_distribution = [1] * (k // 2) + [0] * (k - k // 2) elif dist_name == 'Log-series': p = (k - 2) / k raw_distribution = [logser.pmf(j, p) for j in range(1, k + 1)] elif dist_name == 'Geometric': p = (k - 1) * 1.0 / k raw_distribution = [(1 - p) * p**i for i in range(k)] elif dist_name == 'Zipf-half': raw_distribution = [1 / (float(i)**(0.5)) for i in range(1, k + 1)] elif dist_name == 'Zipf-one': raw_distribution = [1 / (float(i)**(1)) for i in range(1, k + 1)] return raw_distribution
def abundance_histogram(x, ax=None, figsize=None, trendline=False, xlabel='Species', title='Sightings histogram'): r""" Plot an assemblage's frequency histogram as a bar plot Parameters ---------- x : 1D numpy array with shape (number of species) An array representing the abundances (observed counts) for each individual species. ax : plt.Axes (default = None) The ax to plot on or None if a new plt.Figure is required. figsize : 2-way tuple (default = None) The size of the new plt.Figure to be plotted (Ignored if an axis is passed.) trendline : bool (default = False) If True, a trendline (Fisher log-series) is fitted and added to the barplot as a reading aid. Note ---- The code for fitting the trendline is based on the [macroeco package](https://github.com/jkitzes/macroeco/\ blob/master/macroeco/models/_distributions.py). Returns ------- ax : plt.Axes The resulting plot's (primary) axis. """ if ax is None: fig, ax = plt.subplots(figsize=figsize) x = np.array(sorted(x, reverse=True)) textstr = (f'Categories: {np.count_nonzero(x)}\n' f'Observations: {x.sum()}\n' f'$f_1$: {np.count_nonzero(x == 1)}\n' f'$f_2$: {np.count_nonzero(x == 2)}') counter = Counter(x) max_count = max(counter.keys()) pos = [k for k in range(1, max_count + 1)] x = np.array([counter[k] for k in pos]) ax.bar(pos, x, alpha=.7, align='center', color=next(ax._get_lines.prop_cycler)['color']) ax.set(xlabel=xlabel, title=title) ax.annotate(textstr, xy=(0.7, 0.7), xycoords='axes fraction', va='center', backgroundcolor='white') if trendline: mu = np.mean(x) eq = lambda p, mu: -p / np.log(1 - p) / (1 - p) - mu p = optim.brentq(eq, 1e-16, 1 - 1e-16, args=(mu), disp=True) estims = logser.pmf(pos, p) ax2 = ax.twinx() ax2.plot(pos, estims, 'r--') ax2.grid(None) ax2.set(ylabel="Fisher's log series (pmf)", ylim=(0, 1)) return ax
def generate_graph_data(self): ageGroup = self.tableModel.data[self.selected_item_index.row()][0] parameter = self.tableModel.data[self.selected_item_index.row()][1] p1 = self.temporaryParametersDict[ageGroup][parameter]["p1"] p2 = self.temporaryParametersDict[ageGroup][parameter]["p2"] distributionType = self.temporaryParametersDict[ageGroup][parameter][ "distributionType"] xyDict = {"x": [], "y": []} try: if distributionType == 'Binomial': xyDict["x"] = np.arange(binom.ppf(0.01, int(p1), p2 / 100), binom.ppf(0.99, int(p1), p2 / 100)) xyDict["y"] = binom.pmf(xyDict["x"], int(p1), p2 / 100) elif distributionType == 'Geometric': xyDict["x"] = np.arange(geom.ppf(0.01, p1 / 100), geom.ppf(0.99, p1 / 100)) xyDict["y"] = geom.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Laplacian': xyDict["x"] = np.arange(dlaplace.ppf(0.01, p1 / 100), dlaplace.ppf(0.99, p1 / 100)) xyDict["y"] = dlaplace.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Logarithmic': xyDict["x"] = np.arange(logser.ppf(0.01, p1 / 100), logser.ppf(0.99, p1 / 100)) xyDict["y"] = logser.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Neg. binomial': xyDict["x"] = np.arange(nbinom.ppf(0.01, p1, p2 / 100), nbinom.ppf(0.99, p1, p2 / 100)) xyDict["y"] = nbinom.pmf(xyDict["x"], p1, p2 / 100) elif distributionType == 'Planck': xyDict["x"] = np.arange(planck.ppf(0.01, p1 / 100), planck.ppf(0.99, p1 / 100)) xyDict["y"] = planck.pmf(xyDict["x"], p1 / 100) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Poisson': xyDict["x"] = np.arange(poisson.ppf(0.01, p1), poisson.ppf(0.99, p1)) xyDict["y"] = poisson.pmf(xyDict["x"], p1) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) elif distributionType == 'Uniform': if p1 - 0.5 * p2 < 0: p2 = p1 min = p1 - 0.5 * p2 max = p1 + 0.5 * p2 xyDict["x"] = np.arange(randint.ppf(0.01, min, max), randint.ppf(0.99, min, max)) xyDict["y"] = randint.pmf(xyDict["x"], min, max) elif distributionType == 'Zipf (Zeta)': xyDict["x"] = np.arange(zipf.ppf(0.01, p1), zipf.ppf(0.99, p1)) xyDict["y"] = zipf.pmf(xyDict["x"], p1) if p2 != 0: self.tableModel.setData( self.selected_item_index.sibling( self.selected_item_index.row(), 3), 0, Qt.EditRole) self.update_graph(xyDict) except Exception as E: log.error(E)
from scipy.stats import logser import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: p = 0.6 mean, var, skew, kurt = logser.stats(p, moments='mvsk') # Display the probability mass function (``pmf``): x = np.arange(logser.ppf(0.01, p), logser.ppf(0.99, p)) ax.plot(x, logser.pmf(x, p), 'bo', ms=8, label='logser pmf') ax.vlines(x, 0, logser.pmf(x, p), colors='b', lw=5, alpha=0.5) # Alternatively, the distribution object can be called (as a function) # to fix the shape and location. This returns a "frozen" RV object holding # the given parameters fixed. # Freeze the distribution and display the frozen ``pmf``: rv = logser(p) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show()
def theoretical_probability(self): x = np.array([i for i in range(self.r_low, self.r_up + 1)]) plt.plot(x, logser.pmf(x, self.p), 'bo')