def test_check_discrete_minibatch(self): disaster_data_t = tt.vector() disaster_data_t.tag.test_value = np.zeros(len(self.disaster_data)) def create_minibatches(): while True: return (self.disaster_data, ) with Model(): switchpoint = DiscreteUniform('switchpoint', lower=self.year.min(), upper=self.year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = tt.switch(switchpoint >= self.year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data_t) with self.assertRaises(ValueError): advi_minibatch(n=10, minibatch_RVs=[disasters], minibatch_tensors=[disaster_data_t], minibatches=create_minibatches())
def test_check_discrete_minibatch(): disaster_data_t = tt.vector() disaster_data_t.tag.test_value = np.zeros(len(disaster_data)) with Model() as disaster_model: switchpoint = DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = switch(switchpoint >= year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data_t) def create_minibatch(): while True: return (disaster_data, ) # This should raise ValueError assert_raises(ValueError, advi_minibatch, model=disaster_model, n=10, minibatch_RVs=[disasters], minibatch_tensors=[disaster_data_t], minibatches=create_minibatch(), verbose=False)
def test_check_discrete(self): with Model(): switchpoint = DiscreteUniform( 'switchpoint', lower=self.year.min(), upper=self.year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = tt.switch(switchpoint >= self.year, early_rate, late_rate) Poisson('disasters', rate, observed=self.disaster_data) # This should raise ValueError with self.assertRaises(ValueError): advi(n=10)
def test_check_discrete(): with Model() as disaster_model: switchpoint = DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = switch(switchpoint >= year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data) # This should raise ValueError assert_raises(ValueError, advi, model=disaster_model, n=10)
], value=-999) year = np.arange(1851, 1962) plt.plot(year, disaster_data, 'o', markersize=8) plt.ylabel("Disaster count") plt.xlabel("Year") plt.show() from pymc3 import DiscreteUniform, Poisson, switch, Model, Exponential, NUTS, Metropolis, sample, traceplot with Model() as disaster_model: switchpoint = DiscreteUniform('switchpoint', lower=year.min(), upper=year.max(), testval=1900) # Priors for pre- and post-switch rates number of disasters early_rate = Exponential('early_rate', 1) late_rate = Exponential('late_rate', 1) # Allocate appropriate Poisson rates to years before and after current rate = switch(switchpoint >= year, early_rate, late_rate) disasters = Poisson('disasters', rate, observed=disaster_data) step1 = NUTS([early_rate, late_rate]) # Use Metropolis for switchpoint, and missing values since it accommodates discrete variables step2 = Metropolis([switchpoint, disasters.missing_values[0]])
def createSignalModelWithLookup(data, wfMax): """ Uses a lookup table to avoid having to call siggen. Lookup locations are along a one-dimensional line from PC to the detector corner. See generate_siggen_lookup.py wfMax: maximum of the input signal. Used as a prior for the for scaling of the simulated pulse """ with Model() as signal_model: switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) noise_sigma = HalfNormal('noise_sigma', sd=1.) siggen_sigma = HalfNormal('siggen_sigma', sd=10.) timestamp = np.arange(0, len(data), dtype=np.int) uncertainty_model = switch(switchpoint >= timestamp, noise_sigma, siggen_sigma) wf_scale = Normal('wf_scale', sd=10., mu=wfMax) detRad = np.floor(35.41) detZ = np.floor(41.5) dtEstimate = DiscreteUniform('dtEstimate', lower=0, upper=99 ) # radiusEstimate = DiscreteUniform('radiusEstimate', lower=0, upper=35 ) # zEstimate = DiscreteUniform('zEstimate', lower=0, upper=41) @as_op(itypes=[T.lscalar, T.lscalar, T.dscalar], otypes=[T.dvector]) def siggen_model_dt(switchpoint, dtEstimate, wf_scale): siggen_out = dt_array[dtEstimate, :] siggen_out *= wf_scale T.clip(dtEstimate, 0, 99) #THIS IS A DISASTER. NEED to find a better way to handle this out = np.zeros(len(data)) out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)] # print "length of out is %d" % len(out) return out @as_op(itypes=[T.lscalar, T.lscalar, T.lscalar], otypes=[T.dvector]) def siggen_model(switchpoint, r, z): siggen_out = findSiggenWaveform(0,r,z,np.amax(np_data)) out = np.zeros(len(data)) out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)] return out # print "length of data is %d" % len(data) # @as_op(itypes=[T.lscalar, T.dscalar, T.dscalar], otypes=[T.dvector]) # # def crazy_modulo3(switchpoint, exp_scale, exp_rate): # out = np.zeros(len(data)) # out[switchpoint:] = exp_scale * (np.exp( exp_rate * (timestamp[switchpoint:] - switchpoint))-1.) # return out #baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) ) # baseline_model = siggen_model(switchpoint, radiusEstimate, zEstimate) baseline_model_dt = siggen_model_dt(switchpoint, dtEstimate, wf_scale) baseline_observed = Normal("baseline_observed", mu=baseline_model_dt, sd=uncertainty_model, observed= data ) return signal_model #def createSignalModelDynamic(data, wfMax): # """ # Calls siggen in real time # # """ # # with Model() as signal_model: # # switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data)) # noise_sigma = HalfNormal('noise_sigma', sd=1.) # siggen_sigma = HalfNormal('siggen_sigma', sd=10.) # # timestamp = np.arange(0, len(data), dtype=np.int) # # uncertainty_model = switch(switchpoint >= timestamp, noise_sigma, siggen_sigma) # # detRad = np.floor(35.41) # detZ = np.floor(41.5) # # dtEstimate = DiscreteUniform('dtEstimate', lower=0, upper=99 ) # # # # radiusEstimate = DiscreteUniform('radiusEstimate', lower=0, upper=35 ) # # zEstimate = DiscreteUniform('zEstimate', lower=0, upper=41) # # # # @as_op(itypes=[T.lscalar, T.lscalar], otypes=[T.dvector]) # def siggen_model_dt(switchpoint, dtEstimate): # siggen_out = dt_array[dtEstimate, :] # siggen_out *= wfMax # # T.clip(dtEstimate, 0, 99) #THIS IS A DISASTER. NEED to find a better way to handle this # # out = np.zeros(len(data)) # out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)] # # # print "length of out is %d" % len(out) # return out # # @as_op(itypes=[T.lscalar, T.lscalar, T.lscalar], otypes=[T.dvector]) # def siggen_model(switchpoint, r, z): # siggen_out = findSiggenWaveform(0,r,z,np.amax(np_data)) # out = np.zeros(len(data)) # out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)] # # return out # # # # print "length of data is %d" % len(data) # # # @as_op(itypes=[T.lscalar, T.dscalar, T.dscalar], otypes=[T.dvector]) # # # # def crazy_modulo3(switchpoint, exp_scale, exp_rate): # # out = np.zeros(len(data)) # # out[switchpoint:] = exp_scale * (np.exp( exp_rate * (timestamp[switchpoint:] - switchpoint))-1.) # # return out # # # #baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) ) # # # baseline_model = siggen_model(switchpoint, radiusEstimate, zEstimate) # baseline_model_dt = siggen_model_dt(switchpoint, dtEstimate) # # # baseline_observed = Normal("baseline_observed", mu=baseline_model_dt, sd=uncertainty_model, observed= data ) # # return signal_model
def mcmc_changepoint(dates, ratings, mcmc_iter=1000, discrete=0, plot_result=1): """This function models Yelp reviews as coming from two normal distributions with a switch point somewhere between them. When left of the switch point then reviews are drawn from the first normal distribution. To the right of the switch point reviews are drawn from the second normal distribution. Normal distributions are used if the reviews have been normalized to the user's average rating; otherwise if analyzing in terms of 1-5 stars set discrete=1 and the function will do the same estimation on Poisson distributions. This function then finds the most likely distribution for where the switchpoint is and the most likely parameters for the two generator distributions by using Metropolis-Hastings sampling and Hamiltonian Monte Carlo.""" # dates: Array of dates when the reviews were posted # ratings: Array of the ratings given by each review # mcmc_iter: How many iterations of the MCMC to run? # discrete: Should I use Normal or Poisson distributions to model the ratings? # (i.e. are the user-averaged or 1-5 stars) # plot_result: Should the function output a plot? number_of_ratings = np.arange(0, len(ratings)) if discrete == 0: with Model() as switch_model: switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(dates)) before_intensity = Normal('before_intensity', mu=0, sd=1) after_intensity = Normal('after_intensity', mu=0, sd=1) intensity = switch(switchpoint >= number_of_ratings, before_intensity, after_intensity) sigma = HalfNormal('sigma', sd=1) rating = Normal('rating', mu=intensity, sd=sigma, observed=ratings) elif discrete == 1: with Model() as switch_model: switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(dates)) before_intensity = Exponential('before_intensity', 1) after_intensity = Exponential('after_intensity', 1) intensity = switch(switchpoint >= number_of_ratings, before_intensity, after_intensity) rating = Poisson('rating', intensity, observed=ratings) with switch_model: trace = sample(mcmc_iter) if plot_result == 1: traceplot(trace) plt.show() switch_posterior = trace['switchpoint'] N_MCs = switch_posterior.shape[0] before_intensity_posterior = trace['before_intensity'] after_intensity_posterior = trace['after_intensity'] expected_stars = np.zeros(len(ratings)) for a_rating in number_of_ratings: where_switch = a_rating < switch_posterior expected_stars[a_rating] = ( before_intensity_posterior[where_switch].sum() + after_intensity_posterior[~where_switch].sum()) / N_MCs if plot_result == 1: plt.plot(dates, ratings, 'o') plt.plot(dates, expected_stars, 'b-') plt.show() # Return the mode and it's frequency / mcmc_iter b_mean, b_count = scipy.stats.mode(trace['before_intensity']) a_mean, a_count = scipy.stats.mode(trace['after_intensity']) modal_switch, count = scipy.stats.mode(trace['switchpoint']) sigma_est, sigma_count = scipy.stats.mode(trace['sigma']) differential = b_mean - a_mean return differential, modal_switch, expected_stars, sigma_est, switch_posterior