def test_sample_binomial(): def model1(key): return func.sample('n', dist.binomial(jnp.array(2), jnp.array(0.5)), key) keys = jax.random.split(jax.random.PRNGKey(123), 100) samples = jax.vmap(lambda k: model1(k))(keys) assert jnp.median(samples) == 1 def model2(key): return func.sample('n', dist.binomial(jnp.array(1), jnp.array(0.)), key) key = jax.random.PRNGKey(123) sample = model2(key) assert sample == 0 def model3(key): return func.sample('n', dist.binomial(jnp.array(10), jnp.array(1.)), key) key = jax.random.PRNGKey(123) sample = model3(key) assert sample == 10 def model4(key): return func.sample('n', dist.binomial(jnp.array(10), jnp.array(0.5)), key) keys = jax.random.split(jax.random.PRNGKey(123), 100) samples = jax.vmap(lambda k: model4(k))(keys) assert jnp.median(samples) == 4
def get_peaks_iter(soln,tvec,int=0,Tint=0,loCI=5,upCI=95): """ calculates the peak prevalence for a multiple runs, with or without an intervention soln: 3D array of values for each iteration for each variable at each timepoint tvec: 1D vector of timepoints ymax : highest value on y axis, relative to "scale" value (e.g. 0.5 makes ymax=0.5 or 50% for scale=1 or N) scale: amount to multiple all frequency values by (e.g. "1" keeps as frequency, "N" turns to absolute values) int: Optional, 1 or 0 for whether or not there was an intervention. Defaults to 0 Tint: Optional, timepoint (days) at which intervention was started loCI,upCI: Optional, upper and lower percentiles for confidence intervals. Defaults to 90% interval """ delta_t=tvec[1]-tvec[0] if int==0: time_int=0 else: time_int=Tint all_cases=soln[:,:,1]+soln[:,:,2]+soln[:,:,3]+soln[:,:,4] # Final values print('Final recovered: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(soln[:,-1,6]), 100*np.percentile(soln[:,-1,6],loCI), 100*np.percentile(soln[:,-1,6],upCI))) print('Final deaths: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(soln[:,-1,5]), 100*np.percentile(soln[:,-1,5],loCI), 100*np.percentile(soln[:,-1,5],upCI))) print('Remaining infections: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100*np.average(all_cases[:,-1]),100*np.percentile(all_cases[:,-1],loCI),100*np.percentile(all_cases[:,-1],upCI))) # Peak prevalence peaks=np.amax(soln[:,:,2],axis=1) print('Peak I1: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) peaks=np.amax(soln[:,:,3],axis=1) print('Peak I2: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) peaks=np.amax(soln[:,:,4],axis=1) print('Peak I3: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) # Timing of peaks tpeak=np.argmax(soln[:,:,2],axis=1)*delta_t-time_int print('Time of peak I1: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak), np.percentile(tpeak,loCI),np.percentile(tpeak,upCI))) tpeak=np.argmax(soln[:,:,3],axis=1)*delta_t-time_int print('Time of peak I2: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak),np.percentile(tpeak,loCI),np.percentile(tpeak,upCI))) tpeak=np.argmax(soln[:,:,4],axis=1)*delta_t-time_int print('Time of peak I3: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak),np.percentile(tpeak,loCI),np.percentile(tpeak,upCI))) # Time when all the infections go extinct time_all_extinct = np.array(get_extinction_time(all_cases,0))*delta_t-time_int print('Time of extinction of all infections post intervention: {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(time_all_extinct),np.percentile(time_all_extinct,loCI),np.percentile(time_all_extinct,upCI))) return
def test_quick_select(): arr = jnp.asarray([10, 4, 5, 8, 11, 6, 26, 7]) # even number of points assert quick_sort_median(arr) == jnp.percentile(arr, 50, interpolation='higher') arr = jnp.asarray([10, 4, 5, 8, 11, 6, 26]) # odd number of points assert quick_sort_median(arr) == jnp.percentile(arr, 50, interpolation='higher') arr = jnp.asarray([10, 4, 5, 8, 11, 6, 26]) # odd number of points assert quick_sort_median(arr) == jnp.median(arr) arr = jnp.asarray([10, 4, 5, 8, 11, 6, 26, 7]) # even number of points try: assert quick_sort_median(arr) == jnp.median(arr) except AssertionError: print("Not equivalent to median when array has an even size.")
def get_diffusionEmbedding(points=[], distance=[], distmatrix=None, alpha=1.0, tdiff=0, eps=None): n = len(points) if distmatrix is None: idx = jnp.array([[i, j] for i in range(n) for j in range(n)]) d = make_distanceMatrix(points=points, idx=idx, distance=distance, n=n) else: d = distmatrix if eps is None: # using heuristic from the R package for diffusion maps eps = 2 * jnp.median(d)**2 K = make_kernelMatrix(distmatrix=d, eps=eps) Kr = renormalize_kernel(K, alpha=alpha) P = make_transitionMatrix(Kr) u, s, v = jnp.linalg.svd(P) phi = u for i in range(len(u)): phi.at[:, i].set((s[i]**tdiff) * jnp.divide(u[:, i], u[:, 0])) return phi, s
def init_to_median(site=None, num_samples=15): """ Initialize to the prior median. For priors with no `.sample` method implemented, we defer to the :func:`init_to_uniform` strategy. :param int num_samples: number of prior points to calculate median. """ if site is None: return partial(init_to_median, num_samples=num_samples) if (site["type"] == "sample" and not site["is_observed"] and not site["fn"].support.is_discrete): if site["value"] is not None: warnings.warn( f"init_to_median() skipping initialization of site '{site['name']}'" " which already stores a value.", stacklevel=find_stack_level(), ) return site["value"] rng_key = site["kwargs"].get("rng_key") sample_shape = site["kwargs"].get("sample_shape") try: samples = site["fn"](sample_shape=(num_samples, ) + sample_shape, rng_key=rng_key) return jnp.median(samples, axis=0) except NotImplementedError: return init_to_uniform(site)
def smooth_vsini_fft(wavelength, spectrum, outwave, sigma_out, inres=0.0, **extras): # The kernel width for the convolution. sigma = np.sqrt(sigma_out**2 - inres**2) # if sigma <= 0: # return np.interp(outwave, wavelength, spectrum) # make length of spectrum a power of 2 by resampling wave, spec = resample_wave(wavelength, spectrum) # get grid resolution (*not* the resolution of the input spectrum) and make # sure it's nearly constant. It should be, by design (see resample_wave) invRgrid = np.diff(np.log(wave)) # assert invRgrid.max() / invRgrid.min() < 1.05 dv = ckms * np.median(invRgrid) # Do the convolution spec_conv = smooth_fft_vsini(dv, spec, sigma) # interpolate aonto output grid # if outwave is not None: spec_conv = jinterp(outwave, wave, spec_conv, right=np.nan, left=np.nan) return spec_conv
def estimate_sigma_median_kth(X: np.ndarray, Y: np.ndarray, percent: float = 0.3) -> float: """Estimates the sigma using the median kth distance This calculates the sigma value using the kth percent of the distances. THe median value of that is the new sigma value. Parameters ---------- dists : jax.numpy.ndarray the distance matrix already calculate (n_samples, n_samples) k : int the kth value from the (default=0.15) Returns ------- kth_dist : jax.numpy.ndarray the neighbours up to the kth distance """ # find the kth distance dists = _estimate_sigma_kth(X=X, Y=Y, percent=percent) # median distances sigma = np.median(dists[np.nonzero(dists)]) return sigma
def compute(self, particles, particle_info, loss_fn): diffs = jnp.expand_dims(particles, axis=0) - jnp.expand_dims( particles, axis=1) # N x N (x D) if self._normed() and particles.ndim == 2: diffs = safe_norm(diffs, ord=2, axis=-1) # N x D -> N diffs = jnp.reshape( diffs, (diffs.shape[0] * diffs.shape[1], -1)) # N * N (x D) factor = self.bandwidth_factor(particles.shape[0]) if diffs.ndim == 2: diff_norms = safe_norm(diffs, ord=2, axis=-1) else: diff_norms = diffs bandwidth = jnp.median(diff_norms)**2 * factor + 1e-5 def kernel(x, y): diff = safe_norm( x - y, ord=2) if self._normed() and x.ndim >= 1 else x - y kernel_res = jnp.exp(-(diff**2) / bandwidth) if self._mode == "matrix": if self.matrix_mode == "norm_diag": return kernel_res * jnp.identity(x.shape[0]) else: return jnp.diag(kernel_res) else: return kernel_res return kernel
def estimate_sigma_median(X: np.ndarray, Y: np.ndarray) -> float: """Estimate sigma using the median distance Parameters ---------- X : jax.numpy.ndarray input data (n_samples, n_features) Y : jax.numpy.ndarray input data (n_samples, n_features) Returns ------- sigma : float the estimated sigma """ # compute distance matrix dists = pdist_squareform(X, Y) # remove non-zero elements # dists = dists[np.nonzero(dists)] # get the median value sigma = np.median(dists[np.nonzero(dists)]) return sigma
def plot_1988(data, samples, ax=None): indicators = get_floating_days_indicators(data["date"]) memorial_beta = samples["memorial/beta"][:, None] labour_beta = samples["labour/beta"][:, None] thanks_beta = samples["thanksgiving/beta"][:, None] memorials = indicators["memorial_days_indicator"] * memorial_beta labour = indicators["labour_days_indicator"] * labour_beta thanksgiving = indicators["thanksgiving_days_indicator"] * thanks_beta floating_days = memorials + labour + thanksgiving is_1988 = data["date"].dt.year == 1988 days_in_1988 = data["day_of_year"][is_1988] - 1 days_effect = samples["day/beta"][:, days_in_1988.values] floating_effect = floating_days[:, jnp.argwhere(is_1988.values).ravel()] y = data["births_relative"] f = (days_effect + floating_effect) * y.std() + y.mean() f_median = jnp.median(f, axis=0) special_days = { "Valentine's": "1988-02-14", "Leap day": "1988-02-29", "Halloween": "1988-10-31", "Christmas eve": "1988-12-24", "Christmas day": "1988-12-25", "New year": "1988-01-01", "New year's eve": "1988-12-31", "April 1st": "1988-04-01", "Independence day": "1988-07-04", "Labour day": "1988-09-05", "Memorial day": "1988-05-30", "Thanksgiving": "1988-11-24", } if ax is None: ax = plt.gca() ax.plot(days_in_1988, f_median, color="k", lw=2) for name, date in special_days.items(): xs = pd.to_datetime(date).day_of_year - 1 ys = f_median[xs] text = ax.text(xs - 3, ys, name, horizontalalignment="right") text.set_bbox(dict(facecolor="white", alpha=0.5, edgecolor="none")) is_day_13 = data["date"].dt.day == 13 bad_luck_days = data.loc[is_1988 & is_day_13, "day_of_year"] - 1 ax.plot( bad_luck_days, f_median[bad_luck_days.values], marker="o", mec="gray", c="none", ms=10, lw=0, ) return ax
def get_peaks_iter_daily(soln_inc,int=0,Tint=0,loCI=5,upCI=95): """ calculates the peak daily incidence for a multiple runs, with or without an intervention soln_inc: 3D array of values for each iteration for each variable at each timepoint ymax : highest value on y axis, relative to "scale" value (e.g. 0.5 makes ymax=0.5 or 50% for scale=1 or N) scale: amount to multiple all frequency values by (e.g. "1" keeps as frequency, "N" turns to absolute values) int: Optional, 1 or 0 for whether or not there was an intervention. Defaults to 0 Tint: Optional, timepoint (days) at which intervention was started loCI,upCI: Optional, upper and lower percentiles for confidence intervals. Defaults to 90% interval """ if int==0: time_int=0 else: time_int=Tint # Peak incidence peaks=np.amax(soln_inc[:,:,2],axis=1) print('Peak daily I1: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) peaks=np.amax(soln_inc[:,:,3],axis=1) print('Peak daily I2: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) peaks=np.amax(soln_inc[:,:,4],axis=1) print('Peak daily I3: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) peaks=np.amax(soln_inc[:,:,5],axis=1) print('Peak daily deaths: {:4.2f}% [{:4.2f}, {:4.2f}]'.format( 100 * np.average(peaks),100 * np.percentile(peaks,loCI),100 * np.percentile(peaks,upCI))) # Timing of peak incidence tpeak=np.argmax(soln_inc[:,:,2],axis=1)+1.0-time_int print('Time of peak I1: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak),np.percentile(tpeak,5.0),np.percentile(tpeak,95.0))) tpeak=np.argmax(soln_inc[:,:,3],axis=1)+1.0-time_int print('Time of peak I2: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak),np.percentile(tpeak,5.0),np.percentile(tpeak,95.0))) tpeak=np.argmax(soln_inc[:,:,4],axis=1)+1.0-time_int print('Time of peak I3: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak),np.percentile(tpeak,5.0),np.percentile(tpeak,95.0))) tpeak=np.argmax(soln_inc[:,:,5],axis=1)+1.0-time_int print('Time of peak deaths: avg {:4.2f} days, median {:4.2f} days [{:4.2f}, {:4.2f}]'.format( np.average(tpeak),np.median(tpeak),np.percentile(tpeak,5.0),np.percentile(tpeak,95.0))) return
def forecast(self, num_samples=1000, rng_key=PRNGKey(4), **args): if self.mcmc_samples is None: raise RuntimeError("run inference first") post_median = np.median(self.mcmc_samples['beta'] ,axis=0).reshape(1,-1) self.mcmc_samples['beta'] =np.repeat(post_median,100,axis=0) post_median = np.median(self.mcmc_samples['mean_z'] ,axis=0).reshape(1,-1) self.mcmc_samples['mean_z'] =np.repeat(post_median,100,axis=0) post_median = np.median(self.mcmc_samples['mean_y'] ,axis=0).reshape(1,-1) self.mcmc_samples['mean_y'] =np.repeat(post_median,100,axis=0) post_median = np.median(self.mcmc_samples['beta0'] ,axis=0).reshape(-1) self.mcmc_samples['beta0'] =np.repeat(post_median,100,axis=0) post_median = np.median(self.mcmc_samples['det_rate_rw'] ,axis=0).reshape(1,-1) self.mcmc_samples['det_rate_rw'] =np.repeat(post_median,100,axis=0) print (self.mcmc_samples['x'].shape) post_median = np.median(self.mcmc_samples['x'] ,axis=0).reshape(1,-1,7) self.mcmc_samples['x'] =np.repeat(post_median,100,axis=0) post_median = np.median(self.mcmc_samples['x0'] ,axis=0).reshape(1,7) self.mcmc_samples['x0'] =np.repeat(post_median,100,axis=0)
def plot_samples(self, samples, plot_fields=['y'], start='2020-03-04', T=None, ax=None, legend=True, forecast=False, n_samples=0, intervals=[50, 80, 95]): ''' Plotting method for SIR-type models. ''' ax = plt.axes(ax) T_data = self.horizon(samples, forecast=forecast) T = T_data if T is None else min(T, T_data) fields = {f: 0.0 + self.get(samples, f, forecast=forecast)[:,:T] for f in plot_fields} names = {f: self.names[f] for f in plot_fields} medians = {names[f]: np.median(v, axis=0) for f, v in fields.items()} t = pd.date_range(start=start, periods=T, freq='D') ax.set_prop_cycle(None) colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] # Plot medians df = pd.DataFrame(index=t, data=medians) df.plot(ax=ax, legend=legend) median_max = df.max().values # Plot samples if requested if n_samples > 0: for i, f in enumerate(fields): df = pd.DataFrame(index=t, data=fields[f][:n_samples,:].T) df.plot(ax=ax, legend=False, alpha=0.1) # Plot prediction intervals pi_max = 10 handles = [] for interval in intervals: low=(100.-interval)/2 high=100.-low pred_intervals = {names[f]: np.percentile(v, (low, high), axis=0) for f, v in fields.items()} for i, pi in enumerate(pred_intervals.values()): h = ax.fill_between(t, pi[0,:], pi[1,:], alpha=0.1, color=colors[i], label=interval) handles.append(h) pi_max = np.maximum(pi_max, np.nanmax(pi[1,:])) return median_max, pi_max
def median_heuristic(x): """ Heuristic for choosing the squared RBF bandwidth. IN: np array of shape (n,) or (n,d): set of particles OUT: scalar: bandwidth parameter for RBF kernel, based on the heuristic from the SVGD paper. Note: assumes k(x, y) = exp(- (x - y)^2 / h^2 / 2) """ pairwise_dists = utils.squared_distance_matrix(utils.remove_diagonal(x)) medsq = np.median(pairwise_dists) h = np.sqrt(0.5 * medsq / np.log(x.shape[0] + 1)) return h
def plot_week(data, samples, ax=None): if ax is None: ax = plt.gca() weekdays = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] y = data["births_relative"] x = data["day_of_week"] - 1 f = jnp.median(samples["week/beta"] * y.std() + y.mean(), 0) ax.plot(x, y, **DATA_STYLE) ax.plot(range(7), f, **MODEL_STYLE) ax.set_xticks(range(7)) ax.set_xticklabels(weekdays) return ax
def median_heuristic(data, distance, per_dimension=True): if isinstance(distance, str): dist_fn = lambda x: pdist(x, distance) else: dist_fn = distance if per_dimension is False: return np.median(dist_fn(data)) else: def single_dim_heuristic(data_dim): return median_heuristic(data_dim[:, None], dist_fn, per_dimension=False) return np.apply_along_axis(single_dim_heuristic, 0, data)
def _eigh_work(H, V=None, precision=lax.Precision.HIGHEST, termination_size=128): """ The main work loop performing the symmetric eigendecomposition of H. Each step recursively computes a projector into the space of eigenvalues above jnp.mean(jnp.diag(H)). The result of the projections into and out of that space, along with the isometries accomplishing these, are then computed. This is performed recursively until the projections have size 1, and thus store an eigenvalue of the original input; the corresponding isometry is the related eigenvector. The results are then composed. This function cannot be Jitted because the internal split_spectrum cannot be. Args: H: The Hermitian input. V: Stores the isometries projecting H into its subspaces. precision: :class:`~jax.lax.Precision` object specifying the matmul precision. Returns: H, V: The result of the projection. """ if H.shape[0] <= termination_size: evals, evecs = jnp.linalg.eigh(H) if V is not None: evecs = jnp.dot(V, evecs, precision=precision) return evals, evecs split_point = jnp.median(jnp.diag(H)) # TODO: Improve this? Hm, Vm, Hp, Vp = split_spectrum(H, split_point, V0=V, precision=precision) Hm, Vm = _eigh_work(Hm, V=Vm, precision=precision, termination_size=termination_size) Hp, Vp = _eigh_work(Hp, V=Vp, precision=precision, termination_size=termination_size) if Hm.ndim != 1 or Hp.ndim != 1: raise ValueError( f"One of Hm.ndim={Hm.ndim} or Hp.ndim={Hp.ndim} != 1 ", "indicating recursion terminated unexpectedly.") evals = jnp.hstack((Hm, Hp)) evecs = jnp.hstack((Vm, Vp)) return evals, evecs
def plot_R0(mcmc_samples, start, ax=None): ax = plt.axes(ax) # Compute R0 over time gamma = mcmc_samples['gamma'][:, None] beta = mcmc_samples['beta'] t = pd.date_range(start=start, periods=beta.shape[1], freq='D') R0 = beta / gamma pi = np.percentile(R0, (10, 90), axis=0) df = pd.DataFrame(index=t, data={'R0': np.median(R0, axis=0)}) df.plot(style='-o', ax=ax) ax.fill_between(t, pi[0, :], pi[1, :], alpha=0.1) ax.axhline(1, linestyle='--')
def plot_samples(self, samples, plot_fields=['y'], start='2020-03-04', T=None, ax=None, legend=True, forecast=False): ''' Plotting method for SIR-type models. ''' ax = plt.axes(ax) T_data = self.horizon(samples, forecast=forecast) T = T_data if T is None else min(T, T_data) fields = { f: self.get(samples, f, forecast=forecast)[:, :T] for f in plot_fields } names = {f: self.names[f] for f in plot_fields} medians = {names[f]: np.median(v, axis=0) for f, v in fields.items()} pred_intervals = { names[f]: np.percentile(v, (10, 90), axis=0) for f, v in fields.items() } t = pd.date_range(start=start, periods=T, freq='D') ax.set_prop_cycle(None) # Plot medians df = pd.DataFrame(index=t, data=medians) df.plot(ax=ax, legend=legend) median_max = df.max().values # Plot prediction intervals pi_max = 10 for pi in pred_intervals.values(): ax.fill_between(t, pi[0, :], pi[1, :], alpha=0.1, label='CI') pi_max = np.maximum(pi_max, np.nanmax(pi[1, :])) return median_max, pi_max
def init_to_median(site=None, num_samples=15): """ Initialize to the prior median. For priors with no `.sample` method implemented, we defer to the :func:`init_to_uniform` strategy. :param int num_samples: number of prior points to calculate median. """ if site is None: return partial(init_to_median, num_samples=num_samples) if site['type'] == 'sample' and not site['is_observed'] and not site['fn'].is_discrete: rng_key = site['kwargs'].get('rng_key') sample_shape = site['kwargs'].get('sample_shape') try: samples = site['fn'].sample(rng_key, sample_shape=(num_samples,) + sample_shape) return jnp.median(samples, axis=0) except NotImplementedError: return init_to_uniform(site)
def smooth_wave_fft(wavelength, spectrum, outwave, sigma_out=1.0, inres=0.0, **extras): """Smooth a spectrum in wavelength space, using FFTs. This is fast, but makes some assumptions about the input spectrum, and can have some issues at the ends of the spectrum depending on how it is padded. :param wavelength: Wavelength vector of the input spectrum. :param spectrum: Flux vector of the input spectrum. :param outwave: Desired output wavelength vector. :param sigma: Desired resolution (*not* FWHM) in wavelength units. :param inres: Resolution of the input, in wavelength units (dispersion not FWHM). :returns flux: The output smoothed flux vector, same length as ``outwave``. """ # restrict wavelength range (for speed) # should also make nearest power of 2 wave, spec = resample_wave(wavelength, spectrum, linear=True) # The kernel width for the convolution. sigma = np.sqrt(sigma_out**2 - inres**2) if sigma < 0: return np.interp(wave, outwave, flux) # get grid resolution (*not* the resolution of the input spectrum) and make # sure it's nearly constant. Should be by design (see resample_wave) Rgrid = np.diff(wave) assert Rgrid.max() / Rgrid.min() < 1.05 dw = np.median(Rgrid) # Do the convolution spec_conv = smooth_fft(dw, spec, sigma) # interpolate onto output grid if outwave is not None: spec_conv = np.interp(outwave, wave, spec_conv) return spec_conv
def plot_growth_rate(mcmc_samples, start, model=SEIRModel, ax=None): ax = plt.axes(ax) # Compute growth rate over time beta = mcmc_samples['beta'] sigma = mcmc_samples['sigma'][:, None] gamma = mcmc_samples['gamma'][:, None] t = pd.date_range(start=start, periods=beta.shape[1], freq='D') growth_rate = SEIRModel.growth_rate((beta, sigma, gamma)) pi = np.percentile(growth_rate, (10, 90), axis=0) df = pd.DataFrame(index=t, data={'growth_rate': np.median(growth_rate, axis=0)}) df.plot(style='-o', ax=ax) ax.fill_between(t, pi[0, :], pi[1, :], alpha=0.1) ax.axhline(0, linestyle='--')
def _init_to_median(site, num_samples=15, skip_param=False): if site['type'] == 'sample' and not site['is_observed']: if isinstance(site['fn'], dist.TransformedDistribution): fn = site['fn'].base_dist else: fn = site['fn'] samples = numpyro.sample('_init', fn, sample_shape=(num_samples,) + site['kwargs']['sample_shape']) return np.median(samples, axis=0) if site['type'] == 'param' and not skip_param: # return base value of param site constraint = site['kwargs'].pop('constraint', real) transform = biject_to(constraint) value = site['args'][0] if isinstance(transform, ComposeTransform): base_transform = transform.parts[0] value = base_transform(transform.inv(value)) return value
def mps_eval_fun(mps, dataset): """Evaluate MPS model within JAX over dataset""" from ti_mps import get_log_probs contract = EXP_ARGS['contract'] batch_size = EXP_ARGS['batch_size'] n, eval_loss, eval_ppl = 0, 0., 0. for batch in minibatches(dataset, batch_size, keep_end=True): eval_log_probs, log2_norms = get_log_probs(mps, batch, contract, True) eval_loss += -jnp.mean(eval_log_probs) eval_ppl += ppl_calc(eval_log_probs, batch.str_lens) n += 1 # Get a log norm and rescale our core tensor using that ref_log = jnp.floor(jnp.median(log2_norms / batch.str_lens)) if jnp.abs(ref_log) > 1: mps = mps._replace(core_tensor=(mps.core_tensor / 2**ref_log)) m_print(f"Rescaling by 2**{-ref_log}") eval_loss, eval_ppl = eval_loss / n, eval_ppl / n return mps, eval_loss, eval_ppl
def init_to_median(site=None, num_samples=15): """ Initialize to the prior median. For priors with no `.sample` method implemented, we defer to the :func:`init_to_uniform` strategy. :param int num_samples: number of prior points to calculate median. """ if site is None: return partial(init_to_median, num_samples=num_samples) if (site["type"] == "sample" and not site["is_observed"] and not site["fn"].is_discrete): rng_key = site["kwargs"].get("rng_key") sample_shape = site["kwargs"].get("sample_shape") try: samples = site["fn"](sample_shape=(num_samples, ) + sample_shape, rng_key=rng_key) return jnp.median(samples, axis=0) except NotImplementedError: return init_to_uniform(site)
def plot_R0(mcmc_samples, start): fig = plt.figure(figsize=(5,3)) # Compute average R0 over time gamma = mcmc_samples['gamma'][:,None] beta = mcmc_samples['beta'] t = pd.date_range(start=start, periods=beta.shape[1], freq='D') R0 = beta/gamma pi = np.percentile(R0, (10, 90), axis=0) df = pd.DataFrame(index=t, data={'R0': np.median(R0, axis=0)}) df.plot(style='-o') plt.fill_between(t, pi[0,:], pi[1,:], alpha=0.1) plt.axhline(1, linestyle='--') #plt.tight_layout() return fig
def smooth_vel_fft(wavelength, spectrum, outwave, sigma_out, inres=0.0, **extras): """Smooth a spectrum in velocity space, using FFTs. This is fast, but makes some assumptions about the form of the input spectrum and can have some issues at the ends of the spectrum depending on how it is padded. :param wavelength: Wavelength vector of the input spectrum. An assertion error will result if this is not a regular grid in wavelength. :param spectrum: Flux vector of the input spectrum. :param outwave: Desired output wavelength vector. :param sigma_out: Desired velocity resolution (km/s), *not* FWHM. Scalar or length 1 array. :param inres: The velocity resolution of the input spectrum (km/s), dispersion *not* FWHM. """ # The kernel width for the convolution. sigma = np.sqrt(sigma_out**2 - inres**2) # if sigma <= 0: # return np.interp(outwave, wavelength, spectrum) # make length of spectrum a power of 2 by resampling wave, spec = resample_wave(wavelength, spectrum) # get grid resolution (*not* the resolution of the input spectrum) and make # sure it's nearly constant. It should be, by design (see resample_wave) invRgrid = np.diff(np.log(wave)) # assert invRgrid.max() / invRgrid.min() < 1.05 dv = ckms * np.median(invRgrid) # Do the convolution spec_conv = smooth_fft(dv, spec, sigma) # interpolate onto output grid # if outwave is not None: spec_conv = jinterp(outwave, wave, spec_conv, right=np.nan, left=np.nan) return spec_conv
def __init__(self, start: np.array, periodicities: np.array = None, stepsizes: np.array = None, example: np.array = None): """An IndexRollout object zig-zags through indices with periodicities given in initialization. At most one periodicity can be NaN, in which case it is taken to be ever-increasing Args: periodicities (np.array): The periodicities, the first one can be np.inf. When a digit would become larger than its periodicity, the previous digit is increased stepsizes (np.array, optional): [description]. Defaults to None. Step sizes for increasing the index. example (np.array, optional): [description]. Defaults to None. Examples of consecutive indices (in rows) for inferring step sizes. """ self.current = start if example is not None: self.stepsizes = np.array( [np.median(x[x > 0]) for x in (example[1:] - example[:-1]).T]) self.periodicities = example.max(0) + self.stepsizes else: assert stepsizes is not None and periodicities is not None self.stepsizes = stepsizes self.periodicities = periodicities
def median_trick_h(theta): pairwise_dists = -((theta[:, np.newaxis, :] - theta)) pairwise_dists_sq = (pairwise_dists**2).sum(axis=-1) med_sq = np.median(np.sqrt(pairwise_dists_sq)) h = med_sq**2 / np.log(theta.shape[0] + 1) return h
rng_key=PRNGKey(0), sample_shape=(1000, )) data_test2 = sample("norm2", dist.Normal(0, 1), rng_key=PRNGKey(0), sample_shape=(1000, )) data_test3 = sample("norm3", dist.Normal(-10, 1), rng_key=PRNGKey(0), sample_shape=(1000, )) test = [data_test1, data_test2, data_test3] data_mix = np.array(test).T # Instantiate model nmc = NMC(mix_model, data_mix) # Run inference nmc.run(1000) for key in nmc.acc_trace: print(key) print(np.mean(np.array(nmc.acc_trace[key]))) print(np.std(np.array(nmc.acc_trace[key]))) print(np.median(np.array(nmc.acc_trace[key]))) print(nmc.nmc_status) kernel = NUTS(mix_model) mcmc = MCMC(kernel, 0, 1000) mcmc.run(rng, data_mix) mcmc.print_summary()