def setK(self, k): if k != self._k: deltaK = (k - self._k) self._k = k self._adjustedBounds[0] -= deltaK self._adjustedBounds[1] += deltaK self._cov = _GP.Covariance(self._c, amp=1, r=self._k) self._cov_matrix = self._cov(self._fiber, self._fiber) self._cov_inv_matrix = linalg.inv(self._cov_matrix) self._alpha = numpy.asmatrix( numpy.dot(numpy.ones(len(self._fiber)), self._cov_inv_matrix)).T self._mean = _GP.Mean(_constant, val=0) self._gp = _GP.observe(self._mean, self._cov, obs_mesh=self._fiber, obs_vals=numpy.ones(len(self._fiber)), obs_V=numpy.zeros(len(self._fiber)) + self._epsilon) if self._precomputedVariance != None: self.precomputeVarianceField() if self._precomputedMean != None: self.precomputeMeanField()
def uninformative_prior_gp(c=0., diff_degree=2., amp=1., scale=1.5): """ Uninformative Mean and Covariance Priors Parameters ---------- c : float, the prior mean diff_degree : float, the prior on differentiability (2 = twice differentiable?) amp : float, the prior on the amplitude of the Gaussian Process scale : float, the prior on the scale of the Gaussian Process Results ------- M, C : mean and covariance objects this constitutes an uninformative prior on a Gaussian Process with a euclidean Matern covariance function """ M = gp.Mean(const_func, c=c) C = gp.Covariance(gp.matern.euclidean, diff_degree=diff_degree, amp=amp, scale=scale) return M, C
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test, spacetime_iters, top_submodel): # load in the data all_data = csv2rec(infile, use_mrecords=False) for m in range(number_submodels): if all_data['spacetime_' + str(m + 1)].dtype == 'float64': all_data = np.delete( all_data, np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0], axis=0) # find the list of years for which we need to predict year_list = np.unique(all_data.year) # find the list of country/age groups country_age = np.array([ str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data)) ]) country_age_list = np.repeat(np.unique(country_age), len(year_list)) # make empty arrays in which to store the results total_iters = np.sum(spacetime_iters) draws = [ np.empty(len(country_age_list), 'float') for i in range(total_iters) ] if (top_submodel > 0): top_submodel_draws = [ np.empty(len(country_age_list), 'float') for i in range(100) ] iso3 = np.empty(len(country_age_list), '|S3') age_group = np.empty(len(country_age_list), 'int') year = np.empty(len(country_age_list), 'int') # loop through country/age groups for ca in np.unique(country_age_list): print('GPRing ' + ca) # subset the data for this particular country/age ca_data = all_data[country_age == ca] # subset just the observed data if ca_data['lt_cf'].dtype != '|O8': ca_observed = ca_data[(np.isnan(ca_data['lt_cf']) == 0) & (ca_data['test_' + test] == 0)] if len(ca_observed) > 1: has_data = True else: has_data = False else: has_data = False # keep track of how many iterations have been added for this model iter_counter = 0 # loop through each submodel for m in range(number_submodels): # identify the dependent variable for this model dv = dv_list[m] # continue making predictions if we actually need draws for this model if (spacetime_iters[m] > 0) or (m + 1 == top_submodel): # skip models with no spacetime results if all_data['spacetime_' + str(m + 1)].dtype != 'float64': for i in range(spacetime_iters[m]): draws[iter_counter][country_age_list == ca] = np.NaN iter_counter += 1 if (m + 1 == top_submodel): for i in range(100): top_submodel_draws[i][country_age_list == ca] = np.NaN continue # make a list of the spacetime predictions ca_prior = np.array([ np.mean(ca_data['spacetime_' + str(m + 1)][ca_data.year == y]) for y in year_list ]) # find the amplitude for this country/age amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m + 1)]) # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR def mean_function(x): return np.interp(x, year_list, ca_prior) # setup the covariance function M = gp.Mean(mean_function) C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale) # observe the data if there is any if has_data: gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m + 1)], obs_vals=ca_observed[dv]) # draw realizations from the data realizations = [ gp.Realization(M, C) for i in range(spacetime_iters[m]) ] # save the data for this country/age into the results array iso3[country_age_list == ca] = ca[0:3] age_group[country_age_list == ca] = ca[4:] year[country_age_list == ca] = year_list.T for i in range(spacetime_iters[m]): try: draws[iter_counter][country_age_list == ca] = realizations[i](year_list) except: print('Failure in ' + ca) iter_counter += 1 # if it's the top submodel, do 100 additional draws if (m + 1 == top_submodel): realizations = [gp.Realization(M, C) for i in range(100)] for i in range(100): try: top_submodel_draws[i][country_age_list == ca] = realizations[i]( year_list) except: print('Failure in ' + ca) # save the results print('Saving GPR results') names = ['iso3', 'age_group', 'year'] results = np.core.records.fromarrays([iso3, age_group, year], names=names) for i in range(total_iters): results = recfunctions.append_fields(results, 'ensemble_d' + str(i + 1), draws[i]) if (top_submodel > 0): for i in range(100): results = recfunctions.append_fields(results, 'top_submodel_d' + str(i + 1), top_submodel_draws[i]) rec2csv(results, outfile)
print "\ninspect with:\nresults.unstack()['mare', '50%'].unstack() # for example" print "or: results.unstack()['mare', '50%'].unstack(2).reindex(columns='Very Moderately Slightly'.split())" else: N = int(options.numberofrows) delta_true = float(options.delta) sigma_true = float(options.sigma) * pl.ones(5) replicate = int(options.replicate) smoothness = options.smoothing print 'Running random effects validation for:' print 'N', N print 'delta_true', delta_true print 'sigma_true', sigma_true print 'replicate', replicate print 'smoothness', smoothness M = gp.Mean(validate_age_integrating_re.quadratic) C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50) gp.observe(M, C, [0, 25, 100], [-5, -3, -5]) log_p = gp.Realization(M, C) true_p = lambda x: pl.exp(log_p(x)) model = validate_age_integrating_re.validate_ai_re( N, delta_true, sigma_true, true_p, smoothness) model.results.to_csv( '%s/%s/%s-%s-%s-%s.csv' % (output_dir, validation_name, options.numberofrows, options.delta, options.sigma, options.replicate))
def plus_minus(arr, bins=30, conf=0.68, xrange=None, func='poly', fit_log=True, order=7, debug=False, zero_pad=False, end_tol=[None, None]): hist0, bins = histogram(arr, bins=bins, range=xrange) xb = (bins[1:] + bins[:-1]) / 2 if fit_log: gids = greater(hist0, 0) xb = xb[gids] var = 1. / hist0[gids] hist = log(hist0[gids]) else: var = hist0 * 1 hist = hist0 * 1 if xrange is None: xrange = (bins[0], bins[-1]) xplot = linspace(xrange[0] * 0.9, xrange[1] * 1.1, 101) if debug: fig = plt.figure() if fit_log: y1 = hist y2 = exp(hist) else: y1 = log(hist) y2 = hist ax1 = fig.add_subplot(211) ax1.plot(xb, y1, 'o') ax2 = fig.add_subplot(212) ax2.plot(xb, y2, 'o') if func == 'gp' or func == 'poly': if func == 'gp': if not gp: raise RuntimeError, "To use GP interpolation, you need to install pymc" scale = xb.max() - xb.min() M = gp.Mean( lambda x: zeros(x.shape[0], dtype=float32) + median(hist)) C = gp.Covariance(gp.matern.euclidean, diff_degree=3, scale=scale * 0.5, amp=std(hist)) # Pad with zeros if zero_pad and not fit_log: obs_mesh = concatenate([ xb.min() + (xb - xb.max())[:-1], xb, xb.max() + (xb - xb.min())[1:] ]) obs = concatenate([hist[1:] * 0, hist, hist[1:] * 0]) var = concatenate([hist[1:] * 0, var, hist[1:] * 0]) else: obs_mesh = xb obs = hist gp.observe(M, C, obs_mesh=obs_mesh, obs_vals=obs, obs_V=var) func = lambda x: wrap_M(x, M, xb[0], xb[-1], log=fit_log) else: x0 = xb[argmax(hist)] pars, epars = fit_poly.fitpoly(xb, hist, w=1. / var, x0=x0, k=order) func = lambda x: wrap_poly(x, x0, pars, xb[0], xb[-1], log=fit_log) if debug: ax1.plot(xplot, log(func(xplot)), '-') ax2.plot(xplot, func(xplot), '-') oneside = False if argmax(hist) == 0: mod = xb[0] oneside = True elif argmax(hist) == len(xb) - 1: mod = xb[-1] oneside = True else: mod0 = xb[argmax(hist)] try: mod = brent(lambda x: -func(x), brack=(xb.min(), mod0, xb.max())) except: # monotonic. Take extremum oneside = True if func(xb[0]) > func(xb[-1]): mod = xb[0] else: mod = xb[-1] fac = integrate.quad(func, xb[0], xb[-1])[0] prob = lambda x: func(x) / fac #end tolerance if requested lower_limit = False upper_limit = False if end_tol[0] is not None and float( hist0[0]) / hist0.max() > end_tol[0]: lower_limit = True if end_tol[1] is not None and float( hist0[-1]) / hist0.max() > end_tol[1]: upper_limit = True if lower_limit and upper_limit: # too flat, return mode, but no limits return mod, nan, nan elif lower_limit and not upper_limit: # one-sided tail = (1 - conf) upper = brentq(\ lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail, mod, xplot[-1]) return mod, nan, upper elif upper_limit and not lower_limit: tail = (1 - conf) lower = brentq(\ lambda x: integrate.quad(prob, xplot[0], x)[0]-tail, xplot[0], xplot[-1]) return mod, lower, nan if debug: ax1.axvline(mod, color='red') ax2.axvline(mod, color='red') if oneside: tail = (1 - conf) else: tail = (1 - conf) / 2 if integrate.quad(prob, xplot[0], mod)[0] < tail: # No lower bound minus = nan else: lower = brentq(\ lambda x: integrate.quad(prob, xplot[0], x)[0]-tail, xplot[0], mod) minus = mod - lower if debug: ax1.axvline(lower, color='orange') ax2.axvline(lower, color='orange') #test for upper bound if integrate.quad(prob, mod, xplot[-1])[0] < tail: # No upper bound plus = nan else: upper = brentq(\ lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail, mod, xplot[-1]) plus = upper - mod if debug: ax1.axvline(upper, color='orange') ax2.axvline(upper, color='orange') else: hist = hist * 1.0 / sum(hist) mid = argmax(hist) mod = xb[mid] if debug: ax1.axvline(mod, color='red') ax2.axvline(mod, color='red') i0 = 0 i1 = len(hist) - 1 prob = 0 while (prob < (1 - conf) / 2): if i0 < mid: i0 += 1 else: break prob = sum(hist[0:i0]) if i0 == 0: lower = None else: lower = xb[i0] if debug: ax1.axvline(lower, color='orange') ax2.axvline(lower, color='orange') while (prob < 1 - conf): if i1 > mid: i1 -= 1 else: break prob = sum(hist[0:i0]) + sum(hist[i1:]) if i1 == len(xb) - 1: upper = None else: upper = xb[i1] if debug: ax1.axvline(upper, color='orange') ax2.axvline(upper, color='orange') if upper is not None: plus = upper - mod else: plus = nan if lower is not None: minus = mod - lower else: minus = nan return mod, minus, plus
elif options.tally.lower() == 'true': results = tally_results() print 'median over all replicates of median absolute relative error' print results.unstack()['mare', '50%'].unstack() else: N = int(options.numberofrows) delta_true = float(options.delta) replicate = int(options.replicate) print 'Running random effects validation for:' print 'N', N print 'delta_true', delta_true print 'replicate', replicate M = gp.Mean(validate_consistent_model.constant) C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50) gp.observe(M, C, [0, 100], [-5, -5]) true = {} li = gp.Realization(M, C) true['i'] = lambda x: pl.exp(li(x)) lr = gp.Realization(M, C) true['r'] = lambda x: pl.exp(lr(x)) lf = gp.Realization(M, C) true['f'] = lambda x: pl.exp(lf(x)) model = validate_consistent_model.validate_consistent_model_sim( N, delta_true, true) model.results.to_csv( '%s/%s/%s-%s-%s-%s.csv' %
def plot2dsurf(self, param1, param2, ax=None, xrange=None, yrange=None, bins=30, smooth=False, bfac=2, sfac=1., dd=3, cmap=cm.gray_r, levels=[], ccolor='red', fill=False, ccmap=None, falpha=1.0, outfile=None, zorder=None): '''Plot up a 2D binned paramter plot for [param1] and [param2]. if [ax] is supplied, use it to plot, otherwise, open up a new figure and axes. You can specify [xrange] and [yrange]. [bins] will be passed to histogram2d. If [smooth], the binned surface is smoothed using either a bivariate spline or a Gaussian Process (if pymc.gp is available). If [cmap] is None, no image is drawn. If [levels] is specified as fractions (0.68, 0.95, etc), draw the contours that enclose this fraction of the data.''' if ax is None: fig = plt.figure() ax = fig.add_subplot(111) own_ax = True else: own_ax = False #if ccmap is not None and ccolor is not None: # # Cmap takes precedence # ccolor = None tr1 = self.get_trace0(param1) tr2 = self.get_trace0(param2) if len(tr1.shape) != 1 or len(tr2.shape) != 1: raise RuntimeError, "Error, variables must be scalars, try using ':' notation" #tr1 = tr1[:,0] #tr2 = tr2[:,0] range = [[tr1.min(), tr1.max()], [tr2.min(), tr2.max()]] if xrange is not None: range[0] = list(xrange) if yrange is not None: range[1] = list(yrange) # first, bin up the data (all of it) grid, xs, ys = histogram2d(tr1, tr1, bins=bins, range=range) grid = grid.T * 1.0 xplot = linspace(xs[0], xs[-1], 101) yplot = linspace(ys[0], ys[-1], 101) extent = [xs[0], xs[-1], ys[0], ys[-1]] xs = (xs[1:] + xs[:-1]) / 2 ys = (ys[1:] + ys[:-1]) / 2 x, y = meshgrid(xs, ys) tx = xs[::bfac] ty = ys[::bfac] if smooth and not gp: tck = bisplrep(ravel(x), ravel(y), ravel(grid), task=-1, tx=tx, ty=ty) x = linspace(xs[0], xs[-1], 501) y = linspace(ys[0], ys[-1], 501) grid = bisplev(x, y, tck).T elif smooth and gp: M = gp.Mean( lambda x: zeros(x.shape[:-1], dtype=float) + median(grid)) scalerat = (tr2.max() - tr2.min()) / (tr1.max() - tr1.min()) C = gp.Covariance(gp.matern.aniso_euclidean, diff_degree=dd, scale=(tr1.max() - tr1.min()) * sfac, amp=std(grid), scalerat=scalerat) x, y = meshgrid(xs, ys) mesh = vstack((ravel(x), ravel(y))).T gp.observe(M, C, obs_mesh=mesh, obs_vals=ravel(grid), obs_V=ravel(grid)) dplot = dstack(meshgrid(xplot, yplot)) grid, Vsurf = gp.point_eval(M, C, dplot) grid = where(grid < 0, 0, grid) if cmap: ax.imshow(grid, extent=extent, origin='lower', aspect='auto', interpolation='nearest', cmap=cmap) if levels: prob = ravel(grid) / sum(grid) sprob = sort(prob) cprob = 1.0 - cumsum(sprob) clevels = [] for l in levels: id = nonzero(greater(cprob - l, 0))[0][-1] clevels.append(sprob[id]) prob.shape = grid.shape clevels.sort() norm = Normalize(clevels[0] * 0.5, clevels[-1] * 1.3) if fill: ax.contourf(prob, levels=clevels + [1], extent=extent, origin='lower', alpha=falpha, cmap=ccmap, norm=norm, zorder=zorder) ax.contour(prob, levels=clevels, colors=ccolor, extent=extent, origin='lower', linewidths=2, zorder=zorder) if own_ax: ax.set_xlabel("$%s$" % param1) ax.set_ylabel("$%s$" % param2) if xrange is not None: ax.set_xlim(xrange[0], xrange[1]) if yrange is not None: ax.set_ylim(yrange[0], yrange[1]) plt.draw() if outfile is not None: fig.savefig(outfile) return fig
def M_g(eval_fun=linfun, c=self.goal_rate[t.team_id]): return gp.Mean(eval_fun, c=c)
def M_d(eval_fun=linfun, c=self.def_rate[t.team_id]): return gp.Mean(eval_fun, c=c)
def smooth(x): from pymc import gp M = gp.Mean(lambda x: zeros(len(x))) C = gp.Covariance(gp.matern.euclidean, amp=1, scale=15, diff_degree=2) gp.observe(M, C, range(len(x)), x, .5) return M(range(len(x)))
else: N = int(options.numberofrows) delta_true = float(options.delta) replicate = int(options.replicate) bias = float(options.bias) sigma_prior = float(options.sigma) print 'Running random effects validation for:' print 'N', N print 'delta_true', delta_true print 'bias', bias print 'sigma_prior', sigma_prior print 'replicate', replicate M = gp.Mean(validate_similarity.quadratic) C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50) gp.observe(M, C, [0, 30, 100], [-5, -3, -5]) true = {} lp = gp.Realization(M, C) true_p = lambda x: pl.exp(lp(x)) model = validate_similarity.generate_data(N, delta_true, true_p, 'Unusable', bias, sigma_prior) for het in 'Very Moderately Slightly'.split(): model.parameters['p']['heterogeneity'] = het validate_similarity.fit(model) model.results.to_csv( '%s/%s/%s-%s-%s-%s-%s-%s.csv' %