def adjust_mean_mtslice(tdata_mt, ref=None): """Adjust the mean of the mtslice by the given ref """ if ref is None: ref = tdata_mt.mean_temp[0] study_slices = utils.sizes_to_slices(tdata_mt.study_sizes) for i in range(tdata_mt.num_studies): obs_mean = tdata_mt.obs_mean[study_slices[i]] obs_std = tdata_mt.obs_std[study_slices[i]] cov = tdata_mt.daily_temp[study_slices[i]] # fit the curve if tdata_mt.study_sizes[i] >= 5: spline = xspline.xspline(np.array([cov.min(), ref, cov.max()]), 2, l_linear=True) else: spline = xspline.xspline(np.array([cov.min(), cov.max()]), 1) beta = utils.fit_spline(obs_mean, obs_std, cov, spline) ref_lnrr = spline.designMat(ref).dot(beta) # adjust the mean tdata_mt.obs_mean[study_slices[i]] -= ref_lnrr return tdata_mt
def __init__(self, beta, beta_var, gamma, random_effects, mean_temp, num_beta_spline_knots=6, num_gamma_spline_knots=6, beta_spline_degree=3, gamma_spline_degree=3): # pass in the data self.num_mean_temp = mean_temp.size assert beta.shape == (self.num_mean_temp, 2) assert gamma.shape == (self.num_mean_temp, 2) self.beta = beta self.beta_var = beta_var self.gamma = gamma self.mean_temp = mean_temp self.random_effects = random_effects # construct the splines self.min_mean_temp = self.mean_temp.min() self.max_mean_temp = self.mean_temp.max() beta_spline_knots = np.linspace(self.min_mean_temp, self.max_mean_temp, num_beta_spline_knots) gamma_spline_knots = np.linspace(self.min_mean_temp, self.max_mean_temp, num_gamma_spline_knots) # gamma_spline_knots = np.array([ # self.min_mean_temp, # 13.0, # 17.0, # 22.0, # self.max_mean_temp # ]) self.beta_spline = xspline.xspline(beta_spline_knots, beta_spline_degree, l_linear=True, r_linear=True) self.gamma_spline = xspline.xspline(gamma_spline_knots, gamma_spline_degree, l_linear=True, r_linear=True) # compute the spline bases coefficients X_beta = self.beta_spline.designMat(self.mean_temp) X_gamma = self.gamma_spline.designMat(self.mean_temp) self.c_beta = np.linalg.solve(X_beta.T.dot(X_beta), X_beta.T.dot(beta)) self.c_gamma = np.linalg.solve(X_gamma.T.dot(X_gamma), X_gamma.T.dot(gamma))
def create_spline_list(self, n_splines=50, n_knots=5, width_pct=0.2, degree=3): if self.model_type == 'spline': spline_mat = self.x_cov_list[0]['mat'][0] dose_max = spline_mat.max() dose_min = 0 start = (np.percentile(spline_mat, 10) - dose_min) / \ (dose_max - dose_min) end = (np.percentile(spline_mat, 90) - dose_min) / \ (dose_max - dose_min) print( f'Knot range: {np.percentile(spline_mat, 10)} to {np.percentile(spline_mat, 90)}' ) b = np.array([[start, end]] * (n_knots - 2)) min_dist = (end - start) * width_pct min_dist_val = min_dist * (dose_max - dose_min) print(f'Minimum interval width: {min_dist_val}') d = np.array([[min_dist, 1.]] * (n_knots - 1)) knots_samples = sampleKnots(dose_min, dose_max, n_knots - 1, b=b, d=d, N=n_splines) self.spline_list = [ xspline(knots, degree, r_linear=True) for knots in knots_samples ] else: print(f'Spline list not needed for model_type {self.model_type}')
def offsite_data_at_mean_temp(tdata, mean_temp): tdata_at_mean_temp = extract_at_mean_temp(tdata, mean_temp) study_slices = utils.sizes_to_slices(tdata_at_mean_temp.study_sizes) for i in range(tdata_at_mean_temp.num_studies): obs_mean = tdata_at_mean_temp.obs_mean[study_slices[i]] obs_std = tdata_at_mean_temp.obs_std[study_slices[i]] cov = tdata_at_mean_temp.daily_temp[study_slices[i]] # fit the curve spline = xspline.xspline(np.array([cov.min(), mean_temp, cov.max()]), 2, l_linear=True) beta = utils.fit_spline(obs_mean, obs_std, cov, spline) ref_lnrr = spline.designMat(mean_temp).dot(beta) # shift the data tdata_at_mean_temp.obs_mean[study_slices[i]] -= ref_lnrr # inflate the std if necessary residual = (obs_mean - spline.designMat(cov).dot(beta)) / obs_std tdata_at_mean_temp.obs_std[study_slices[i]] *= np.maximum( 1.0, np.std(residual)) return tdata_at_mean_temp
def create_spline_list(spline_mat, degree=3, n_knots=5, l_linear=False, r_linear=False, n_splines=10, width_pct=0.1, l_zero=True): dose_max = spline_mat.max() if l_zero: dose_min = 0 else: dose_min = spline_mat.min() if np.percentile(spline_mat, 5) > dose_min: start = (np.percentile(spline_mat, 5) - dose_min) / \ (dose_max - dose_min) else: start = 0 end = (np.percentile(spline_mat, 95) - dose_min) / \ (dose_max - dose_min) print( f'Knot range: {dose_min + start * (dose_max - dose_min)} to {dose_min + end * (dose_max - dose_min)}' ) b = np.array([[start] * (n_knots - 2), [end] * (n_knots - 2)]).T min_dist = (end - start) * width_pct min_dist_val = min_dist * (dose_max - dose_min) print(f'Minimum interval width: {min_dist_val}') d = np.array([[min_dist] * (n_knots - 1), [1.] * (n_knots - 1)]).T knots_samples = sampleKnots(dose_min, dose_max, n_knots - 1, b=b, d=d, N=n_splines) spline_list = [ xspline(knots, degree, l_linear=l_linear, r_linear=r_linear) for knots in knots_samples ] return spline_list
def adjust_agg_std_mtslice(tdata_mt, ref=None): """Adjust std of the aggregate the tdata slices """ if ref is None: ref = tdata_mt.mean_temp[0] # fit the curve spline = xspline.xspline(np.array( [tdata_mt.daily_temp.min(), ref, tdata_mt.daily_temp.max()]), 2, l_linear=True) beta = utils.fit_spline(tdata_mt.obs_mean, tdata_mt.obs_std, tdata_mt.daily_temp, spline) residual = (tdata_mt.obs_mean - spline.designMat(tdata_mt.daily_temp).dot(beta)) residual /= tdata_mt.obs_std # print(np.maximum(1.0, np.std(residual))) tdata_mt.obs_std *= np.maximum(3.0, np.std(residual)) return tdata_mt
def fit_trend_mtslice(tdata_at_mean_temp, tmrl, inlier_pct=0.9, debug=False): """ Return beta (intercept and slope) and gamma (intercept and slope) with given data """ if debug: print("number of locations at mean temp", tdata_at_mean_temp.num_studies) outer_verbose = True inner_print_level = 5 else: outer_verbose = False inner_print_level = 0 # construct the linear mixed effect model cov = tdata_at_mean_temp.daily_temp knots = np.array([cov.min(), tmrl, cov.max()]) degree = 1 spline = xspline.xspline(knots, degree) l1 = knots[1] - knots[0] l2 = knots[2] - knots[1] mat_transform = np.array([[1.0, 0.0, 0.0], [1.0, l1, 0.0], [1.0, l1, l2]]) M = spline.designMat(cov).dot(mat_transform) M[:, 1] -= M[:, 0] * l1 M = M[:, 1:] scale = np.linalg.norm(M, axis=0) scaled_M = M / scale # construct the LimeTr object F = lambda beta: scaled_M.dot(beta) JF = lambda beta: scaled_M Z = scaled_M.copy() n = tdata_at_mean_temp.study_sizes k_beta = 2 k_gamma = 2 Y = tdata_at_mean_temp.obs_mean S = tdata_at_mean_temp.obs_std uprior = np.array([[-np.inf] * k_beta + [1e-7] * k_gamma, [np.inf] * k_beta + [1.5] * k_gamma]) lt = limetr.LimeTr(n, k_beta, k_gamma, Y, F, JF, Z, S=S, uprior=uprior, inlier_percentage=inlier_pct) # fit model MS = M / S.reshape(S.size, 1) YS = Y / S beta0 = np.linalg.solve(MS.T.dot(MS), MS.T.dot(YS)) gamma0 = np.array([0.1, 0.1]) (beta, gamma, trimming_weights) = lt.fitModel(x0=np.hstack((beta0, gamma0)), outer_step_size=200.0, outer_verbose=outer_verbose, inner_print_level=inner_print_level) # estimate the random effects random_effects = lt.estimateRE() # estimate the uncertainty of beta V = limetr.utils.VarMat(lt.S**2, lt.Z, gamma, lt.n) beta_var = np.linalg.inv(M.T.dot(V.invDot(M))) # # scale beta and gamma back beta /= scale beta_var /= scale**2 gamma /= scale**2 random_effects /= scale return beta, beta_var, gamma, random_effects