def test_compute_rmse(test_data, param_names, fun, link_fun, var_link_fun, loss_fun): model = CurveModel(test_data, 't', 'obs', [['intercept']] * 3, 'group', param_names, link_fun, var_link_fun, fun, loss_fun=loss_fun) x = np.hstack((np.ones(3), np.zeros(3))) params = effects2params( x, model.order_group_sizes, model.covs, model.link_fun, model.var_link_fun, ) residual = model.obs - model.fun(model.t, params) result = model.compute_rmse(x=x, use_obs_se=False) assert np.abs(result - np.sqrt(np.mean(residual**2))) < 1e-10
def get_init_dict(self, df, groups): """ Run the init model for each location. Args: df: (pd.DataFrame) data frame to fit the model that will be subset by group groups: (str) groups to get in the dict Returns: (dict) dictionary of fixed effects keyed by group """ init_dict = {} for param in ['beta', 'p']: init_dict[param] = {} for fit_type in ['loose', 'tight']: model_arg_dict = deepcopy( getattr(self, f'{param}_model_kwargs')) model = CurveModel(df=df, **model_arg_dict) fit_arg_dict = deepcopy( getattr(self, f'{fit_type}_{param}_fit_dict')) fit_arg_dict.update(options=self.smart_init_options) init_dict[param][fit_type] = get_initial_params( groups=groups, model=model, fit_arg_dict=fit_arg_dict) return init_dict
def test_loss_fun(test_data, param_names, fun, link_fun, var_link_fun, loss_fun): model = CurveModel(test_data, 't', 'obs', [['intercept']] * 3, 'group', param_names, link_fun, var_link_fun, fun, loss_fun=loss_fun) x = np.hstack((np.ones(3), np.zeros(3))) params = effects2params(x, model.order_group_sizes, model.covs, model.link_fun, model.var_link_fun, expand=False) params = params[:, 0] residual = (model.obs - fun(model.t, params)) / model.obs_se val = model.objective(x) my_val = loss_fun(residual) assert np.abs(val - my_val) < 1e-10
def fit(self, df, group=None): """ Fits a loose, tight, beta, and p combinations model. If you pass in update group it will override the initial parameters with new initial parameters based on the df you pass. Args: df: group: (str) passing in the group will update the initialization dictionary (not replacing the old one) for this particular fit. Returns: """ if group is not None: init_dict = self.update_init_model(df=df, group=group) else: init_dict = deepcopy(self.init_dict) fit_dict = deepcopy(self.fit_dict) fe_init, re_init = compute_starting_params(init_dict) fit_dict.update(fe_init=fe_init, re_init=re_init) self.mod = CurveModel(df=df, **self.basic_model_dict) self.mod.fit_params(**fit_dict)
def run_joint_model(self, df, groups): model = CurveModel( df=df[df[self.col_group].isin(groups)].copy(), **self.basic_model_dict ) model.fit_params(**self.joint_model_fit_dict) return model
def test_defualt_obs_se(test_data, param_names, fun, link_fun, var_link_fun, loss_fun): model = CurveModel(test_data, 't', 'obs', [['intercept']]*3, 'group', param_names, link_fun, var_link_fun, fun, loss_fun=loss_fun) assert np.allclose(model.obs_se, model.obs.mean())
def run_model(self, df, group): """Run each individual model. """ model = CurveModel(df=df[df[self.col_group] == group].copy(), **self.basic_model_dict) fit_dict = deepcopy(self.fit_dict) fe_gprior = fit_dict['fe_gprior'] fe_gprior[1][1] *= self.prior_modifier(model.num_obs) print(group) print('\t update beta fe_gprior to', fe_gprior) fit_dict.update({'fe_gprior': fe_gprior}) model.fit_params(**fit_dict) return model
def fit(self, daily_deaths, social_distance=None): daily_deaths = np.array(daily_deaths) n_data = daily_deaths.shape[0] # Prepare the data frame df = pd.DataFrame() df['death_rate'] = np.cumsum(daily_deaths) df['time'] = np.arange(df['death_rate'].shape[0]) df['ln_death_rate'] = np.log( df['death_rate'] + 1) # Add 1 to pad in case the #deaths are zero df['group'] = ['all'] * n_data df['cov_one'] = [1.0] * n_data if social_distance is not None: df['social_distance'] = social_distance col_covs = [['cov_one'], ['cov_one', 'social_distance'], ['cov_one']] num_fe = 4 fe_init = [-3, 100, 0, 10] # col_covs = [['cov_one', 'social_distance'], ['cov_one', 'social_distance'], ['cov_one', 'social_distance']] # num_fe = 6 # fe_init = [-3, 0, 100, 0, 10, 0] else: col_covs = [['cov_one'], ['cov_one'], ['cov_one']] num_fe = 3 fe_init = [-3, 100, 1] # Set up the CurveModel self.model = CurveModel( df=df, col_t='time', col_obs='ln_death_rate', col_group='group', col_covs=col_covs, param_names=['alpha', 'beta', 'p'], link_fun=[lambda x: np.exp(x), lambda x: x, lambda x: np.exp(x)], var_link_fun=[lambda x: x] * num_fe, fun=ln_gaussian_cdf) # Fit the model to estimate parameters self.model.fit_params( fe_init=fe_init, options={ 'ftol': 1e-14, 'maxiter': 500 }, re_bounds=[[0, 0]] * num_fe # No random effects )
def run_model(self, df, group): __doc__ = super().run_model.__doc__ model = CurveModel(df=df[df[self.col_group] == group].copy(), **self.basic_model_dict) fit_dict = deepcopy(self.fit_dict) print(group) fe_gprior = fit_dict['fe_gprior'] common_beta_bounds = fit_dict['fe_bounds'][1] suggested_beta_bounds = self.init_parameters_estimations[ "fe_bounds_beta"].get(group, None) if suggested_beta_bounds is not None: print("\t Suggested beta bounds ", suggested_beta_bounds) individual_beta_bounds = [ max(common_beta_bounds[0], suggested_beta_bounds[0] * 1.2), common_beta_bounds[1] ] fit_dict['fe_bounds'][1] = individual_beta_bounds fe_gprior[1][0] = max(individual_beta_bounds[0], fe_gprior[1][0]) print('\t Update beta bounds to ', individual_beta_bounds) else: print('\t Use common beta bounds ', common_beta_bounds) individual_beta_bounds = common_beta_bounds fe_gprior[1][1] *= self.prior_modifier(model.num_obs) print('\t Update beta fe_gprior to ', fe_gprior) fit_dict.update({ 'fe_gprior': fe_gprior, 'fe_bounds': [ fit_dict['fe_bounds'][0], individual_beta_bounds, fit_dict['fe_bounds'][2] ] }) # print(fit_dict['fe_gprior']) model.fit_params(**fit_dict) if suggested_beta_bounds is not None: fit_dict.update({ 'fe_bounds': [ fit_dict['fe_bounds'][0], common_beta_bounds, fit_dict['fe_bounds'][2] ] }) return model
def run_model(self, group, **fit_kwargs): """Construct and run the model. """ model = CurveModel(self.df[self.df[self.col_group] == group].copy(), col_t=self.col_t, col_obs=self.col_obs, col_covs=self.col_covs, col_group=self.col_group, param_names=self.param_names, link_fun=self.link_fun, var_link_fun=self.var_link_fun, fun=self.fun, col_obs_se=self.col_obs_se) model.fit_params(**fit_kwargs) return model
def test_curve_model(alpha_true, beta_true, p_true, n_data): num_params = 3 params_true = np.array([alpha_true, beta_true, p_true]) independent_var = np.array(range(n_data)) * beta_true / (n_data - 1) df = pd.DataFrame({ 'independent_var': independent_var, 'measurement_value': generalized_logistic(independent_var, params_true), 'measurement_std': n_data * [0.1], 'constant_one': n_data * [1.0], 'data_group': n_data * ['world'], }) # Initialize a model cm = CurveModel(df=df, col_t='independent_var', col_obs='measurement_value', col_covs=num_params * [['constant_one']], col_group='data_group', param_names=['alpha', 'beta', 'p'], link_fun=[exp_fun, identity_fun, exp_fun], var_link_fun=[exp_fun, identity_fun, exp_fun], fun=generalized_logistic, col_obs_se='measurement_std') inv_link_fun = [ln_fun, identity_fun, ln_fun] fe_init = np.zeros(num_params) for j in range(num_params): fe_init[j] = inv_link_fun[j](params_true[j] / 3.0) # Fit the parameters cm.fit_params(fe_init=fe_init, options={ 'ftol': 1e-16, 'gtol': 1e-16, 'maxiter': 1000 }) params_estimate = cm.params for i in range(num_params): rel_error = params_estimate[i] / params_true[i] - 1.0 assert abs(rel_error) < 1e-6
def get_init_dict(self, df, groups): """ Run the init model for each location. Args: df: (pd.DataFrame) data frame to fit the model that will be subset by group groups: (str) groups to get in the dict Returns: (dict) dictionary of fixed effects keyed by group """ model = CurveModel(df=df, **self.basic_model_dict) init_fit_dict = deepcopy(self.fit_dict) init_fit_dict.update(options=self.smart_init_options) init_dict = get_initial_params(groups=groups, model=model, fit_arg_dict=init_fit_dict) return init_dict
def fit(self, df, group=None): """ Fits a loose, tight, beta, and p combinations model. If you pass in update group it will override the initial parameters with new initial parameters based on the df you pass. Args: df: group: (str) passing in the group will update the initialization dictionary (not replacing the old one) for this particular fit. Returns: """ if group is not None: init_dict = self.update_init_model(df=df, group=group) else: init_dict = deepcopy(self.init_dict) for param in ['beta', 'p']: if getattr(self, f'{param}_weight') == 0: continue for fit_type in ['loose', 'tight']: model_arg_dict = deepcopy( getattr(self, f'{param}_model_kwargs')) fit_arg_dict = deepcopy( getattr(self, f'{fit_type}_{param}_fit_dict')) model = CurveModel(df=df, **model_arg_dict) fe_init, re_init = compute_starting_params( init_dict[param][fit_type]) fit_arg_dict.update(fe_init=fe_init, re_init=re_init) model.fit_params(**fit_arg_dict) setattr(self, f'{fit_type}_{param}_model', model)
df['death_rate']=dataset['Confirmed'] df['death_rate']=(df['death_rate'].clip(lower=1e-5))*1000000 print(df['death_rate']) df['ln_death_rate'] = np.log(df['death_rate']) print(df['ln_death_rate']) df['group'] = dataset['State'] df['intercept'] = 1.0 print(df) # Set up the CurveModel model = CurveModel( df=df, col_t='time', col_obs='ln_death_rate', col_group='group', col_covs=[['intercept'], ['intercept'], ['intercept']], param_names=['alpha', 'beta', 'p'], link_fun=[lambda x: x, lambda x: x, lambda x: x], var_link_fun=[lambda x: x, lambda x: x, lambda x: x], fun=ln_gaussian_cdf ) # Fit the model to estimate parameters model.fit_params(fe_init=[0, 0, 1.], fe_gprior=[[0, np.inf], [0, np.inf], [1., np.inf]], options={'disp':True}) # Get predictions y_pred = model.predict( t=np.linspace(0,100,num=100), group_name="Kerala"
'independent_var': independent_var, 'measurement_value': generalized_logistic(independent_var, params_true), 'measurement_std': n_data * [0.1], 'constant_one': n_data * [1.0], 'data_group': n_data * ['world'], }) # Initialize a model cm = CurveModel( df=df, col_t='independent_var', col_obs='measurement_value', col_covs=num_params*[['constant_one']], col_group='data_group', param_names=['alpha', 'beta', 'p'], link_fun=[exp_fun, identity_fun, exp_fun], var_link_fun=[exp_fun, identity_fun, exp_fun], fun=generalized_logistic, col_obs_se='measurement_std' ) inv_link_fun = [ln_fun, identity_fun, ln_fun] fe_init = np.zeros(num_params) for j in range(num_params): fe_init[j] = inv_link_fun[j](params_true[j] / 3.0) cm.fit_params( fe_init=fe_init, options={ 'ftol': 1e-16,
def fit(self, df, group=None): self.mod = CurveModel(df=df, **self.basic_model_dict) self.mod.fit_params(**self.fit_dict)