def test_scipy_optimizer(seed): np.random.seed(seed) num_obs = 20 df = pd.DataFrame({ "obs": np.random.randn(num_obs), "cov0": np.random.randn(num_obs), "cov1": np.random.randn(num_obs) }) data = Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df) spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain") var_cov0 = Variable(name="cov0") var_cov1 = SplineVariable(name="cov1", spline_specs=spline_specs) model = GaussianModel( data, param_specs={"mu": { "variables": [var_cov0, var_cov1] }}) coefs = scipy_optimize(model) tr_coef = np.linalg.solve( (model.mat[0].T * model.data.weights).dot(model.mat[0]), (model.mat[0].T * model.data.weights).dot(model.data.obs)) assert np.allclose(coefs, tr_coef)
def get_mortality_pattern_model(df: DataFrame, col_time: str = "time_start", units_per_year: int = 12, knots: np.ndarray = np.arange(2010, 2021), smooth_order: int = 1) -> ExcessMortalityModel: seas_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain") time_knots = get_time_knots(df.time.min(), df.time.max(), knots) time_spline_specs = SplineSpecs(knots=time_knots, degree=1, knots_type="abs") seas_var = SplineVariable(col_time, spline_specs=seas_spline_specs) time_var = SplineVariable("time", spline_specs=time_spline_specs) variables = [ SeasonalityModelVariables([seas_var], col_time, smooth_order), TimeModelVariables([time_var]) ] return ExcessMortalityModel(df, variables)
def get_mortality_pattern_model(df: DataFrame, col_time: str = "time_start", units_per_year: int = 12, knots_per_year: float = 0.5, tail_size: int = 18, smooth_order: int = 1) -> ExcessMortalityModel: """ Define one mortality pattern model """ seas_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain") time_knots = get_time_knots(df.time.min(), df.time.max(), units_per_year, knots_per_year, tail_size) time_spline_specs = SplineSpecs(knots=time_knots, degree=1, knots_type="abs") seas_var = SplineVariable(col_time, spline_specs=seas_spline_specs) time_var = SplineVariable("time", spline_specs=time_spline_specs) variables = [ SeasonalityModelVariables([seas_var], col_time, smooth_order), TimeModelVariables([time_var]) ] return ExcessMortalityModel(df, variables)
def spline_specs(): return SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain")
# time unit time_unit = "week" # start time time_start = YearTime(2019, 1, time_unit=time_unit) # end time for fit and prediction time_end = { "fit": YearTime(2020, 9, time_unit=time_unit), "pred": YearTime(2020, 52, time_unit=time_unit) } # model setting # spline specification for seasonality and time trend seas_spline_specs = SplineSpecs(knots=np.linspace(1, 52, 3), degree=2, r_linear=True, knots_type="abs") time_spline_specs = SplineSpecs(knots=np.array([0.0, 1.0]), degree=1, knots_type="rel_domain") def main(): # process input data df = pd.read_csv(data_path) df_2020 = df[[ "location_name", "year_x", "week", "pop_2020", "death_rate_2020" ]].copy() df_2019 = df[[ "location_name", "year_y", "week", "pop_2019", "death_rate_2019" ]].copy()
def spline_variable(): return SplineVariable(name=COL_COVS[0], spline_specs=SplineSpecs(knots=np.linspace( 0.0, 1.0, 5), degree=3))
from regmod.optimizer import scipy_optimize from emmodel.model import ExcessMortalityModel from emmodel.variable import ModelVariables from emmodel.cascade import CascadeSpecs, Cascade # data file path data_path = Path("./examples/data_debug/2020-04-22/stage2_input.csv") # result folder results_path = Path("./examples/results_debug") # define all variables intercept_variable = Variable("intercept") idr_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 5), degree=2, knots_type="rel_domain", include_first_basis=False) idr_variable = SplineVariable("idr_lagged", spline_specs=idr_spline_specs, priors=[ SplineUniformPrior(order=1, lb=-np.inf, ub=0.0), SplineGaussianPrior(order=1, mean=0.0, sd=1e-4, domain_lb=0.4, domain_ub=1.0) ]) time_spline_specs = SplineSpecs(knots=np.linspace(0.0, 1.0, 10),