示例#1
0
    def run_joint_model(self, df, groups):
        model = CurveModel(
            df=df[df[self.col_group].isin(groups)].copy(),
            **self.basic_model_dict
        )
        model.fit_params(**self.joint_model_fit_dict)

        return model
    def run_model(self, df, group):
        """Run each individual model.
        """
        model = CurveModel(df=df[df[self.col_group] == group].copy(),
                           **self.basic_model_dict)

        fit_dict = deepcopy(self.fit_dict)
        fe_gprior = fit_dict['fe_gprior']
        fe_gprior[1][1] *= self.prior_modifier(model.num_obs)
        print(group)
        print('\t update beta fe_gprior to', fe_gprior)

        fit_dict.update({'fe_gprior': fe_gprior})
        model.fit_params(**fit_dict)
        return model
class BasicModel(ModelPipeline):
    def __init__(self, fit_dict, basic_model_dict, **pipeline_kwargs):
        """
        Generic class for a function to produce predictions from a model
        with the following attributes.

        Args:
            **pipeline_kwargs: keyword arguments for the base class of ModelPipeline
            predict_group: (str) which group to make predictions for
            fit_dict: keyword arguments to CurveModel.fit_params()
            basic_model_dict: additional keyword arguments to the CurveModel class
                col_obs_se: (str) of observation standard error
                col_covs: List[str] list of names of covariates to put on the parameters
            param_names (list{str}):
                Names of the parameters in the specific functional form.
            link_fun (list{function}):
                List of link functions for each parameter.
            var_link_fun (list{function}):
                List of link functions for the variables including fixed effects
                and random effects.
        """
        super().__init__(**pipeline_kwargs)
        self.fit_dict = fit_dict
        self.basic_model_dict = basic_model_dict
        self.basic_model_dict.update({'col_obs_se': self.col_obs_se})

        generator_kwargs = pipeline_kwargs
        for arg in self.pop_cols:
            generator_kwargs.pop(arg)

        self.basic_model_dict.update(**generator_kwargs)
        self.mod = None

        self.setup_pipeline()

    def refresh(self):
        self.mod = None

    def fit(self, df, group=None):
        self.mod = CurveModel(df=df, **self.basic_model_dict)
        self.mod.fit_params(**self.fit_dict)

    def predict(self, times, predict_space, predict_group):
        predictions = self.mod.predict(
            t=times,
            group_name=predict_group,
            prediction_functional_form=predict_space)
        return predictions
    def run_model(self, df, group):
        __doc__ = super().run_model.__doc__

        model = CurveModel(df=df[df[self.col_group] == group].copy(),
                           **self.basic_model_dict)

        fit_dict = deepcopy(self.fit_dict)

        print(group)
        fe_gprior = fit_dict['fe_gprior']
        common_beta_bounds = fit_dict['fe_bounds'][1]
        suggested_beta_bounds = self.init_parameters_estimations[
            "fe_bounds_beta"].get(group, None)
        if suggested_beta_bounds is not None:
            print("\t Suggested beta bounds ", suggested_beta_bounds)
            individual_beta_bounds = [
                max(common_beta_bounds[0], suggested_beta_bounds[0] * 1.2),
                common_beta_bounds[1]
            ]
            fit_dict['fe_bounds'][1] = individual_beta_bounds
            fe_gprior[1][0] = max(individual_beta_bounds[0], fe_gprior[1][0])
            print('\t Update beta bounds to ', individual_beta_bounds)
        else:
            print('\t Use common beta bounds ', common_beta_bounds)
            individual_beta_bounds = common_beta_bounds

        fe_gprior[1][1] *= self.prior_modifier(model.num_obs)
        print('\t Update beta fe_gprior to ', fe_gprior)
        fit_dict.update({
            'fe_gprior':
            fe_gprior,
            'fe_bounds': [
                fit_dict['fe_bounds'][0], individual_beta_bounds,
                fit_dict['fe_bounds'][2]
            ]
        })
        # print(fit_dict['fe_gprior'])

        model.fit_params(**fit_dict)
        if suggested_beta_bounds is not None:
            fit_dict.update({
                'fe_bounds': [
                    fit_dict['fe_bounds'][0], common_beta_bounds,
                    fit_dict['fe_bounds'][2]
                ]
            })
        return model
    def run_model(self, group, **fit_kwargs):
        """Construct and run the model.
        """
        model = CurveModel(self.df[self.df[self.col_group] == group].copy(),
                           col_t=self.col_t,
                           col_obs=self.col_obs,
                           col_covs=self.col_covs,
                           col_group=self.col_group,
                           param_names=self.param_names,
                           link_fun=self.link_fun,
                           var_link_fun=self.var_link_fun,
                           fun=self.fun,
                           col_obs_se=self.col_obs_se)

        model.fit_params(**fit_kwargs)

        return model
示例#6
0
def test_curve_model(alpha_true, beta_true, p_true, n_data):
    num_params = 3
    params_true = np.array([alpha_true, beta_true, p_true])

    independent_var = np.array(range(n_data)) * beta_true / (n_data - 1)
    df = pd.DataFrame({
        'independent_var':
        independent_var,
        'measurement_value':
        generalized_logistic(independent_var, params_true),
        'measurement_std':
        n_data * [0.1],
        'constant_one':
        n_data * [1.0],
        'data_group':
        n_data * ['world'],
    })

    # Initialize a model
    cm = CurveModel(df=df,
                    col_t='independent_var',
                    col_obs='measurement_value',
                    col_covs=num_params * [['constant_one']],
                    col_group='data_group',
                    param_names=['alpha', 'beta', 'p'],
                    link_fun=[exp_fun, identity_fun, exp_fun],
                    var_link_fun=[exp_fun, identity_fun, exp_fun],
                    fun=generalized_logistic,
                    col_obs_se='measurement_std')
    inv_link_fun = [ln_fun, identity_fun, ln_fun]
    fe_init = np.zeros(num_params)
    for j in range(num_params):
        fe_init[j] = inv_link_fun[j](params_true[j] / 3.0)

    # Fit the parameters
    cm.fit_params(fe_init=fe_init,
                  options={
                      'ftol': 1e-16,
                      'gtol': 1e-16,
                      'maxiter': 1000
                  })
    params_estimate = cm.params

    for i in range(num_params):
        rel_error = params_estimate[i] / params_true[i] - 1.0
        assert abs(rel_error) < 1e-6
示例#7
0
    def fit(self, df, group=None):
        """
        Fits a loose, tight, beta, and p combinations model. If you pass in
        update group it will override the initial parameters with new
        initial parameters based on the df you pass.

        Args:
            df:
            group: (str) passing in the group will update the initialization
                dictionary (not replacing the old one) for this particular fit.

        Returns:

        """
        if group is not None:
            init_dict = self.update_init_model(df=df, group=group)
        else:
            init_dict = deepcopy(self.init_dict)

        for param in ['beta', 'p']:
            if getattr(self, f'{param}_weight') == 0:
                continue
            for fit_type in ['loose', 'tight']:
                model_arg_dict = deepcopy(
                    getattr(self, f'{param}_model_kwargs'))
                fit_arg_dict = deepcopy(
                    getattr(self, f'{fit_type}_{param}_fit_dict'))
                model = CurveModel(df=df, **model_arg_dict)

                fe_init, re_init = compute_starting_params(
                    init_dict[param][fit_type])

                fit_arg_dict.update(fe_init=fe_init, re_init=re_init)
                model.fit_params(**fit_arg_dict)

                setattr(self, f'{fit_type}_{param}_model', model)
示例#8
0
# Set up the CurveModel
model = CurveModel(
    df=df,
    col_t='time',
    col_obs='ln_death_rate',
    col_group='group',
    col_covs=[['intercept'], ['intercept'], ['intercept']],
    param_names=['alpha', 'beta', 'p'],
    link_fun=[lambda x: x, lambda x: x, lambda x: x],
    var_link_fun=[lambda x: x, lambda x: x, lambda x: x],
    fun=ln_gaussian_cdf
)

# Fit the model to estimate parameters
model.fit_params(fe_init=[0, 0, 1.],
                 fe_gprior=[[0, np.inf], [0, np.inf], [1., np.inf]],
                 options={'disp':True})

# Get predictions
y_pred = model.predict(
    t=np.linspace(0,100,num=100),
    group_name="Kerala"
)
ground_truth = df.ln_death_rate[df['group']=="Hubei"].reset_index(drop=True)
print(ground_truth)
print(np.exp(y_pred))
# Plot results
plt.plot(np.linspace(0,100,num=100), y_pred, '-')
plt.plot(ground_truth, '.')
plt.show()
示例#9
0
cm = CurveModel(
    df=df,
    col_t='independent_var',
    col_obs='measurement_value',
    col_covs=num_params*[['constant_one']],
    col_group='data_group',
    param_names=['alpha', 'beta', 'p'],
    link_fun=[exp_fun, identity_fun, exp_fun],
    var_link_fun=[exp_fun, identity_fun, exp_fun],
    fun=generalized_logistic,
    col_obs_se='measurement_std'
)

inv_link_fun = [ln_fun, identity_fun, ln_fun]
fe_init = np.zeros(num_params)
for j in range(num_params):
    fe_init[j] = inv_link_fun[j](params_true[j] / 3.0)

cm.fit_params(
        fe_init=fe_init,
        options={
            'ftol': 1e-16,
            'gtol': 1e-16,
            'maxiter': 1000
        }
    )
params_estimate = cm.params

print("True parameters:\n", params_true)
print("Estimated parameters:\n", params_estimate)
示例#10
0
class GaussianCDF:
    """Fit a single Gaussian Atom to cumulative daily deaths"""
    def __init__(self):
        self.model = None

    def fit(self, daily_deaths, social_distance=None):
        daily_deaths = np.array(daily_deaths)
        n_data = daily_deaths.shape[0]

        # Prepare the data frame
        df = pd.DataFrame()
        df['death_rate'] = np.cumsum(daily_deaths)
        df['time'] = np.arange(df['death_rate'].shape[0])
        df['ln_death_rate'] = np.log(
            df['death_rate'] + 1)  # Add 1 to pad in case the #deaths are zero
        df['group'] = ['all'] * n_data
        df['cov_one'] = [1.0] * n_data

        if social_distance is not None:
            df['social_distance'] = social_distance
            col_covs = [['cov_one'], ['cov_one', 'social_distance'],
                        ['cov_one']]
            num_fe = 4
            fe_init = [-3, 100, 0, 10]
            # col_covs = [['cov_one', 'social_distance'], ['cov_one', 'social_distance'], ['cov_one', 'social_distance']]
            # num_fe = 6
            # fe_init = [-3, 0, 100, 0, 10, 0]
        else:
            col_covs = [['cov_one'], ['cov_one'], ['cov_one']]
            num_fe = 3
            fe_init = [-3, 100, 1]

        # Set up the CurveModel
        self.model = CurveModel(
            df=df,
            col_t='time',
            col_obs='ln_death_rate',
            col_group='group',
            col_covs=col_covs,
            param_names=['alpha', 'beta', 'p'],
            link_fun=[lambda x: np.exp(x), lambda x: x, lambda x: np.exp(x)],
            var_link_fun=[lambda x: x] * num_fe,
            fun=ln_gaussian_cdf)

        # Fit the model to estimate parameters
        self.model.fit_params(
            fe_init=fe_init,
            options={
                'ftol': 1e-14,
                'maxiter': 500
            },
            re_bounds=[[0, 0]] * num_fe  # No random effects
        )

    def predict(self, t):
        """Get predictions for values in t"""

        return self.model.predict(t=t, group_name='all')

    def get_params(self):
        return np.squeeze(self.model.params)
class BasicModelWithInit(BasicModel):
    def __init__(self, smart_init_options=None, **kwargs):
        if smart_init_options is None:
            smart_init_options = {}
        self.smart_init_options = smart_init_options

        super().__init__(**kwargs)

        if self.fit_dict['options']:
            self.smart_init_options = {
                **self.fit_dict['options'],
                **self.smart_init_options
            }

        self.init_dict = None
        self.mod = None

    def run_init_model(self):
        self.init_dict = self.get_init_dict(df=self.all_data,
                                            groups=self.groups)

    def update_init_model(self, df, group):
        """
        Update the initial model with a re-fit model
        from the specified group. Returns a new copy of the init dict

        Args:
            df: (pd.DataFrame) data used to update the init model
            group: (str) the group to update

        Returns:

        """
        new_init_dict = deepcopy(self.init_dict)
        new_init_dict.update(self.get_init_dict(df=df, groups=[group]))
        return new_init_dict

    def get_init_dict(self, df, groups):
        """
        Run the init model for each location.

        Args:
            df: (pd.DataFrame) data frame to fit the model that will
                be subset by group
            groups: (str) groups to get in the dict

        Returns:
            (dict) dictionary of fixed effects keyed by group
        """
        model = CurveModel(df=df, **self.basic_model_dict)

        init_fit_dict = deepcopy(self.fit_dict)
        init_fit_dict.update(options=self.smart_init_options)

        init_dict = get_initial_params(groups=groups,
                                       model=model,
                                       fit_arg_dict=init_fit_dict)
        return init_dict

    def fit(self, df, group=None):
        """
        Fits a loose, tight, beta, and p combinations model. If you pass in
        update group it will override the initial parameters with new
        initial parameters based on the df you pass.

        Args:
            df:
            group: (str) passing in the group will update the initialization
                dictionary (not replacing the old one) for this particular fit.

        Returns:

        """
        if group is not None:
            init_dict = self.update_init_model(df=df, group=group)
        else:
            init_dict = deepcopy(self.init_dict)

        fit_dict = deepcopy(self.fit_dict)
        fe_init, re_init = compute_starting_params(init_dict)
        fit_dict.update(fe_init=fe_init, re_init=re_init)

        self.mod = CurveModel(df=df, **self.basic_model_dict)
        self.mod.fit_params(**fit_dict)

    def refresh(self):
        self.mod = None