Пример #1
0
def model_intercept(data: pd.DataFrame,
                    dep_var: str,
                    prediction: pd.Series,
                    weight_data: pd.DataFrame = None,
                    dep_var_se: str = None,
                    dep_trans_in: Callable[[pd.Series],
                                           pd.Series] = lambda x: x,
                    dep_se_trans_in: Callable[[pd.Series],
                                              pd.Series] = lambda x: x,
                    dep_trans_out: Callable[[pd.Series],
                                            pd.Series] = lambda x: x,
                    verbose: bool = True):
    data = data.copy()
    data[dep_var] = dep_trans_in(data[dep_var])
    prediction = dep_trans_in(prediction)
    data = reshape_data_long(data, dep_var)
    if weight_data is not None:
        weight_data = reshape_data_long(weight_data, dep_var_se)
        if (data['date'] != weight_data['date']).any():
            raise ValueError(
                'Dates in `data` and `weight_data` not identical.')
        data['se'] = dep_se_trans_in(weight_data[dep_var_se])
    else:
        data['se'] = 1.
    data = data.set_index('date').sort_index()
    data[dep_var] = data[dep_var] - prediction
    data = data.reset_index().dropna()
    data['intercept'] = 1

    mr_data = MRData()
    mr_data.load_df(
        data,
        col_obs=dep_var,
        col_obs_se='se',
        col_covs=['intercept'],
        col_study_id='date',
    )
    intercept_model = LinearCovModel(
        'intercept',
        use_re=False,
    )
    mr_model = MRBRT(mr_data, [intercept_model])
    mr_model.fit_model()

    intercept = mr_model.beta_soln

    prediction += intercept
    prediction = dep_trans_out(prediction)

    return prediction
Пример #2
0
    def __init__(self, t, y,
                 spline_options=None):
        """Constructor of the SplineFit

        Args:
            t (np.ndarray): Independent variable.
            y (np.ndarray): Dependent variable.
            spline_options (dict | None, optional):
                Dictionary of spline prior options.
        """
        self.t = t
        self.y = y
        self.spline_options = {} if spline_options is None else spline_options

        # create mrbrt object
        df = pd.DataFrame({
            'y': self.y,
            'y_se': 1.0/np.exp(self.y),
            't': self.t,
            'study_id': 1,
        })

        data = MRData(
            df=df,
            col_obs='y',
            col_obs_se='y_se',
            col_covs=['t'],
            col_study_id='study_id',
            add_intercept=True
        )

        intercept = LinearCovModel(
            alt_cov='intercept',
            use_re=True,
            prior_gamma_uniform=np.array([0.0, 0.0]),
            name='intercept'
        )

        time = LinearCovModel(
            alt_cov='t',
            use_re=False,
            use_spline=True,
            **self.spline_options,
            name='time'
        )

        self.mr_model = MRBRT(data, cov_models=[intercept, time])
        self.spline = time.create_spline(data)
        self.spline_coef = None
Пример #3
0
    def fit_model(self, **fit_options):
        """Fit the model
        """
        self._assert_has_data()
        beta_init = solve_ls(self.mat, self.data.obs, self.data.obs_se)
        model = MRBRT(self.data, self.cov_models)
        gamma_init = np.zeros(model.num_z_vars)

        default_fit_options = dict(
            x0=np.hstack((beta_init, gamma_init)),
            inner_max_iter=500,
            inner_print_level=5,
        )
        fit_options = {**default_fit_options, **fit_options}
        model.fit_model(**fit_options)
        self.soln = model.beta_soln
Пример #4
0
def predict_time_series(
    day0: pd.Timestamp,
    dep_var: str,
    mr_model: MRBRT,
    dep_trans_out: Callable[[pd.Series], pd.Series],
    diff: bool,
) -> pd.DataFrame:
    data = mr_model.data.to_df()

    pred_data = MRData()
    t = np.arange(0, data['t'].max() + 1)
    pred_data.load_df(pd.DataFrame({'t': t}), col_covs='t')
    pred_data_value = mr_model.predict(pred_data)
    if diff:
        pred_data_value = pred_data_value.cumsum()
    pred_data_value = dep_trans_out(pred_data_value)
    pred_data = pd.DataFrame({
        't': t,
        dep_var: pred_data_value,
    })
    pred_data['date'] = pred_data['t'].apply(
        lambda x: day0 + pd.Timedelta(days=x))
    pred_data = pred_data.set_index('date')[dep_var]

    return pred_data
Пример #5
0
def predict(pred_data: pd.DataFrame, hierarchy: pd.DataFrame, mr_model: MRBRT,
            pred_replace_dict: Dict[str, str], pred_exclude_vars: List[str],
            dep_var: str, dep_var_se: str, fe_vars: List[str],
            re_vars: List[str], group_var: str,
            **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
    keep_vars = list(pred_replace_dict.keys()) + fe_vars
    if len(set(keep_vars)) != len(keep_vars):
        raise ValueError('Duplicate in replace_var + fe_vars.')
    for replace_var in list(pred_replace_dict.values()):
        if replace_var in pred_data.columns:
            del pred_data[replace_var]
    pred_data = pred_data.rename(columns=pred_replace_dict)
    pred_data = (pred_data.loc[:, ['location_id', 'date'] +
                               fe_vars].dropna().drop_duplicates())
    for pred_exclude_var in pred_exclude_vars:
        pred_data[pred_exclude_var] = 0

    if re_vars:
        raise ValueError(
            'Not propagating random effects (finish `apply_parent_random_effects` method).'
        )

    pred_mr_data = create_mr_data(pred_data,
                                  dep_var,
                                  dep_var_se,
                                  fe_vars,
                                  group_var,
                                  pred=True)
    pred = mr_model.predict(pred_mr_data)
    pred_fe = mr_model.predict(pred_mr_data, predict_for_study=False)

    pred = pd.concat([
        pred_data.loc[:, ['location_id', 'date']],
        pd.DataFrame({dep_var: pred})
    ],
                     axis=1)
    pred_fe = pd.concat([
        pred_data.loc[:, ['location_id', 'date']],
        pd.DataFrame({dep_var: pred_fe})
    ],
                        axis=1)

    return pred, pred_fe
Пример #6
0
def run_mr_model(model_data: pd.DataFrame,
                 dep_var: str,
                 dep_var_se: str,
                 fe_vars: List[str],
                 re_vars: List[str],
                 group_var: str,
                 inlier_pct: float = 1.,
                 inner_max_iter: int = 1000,
                 outer_max_iter: int = 1000,
                 prior_dict: Dict = None,
                 global_mr_data: MRData = None,
                 **kwargs) -> MRBRT:
    mr_data = create_mr_data(model_data, dep_var, dep_var_se, fe_vars,
                             group_var)

    if len(set(re_vars) - set(fe_vars)) > 0:
        raise ValueError('RE vars must also be FE vars.')
    if prior_dict is None:
        prior_dict = {fe_var: {} for fe_var in fe_vars}
    cov_models = [
        LinearCovModel(fe_var, use_re=fe_var in re_vars, **prior_dict[fe_var])
        for fe_var in fe_vars
    ]

    with suppress_stdout():
        mr_model = MRBRT(mr_data, cov_models, inlier_pct=inlier_pct)
        mr_model.attach_data(global_mr_data)
        mr_model.fit_model(inner_max_iter=inner_max_iter,
                           outer_max_iter=outer_max_iter)

    return mr_model
Пример #7
0
    def create_model(self,
                     covs: List[str],
                     prior_type: str = 'Laplace',
                     laplace_std: float = None) -> MRBRT:
        """Create Gaussian or Laplace model.

        Args:
            covs (List[str]): A list of covariates need to be included in the model.
            prior_type (str): Indicate if use ``Gaussian`` or ``Laplace`` model.
            laplace_std (float): Standard deviation of the Laplace prior. Default to None.

        Return:
            MRBRT: Created model object.
        """
        assert prior_type in ['Laplace', 'Gaussian'
                              ], "Prior type can only 'Laplace' or 'Gaussian'."
        if prior_type == 'Laplace':
            assert laplace_std is not None, "Use Laplace prior must provide standard deviation."

        if prior_type == 'Laplace':
            cov_models = [
                LinearCovModel(
                    cov,
                    use_re=True,
                    prior_beta_laplace=np.array([0.0, laplace_std])
                    if cov not in self.selected_covs else None,
                    prior_beta_gaussian=None if cov not in self.selected_covs
                    else self.beta_gprior[cov],
                    prior_gamma_uniform=self.loose_gamma_uprior
                    if self.use_re[cov] else self.zero_gamma_uprior)
                for cov in covs
            ]
        else:
            cov_models = [
                LinearCovModel(
                    cov,
                    use_re=True,
                    prior_beta_gaussian=None
                    if cov not in self.beta_gprior else self.beta_gprior[cov],
                    prior_gamma_uniform=self.loose_gamma_uprior
                    if self.use_re[cov] else self.zero_gamma_uprior)
                for cov in covs
            ]
        model = MRBRT(self.data,
                      cov_models=cov_models,
                      inlier_pct=self.inlier_pct)
        return model
Пример #8
0
    def __init__(self,
                 t,
                 y,
                 spline_options=None,
                 se_power=1.0,
                 space='ln daily',
                 max_iter=50):
        """Constructor of the SplineFit

        Args:
            t (np.ndarray): Independent variable.
            y (np.ndarray): Dependent variable.
            spline_options (dict | None, optional):
                Dictionary of spline prior options.
            se_power (float):
                A number between 0 and 1 that scale the standard error.
            space (str):
                Which space is the spline fitting, assume y is daily cases.
            max_iter (int):
                Maximum number of iteration.
        """
        self.space = space
        assert self.space in ['daily', 'ln daily', 'cumul', 'ln cumul'], "spline_space must be one of 'daily'," \
                                                                         " 'ln daily', 'cumul', 'ln cumul' space."
        if self.space == 'ln daily':
            self.t = t[y > 0.0]
            self.y = np.log(y[y > 0.0])
        elif self.space == 'daily':
            self.t = t
            self.y = y
        elif self.space == 'ln cumul':
            y = np.cumsum(y)
            self.t = t[y > 0.0]
            self.y = np.log(y[y > 0.0])
        else:
            self.t = t
            self.y = np.cumsum(y)
        self.spline_options = {} if spline_options is None else spline_options
        self.se_power = se_power

        assert 0 <= self.se_power <= 1, "spline se_power has to be between 0 and 1."
        if self.se_power == 0:
            y_se = np.ones(self.t.size)
        else:
            y_se = 1.0 / np.exp(self.y)**self.se_power
        # create mrbrt object
        df = pd.DataFrame({
            'y': self.y,
            'y_se': y_se,
            't': self.t,
            'study_id': 1,
        })

        data = MRData(df=df,
                      col_obs='y',
                      col_obs_se='y_se',
                      col_covs=['t'],
                      col_study_id='study_id',
                      add_intercept=True)

        intercept = LinearCovModel(alt_cov='intercept',
                                   use_re=True,
                                   prior_gamma_uniform=np.array([0.0, 0.0]),
                                   name='intercept')

        time = LinearCovModel(alt_cov='t',
                              use_re=False,
                              use_spline=True,
                              **self.spline_options,
                              name='time')

        self.mr_model = MRBRT(data, cov_models=[intercept, time])
        self.spline = time.create_spline(data)
        self.spline_coef = None
        self.max_iter = max_iter
Пример #9
0
class SplineFit:
    """Spline fit class
    """
    def __init__(self, t, y,
                 spline_options=None):
        """Constructor of the SplineFit

        Args:
            t (np.ndarray): Independent variable.
            y (np.ndarray): Dependent variable.
            spline_options (dict | None, optional):
                Dictionary of spline prior options.
        """
        self.t = t
        self.y = y
        self.spline_options = {} if spline_options is None else spline_options

        # create mrbrt object
        df = pd.DataFrame({
            'y': self.y,
            'y_se': 1.0/np.exp(self.y),
            't': self.t,
            'study_id': 1,
        })

        data = MRData(
            df=df,
            col_obs='y',
            col_obs_se='y_se',
            col_covs=['t'],
            col_study_id='study_id',
            add_intercept=True
        )

        intercept = LinearCovModel(
            alt_cov='intercept',
            use_re=True,
            prior_gamma_uniform=np.array([0.0, 0.0]),
            name='intercept'
        )

        time = LinearCovModel(
            alt_cov='t',
            use_re=False,
            use_spline=True,
            **self.spline_options,
            name='time'
        )

        self.mr_model = MRBRT(data, cov_models=[intercept, time])
        self.spline = time.create_spline(data)
        self.spline_coef = None

    def fit_spline(self):
        """Fit the spline.
        """
        self.mr_model.fit_model(inner_max_iter=30)
        self.spline_coef = self.mr_model.beta_soln
        self.spline_coef[1:] += self.spline_coef[0]

    def predict(self, t):
        """Predict the dependent variable, given independent variable.
        """
        mat = self.spline.design_mat(t)
        return mat.dot(self.spline_coef)
Пример #10
0
def estimate_time_series(
    data: pd.DataFrame,
    spline_options: Dict,
    n_knots: int,
    dep_var: str,
    dep_trans_in: Callable[[pd.Series], pd.Series] = lambda x: x,
    weight_data: pd.DataFrame = None,
    dep_var_se: str = None,
    dep_se_trans_in: Callable[[pd.Series], pd.Series] = lambda x: x,
    diff: bool = False,
    num_submodels: int = 25,
    single_random_knot: bool = False,
    min_interval_days: int = 7,
    dep_trans_out: Callable[[pd.Series], pd.Series] = lambda x: x,
    split_l_interval: bool = False,
    split_r_interval: bool = False,
    verbose: bool = False,
) -> Tuple[pd.DataFrame, pd.Series, MRBeRT]:
    if verbose: logger.info('Formatting data.')
    data = data.copy()
    data[dep_var] = dep_trans_in(data[dep_var])
    if diff:
        if verbose:
            logger.info(
                'For diff model, drop day1 (i.e., if day0 is > 0, day0->day1 diff would be hugely negative).'
            )
        data[dep_var] = data[dep_var].diff()
        data[dep_var] = data[dep_var][data[dep_var].diff().notnull()]
    if data[[dep_var]].shape[1] > 1:
        reshape = True
        data = reshape_data_long(data, dep_var)
        if weight_data is not None:
            weight_data = reshape_data_long(weight_data, dep_var_se)
    else:
        reshape = False
    if weight_data is not None:
        if (data['date'] != weight_data['date']).any():
            raise ValueError(
                'Dates in `data` and `weight_data` not identical.')
        data['se'] = dep_se_trans_in(weight_data[dep_var_se])
    else:
        data['se'] = 1.
    data = data.rename(columns={dep_var: 'y'})
    day0 = data['date'].min()
    keep_vars = ['date', 'y', 'se']
    data = data.loc[:, keep_vars]
    start_len = len(data)
    data = data.dropna()
    end_len = len(data)
    if start_len != end_len and not reshape:
        if verbose: logger.debug('NAs in data')
    data['t'] = (data['date'] - day0).dt.days

    col_args = {
        'col_obs': 'y',
        'col_obs_se': 'se',
        'col_covs': ['t'],
        #'col_study_id':'date',
    }
    if verbose: logger.info('Getting base knots.')
    min_interval = min_interval_days / data['t'].max()
    if num_submodels == 1 and single_random_knot:
        spline_knots = get_ensemble_knots(n_knots, min_interval, 1)[0]
    else:
        spline_knots = np.linspace(0., 1., n_knots)

    if split_l_interval or split_r_interval:
        if num_submodels > 1:
            raise ValueError(
                'Would need to set up functionality to split segments for ensemble.'
            )
        if split_l_interval:
            n_knots += 1
            spline_knots = np.insert(spline_knots, 0, spline_knots[:2].mean())
        if split_r_interval:
            n_knots += 1
            spline_knots = np.insert(spline_knots, -1,
                                     spline_knots[-2:].mean())

    if verbose: logger.info('Creating model data.')
    mr_data = MRData()
    mr_data.load_df(data, **col_args)
    spline_model = LinearCovModel('t',
                                  use_re=False,
                                  use_spline=True,
                                  use_spline_intercept=True,
                                  spline_knots=spline_knots,
                                  **spline_options)
    if num_submodels > 1:
        if verbose: logger.info('Sampling knots.')
        ensemble_knots = get_ensemble_knots(n_knots, min_interval,
                                            num_submodels)

        if verbose: logger.info('Initializing model.')
        mr_model = MRBeRT(mr_data, spline_model, ensemble_knots)
    else:
        if verbose: logger.info('Initializing model.')
        mr_model = MRBRT(mr_data, [spline_model])

    if verbose: logger.info('Fitting model.')
    mr_model.fit_model()

    if num_submodels > 1:
        if verbose: logger.info('Scoring submodels.')
        mr_model.score_model()

    data = data.set_index('date')[['y', 'se']]

    if verbose: logger.info('Making prediction.')
    smooth_data = predict_time_series(
        day0=day0,
        dep_var=dep_var,
        mr_model=mr_model,
        dep_trans_out=dep_trans_out,
        diff=diff,
    )

    return data, smooth_data, mr_model