Пример #1
0
def plot_fits(generator, prediction_times, sharex, sharey, draw_space, plot_obs=None, plot_uncertainty=False):
    """
    Plot the result and draws from a model generator at some prediction times.

    Args:
        generator: (curvefit.model_generator.ModelPipeline) that has some draws
        prediction_times: (np.array) of prediction times
        sharex: (bool) fix the x axes
        sharey: (bool) fix the y axes
        draw_space: (callable) which curvefit.functions space to plot the draws in
        plot_obs: (optional str) column of observations to plot
        plot_uncertainty: (optional bool) plot the uncertainty intervals
    """
    fig, ax = plt.subplots(len(generator.groups), 1, figsize=(8, 4 * len(generator.groups)),
                           sharex=sharex, sharey=sharey)
    if len(generator.groups) == 1:
        ax = [ax]
    for i, group in enumerate(generator.groups):
        draws = generator.draws[group].copy()
        draws = data_translator(
            data=draws,
            input_space=generator.predict_space,
            output_space=draw_space
        )
        mean_fit = generator.mean_predictions[group].copy()
        mean_fit = data_translator(
            data=mean_fit,
            input_space=generator.predict_space,
            output_space=draw_space
        )
        mean = draws.mean(axis=0)
        ax[i].plot(prediction_times, mean, c='red', linestyle=':')
        ax[i].plot(prediction_times, mean_fit, c='black')

        if plot_uncertainty:
            lower = np.quantile(draws, axis=0, q=0.025)
            upper = np.quantile(draws, axis=0, q=0.975)
            ax[i].plot(prediction_times, lower, c='red', linestyle=':')
            ax[i].plot(prediction_times, upper, c='red', linestyle=':')

        if plot_obs is not None:
            df_data = generator.all_data.loc[generator.all_data[generator.col_group] == group].copy()
            ax[i].scatter(df_data[generator.col_t], df_data[plot_obs])

        ax[i].set_title(f"{group} predictions")
    def summarize_result(self, print_summary=True):
        """
        Prints a table which characterizes fit quality. It has four columns:
        Location, RMSE ERF, RMSE DERF, RMSE LNR
        Where
            - RMSE ERF: residual squares for the fit in ERF space
            - RMSE DERF: residual squares for the fit in DERF space
            - RMSE LNR: residual squares for the exponential fit in DERF space, corresponds to the linear fit in ln(DERF) space,
        The table is sorted by -ln(RMSE DERF) + ln(RMSE LNR), which means that the fits where a simple exponential
        model works better than the CurveFit (which means the fit went badly) will go first.

        Returns:
            Dataframe with the data.
        """
        models = self.models
        summary = []
        df_summary = pd.DataFrame(
            {}, columns=['Location', 'RMSE ERF', 'RMSE DERF', 'RMSE LNR'])
        location_list = []
        rmse_gaussian_cdf_list = []
        rmse_gaussian_pdf_list = []
        rmse_gaussian_pdf_linear_list = []
        for i, (location, model) in enumerate(models.items()):
            gaussian_cdf_pred = model.fun(model.t, model.params[:, 0])
            rmse_gaussian_cdf = np.linalg.norm(gaussian_cdf_pred -
                                               model.obs)**2
            gaussian_pdf_obs = data_translator(model.obs,
                                               self.basic_model_dict['fun'],
                                               'gaussian_pdf')
            gaussian_pdf_pred = gaussian_pdf(model.t, model.params[:, 0])
            rmse_gaussian_pdf = np.linalg.norm(gaussian_pdf_obs -
                                               gaussian_pdf_pred)**2
            rmse_gaussian_pdf_linear = self.preconditioner._statistics[
                "linear_rmse"].get(location, 1e10)
            summary.append([
                location, rmse_gaussian_cdf, rmse_gaussian_pdf,
                rmse_gaussian_pdf_linear
            ])

            location_list.append(location)
            rmse_gaussian_cdf_list.append(rmse_gaussian_cdf)
            rmse_gaussian_pdf_list.append(rmse_gaussian_pdf)
            rmse_gaussian_pdf_linear_list.append(rmse_gaussian_pdf_linear)

        df_summary['Location'] = location_list
        df_summary['RMSE ERF'] = rmse_gaussian_cdf_list
        df_summary['RMSE DERF'] = rmse_gaussian_pdf_list
        df_summary['RMSE LNR'] = rmse_gaussian_pdf_linear_list

        return df_summary
Пример #3
0
    def simulate(self,
                 mp,
                 num_simulations,
                 prediction_times,
                 group,
                 epsilon=1e-2,
                 theta=1):
        """
        Simulate the residuals based on the mean and standard deviation of predicting
        into the future.

        Args:
            mp: (curvefit.model_generator.ModelPipeline) model pipeline
            prediction_times: (np.array) times to create predictions at
            num_simulations: number of simulations
            group: (str) the group to make the simulations for
            epsilon: (epsilon) the floor for standard deviation moving out into the future
            theta: (theta) scaling of residuals to do relative to prediction magnitude

        Returns:
            List[pd.DataFrame] list of data frames for each simulation
        """
        data = mp.all_data.loc[mp.all_data[mp.col_group] == group].copy()
        max_t = int(np.round(data[mp.col_t].max()))
        num_obs = data.loc[~data[mp.col_obs_compare].isnull()][
            mp.col_group].count()

        predictions = mp.mean_predictions[group]

        add_noise = prediction_times > max_t
        no_noise = prediction_times <= max_t

        forecast_out_times = prediction_times[add_noise] - max_t
        error = self.create_residual_samples(
            num_simulations=num_simulations,
            forecast_out_times=forecast_out_times,
            num_data=num_obs,
            epsilon=epsilon)
        no_error = np.zeros(shape=(num_simulations, sum(no_noise)))
        all_error = np.hstack([no_error, error])

        noisy_forecast = predictions - (predictions**theta) * all_error
        noisy_forecast = data_translator(data=noisy_forecast,
                                         input_space=mp.predict_space,
                                         output_space=mp.predict_space)
        return noisy_forecast
Пример #4
0
def test_data_translator_exp(data, input_space, output_space):
    result = utils.data_translator(data, input_space, output_space)
    assert np.allclose(data, result)
Пример #5
0
def test_data_translator_diff(data, input_space, output_space):
    result = utils.data_translator(data, input_space, output_space)
    if 'log' in input_space:
        assert np.allclose(np.exp(data), np.cumsum(np.exp(result), axis=1))
    else:
        assert np.allclose(data, np.cumsum(result, axis=1))