Пример #1
0
def main():
    time_series = joblib.load("result/TimeSeriesHyperSlice.gz")
    test_set = dataset.CardiffTest()
    test_rain = test_set.rain

    forecaster = time_series.forecaster
    forecaster.load_memmap("r")

    seed = random.SeedSequence(254267254235771235840594891069714545013)
    rng = random.RandomState(random.MT19937(seed))

    rain_array = [0, 5, 10, 15, 20, 25, 30]
    decimial_place_array = []
    n_bootstrap = 32

    for rain in rain_array:
        auc_array = []
        for i in range(n_bootstrap):
            bootstrap = forecaster.bootstrap(rng)
            roc = bootstrap.get_roc_curve(rain, test_rain)
            auc_array.append(roc.area_under_curve)
        auc_std = np.std(auc_array, ddof=1)
        decimial_place_array.append(-round(math.log10(auc_std)))

    data_frame = pd.DataFrame(decimial_place_array, rain_array,
                              ["no. dec. places"])
    print("rain (mm)")
    print(data_frame)
Пример #2
0
def main():

    directory = "figure_roc_compare"
    if not path.isdir(directory):
        os.mkdir(directory)

    era5 = compound_poisson.era5.TimeSeries()
    era5.fit(dataset.Era5Cardiff())

    observed_data = dataset.CardiffTest()

    time_series = joblib.load(path.join("result", "TimeSeriesHyperSlice.gz"))
    time_series.forecaster.load_memmap("r")

    for rain in RAIN_ARRAY:
        positive_array = era5.forecaster.forecast > rain
        alt_array = observed_data.rain > rain
        null_array = np.logical_not(alt_array)

        true_positive_rate = (
            np.sum(np.logical_and(positive_array, alt_array)) /
            np.sum(alt_array))
        false_positive_rate = (
            np.sum(np.logical_and(positive_array, null_array)) /
            np.sum(null_array))

        roc = time_series.forecaster.get_roc_curve(rain, observed_data.rain)
        plt.figure()
        roc.plot()
        plt.scatter(false_positive_rate, true_positive_rate, label="ERA5")
        plt.legend()
        plt.savefig(path.join(directory, str(rain) + ".pdf"))
        plt.close()

    time_series.forecaster.del_memmap()
Пример #3
0
def main():

    directory = "figure"
    if not path.isdir(directory):
        os.mkdir(directory)

    era5 = dataset.Era5Cardiff()
    time_series = compound_poisson.era5.TimeSeries()
    time_series.fit(era5)

    observed_data = dataset.CardiffTest()

    printer = print.TimeSeries(
        time_series.forecaster, observed_data.rain, directory, "test")
    printer.print()
Пример #4
0
def main():
    fitter = fit.time_series.FitterHyperSlice()
    training = dataset.Cardiff10Training()
    test = dataset.CardiffTest()
    default_burn_in = 30000
    wrapper.time_series_forecast(fitter, training, test, default_burn_in)
Пример #5
0
def main():

    monochrome = (cycler.cycler('color', ['k'])
        * cycler.cycler('linestyle', LINESTYLE))
    plt.rcParams.update({'font.size': 14})

    #where to save the figures
    directory = "figure"
    if not path.isdir(directory):
        os.mkdir(directory)

    era5 = compound_poisson.era5.TimeSeries()
    era5.fit(dataset.Era5Cardiff())

    observed_data = dataset.CardiffTest()
    observed_rain = observed_data.rain

    dir = path.join("..", "cardiff_5_20")
    time_series = joblib.load(
        path.join(dir, "result", "TimeSeriesHyperSlice.gz"))
    time_series_name = "CP-MCMC (5)"
    era5_name = "IFS"
    observed_name = "observed"
    old_dir = time_series.forecaster.memmap_path
    time_series.forecaster.memmap_path = path.join(dir, old_dir)
    time_series.forecaster.load_memmap("r")

    cp_comparer = time_series.forecaster.compare_dist_with_observed(
        observed_rain)
    era5_comparer = era5.forecaster.compare_dist_with_observed(
        observed_rain)

    #survival plot
    plt.figure()
    ax = plt.gca()
    ax.set_prop_cycle(monochrome)
    cp_comparer.plot_survival_forecast(time_series_name)
    era5_comparer.plot_survival_forecast(era5_name)
    era5_comparer.plot_survival_observed(observed_name)
    cp_comparer.adjust_survival_plot()
    plt.legend()
    plt.savefig(path.join(directory, "survival.pdf"), bbox_inches="tight")
    plt.close()

    #pp plot
    plt.figure()
    ax = plt.gca()
    ax.set_prop_cycle(monochrome)
    cp_comparer.plot_pp(time_series_name)
    era5_comparer.plot_pp(era5_name)
    cp_comparer.adjust_pp_plot()
    plt.legend()
    plt.savefig(path.join(directory, "pp.pdf"), bbox_inches="tight")
    plt.close()

    #qq plot
    plt.figure()
    ax = plt.gca()
    ax.set_prop_cycle(monochrome)
    cp_comparer.plot_qq(time_series_name)
    cp_comparer.adjust_qq_plot()
    era5_comparer.plot_qq(era5_name)
    plt.legend()
    plt.savefig(path.join(directory, "qq.pdf"), bbox_inches="tight")
    plt.close()

    time_series.forecaster.del_memmap()
Пример #6
0
def main():

    monochrome = (cycler.cycler('color', ['k']) *
                  cycler.cycler('linestyle', LINESTYLE))
    monochrome2 = (cycler.cycler('color', LINECOLOUR2) +
                   cycler.cycler('linestyle', LINESTYLE2) +
                   cycler.cycler('marker', LINEMARKER2))

    plt.rcParams.update({'font.size': 14})

    #where to save the figures
    directory = "figure"
    if not path.isdir(directory):
        os.mkdir(directory)

    seed = random.SeedSequence(301608752619507842997952162996242447135)
    rng = random.RandomState(random.MT19937(seed))

    era5 = compound_poisson.era5.TimeSeries()
    era5.fit(dataset.Era5Cardiff())

    observed_data = dataset.CardiffTest()
    observed_rain = observed_data.rain
    time_array = observed_data.time_array

    training_size_array = [1, 5, 10, 20]
    script_dir_array = [
        "cardiff_1_20",
        "cardiff_5_20",
        "cardiff_10_20",
        "cardiff_20_20",
    ]
    for i, dir_i in enumerate(script_dir_array):
        script_dir_array[i] = path.join("..", dir_i)

    time_series_name_array = []  #time series for each training set
    time_series_array = []
    #will need to update the location of each time series memmap_path because
    #they would be using relative paths
    for i, dir_i in enumerate(script_dir_array):
        time_series = joblib.load(
            path.join(dir_i, "result", "TimeSeriesHyperSlice.gz"))
        old_dir = time_series.forecaster.memmap_path
        time_series.forecaster.memmap_path = path.join(dir_i, old_dir)
        time_series.forecaster.load_memmap("r")
        time_series_array.append(time_series)
        time_series_name_array.append("CP-MCMC (" +
                                      str(training_size_array[i]) + ")")

    #plot auc for varying precipitation

    #array of array:
    #for each training set, then for each value in rain_array
    auc_array = []
    bootstrap_error_array = []
    n_bootstrap = 32
    rain_array = [0, 5, 10, 15]
    for i_training_size, size_i in enumerate(training_size_array):
        auc_array.append([])
        bootstrap_error_array.append([])
        forecaster_i = time_series_array[i_training_size].forecaster
        for rain_i in rain_array:
            roc_i = forecaster_i.get_roc_curve(rain_i, observed_rain)
            auc_array[i_training_size].append(roc_i.area_under_curve)

            bootstrap_i_array = []
            for j_bootstrap in range(n_bootstrap):
                bootstrap = forecaster_i.bootstrap(rng)
                roc_ij = bootstrap.get_roc_curve(rain_i, observed_rain)
                bootstrap_i_array.append(
                    math.pow(roc_ij.area_under_curve - roc_i.area_under_curve,
                             2))
            bootstrap_error_array[i_training_size].append(
                math.sqrt(np.mean(bootstrap_i_array)))

    #figure format
    plt.figure()
    ax = plt.gca()
    ax.set_prop_cycle(monochrome)
    for i_training_size, size_i in enumerate(training_size_array):
        plt.plot(rain_array,
                 auc_array[i_training_size],
                 label=time_series_name_array[i_training_size])
    plt.ylim([0.5, 1])
    plt.xlabel("precipitation (mm)")
    plt.ylabel("Area under ROC curve")
    plt.legend()
    plt.savefig(path.join(directory, "auc.pdf"), bbox_inches="tight")
    plt.close()

    #table format
    rain_label_array = []
    for rain in rain_array:
        rain_label_array.append(str(rain) + " mm")
    #table format with uncertainity values
    auc_table = []
    for auc_i, error_i in zip(auc_array, bootstrap_error_array):
        auc_table.append([])
        for auc_ij, error_ij in zip(auc_i, error_i):
            auc_table[-1].append("${:0.4f}\pm {:0.4f}$".format(
                auc_ij, error_ij))

    data_frame = pd.DataFrame(
        np.asarray(auc_table).T, rain_label_array, time_series_name_array)
    data_frame.to_latex(path.join(directory, "auc.txt"), escape=False)

    #add era5 (for loss evaluation)
    #roc unavailable for era5
    time_series_array.append(era5)
    time_series_name_array.append("IFS")

    #yearly plot of the bias losses
    time_segmentator = time_segmentation.YearSegmentator(time_array)
    loss_segmentator_array = []
    for time_series_i in time_series_array:
        loss_segmentator_i = loss_segmentation.TimeSeries(
            time_series_i.forecaster, observed_rain)
        loss_segmentator_i.evaluate_loss(time_segmentator)
        loss_segmentator_array.append(loss_segmentator_i)

    pandas.plotting.register_matplotlib_converters()
    for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES):

        #array of arrays, one for each time_series in time_series_array
        #for each array, contains array of loss for each time point
        bias_loss_plot_array = []
        bias_median_loss_plot_array = []

        for loss_segmentator_i in loss_segmentator_array:
            bias_loss_plot, bias_median_loss_plot = (
                loss_segmentator_i.get_bias_plot(i_loss))
            bias_loss_plot_array.append(bias_loss_plot)
            bias_median_loss_plot_array.append(bias_median_loss_plot)

        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(monochrome2)

        for time_series_label, bias_plot_array in zip(time_series_name_array,
                                                      bias_loss_plot_array):
            plt.plot(loss_segmentator_i.time_array,
                     bias_plot_array,
                     label=time_series_label)
        plt.legend(bbox_to_anchor=(0, 1, 1, 0),
                   loc="lower left",
                   mode="expand",
                   ncol=3)
        plt.ylabel(Loss.get_axis_bias_label())
        plt.xlabel("year")
        plt.xticks(rotation=45)
        plt.savefig(path.join(directory,
                              Loss.get_short_bias_name() + "_mean.pdf"),
                    bbox_inches="tight")
        plt.close()

        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(monochrome2)
        for time_series_label, bias_plot_array in zip(
                time_series_name_array, bias_median_loss_plot_array):
            plt.plot(loss_segmentator_i.time_array,
                     bias_plot_array,
                     label=time_series_label)
        plt.legend(bbox_to_anchor=(0, 1, 1, 0),
                   loc="lower left",
                   mode="expand",
                   ncol=3)
        plt.ylabel(Loss.get_axis_bias_label())
        plt.xlabel("year")
        plt.xticks(rotation=45)
        plt.savefig(path.join(directory,
                              Loss.get_short_bias_name() + "_median.pdf"),
                    bbox_inches="tight")
        plt.close()

    #plot table of test set bias loss
    time_segmentator_array = {
        "all_years": time_segmentation.AllInclusive(time_array),
        "spring": time_segmentation.SpringSegmentator(time_array),
        "summer": time_segmentation.SummerSegmentator(time_array),
        "autumn": time_segmentation.AutumnSegmentator(time_array),
        "winter": time_segmentation.WinterSegmentator(time_array),
    }
    time_segmentator_names = list(time_segmentator_array.keys())

    #array of loss_segmentator objects, for each time series
    #dim 0: for each time series
    #dim 1: for each time segmentator
    loss_array = []

    #plot the table (for mean, the median bias)
    for i, time_series_i in enumerate(time_series_array):
        loss_array.append([])
        for time_segmentator_k in time_segmentator_array.values():
            forecaster_i = time_series_i.forecaster
            loss_i = loss_segmentation.TimeSeries(forecaster_i, observed_rain)
            loss_i.evaluate_loss(time_segmentator_k)
            loss_array[i].append(loss_i)

    for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES):

        #using training set size 5 years to get bootstrap variance, this is used
        #to guide the number of decimial places to use
        n_decimial = 3
        float_format = ("{:." + str(n_decimial) + "f}").format

        #table of losses
        #columns: for each time segmentator
        #rows: for each time series
        loss_mean_array = []
        loss_median_array = []

        #plot the table (for mean, the median bias)
        for i_time_series, time_series_i in enumerate(time_series_array):
            loss_mean_array.append([])
            loss_median_array.append([])
            for loss_segmentator_i in loss_array[i_time_series]:
                loss = loss_segmentator_i.loss_all_array[i_loss]
                loss_mean_array[i_time_series].append(loss.get_bias_loss())
                loss_median_array[i_time_series].append(
                    loss.get_bias_median_loss())

        for prefix, loss_table in zip(["mean", "median"],
                                      [loss_mean_array, loss_median_array]):
            data_frame = pd.DataFrame(loss_table, time_series_name_array,
                                      time_segmentator_names)
            path_to_table = path.join(
                directory, prefix + "_" + Loss.get_short_bias_name() + ".txt")
            data_frame.to_latex(path_to_table, float_format=float_format)

    for i, time_series_i in enumerate(time_series_array):
        residual_plot = residual_analysis.ResidualLnqqPlotter()

        #add residuals data
        residual_plot.add_data(time_series_i.forecaster, observed_rain)

        #plot residual data
        residual_plot.plot_heatmap([[0, 3.8], [0, 3.8]], 1.8, 5.3, 'Greys')
        plt.savefig(path.join(
            directory, time_series_name_array[i] + "_residual_qq_hist.pdf"),
                    bbox_inches="tight")
        plt.close()

    for time_series_i in time_series_array:
        time_series_i.forecaster.del_memmap()