Пример #1
0
def main(indata="ghcnd", diagnostics=False):
    """
    Extract relevant dataset from the command line switchs

    :param str indata: input dataset to process
    :param bool diagnostics: output diagnostic information
    """

    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:
        process_dataset(all_datasets[names == indata][0],
                        diagnostics=diagnostics)

    # fail gracefully
    else:
        print("data name not available: {}\n".format(indata))
        print("available data names: {}".format(" ".join(names)))

    return  # main
Пример #2
0
def main(indata="acre", diagnostics=False):
    """
    Extract relevant dataset from the command line switchs

    :param str indata: input dataset to process
    :param bool diagnostics: output diagnostic information
    """

    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:

        dataset = all_datasets[names == indata][0]

        if dataset.name in ["acre"]:
            # need to run ACRE as base period set separately
            process_dataset(dataset, diagnostics=diagnostics)

        elif dataset.base_period == "00-00":
            # the input dataset is raw observations, run by climpact, so no need
            #    as will have matched HadEX3 base period
            pass
        elif utils.match_reference_period(dataset.base_period):
            # the input datasets's reference period matches that of HadEX3 version
            pass
        else:
            # base period of input dataset doesn't match HadEX3, so remove
            #    appropriate indices
            process_dataset(dataset, diagnostics=diagnostics)

    return  # main
Пример #3
0
def main(index="TX90p", diagnostics=False, qc_flags=""):
    """
    The main DLS function

    :param str index: which index to run
    :param bool diagnostics: extra verbose output
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    """

    if index in utils.MONTHLY_INDICES:
        nmonths = 13
        timescale = "MON"
    else:
        nmonths = 1
        timescale = "ANN"

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    # spin through all datasets
    stations = np.array([])
    for dataset in all_datasets:

        try:
            ds_stations = utils.read_inventory(dataset,
                                               subdir="formatted/indices",
                                               final=True,
                                               timescale=timescale,
                                               index=index,
                                               qc_flags=qc_flags)
            good_stations = utils.select_qc_passes(ds_stations,
                                                   qc_flags=qc_flags)

            stations = np.append(stations, good_stations)

            print("Adding {} ({} stations), nstations = {}".format(
                dataset.name, len(good_stations), len(stations)))

        except IOError:
            # file missing
            print("No stations with data for {}".format(dataset.name))

    nstations = len(stations)

    # array of lats and lons for calculation of separations
    all_locations = np.array([[stn.latitude, stn.longitude]
                              for stn in stations])

    # get the separations (km, radians)
    stn_separation, stn_angle = get_separations(stations, all_locations)

    # assign stations to bands
    StationBands = assign_to_latitude_bands(stations)

    # read in all the station data
    all_data = get_all_data(stations, index, timescale, nyears, nmonths)

    # set up the DLS defaults
    bins = np.arange(0, MAX_SEPARATION + BIN_WIDTH, BIN_WIDTH)

    all_dls = np.zeros([len(utils.LAT_BANDS), nmonths])
    all_dls[:] = utils.DEFAULT_DLS

    # now spin through all latitude bands and months.
    for lb, band in enumerate(utils.LAT_BANDS):

        stations_in_bands, = np.where(StationBands == lb)

        if len(stations_in_bands) <= 30:
            # insufficient stations within this latitude band, next band
            if diagnostics:
                print("Index {}, Band {} to {}".format(index, band[0],
                                                       band[1]))
                print("Number of stations {}".format(len(stations_in_bands)))
            print("Ann, Jan -- Dec, DLS = {} km".format(utils.DEFAULT_DLS))

            # spin through months to remove old plots if they exist
            for month in range(nmonths):
                if os.path.exists(
                        os.path.join(
                            utils.PLOTLOCS, "DLS",
                            "DLS_{}_{}_{}to{}.png".format(
                                index, month_names[month], band[0], band[1]))):
                    os.remove(
                        os.path.join(
                            utils.PLOTLOCS, "DLS",
                            "DLS_{}_{}_{}to{}.png".format(
                                utils.PLOTLOCS, index, month_names[month],
                                band[0], band[1])))
            continue

        print("{}, # stations {}".format(band, len(stations_in_bands)))

        # process each month
        for month in range(nmonths):
            print(month_names[month])

            month_data = all_data[stations_in_bands, :, month]

            names = [s.id for s in stations[stations_in_bands]]

            # get the separation and correlation for each cross pair
            # correlations only from 1951 (match HadEX2)
            cor_yr = 1951 - utils.STARTYEAR.year
            seps, cors = separations_and_correlations(
                month_data[:, cor_yr:],
                stn_separation[stations_in_bands, :][:, stations_in_bands],
                names,
                diagnostics=diagnostics)

            if len(seps) == 0 and len(cors) == 0:
                # then none of the available stations either had sufficient overlapping data
                #  or values at that particular point (correlations of lots of zeros doesn't mean anything)
                #  so escape and go on to next month
                if diagnostics:
                    print("Index {}, Band {} to {}, month {}".format(
                        index, band[0], band[1], month_names[month]))
                    print("Number of stations {}".format(
                        len(stations_in_bands)))
                    print(
                        "Likely that all values for this index, month and band are zero\n hence correlations don't mean anything"
                    )
                    print("Using default DLS = {}km".format(utils.DEFAULT_DLS))
                else:
                    print("No stations, {} - {} DLS = {} km".format(
                        band, month_names[month], utils.DEFAULT_DLS))
                continue

            # get the bins
            bin_assignment = np.digitize(
                seps, bins, right=True)  # "right" means left bin edge included
            bin_centers = bins - BIN_WIDTH / 2.

            # average value for each bin if sufficient correlations to do so.
            means = np.zeros(len(bins))
            sigmas = np.zeros(len(bins))
            for b, bin in enumerate(bins):
                locs, = np.where(bin_assignment == b)

                if len(locs) > MIN_PER_BIN:
                    #                means[b] = np.ma.mean(cors[locs])
                    means[b] = np.ma.median(cors[locs])
                    sigmas[b] = np.ma.std(cors[locs])
    #                print(bin, means[b], len(locs), cors[locs])
    #                raw_input("stop")

            filled_bins, = np.where(means != 0)

            # if sufficient bins are filled then fit the curve
            if len(filled_bins) / float(len(bins)) >= 0.5:

                if utils.FIX_ZERO:
                    # fix zero bin to be 1.0, and use bin edges, not centres (HadEX2)
                    means[0] = 1.
                    sigmas[0] = sigmas[1]
                    dls, plot_curve, chisq, R2 = exponential_fit(bins,
                                                                 means,
                                                                 sigmas,
                                                                 C=C)
                else:
                    dls, plot_curve, chisq, R2 = exponential_fit(
                        bin_centers[1:], means[1:], sigmas[1:], C=C)

                # only take fit if greater than minimum set overall
                all_dls[lb, month] = np.max([dls, utils.DEFAULT_DLS])

                # test at 5% level and 2 or 3 dofs, as per HadEX2
                if utils.FIX_ZERO and chisq >= chi2.isf(
                        0.05,
                        len(bins[sigmas != 0]) - 2):
                    print("inadequately good fit")
                    all_dls[lb, month] = utils.DEFAULT_DLS
                elif chisq >= chi2.isf(0.05, len(bins[sigmas != 0]) - 3):
                    print("inadequately good fit")
                    all_dls[lb, month] = utils.DEFAULT_DLS

                # plot the fit if required
                plt.clf()
                plt.scatter(seps,
                            cors,
                            c='b',
                            marker='.',
                            alpha=0.1,
                            edgecolor=None)

                # calculate the 2D density of the data given
                counts, xbins, ybins = np.histogram2d(seps, cors, bins=50)

                # make the contour plot (5 levels)
                plt.contour(counts.transpose(),
                            5,
                            extent=[
                                xbins.min(),
                                xbins.max(),
                                ybins.min(),
                                ybins.max()
                            ],
                            linewidths=1,
                            colors='black',
                            linestyles='solid')

                if utils.FIX_ZERO:
                    plt.plot(bins[sigmas != 0], means[sigmas != 0], 'ro')
                    plt.errorbar(bins[sigmas != 0],
                                 means[sigmas != 0],
                                 yerr=sigmas[sigmas != 0],
                                 fmt="none",
                                 ecolor="r")
                    plt.plot(bins, plot_curve, c='cyan', ls='-', lw=2)
                else:
                    plt.plot(bin_centers[1:][sigmas[1:] != 0],
                             means[1:][sigmas[1:] != 0], 'ro')
                    plt.errorbar(bin_centers[1:][sigmas[1:] != 0],
                                 means[1:][sigmas[1:] != 0],
                                 yerr=sigmas[1:][sigmas[1:] != 0],
                                 fmt="none",
                                 ecolor="r")
                    plt.plot(bin_centers[1:],
                             plot_curve,
                             c='cyan',
                             ls='-',
                             lw=2)  # plot curve will have been truncated

                plt.axvline(dls, c='magenta', ls="--", lw=2)
                plt.axvline(utils.DEFAULT_DLS, c='k', ls=":", lw=1)
                plt.axvline(utils.MAX_DLS, c='k', ls=":", lw=1)
                plt.text(dls + 10, 0.95, "dls = {:4.0f}km".format(dls))
                plt.text(3010, -0.95, "r2 = {:6.4f}".format(R2))
                plt.text(3010, -0.85, "chi2 = {:6.4f}".format(chisq))
                plt.text(3010, -0.75,
                         "Nstat = {}".format(len(stations_in_bands)))
                plt.xlim([-100, 5000])
                plt.ylim([-1, None])

                plt.xlabel("Separation (km)")
                plt.ylabel("Correlation")
                plt.title("{} - {}; {} to {}".format(index, month_names[month],
                                                     band[0], band[1]))

                # add text to show what code created this and when
                if utils.WATERMARK:
                    watermarkstring = "/".join(
                        os.getcwd().split('/')[4:]) + '/' + os.path.basename(
                            __file__) + "   " + dt.datetime.strftime(
                                dt.datetime.now(), "%d-%b-%Y %H:%M")
                    plt.figtext(0.01, 0.01, watermarkstring, size=6)

                if utils.FIX_ZERO:
                    plt.savefig(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}_fixzero.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])), dpi=300)
                else:
                    plt.savefig(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])), dpi=300)

                print("DLS = {:7.2f} km".format(dls))

            else:
                print("insufficient bins for fit ({}/{})".format(
                    len(filled_bins), float(len(bins))))
                if os.path.exists(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1]))):
                    # remove old plots!
                    os.remove(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])))

    # replace those dls < default with default and > max with max
    all_dls[all_dls < utils.DEFAULT_DLS] = utils.DEFAULT_DLS

    # interpolate
    grid_dls = interpolate_dls_to_grid(all_dls, nmonths)

    # write output file
    write_dls_file(os.path.join(utils.DLSLOCS, "dls_{}.txt".format(index)),
                   grid_dls, nmonths, month_names)

    return  # main
Пример #4
0
def main(indata="ghcndex", index="R95pTOT", diagnostics=False):
    """
    Read PRCPTOT and other indices and write out


    """

    # check if need to do monthly ones
    if index in utils.MONTHLY_INDICES:
        timescales = ["ANN", "MON"]
    else:
        timescales = ["ANN"]

    # get all possible datasets
    all_datasets = utils.get_input_datasets()
    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:
        dataset = all_datasets[names == indata][0]

        dataset_stations = utils.read_inventory(dataset,
                                                subdir="formatted/indices")

        # check each station
        for stn in dataset_stations:

            if diagnostics:
                print("{} - {}".format(dataset.name, stn.id))

            # for appropriate number of timescales
            for ts in timescales:

                if os.path.exists(
                        os.path.join(
                            stn.location, stn.id, "{}_{}_{}.csv".format(
                                stn.id, PARTNERS[index].lower(),
                                ts))) and os.path.exists(
                                    os.path.join(
                                        stn.location, stn.id,
                                        "{}_{}_{}.csv".format(
                                            stn.id, "PRCPTOT".lower(), ts))):
                    rtimes, rXXp = utils.read_station_index(
                        stn, PARTNERS[index].lower(), ts)
                    ptimes, prcptot = utils.read_station_index(
                        stn, "PRCPTOT".lower(), ts)

                    match = np.in1d(rtimes, ptimes)
                    match_b = np.in1d(ptimes, rtimes)

                    if len(match) != 0 and len(match_b) != 0:

                        rXXptot = (100 * rXXp) / prcptot
                        rXXptot_times = rtimes[match]

                        if ts == "MON":
                            myears = []
                            months = []
                            for y in rXXptot_times:
                                for m in range(1, 13):
                                    myears += [y]
                                    months += [m]

                            stn.monthly = rXXptot.filled().reshape(-1)
                            stn.myears = myears
                            stn.months = months
                            path = os.path.join(
                                dataset.location, "formatted", "indices",
                                stn.id,
                                "{}_{}_MON.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path,
                                                          stn,
                                                          index,
                                                          doMonthly=True)

                        else:
                            stn.years = rXXptot_times
                            stn.annual = rXXptot.filled()
                            path = os.path.join(
                                dataset.location, "formatted", "indices",
                                stn.id,
                                "{}_{}_ANN.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path, stn, index)

    return  # main
Пример #5
0
def main(indata="acre", diagnostics=False):
    """
    Call the R package climpact2 with appropriate settings to calculate the indices

    :param str indata: name of dataset to process
    :param bool diagnostics: output diagnostic information
    """


    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # select the matching one
    if indata in names:
        dataset = all_datasets[names == indata][0]

        # check that there are stations to process for this dataset
        stations = utils.read_inventory(dataset)
        if len(stations) != 0:

            for station in stations:

                # read the station data
                infile = os.path.join(dataset.location, "formatted", "{}.txt".format(station.id))
                indata = np.genfromtxt(infile)

                # get the first year and last year
                ref_start = int(indata[0][0])
                ref_end = int(indata[-1][0])

                # write a temporary inventory file for just this station
                utils.write_climpact_inventory_header(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)))
                utils.write_climpact_inventory(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), station)

                try:
                    with utils.cd(utils.CLIMPACT_LOCS):
                        # call the R process - which should automatically do everything and make suitable files etc
                        #  runs in subfolder with context manager, so returning to parent once done.

                        # ACRE (and others?) have stations that do not overlap the reference period.
                        #   Means that the QC process throws them out if insufficient overlap between data and reference period

                        print(" ".join(["Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES)]))

                        subprocess.check_call(["Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES)])

                except subprocess.CalledProcessError:
                    # handle errors in the called executable
                    raise Exception

                except OSError:
                    # executable not found
                    print("Cannot find Rscript")
                    raise OSError

                # remove temporary metadata file
                os.remove(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)))

        # fail gracefully
        else:
            print("No stations available in {}".format(indata))
            print("  Climpact2 not run")

        # remove plots, qc, thres and trend folders (save space)
        if utils.REMOVE_EXTRA:
            for subdir in ["plots", "qc", "thres", "trend"]:
                shutil.rmtree(os.path.join(dataset.location, "formatted", subdir))


    # fail gracefully
    else:
        print("data name not available: {}\n".format(indata))
        print("available data names: {}".format(" ".join(names)))
        

    return # main
Пример #6
0
def main(index="TX90p", diagnostics=False, qc_flags="", anomalies="None"):
    """
    Read inventories and make scatter plot

    :param str index: which index to run
    :param bool diagnostics: extra verbose output
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    :param str anomalies: run code on anomalies or climatology rather than raw data

    """

    if index in utils.MONTHLY_INDICES:
        timescale = ["ANN", "MON"]
    else:
        timescale = ["ANN"]

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    for ts in timescale:
        # set up the figure
        fig = plt.figure(figsize=(10, 6.5))
        plt.clf()
        ax = plt.axes([0.025, 0.14, 0.95, 0.90], projection=cartopy.crs.Robinson())
        ax.gridlines() #draw_labels=True)
        ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor="0.9", edgecolor="k")
        ax.coastlines()

        # dummy scatters for full extent
        plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \
                        edgecolor='w', linewidth='0.01')

        # run all datasets
        total = 0
        for dataset in all_datasets:

            try:
                # choose appropriate subdirectory.
                if anomalies == "None":
                    subdir = "formatted/indices"
                elif anomalies == "anomalies":
                    subdir = "formatted/anomalies"
                elif anomalies == "climatology":
                    subdir = "formatted/climatology"

                ds_stations = utils.read_inventory(dataset, subdir=subdir, final=True, \
                                                   timescale=ts, index=index, anomalies=anomalies, qc_flags=qc_flags)
                ds_stations = utils.select_qc_passes(ds_stations, qc_flags=qc_flags)

            except IOError:
                # file missing
                print("No stations with data for {}".format(dataset.name))
                ds_stations = []

            if len(ds_stations) > 0:
                lats = np.array([stn.latitude for stn in ds_stations])
                lons = np.array([stn.longitude for stn in ds_stations])

                # and plot
                scatter = plt.scatter(lons, lats, c=COLOURS[dataset.name], s=15, \
                                          label="{} ({})".format(get_label(dataset.name), len(ds_stations)), \
                                          transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')

                total += len(ds_stations)

        # make a legend
        leg = plt.legend(loc='lower center', ncol=5, bbox_to_anchor=(0.50, -0.3), \
                             frameon=False, title="", prop={'size':12}, labelspacing=0.15, columnspacing=0.5, numpoints=3)
        plt.setp(leg.get_title(), fontsize=12)

        plt.figtext(0.06, 0.91, "{} Stations".format(total))
        plt.title("{} - {}".format(index, ts))

        # extra information
        if utils.WATERMARK:
            watermarkstring = "{} {}".format(os.path.join("/".join(os.getcwd().split('/')[4:]), os.path.basename(__file__)), dt.datetime.strftime(dt.datetime.now(), "%d-%b-%Y %H:%M"))
            plt.figtext(0.01, 0.01, watermarkstring, size=6)
#        plt.figtext(0.03, 0.95, "(c)", size=14)

        # and save
        outname = putils.make_filenames("station_locations", index=index, grid="ADW", anomalies=anomalies, month=ts.capitalize())

        plt.savefig("{}/{}/{}".format(utils.PLOTLOCS, index, outname))
            
        plt.close()

        # write out total station number
        if ts == "ANN":
            with open(os.path.join(utils.INFILELOCS, "{}_stations.txt".format(index)), "w") as outfile:
                outfile.write("{}\n".format(index))
                outfile.write("{}".format(total))
        
    return # main
Пример #7
0
def main(indata="ghcnd", diagnostics=False):
    """
    Call the R package climpact2 with appropriate settings to calculate the indices

    :param str indata: name of dataset to process
    :param bool diagnostics: output diagnostic information
    """

    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # select the matching one
    if indata in names:
        dataset = all_datasets[names == indata][0]
        '''
        Process call structure

        climpact2.batch.stations.r ./sample_data/ ./sample_data/climpact2.sample.batch.metadata.txt 1971 2000 4
        '''

        # check that there are stations to process for this dataset
        stations = utils.read_inventory(dataset)
        if len(stations) != 0:

            try:
                with utils.cd(utils.CLIMPACT_LOCS):
                    # call the R process - which should automatically do everything and make suitable files etc
                    #  runs in subfolder with context manager, so returning to parent once done.

                    # ACRE (and others?) have stations that do not overlap the reference period.
                    #   Means that the QC process throws them out if insufficient overlap between data and reference period

                    if dataset.name == "acre":
                        ref_start = 1901
                        ref_end = 1930
                    else:
                        ref_start = utils.REF_START
                        ref_end = utils.REF_END

                    print(" ".join([
                        "Rscript", "climpact2.batch.stations.r",
                        os.path.join(dataset.location, "formatted"),
                        os.path.join(dataset.location,
                                     "{}.metadata.txt".format(dataset.name)),
                        str(ref_start),
                        str(ref_end),
                        str(utils.NCORES)
                    ]))

                    subprocess.check_call([
                        "Rscript", "climpact2.batch.stations.r",
                        os.path.join(dataset.location, "formatted"),
                        os.path.join(dataset.location,
                                     "{}.metadata.txt".format(dataset.name)),
                        str(ref_start),
                        str(ref_end),
                        str(utils.NCORES)
                    ])

            except subprocess.CalledProcessError:
                # handle errors in the called executable
                raise Exception

            except OSError:
                # executable not found
                print("Cannot find Rscript")
                raise OSError

        # fail gracefully
        else:
            print("No stations available in {}".format(indata))
            print("  Climpact2 not run")

        # remove plots, qc, thres and trend folders (save space)
        if utils.REMOVE_EXTRA:
            for subdir in ["plots", "qc", "thres", "trend"]:
                try:
                    shutil.rmtree(
                        os.path.join(dataset.location, "formatted", subdir))
                except FileNotFoundError:
                    print("{} doesn't exist".format(
                        os.path.join(dataset.location, "formatted", subdir)))

    # fail gracefully
    else:
        print("data name not available: {}\n".format(indata))
        print("available data names: {}".format(" ".join(names)))

    return  # main
Пример #8
0
def main(indata="ghcnd", diagnostics=False):
    """
    Read TXn and TNn and write out ETR as difference


    """
    index = "ETR"

    # check if need to do monthly ones
    if index in utils.MONTHLY_INDICES:
        timescales = ["ANN", "MON"]
    else:
        timescales = ["ANN"]
    
    # get all possible datasets
    all_datasets = utils.get_input_datasets()
    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:
        dataset = all_datasets[names == indata][0]

        dataset_stations = utils.read_inventory(dataset, subdir="formatted/indices")

        # check each station
        for stn in dataset_stations:

            if diagnostics:
                print("{} - {}".format(dataset.name, stn.id))

            # for appropriate number of timescales
            for ts in timescales:

                if os.path.exists(os.path.join(stn.location, stn.id, "{}_{}_{}.csv".format(stn.id, "txx", ts))) and os.path.exists(os.path.join(stn.location, stn.id, "{}_{}_{}.csv".format(stn.id, "tnn", ts))):
                    xtimes, txx = utils.read_station_index(stn, "TXx", ts)
                    ntimes, tnn = utils.read_station_index(stn, "TNn", ts)

                    match = np.in1d(xtimes, ntimes)
                    match_b = np.in1d(ntimes, xtimes)

                    if len(match) != 0 and len(match_b) != 0:

                        etr = txx[match]-tnn[match_b]
                        etr_times = xtimes[match]

                        if ts == "MON":
                            myears = []
                            months = []
                            for y in etr_times:
                                for m in range(1, 13):
                                    myears += [y]
                                    months += [m]                    

                            stn.monthly = etr.filled().reshape(-1)
                            stn.myears = myears
                            stn.months = months 
                            path = os.path.join(dataset.location, "formatted", "indices", stn.id, "{}_{}_MON.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path, stn, "ETR", doMonthly=True)

                        else:            
                            stn.years = etr_times
                            stn.annual = etr.filled()
                            path = os.path.join(dataset.location, "formatted", "indices", stn.id, "{}_{}_ANN.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path, stn, "ETR")

    return # main
Пример #9
0
def main(grid="ADW",
         index="TX90p",
         month_index=0,
         diagnostics=False,
         hadex2_adw=False,
         qc_flags="",
         anomalies="None"):
    """
    :param str grid: gridding type ADW/CAM
    :param str index: which index to run
    :param int month_index: which month to apply (0 = Annual, 1-12 for months)    
    :param str qc_flags: which QC flags to process W, B, A, N, C, R
    :param bool diagnostics: output diagnostic information
    :param str anomalies: run code on anomalies or climatology rather than raw data
    """

    # ensure correct timescale is selected
    if args.index in utils.MONTHLY_INDICES:
        if month_index == 0:
            timescale = "ANN"
        else:
            timescale = "MON"
    else:
        if month_index == 0:
            timescale = "ANN"
        else:
            print("Monthly requested for annual-only index.\n Exiting")
            return

    # move this up one level eventually
    all_datasets = utils.get_input_datasets()

    # set up the data arrays
    if anomalies == "climatology":
        nyears = 1
    else:
        nyears = len(utils.REFERENCEYEARS)

    if grid == "CAM":
        GridData, GridStations = cam(all_datasets,
                                     index,
                                     timescale,
                                     nyears,
                                     qc_flags=qc_flags,
                                     month_index=month_index,
                                     diagnostics=diagnostics,
                                     anomalies=anomalies)

    elif grid == "ADW":
        GridData, GridStations, GridDLSStations = adw(all_datasets,
                                                      index,
                                                      timescale,
                                                      nyears,
                                                      qc_flags=qc_flags,
                                                      month_index=month_index,
                                                      diagnostics=diagnostics,
                                                      anomalies=anomalies,
                                                      hadex2_adw=hadex2_adw)

    if utils.DOLSM:
        nmonths = 1
        # apply LSM
        lsm = utils.get_land_sea_mask(
            utils.box_centre_lats, utils.box_centre_lons, floor=False
        )  # not taking only purely non-land boxes.  Have to have sufficient amount of land!
        # resize to match
        lsm_sized = np.tile(np.tile(lsm, (1, 1, 1, 1)),
                            (nyears, nmonths, 1, 1))

        GridData.mask = np.logical_or(GridData.mask, lsm_sized)
        GridStations.mask = np.logical_or(GridStations.mask, lsm_sized)
        if grid == "ADW":
            GridDLSStations.mask = np.logical_or(GridDLSStations.mask,
                                                 lsm_sized)

    # correct fill_value
    GridData.fill_value = utils.HADEX_MDI
    GridStations.fill_value = utils.HADEX_MDI

    # append appropriate name to filename if anomalies or climatology

    filename = utils.make_filenames(index=index,
                                    grid=grid,
                                    anomalies=anomalies,
                                    month_index=month_index)

    ncdfp.netcdf_write(os.path.join(utils.OUTROOT, filename),
                       index,
                       GridData.filled(),
                       utils.REFERENCEYEARS,
                       utils.box_centre_lats,
                       utils.box_centre_lons,
                       single_month=month_index)

    filename = utils.make_filenames(index=index,
                                    grid=grid,
                                    anomalies=anomalies,
                                    extra="num",
                                    month_index=month_index)
    ncdfp.netcdf_write(os.path.join(utils.OUTROOT, filename),
                       index,
                       GridStations.filled(),
                       utils.REFERENCEYEARS,
                       utils.box_centre_lats,
                       utils.box_centre_lons,
                       single_month=month_index,
                       station_count=True)

    if grid == "ADW":

        filename = utils.make_filenames(index=index,
                                        grid=grid,
                                        anomalies=anomalies,
                                        extra="numdls",
                                        month_index=month_index)
        ncdfp.netcdf_write(os.path.join(utils.OUTROOT, filename),
                           index,
                           GridDLSStations.filled(),
                           utils.REFERENCEYEARS,
                           utils.box_centre_lats,
                           utils.box_centre_lons,
                           single_month=month_index,
                           station_count=True)

    return  # main
Пример #10
0
def main(diagnostics=False):
    """
    Read inventories and make scatter plot

    :param bool diagnostics: extra verbose output

    """

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    # set up the figure
    fig = plt.figure(figsize=(10, 6.7))
    plt.clf()
    ax = plt.axes([0.025, 0.14, 0.95, 0.90], projection=cartopy.crs.Robinson())
    ax.gridlines()  #draw_labels=True)
    ax.add_feature(cartopy.feature.LAND,
                   zorder=0,
                   facecolor="0.9",
                   edgecolor="k")
    ax.coastlines()

    # dummy scatters for full extent
    plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \
                    edgecolor='w', linewidth='0.01')

    # run all datasets
    total = 0
    for dataset in all_datasets:

        try:
            # choose appropriate subdirectory.
            subdir = "formatted/indices"

            ds_stations = utils.read_inventory(dataset, subdir=subdir, final=False, \
                                               timescale="", index="", anomalies="None", qc_flags="")

        except IOError:
            # file missing
            print("No stations with data for {}".format(dataset.name))
            ds_stations = []

        if len(ds_stations) > 0:
            lats = np.array([stn.latitude for stn in ds_stations])
            lons = np.array([stn.longitude for stn in ds_stations])

            # and plot
            scatter = plt.scatter(lons, lats, c=COLOURS[dataset.name], s=15, \
                                      label="{} ({})".format(get_label(dataset.name), len(ds_stations)), \
                                      transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')

            total += len(ds_stations)

    # make a legend
    leg = plt.legend(loc='lower center', ncol=5, bbox_to_anchor=(0.50, -0.34), \
                         frameon=False, title="", prop={'size':12}, labelspacing=0.15, columnspacing=0.5, numpoints=3)
    plt.setp(leg.get_title(), fontsize=12)

    plt.figtext(0.05, 0.92, "{} Stations".format(total))

    plt.title("HadEX3 stations")

    # and save
    outname = putils.make_filenames("station_locations",
                                    index="All",
                                    grid="ADW",
                                    anomalies="None",
                                    month="All")

    plt.savefig("{}/{}".format(utils.PLOTLOCS, outname), dpi=300)

    plt.close()

    return  # main
Пример #11
0
def main(index="TX90p", diagnostics=False):
    """
    For all datasets, finds stations that exist for given index (and appropriate timescales)
    Checks for presence of data and write final station listing
    
    :param str index: which index to process
    :param bool diagnostics: output diagnostic information
    """

    # check if need to do monthly ones
    if index in utils.MONTHLY_INDICES:
        timescales = ["ANN", "MON"]
    else:
        timescales = ["ANN"]

    # read in all datasets
    all_datasets = utils.get_input_datasets()

    # for appropriate number of timescales
    for ts in timescales:
        print("{}".format(ts))

        # spin through each dataset
        for d, dataset in enumerate(all_datasets):

            dataset_stations = utils.read_inventory(dataset,
                                                    subdir="formatted/indices")

            if diagnostics:
                print("{} - {}".format(dataset.name, index))

            final_inventory = []

            # check each station
            for stn in dataset_stations:

                if diagnostics:
                    print("{} - {}".format(dataset.name, stn.id))

                if assess_station(stn, index, ts, diagnostics=diagnostics):
                    final_inventory += [stn]
                    if diagnostics:
                        print("{}\n".format(len(final_inventory)))
                else:
                    if diagnostics:
                        print("\n")

            # then write everything out.
            utils.write_climpact_inventory_header(
                os.path.join(
                    dataset.location,
                    "{}.metadata.{}.{}.txt".format(dataset.name, index, ts)))

            for stn in final_inventory:
                utils.write_climpact_inventory(
                    os.path.join(
                        dataset.location,
                        "{}.metadata.{}.{}.txt".format(dataset.name, index,
                                                       ts)), stn)

            print("{} - {} stations".format(dataset.name,
                                            len(final_inventory)))

    return  # main
Пример #12
0
def main(index="TX90p", diagnostics=False, qc_flags="", anomalies="None"):
    """
    Read inventories and make scatter plot

    :param str index: which index to run
    :param bool diagnostics: extra verbose output
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    :param str anomalies: run code on anomalies or climatology rather than raw data

    """
    with open(
            os.path.join(utils.INFILELOCS,
                         "{}_yearly_stations.txt".format(index)),
            "w") as outfile:
        outfile.write("{}\n".format(index))

    if index in utils.MONTHLY_INDICES:
        timescale = ["ANN", "MON"]  # allow for future!
    else:
        timescale = ["ANN"]

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    for ts in timescale:

        # run all datasets
        for d, dataset in enumerate(all_datasets):

            print(dataset)

            try:
                # choose appropriate subdirectory.
                subdir = "formatted/indices"

                ds_stations = utils.read_inventory(dataset, subdir=subdir, final=True, \
                                                       timescale=ts, index=index, anomalies=anomalies, qc_flags=qc_flags)
                ds_stations = utils.select_qc_passes(ds_stations,
                                                     qc_flags=qc_flags)

            except IOError:
                # file missing
                print("No stations with data for {}".format(dataset.name))
                ds_stations = []

            # extract relevant info for this dataset
            if len(ds_stations) > 0:

                # extract values for this dataset
                for s, stn in enumerate(ds_stations):
                    presence = time_presence(stn, index, ts)  # year/month
                    if s == 0:
                        ds_presence = np.expand_dims(presence, axis=0)[:]
                    else:
                        ds_presence = np.append(ds_presence,
                                                np.expand_dims(presence,
                                                               axis=0),
                                                axis=0)  # station/year/month

                ds_lats = np.array([stn.latitude for stn in ds_stations])
                ds_lons = np.array([stn.longitude for stn in ds_stations])

                # store in overall arrays
                try:
                    all_lats = np.append(all_lats, ds_lats[:], axis=0)
                    all_lons = np.append(all_lons, ds_lons[:], axis=0)
                    all_presence = np.append(
                        all_presence, ds_presence[:],
                        axis=0)  # dataset*station/year/month
                    all_dataset_names = np.append(
                        all_dataset_names,
                        np.array([dataset.name for i in ds_lats]))
                except NameError:
                    # if not yet defined, then set up
                    all_lats = ds_lats[:]
                    all_lons = ds_lons[:]
                    all_presence = ds_presence[:]
                    all_dataset_names = np.array(
                        [dataset.name for i in ds_lats])

        for y, year in enumerate(utils.REFERENCEYEARS):

            # set up the figure
            fig = plt.figure(figsize=(10, 6.5))
            plt.clf()
            ax = plt.axes([0.025, 0.10, 0.95, 0.90],
                          projection=cartopy.crs.Robinson())
            ax.gridlines()  #draw_labels=True)
            ax.add_feature(cartopy.feature.LAND,
                           zorder=0,
                           facecolor="0.9",
                           edgecolor="k")
            ax.coastlines()

            # dummy scatters for full extent
            plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \
                            edgecolor='w', linewidth='0.01')

            total = 0
            for dataset in all_datasets:

                ds, = np.where(all_dataset_names == dataset.name)
                locs, = np.where(all_presence[ds, y, 0] == 1)

                if len(locs) > 0:
                    plt.scatter(all_lons[ds][locs], all_lats[ds][locs], c=ps.COLOURS[dataset.name], \
                                    s=15, label="{} ({})".format(ps.get_label(dataset.name), len(locs)), \
                                    transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')
                    total += len(locs)
                else:
                    # aiming to show all, even if zero
                    plt.scatter([-180], [-90], c=ps.COLOURS[dataset.name], s=15, \
                                    label="{} ({})".format(ps.get_label(dataset.name), len(locs)), \
                                    transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')
                time.sleep(1)

            # make a legend
            leg = plt.legend(loc='lower center', ncol=6, bbox_to_anchor=(0.50, -0.25), frameon=False, \
                                 title="", prop={'size':10}, labelspacing=0.15, columnspacing=0.5, numpoints=3)
            plt.setp(leg.get_title(), fontsize=12)

            plt.figtext(0.05, 0.92, "{} Stations".format(total))

            plt.title("{} - {} - {}".format(index, ts, year))

            # and save
            outname = putils.make_filenames("station_locations_{}_{}".format(
                ts.capitalize(), year),
                                            index=index,
                                            grid="ADW",
                                            anomalies=anomalies)

            plt.savefig("{}/{}/{}".format(utils.PLOTLOCS, index, outname))

            plt.close()
            plt.clf()
            print("{} done".format(year))

            # write out total station number
            with open(
                    os.path.join(utils.INFILELOCS,
                                 "{}_yearly_stations.txt".format(index)),
                    "a") as outfile:
                outfile.write("{} {}\n".format(year, total))

            time.sleep(1)

        # reset namespace
        del all_lats
        del all_lons
        del all_presence
        del all_dataset_names
    return  # main
Пример #13
0
def main(indata="ecad", diagnostics=False):
    """
    Find all metadata files for given dataset across all indices and merge together

    :param str indata: input dataset name
    :param bool diagnostics: output diagnostic information
    """

    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:
        dataset = all_datasets[names == indata][0]

        all_stations = []
        all_names = []

        # spin through indices
        for index in utils.ALL_INDICES:
            
            if diagnostics:
                print("processing {}".format(index))

            # read in info
            if dataset.name in ["ecad", "sacad", "lacad"]:
                index_stations = inventory_utils.read_ecad(dataset, index, diagnostics=diagnostics)
            elif dataset.name in ["hadex2"]:
                index_stations = inventory_utils.read_hadex2(dataset, index, diagnostics=diagnostics)
            elif dataset.name in ["south_america"]:
                index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics)
            elif dataset.name in ["west_africa_pptn"]:
                index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics)
            elif dataset.name in ["west_africa_indices"]:
                index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics)
            elif dataset.name in ["arabia"]:
                if utils.REF_START == 1961 and utils.REF_END == 1990:
                    index_stations = inventory_utils.read_arabia_6190_index(dataset, index, diagnostics=diagnostics)
                elif utils.REF_START == 1981 and utils.REF_END == 2010:
                    index_stations = inventory_utils.read_arabia_8110_index(dataset, index, diagnostics=diagnostics)

            elif dataset.name in ["south_africa"]:
                index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics)
            elif dataset.name in ["ghcndex"]:
                index_stations = inventory_utils.read_ghcndex(dataset, index, diagnostics=diagnostics)

            # if no station metadata, then move on to next one
            if index_stations == []:
                continue
            
            # extract names
            station_names_for_index = [stn.id for stn in index_stations]

            # check if new
            for n, name in enumerate(station_names_for_index):
                if name in all_names:
                    # station exists already
                    loc = all_names.index(name)
                    # check all works out
                    try:
                        assert index_stations[n].latitude == all_stations[loc].latitude
                        assert index_stations[n].longitude == all_stations[loc].longitude
                    except AssertionError:
                        if (index_stations[n].latitude - all_stations[loc].latitude) > 0.1:
                            print("Station {} has mismatch in latitude: {} != {}".format(name, index_stations[n].latitude, all_stations[loc].latitude))
#                            sys.exit(1)
                        if (index_stations[n].longitude - all_stations[loc].longitude) > 0.1:
                            print("Station {} has mismatch in longitude: {} != {}".format(name, index_stations[n].longitude, all_stations[loc].longitude))
#                            sys.exit(1)

                else:
                    all_names += [name]
                    all_stations += [index_stations[n]]

        # sort alphabetically
        sort_order = np.argsort(np.array(all_names))

        all_stations = np.array(all_stations)[sort_order]

        # for some data sources there are index and raw data supplied
        #   For these, raw data produces the file, so for the index data, cross check  
        if os.path.exists(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name))) and dataset.name in ["south_america", "south_africa"]:

            subset_stations = np.array([])

            # cross check all stations
            for this_station in all_stations:
                keep = True

                with open(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name))) as infile:
                    for line in infile:
                        if re.search(this_station.id, line):
                            keep = False
                            if diagnostics:
                                print("{} {} already in metadata file".format(dataset.name, this_station.id))
                if keep:
                    subset_stations = np.append(subset_stations, this_station)

            all_station = subset_stations

        else:
            # write out the header
            utils.write_climpact_inventory_header(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name)))

        # always prefer raw data over index data
        for this_station in all_stations:
            # write out the metadata for this station and index
            utils.write_climpact_inventory(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name)), this_station)

    return # main
Пример #14
0
def main():

    datasets = utils.get_input_datasets()
    # makes data structures

    return