Пример #1
0
def l1qc(cfg):
    """
    Purpose:
     Reads input files, either an Excel workbook or a collection of CSV files,
     and returns the data as a data structure.
    Usage:
    Side effects:
     Returns a data structure containing the data specified in the L1
     control file.
    Author: PRI
    Date: February 2020
    """
    # parse the L1 control file
    l1_info = pfp_compliance.ParseL1ControlFile(cfg)
    # read the input file into a pandas data frame
    dfs = pfp_io.ReadInputFile(l1_info)
    # discard empty data frames
    for key in list(dfs.keys()):
        if len(dfs[key]) == 0:
            dfs.pop(key)
    if len(list(dfs.keys())) == 0:
        ds = pfp_io.DataStructure()
        ds.info["returncodes"]["value"] = 1
        ds.info["returncodes"][
            "message"] = "An error occured reading the input file"
        return ds
    # merge the data frames (1 per Excel worksheet)
    df = pfp_io.MergeDataFrames(dfs, l1_info)
    # convert the data frame to a PFP data structure and add metadata
    ds = pfp_io.DataFrameToDataStructure(df, l1_info)
    # write the processing level to a global attribute
    ds.root["Attributes"]["processing_level"] = "L1"
    # apply linear corrections to the data
    pfp_ck.do_linear(cfg, ds)
    # create new variables using user defined functions
    pfp_ts.DoFunctions(ds, l1_info["read_excel"])
    # calculate variances from standard deviations and vice versa
    pfp_ts.CalculateStandardDeviations(ds)
    # check missing data and QC flags are consistent
    pfp_utils.CheckQCFlags(ds)
    return ds
Пример #2
0
def include_variables(std, ds_in):
    """
    Purpose:
     Only pick variables that match the specified string for the length
     of the specified string.
    Usage:
    Author: PRI
    Date: November 2018
    """
    msg = " Including variables ..."
    logger.info(msg)
    # get a new data structure
    ds_out = pfp_io.DataStructure()
    # copy the global attributes
    for gattr in ds_in.globalattributes:
        ds_out.globalattributes[gattr] = ds_in.globalattributes[gattr]
    # loop over variables to be included
    include_list = pfp_utils.string_to_list(std["Variables"]["include"]["include"])
    series_list = list(ds_in.series.keys())
    for item in include_list:
        for label in series_list:
            if label[0:len(item)] == item:
                ds_out.series[label] = ds_in.series[label]
    return ds_out
Пример #3
0
def interpolate_ds(ds_in, ts, k=3):
    """
    Purpose:
     Interpolate the contents of a data structure onto a different time step.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    # instance the output data structure
    ds_out = pfp_io.DataStructure()
    # copy the global attributes
    for key in list(ds_in.globalattributes.keys()):
        ds_out.globalattributes[key] = ds_in.globalattributes[key]
    # add the time step
    ds_out.globalattributes["time_step"] = str(ts)
    # generate a regular time series at the required time step
    dt = ds_in.series["DateTime"]["Data"]
    dt0 = dt[0] - datetime.timedelta(minutes=30)
    start = datetime.datetime(dt0.year, dt0.month, dt0.day, dt0.hour, 0, 0)
    dt1 = dt[-1] + datetime.timedelta(minutes=30)
    end = datetime.datetime(dt1.year, dt1.month, dt1.day, dt1.hour, 0, 0)
    idt = [
        result
        for result in perdelta(start, end, datetime.timedelta(minutes=ts))
    ]
    x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))])
    x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))])
    # loop over the series in the data structure and interpolate
    ds_out.series["DateTime"] = {}
    ds_out.series["DateTime"]["Data"] = idt
    ds_out.series["DateTime"]["Flag"] = numpy.zeros(len(idt))
    ds_out.series["DateTime"]["Attr"] = {
        "long_name": "Datetime",
        "units": "none"
    }
    ds_out.globalattributes["nc_nrecs"] = len(idt)
    series_list = list(ds_in.series.keys())
    if "DateTime" in series_list:
        series_list.remove("DateTime")
    for label in series_list:
        #print label
        data_in, flag_in, attr_in = pfp_utils.GetSeriesasMA(ds_in, label)
        # check if we are dealing with precipitation
        if "Precip" in label:
            # precipitation shouldn't be interpolated, just assign any precipitation
            # to the ISD time stamp.
            data_out = numpy.ma.zeros(len(idt), dtype=numpy.float64)
            idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1))
            idy = numpy.searchsorted(x1, numpy.intersect1d(x1, x2))
            data_out[idx] = data_in[idy]
        else:
            # interpolate everything else
            data_out = interpolate_1d(x1, data_in, x2)
        flag_out = numpy.zeros(len(idt))
        attr_out = attr_in
        pfp_utils.CreateSeries(ds_out,
                               label,
                               data_out,
                               Flag=flag_out,
                               Attr=attr_out)

    return ds_out
Пример #4
0
def read_isd_file_gz(isd_file_path):
    """
    Purpose:
     Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    isd_file_name = os.path.split(isd_file_path)[1]
    msg = "Reading ISD file " + isd_file_name
    logger.info(msg)
    isd_site_id = isd_file_name.split("-")
    isd_site_id = isd_site_id[0] + "-" + isd_site_id[1]
    # read the file
    if os.path.splitext(isd_file_path)[1] == ".gz":
        with gzip.open(isd_file_path, 'r') as fp:
            content = fp.readlines()
    else:
        with open(isd_file_path) as fp:
            content = fp.readlines()
    # get a data structure
    ds = pfp_io.DataStructure()
    # get the site latitude, longitude and altitude
    ds.globalattributes["altitude"] = float(content[0][46:51].decode('utf-8'))
    ds.globalattributes["latitude"] = float(
        content[0][28:34].decode('utf-8')) / float(1000)
    ds.globalattributes["longitude"] = float(
        content[0][34:41].decode('utf-8')) / float(1000)
    ds.globalattributes["isd_site_id"] = isd_site_id
    # initialise the data structure
    ds.series["DateTime"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Datetime",
            "units": "none"
        }
    }
    ds.series["Wd"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Wind direction",
            "units": "degrees"
        }
    }
    ds.series["Ws"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Wind speed",
            "units": "m/s"
        }
    }
    ds.series["Ta"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Air temperature",
            "units": "degC"
        }
    }
    ds.series["Td"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Dew point temperature",
            "units": "degC"
        }
    }
    ds.series["ps"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Surface pressure",
            "units": "kPa"
        }
    }
    ds.series["Precip"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Precipitation",
            "units": "mm"
        }
    }
    # define the codes for good data in the ISD file
    OK_obs_code = [
        "AUTO ", "CRN05", "CRN15", "FM-12", "FM-15", "FM-16", "SY-MT"
    ]
    # iterate over the lines in the file and decode the data
    for i in range(len(content) - 1):
        #for i in range(10):
        # filter out anything other than hourly data
        if content[i][41:46].decode('utf-8') not in OK_obs_code: continue
        YY = int(content[i][15:19].decode('utf-8'))
        MM = int(content[i][19:21].decode('utf-8'))
        DD = int(content[i][21:23].decode('utf-8'))
        HH = int(content[i][23:25].decode('utf-8'))
        mm = int(content[i][25:27].decode('utf-8'))
        dt = datetime.datetime(YY, MM, DD, HH, mm, 0)
        ds.series["DateTime"]["Data"].append(pytz.utc.localize(dt))
        # wind direction, degT
        try:
            ds.series["Wd"]["Data"].append(
                float(content[i][60:63].decode('utf-8')))
        except:
            ds.series["Wd"]["Data"].append(float(999))
        # wind speed, m/s
        try:
            ds.series["Ws"]["Data"].append(
                float(content[i][65:69].decode('utf-8')) / float(10))
        except:
            ds.series["Ws"]["Data"].append(float(999.9))
        # air temperature, C
        try:
            ds.series["Ta"]["Data"].append(
                float(content[i][87:92].decode('utf-8')) / float(10))
        except:
            ds.series["Ta"]["Data"].append(float(999.9))
        # dew point temperature, C
        try:
            ds.series["Td"]["Data"].append(
                float(content[i][93:98].decode('utf-8')) / float(10))
        except:
            ds.series["Td"]["Data"].append(float(999.9))
        # sea level pressure, hPa
        try:
            ds.series["ps"]["Data"].append(
                float(content[i][99:104].decode('utf-8')) / float(10))
        except:
            ds.series["ps"]["Data"].append(float(9999.9))
        # precipitation, mm
        if content[i][108:111].decode('utf-8') == "AA1":
            try:
                ds.series["Precip"]["Data"].append(
                    float(content[i][113:117].decode('utf-8')) / float(10))
            except:
                ds.series["Precip"]["Data"].append(float(999.9))
        else:
            ds.series["Precip"]["Data"].append(float(999.9))

    # add the time zone to the DateTime ataributes
    ds.series["DateTime"]["Attr"]["time_zone"] = "UTC"
    # convert from lists to masked arrays
    f0 = numpy.zeros(len(ds.series["DateTime"]["Data"]))
    f1 = numpy.ones(len(ds.series["DateTime"]["Data"]))
    ds.series["DateTime"]["Data"] = numpy.array(ds.series["DateTime"]["Data"])
    ds.series["DateTime"]["Flag"] = f0
    ds.globalattributes["nc_nrecs"] = len(f0)
    dt_delta = pfp_utils.get_timestep(ds)
    ts = scipy.stats.mode(dt_delta)[0] / 60
    ds.globalattributes["time_step"] = ts[0]

    ds.series["Wd"]["Data"] = numpy.ma.masked_equal(ds.series["Wd"]["Data"],
                                                    999)
    ds.series["Wd"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Wd"]["Data"]) == True, f1, f0)
    ds.series["Ws"]["Data"] = numpy.ma.masked_equal(ds.series["Ws"]["Data"],
                                                    999.9)
    ds.series["Ws"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Ws"]["Data"]) == True, f1, f0)
    ds.series["Ta"]["Data"] = numpy.ma.masked_equal(ds.series["Ta"]["Data"],
                                                    999.9)
    ds.series["Ta"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Ta"]["Data"]) == True, f1, f0)
    ds.series["Td"]["Data"] = numpy.ma.masked_equal(ds.series["Td"]["Data"],
                                                    999.9)
    ds.series["Td"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Td"]["Data"]) == True, f1, f0)
    # hPa to kPa
    ds.series["ps"]["Data"] = numpy.ma.masked_equal(ds.series["ps"]["Data"],
                                                    9999.9) / float(10)
    ds.series["ps"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["ps"]["Data"]) == True, f1, f0)
    # convert sea level pressure to station pressure
    site_altitude = float(ds.globalattributes["altitude"])
    cfac = numpy.ma.exp(
        (-1 * site_altitude) / ((ds.series["Ta"]["Data"] + 273.15) * 29.263))
    ds.series["ps"]["Data"] = ds.series["ps"]["Data"] * cfac
    # do precipitation and apply crude limits
    ds.series["Precip"]["Data"] = numpy.ma.masked_equal(
        ds.series["Precip"]["Data"], 999.9)
    condition = (ds.series["Precip"]["Data"] <
                 0) | (ds.series["Precip"]["Data"] > 100)
    ds.series["Precip"]["Data"] = numpy.ma.masked_where(
        condition, ds.series["Precip"]["Data"])
    ds.series["Precip"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Precip"]["Data"]) == True, f1, f0)
    # get the humidities from Td
    Ta, flag, attr = pfp_utils.GetSeriesasMA(ds, "Ta")
    Td, flag, attr = pfp_utils.GetSeriesasMA(ds, "Td")
    ps, flag, attr = pfp_utils.GetSeriesasMA(ds, "ps")

    RH = mf.relativehumidityfromdewpoint(Td, Ta)
    flag = numpy.where(numpy.ma.getmaskarray(RH) == True, f1, f0)
    attr = {"long_name": "Relative humidity", "units": "percent"}
    pfp_utils.CreateSeries(ds, "RH", RH, Flag=flag, Attr=attr)
    AH = mf.absolutehumidityfromrelativehumidity(Ta, RH)
    flag = numpy.where(numpy.ma.getmaskarray(AH) == True, f1, f0)
    attr = {"long_name": "Absolute humidity", "units": "g/m^3"}
    pfp_utils.CreateSeries(ds, "AH", AH, Flag=flag, Attr=attr)
    SH = mf.specifichumidityfromrelativehumidity(RH, Ta, ps)
    flag = numpy.where(numpy.ma.getmaskarray(SH) == True, f1, f0)
    attr = {"long_name": "Specific humidity", "units": "kg/kg"}
    pfp_utils.CreateSeries(ds, "SH", SH, Flag=flag, Attr=attr)

    # return the data
    return ds
Пример #5
0
def read_isd_file_csv(isd_file_path):
    """
    Purpose:
     Reads a NOAA ISD CSV file downlaoded from https://www.ncei.noaa.gov/data/global-hourly/access/
     These files used to be field formatted ASCII where the character position in a line
     of ASCII determined the data type.  Some time in 2020 or 2021, the old FFA format
     was replaced with CSV.
     The format of the old-style .gz files is described in
     https://www.ncei.noaa.gov/data/global-hourly/doc/isd-format-document.pdf
     This document still describes the data in the new CSV format.
    Usage:
    Side effects:
     Returns a PFP data structure with the data at the site time step.
    Author: PRI
    Date: July 2021
    """
    msg = " Reading " + isd_file_path
    logger.info(msg)
    # list of variables to read from the CSV file
    csv_labels = [
        "STATION", "DATE", "LATITUDE", "LONGITUDE", "ELEVATION", "REPORT_TYPE",
        "QUALITY_CONTROL", "WND", "TMP", "DEW", "SLP", "AA1", "AA2", "AA3",
        "AA4"
    ]
    # read the CSV file
    df = pandas.read_csv(isd_file_path, delimiter=",", header=0)
    # remove items from csv_labels that are not in the data frame
    df_labels = df.columns.to_list()
    for csv_label in list(csv_labels):
        if csv_label not in df_labels:
            csv_labels.remove(csv_label)
    # keep only what we need
    df = df[csv_labels]
    # remove duplicate dates, keep the SYNOP (FM-12) reports
    # first, we find the duplicate dates
    df["Duplicates"] = df["DATE"].duplicated()
    # next, we drop rows with duplicate dates that are not SYNOP reports
    df = df.drop(df[(df["Duplicates"]) & (df["REPORT_TYPE"] != "FM-12")].index)
    # then check for duplicates again
    df["Duplicates"] = df["DATE"].duplicated()
    if df["Duplicates"].sum() != 0:
        msg = " Unable to remove all duplicate dates in files"
        logger.error(msg)
        raise ValueError(msg)
    # convert the date in the CSV file to a pandas datetime
    df["TIMESTAMP"] = pandas.to_datetime(df["DATE"].astype("string"),
                                         errors="raise")
    # find all of the timestamps (should only be 1)
    timestamps = list(df.select_dtypes(include=['datetime64']))
    # take the first if more than 1
    timestamp = timestamps[0]
    # use the timestamp as the index
    df.set_index(timestamp, inplace=True)
    df.index = df.index.round('1S')
    # wind direction field, see isd_format_document.pdf for details
    wind = df["WND"].str.split(',', expand=True)
    df["Wd"] = wind[0].apply(pandas.to_numeric, errors='coerce')
    df["Ws"] = wind[3].apply(pandas.to_numeric, errors='coerce') / float(10)
    del df["WND"]
    # air temperature
    temperature = df["TMP"].str.split(',', expand=True)
    df["Ta"] = temperature[0].apply(pandas.to_numeric,
                                    errors='coerce') / float(10)
    del df["TMP"]
    # dew point temperature
    dew_point = df["DEW"].str.split(',', expand=True)
    df["Td"] = dew_point[0].apply(pandas.to_numeric,
                                  errors='coerce') / float(10)
    del df["DEW"]
    # surface pressure
    surface_pressure = df["SLP"].str.split(',', expand=True)
    df["ps"] = surface_pressure[0].apply(pandas.to_numeric,
                                         errors='coerce') / float(100)
    del df["SLP"]
    # Precipitation is stored in columns AA1 to AA4 but not all columns will be present
    #
    # Within each column, precipitation is stored as "HH,PPPP,C,Q" where HH is the
    # period over which the precipitation was accumulated (e.g. 1, 3, 6, 24 hours),
    # PPPP is the precipitation amount in mm*10, C is the condition code and Q is
    # the QC flag (1 = passed all QC checks)
    #
    # Column AA1 contains most of the precipitation data.  When precipitation data is
    # available for 2 accumulation periods e.g. 3 hours and 6 or 24 hours, the second
    # accumulation period is given in AA2.  And so on for up to 4 separate accumulation'
    # periods e.g. 1 hour, 3 hours, 6 hours and 24 hours.
    #
    # get a list of the precipitation columns in the data frame
    precip_labels = [l for l in df.columns.to_list() if "AA" in l]
    # create a data frame for the precipitation data, same index as main data frame
    df_precip = pandas.DataFrame(index=df.index)
    # loop over the precipitation fields
    for precip_label in precip_labels:
        # split the "HH,PPPP,C,Q" fields to get individual parts
        tmp = df[precip_label].str.split(',', expand=True)
        # name the columns
        tmp.columns = ["Period", "Amount", "Condition", "Quality"]
        # coerce to numeric values
        tmp = tmp.apply(pandas.to_numeric, errors='coerce')
        # loop over the accumulation periods
        for n in [1, 3, 6, 24]:
            # get the data for this accumulation period and store in a new column
            # e.g. "3_hourly_AA1"
            tmp.loc[(tmp["Period"] == n) & (tmp["Quality"] == 1),
                    str(n) + "_hourly_" + precip_label] = tmp["Amount"]
        # drop the intermediate columns, no longer needed
        tmp = tmp.drop(["Period", "Amount", "Condition", "Quality"], axis=1)
        # concatenate the new data
        df_precip = pandas.concat([df_precip, tmp], axis=1)
        # drop the individual columns e.g. AA1, AA2 etc
        df.drop(precip_label, axis=1, inplace=True)
    # now loop over the accumulation periods and combine to get a single column
    # for each accumulation period
    for n in [1, 3, 6, 24]:
        # list of column headings for this accumulation period
        label = str(n) + "_hourly"
        hour_labels = [l for l in df_precip.columns.to_list() if label in l]
        # rename the first column e.g. "3_hourly_AA1" to "3_hourly"
        df_precip.rename({hour_labels[0]: label}, axis=1, inplace=True)
        # loop over the remaining columns and merge into a single column for this
        # accumulation period
        for hour_label in hour_labels[1:]:
            # merge "3_hourly" with "3_hourly_AA2" etc
            df_precip[label] = df_precip[label].combine_first(
                df_precip[hour_label])
            # convert mm*10 to mm
            df_precip[label] = df_precip[label] / float(10)
            # delete columns that are no longer needed
            df_precip.drop(hour_label, axis=1, inplace=True)
    # print the sum of the 1, 3, 6 and 24 hourly accumulation periods (we expect them to
    # be equal)
    msg = " 1 hourly precipitation total is " + str(
        round(df_precip["1_hourly"].sum(), 4))
    logger.info(msg)
    msg = " 3 hourly precipitation total is " + str(
        round(df_precip["3_hourly"].sum(), 4))
    logger.info(msg)
    msg = " 6 hourly precipitation total is " + str(
        round(df_precip["6_hourly"].sum(), 4))
    logger.info(msg)
    msg = " 24 hourly precipitation total is " + str(
        round(df_precip["24_hourly"].sum(), 4))
    logger.info(msg)
    # choose the most common accumulation period
    msg = " Using " + df_precip.count().idxmax() + " for precipitation"
    logger.info(msg)
    # and use it for the precipitation data
    df["Precip"] = df_precip[df_precip.count().idxmax()]
    # now copy the data from a pandas data frame to a PFP data structure
    nrecs = len(df)
    ones = numpy.ones(nrecs)
    zeros = numpy.zeros(nrecs)
    # create a data structure
    ds_its = pfp_io.DataStructure()
    # set the global attributes
    ds_its.globalattributes["nc_nrecs"] = nrecs
    ds_its.globalattributes["altitude"] = float(df["ELEVATION"][0])
    ds_its.globalattributes["latitude"] = float(df["LATITUDE"][0])
    ds_its.globalattributes["longitude"] = float(df["LONGITUDE"][0])
    ds_its.globalattributes["isd_site_id"] = int(df["STATION"][0])
    # get the datetime variable
    ldt = pfp_utils.CreateEmptyVariable("DateTime", nrecs)
    ldt["Data"] = numpy.array(df.index.to_pydatetime())
    ldt["Flag"] = zeros
    ldt["Attr"] = {"long_name": "Datetime in UTC", "units": ""}
    pfp_utils.CreateVariable(ds_its, ldt)
    # get the time step
    dt = pfp_utils.get_timestep(ds_its)
    time_step = int(scipy.stats.mode(dt / float(60))[0][0])
    if time_step not in [10, 30, 60, 180]:
        msg = " Time step (" + str(
            time_step) + ") must be 10, 30, 60 or 180 minutes"
        logger.error(msg)
        raise ValueError(msg)
    else:
        ds_its.globalattributes["time_step"] = int(
            scipy.stats.mode(dt / float(60))[0][0])
    # now add the other variables
    # wind direction
    Wd = pfp_utils.CreateEmptyVariable("Wd", nrecs, datetime=ldt["Data"])
    Wd["Data"] = numpy.ma.masked_equal(df["Wd"].values, 999)
    Wd["Flag"] = numpy.where(
        numpy.ma.getmaskarray(Wd["Data"]) == True, ones, zeros)
    Wd["Attr"] = {
        "long_name": "Wind direction",
        "statistic_type": "average",
        "standard_name": "wind_from_direction",
        "units": "degrees"
    }
    pfp_utils.CreateVariable(ds_its, Wd)
    # wind speed
    Ws = pfp_utils.CreateEmptyVariable("Ws", nrecs, datetime=ldt["Data"])
    Ws["Data"] = numpy.ma.masked_equal(df["Ws"].values, 999.9)
    Ws["Flag"] = numpy.where(
        numpy.ma.getmaskarray(Ws["Data"]) == True, ones, zeros)
    Ws["Attr"] = {
        "long_name": "Wind speed",
        "statistic_type": "average",
        "standard_name": "wind_speed",
        "units": "m/s"
    }
    pfp_utils.CreateVariable(ds_its, Ws)
    # air temperature
    Ta = pfp_utils.CreateEmptyVariable("Ta", nrecs, datetime=ldt["Data"])
    Ta["Data"] = numpy.ma.masked_equal(df["Ta"].values, 999.9)
    Ta["Flag"] = numpy.where(
        numpy.ma.getmaskarray(Ta["Data"]) == True, ones, zeros)
    Ta["Attr"] = {
        "long_name": "Air temperature",
        "statistic_type": "average",
        "standard_name": "air_temperature",
        "units": "degC"
    }
    pfp_utils.CreateVariable(ds_its, Ta)
    # dew point temperature
    Td = pfp_utils.CreateEmptyVariable("Td", nrecs, datetime=ldt["Data"])
    Td["Data"] = numpy.ma.masked_equal(df["Td"].values, 999.9)
    Td["Flag"] = numpy.where(
        numpy.ma.getmaskarray(Td["Data"]) == True, ones, zeros)
    Td["Attr"] = {
        "long_name": "Dew point temperature",
        "statistic_type": "average",
        "standard_name": "dew_point_temperature",
        "units": "degC"
    }
    pfp_utils.CreateVariable(ds_its, Td)
    # surface pressure
    ps = pfp_utils.CreateEmptyVariable("ps", nrecs, datetime=ldt["Data"])
    site_altitude = float(ds_its.globalattributes["altitude"])
    cfac = numpy.ma.exp(
        (-1 * site_altitude) / ((Ta["Data"] + 273.15) * 29.263))
    ps["Data"] = numpy.ma.masked_equal(df["ps"].values, 9999.9)
    ps["Data"] = ps["Data"] * cfac
    ps["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ps["Data"]) == True, ones, zeros)
    ps["Attr"] = {
        "long_name": "Surface pressure",
        "statistic_type": "average",
        "standard_name": "surface_air_pressure",
        "units": "kPa"
    }
    pfp_utils.CreateVariable(ds_its, ps)
    # precipitation
    Precip = pfp_utils.CreateEmptyVariable("Precip",
                                           nrecs,
                                           datetime=ldt["Data"])
    Precip["Data"] = numpy.ma.masked_equal(df["Precip"].values, 999.9)
    Precip["Flag"] = numpy.where(
        numpy.ma.getmaskarray(Precip["Data"]) == True, ones, zeros)
    Precip["Attr"] = {
        "long_name": "Rainfall",
        "statistic_type": "sum",
        "standard_name": "thickness_of_rainfall_amount",
        "units": "mm"
    }
    pfp_utils.CreateVariable(ds_its, Precip)
    # relative humidity
    RH = pfp_utils.CreateEmptyVariable("RH", nrecs, datetime=ldt["Data"])
    RH["Data"] = mf.relativehumidityfromdewpoint(Td["Data"], Ta["Data"])
    RH["Flag"] = numpy.where(
        numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros)
    RH["Attr"] = {
        "long_name": "Relative humidity",
        "statistics_type": "average",
        "standard_name": "relative_humidity",
        "units": "percent"
    }
    pfp_utils.CreateVariable(ds_its, RH)
    # absolute humidity
    AH = pfp_utils.CreateEmptyVariable("AH", nrecs, datetime=ldt["Data"])
    AH["Data"] = mf.absolutehumidityfromrelativehumidity(
        Ta["Data"], RH["Data"])
    AH["Flag"] = numpy.where(
        numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros)
    AH["Attr"] = {
        "long_name": "Absolute humidity",
        "statistic_type": "average",
        "standard_name": "mass_concentration_of_water_vapor_in_air",
        "units": "g/m^3"
    }
    pfp_utils.CreateVariable(ds_its, AH)
    # specific humidity
    SH = pfp_utils.CreateEmptyVariable("SH", nrecs, datetime=ldt["Data"])
    SH["Data"] = mf.specifichumidityfromrelativehumidity(
        RH["Data"], Ta["Data"], ps["Data"])
    SH["Flag"] = numpy.where(
        numpy.ma.getmaskarray(SH["Data"]) == True, ones, zeros)
    SH["Attr"] = {
        "long_name": "Specific humidity",
        "statistic_type": "average",
        "standard_name": "specific_humidity",
        "units": "kg/kg"
    }
    pfp_utils.CreateVariable(ds_its, SH)
    return ds_its
Пример #6
0
     # add some useful global attributes
     ds_out[site_index[isd_site]].globalattributes[
         "isd_site_id"] = isd_site
     ds_out[
         site_index[isd_site]].globalattributes["time_zone"] = time_zone
     # write out a netCDF file for each ISD site and each year
     #nc_file_name = isd_site+"_"+str(year)+".nc"
     #nc_dir_path = os.path.join(out_base_path,site,"Data","ISD")
     #if not os.path.exists(nc_dir_path):
     #os.makedirs(nc_dir_path)
     #nc_file_path = os.path.join(nc_dir_path,nc_file_name)
     #nc_file = pfp_io.nc_open_write(nc_file_path)
     #pfp_io.nc_write_series(nc_file, ds_out[site_index[isd_site]], ndims=1)
 # now we merge the data structures for each ISD station into a single data structure
 # first, instance a data structure
 ds_all = pfp_io.DataStructure()
 ds_all.globalattributes["latitude"] = site_info[site]["Latitude"]
 ds_all.globalattributes["longitude"] = site_info[site]["Longitude"]
 ds_all.globalattributes["altitude"] = site_info[site]["Altitude"]
 ds_all.globalattributes["site_name"] = site_info[site]["site_name"]
 # now loop over the data structures for each ISD station and get the earliest
 # start time and the latest end time
 start_datetime = []
 end_datetime = []
 for i in list(ds_out.keys()):
     # print(i)
     start_datetime.append(ds_out[i].series["DateTime"]["Data"][0])
     end_datetime.append(ds_out[i].series["DateTime"]["Data"][-1])
 start = min(start_datetime)
 end = max(end_datetime)
 # now make a datetime series at the required time step from the earliest start
Пример #7
0
                l = label + "_" + str(i) + str(j)
                data_nogaps[site][l]["Data"] = numpy.ma.masked_all(
                    nrecs_nogaps)
                data_nogaps[site][l]["Flag"] = numpy.ones(nrecs_nogaps)
                data_nogaps[site][l]["Data"][iA] = numpy.ma.array(
                    data[site][l]["Data"])[iB]
                data_nogaps[site][l]["Flag"][iA] = int(0)

# now we copy the data from the no gaps data sets to PFP data structures
# dictionary to hold data structures for each site
dss_ats = {}
msg = " Creating data structure for each site"
logger.info(msg)
for site in sites:
    # create a data structure for this site
    dss_ats[site] = pfp_io.DataStructure()
    # convert UTC datetime to local standard time
    dt_loc_nogaps = numpy.array(
        convert_utc_to_local_standard(dt_utc_nogaps,
                                      site_info[site]["Time zone"]))
    # add the global attributes
    dss_ats[site].root["Attributes"]["site_name"] = site.replace(" ", "")
    dss_ats[site].root["Attributes"]["time_zone"] = site_info[site][
        "Time zone"]
    dss_ats[site].root["Attributes"]["latitude"] = site_info[site]["Latitude"]
    dss_ats[site].root["Attributes"]["longitude"] = site_info[site][
        "Longitude"]
    dss_ats[site].root["Attributes"]["time_coverage_start"] = str(
        dt_loc_nogaps[0])
    dss_ats[site].root["Attributes"]["time_coverage_end"] = str(
        dt_loc_nogaps[-1])