def l1qc(cfg): """ Purpose: Reads input files, either an Excel workbook or a collection of CSV files, and returns the data as a data structure. Usage: Side effects: Returns a data structure containing the data specified in the L1 control file. Author: PRI Date: February 2020 """ # parse the L1 control file l1_info = pfp_compliance.ParseL1ControlFile(cfg) # read the input file into a pandas data frame dfs = pfp_io.ReadInputFile(l1_info) # discard empty data frames for key in list(dfs.keys()): if len(dfs[key]) == 0: dfs.pop(key) if len(list(dfs.keys())) == 0: ds = pfp_io.DataStructure() ds.info["returncodes"]["value"] = 1 ds.info["returncodes"][ "message"] = "An error occured reading the input file" return ds # merge the data frames (1 per Excel worksheet) df = pfp_io.MergeDataFrames(dfs, l1_info) # convert the data frame to a PFP data structure and add metadata ds = pfp_io.DataFrameToDataStructure(df, l1_info) # write the processing level to a global attribute ds.root["Attributes"]["processing_level"] = "L1" # apply linear corrections to the data pfp_ck.do_linear(cfg, ds) # create new variables using user defined functions pfp_ts.DoFunctions(ds, l1_info["read_excel"]) # calculate variances from standard deviations and vice versa pfp_ts.CalculateStandardDeviations(ds) # check missing data and QC flags are consistent pfp_utils.CheckQCFlags(ds) return ds
def include_variables(std, ds_in): """ Purpose: Only pick variables that match the specified string for the length of the specified string. Usage: Author: PRI Date: November 2018 """ msg = " Including variables ..." logger.info(msg) # get a new data structure ds_out = pfp_io.DataStructure() # copy the global attributes for gattr in ds_in.globalattributes: ds_out.globalattributes[gattr] = ds_in.globalattributes[gattr] # loop over variables to be included include_list = pfp_utils.string_to_list(std["Variables"]["include"]["include"]) series_list = list(ds_in.series.keys()) for item in include_list: for label in series_list: if label[0:len(item)] == item: ds_out.series[label] = ds_in.series[label] return ds_out
def interpolate_ds(ds_in, ts, k=3): """ Purpose: Interpolate the contents of a data structure onto a different time step. Assumptions: Usage: Author: PRI Date: June 2017 """ # instance the output data structure ds_out = pfp_io.DataStructure() # copy the global attributes for key in list(ds_in.globalattributes.keys()): ds_out.globalattributes[key] = ds_in.globalattributes[key] # add the time step ds_out.globalattributes["time_step"] = str(ts) # generate a regular time series at the required time step dt = ds_in.series["DateTime"]["Data"] dt0 = dt[0] - datetime.timedelta(minutes=30) start = datetime.datetime(dt0.year, dt0.month, dt0.day, dt0.hour, 0, 0) dt1 = dt[-1] + datetime.timedelta(minutes=30) end = datetime.datetime(dt1.year, dt1.month, dt1.day, dt1.hour, 0, 0) idt = [ result for result in perdelta(start, end, datetime.timedelta(minutes=ts)) ] x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))]) x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))]) # loop over the series in the data structure and interpolate ds_out.series["DateTime"] = {} ds_out.series["DateTime"]["Data"] = idt ds_out.series["DateTime"]["Flag"] = numpy.zeros(len(idt)) ds_out.series["DateTime"]["Attr"] = { "long_name": "Datetime", "units": "none" } ds_out.globalattributes["nc_nrecs"] = len(idt) series_list = list(ds_in.series.keys()) if "DateTime" in series_list: series_list.remove("DateTime") for label in series_list: #print label data_in, flag_in, attr_in = pfp_utils.GetSeriesasMA(ds_in, label) # check if we are dealing with precipitation if "Precip" in label: # precipitation shouldn't be interpolated, just assign any precipitation # to the ISD time stamp. data_out = numpy.ma.zeros(len(idt), dtype=numpy.float64) idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1)) idy = numpy.searchsorted(x1, numpy.intersect1d(x1, x2)) data_out[idx] = data_in[idy] else: # interpolate everything else data_out = interpolate_1d(x1, data_in, x2) flag_out = numpy.zeros(len(idt)) attr_out = attr_in pfp_utils.CreateSeries(ds_out, label, data_out, Flag=flag_out, Attr=attr_out) return ds_out
def read_isd_file_gz(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file " + isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0] + "-" + isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'r') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = pfp_io.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51].decode('utf-8')) ds.globalattributes["latitude"] = float( content[0][28:34].decode('utf-8')) / float(1000) ds.globalattributes["longitude"] = float( content[0][34:41].decode('utf-8')) / float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure ds.series["DateTime"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Datetime", "units": "none" } } ds.series["Wd"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind direction", "units": "degrees" } } ds.series["Ws"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Wind speed", "units": "m/s" } } ds.series["Ta"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Air temperature", "units": "degC" } } ds.series["Td"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Dew point temperature", "units": "degC" } } ds.series["ps"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Surface pressure", "units": "kPa" } } ds.series["Precip"] = { "Data": [], "Flag": [], "Attr": { "long_name": "Precipitation", "units": "mm" } } # define the codes for good data in the ISD file OK_obs_code = [ "AUTO ", "CRN05", "CRN15", "FM-12", "FM-15", "FM-16", "SY-MT" ] # iterate over the lines in the file and decode the data for i in range(len(content) - 1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46].decode('utf-8') not in OK_obs_code: continue YY = int(content[i][15:19].decode('utf-8')) MM = int(content[i][19:21].decode('utf-8')) DD = int(content[i][21:23].decode('utf-8')) HH = int(content[i][23:25].decode('utf-8')) mm = int(content[i][25:27].decode('utf-8')) dt = datetime.datetime(YY, MM, DD, HH, mm, 0) ds.series["DateTime"]["Data"].append(pytz.utc.localize(dt)) # wind direction, degT try: ds.series["Wd"]["Data"].append( float(content[i][60:63].decode('utf-8'))) except: ds.series["Wd"]["Data"].append(float(999)) # wind speed, m/s try: ds.series["Ws"]["Data"].append( float(content[i][65:69].decode('utf-8')) / float(10)) except: ds.series["Ws"]["Data"].append(float(999.9)) # air temperature, C try: ds.series["Ta"]["Data"].append( float(content[i][87:92].decode('utf-8')) / float(10)) except: ds.series["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: ds.series["Td"]["Data"].append( float(content[i][93:98].decode('utf-8')) / float(10)) except: ds.series["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: ds.series["ps"]["Data"].append( float(content[i][99:104].decode('utf-8')) / float(10)) except: ds.series["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111].decode('utf-8') == "AA1": try: ds.series["Precip"]["Data"].append( float(content[i][113:117].decode('utf-8')) / float(10)) except: ds.series["Precip"]["Data"].append(float(999.9)) else: ds.series["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes ds.series["DateTime"]["Attr"]["time_zone"] = "UTC" # convert from lists to masked arrays f0 = numpy.zeros(len(ds.series["DateTime"]["Data"])) f1 = numpy.ones(len(ds.series["DateTime"]["Data"])) ds.series["DateTime"]["Data"] = numpy.array(ds.series["DateTime"]["Data"]) ds.series["DateTime"]["Flag"] = f0 ds.globalattributes["nc_nrecs"] = len(f0) dt_delta = pfp_utils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0] / 60 ds.globalattributes["time_step"] = ts[0] ds.series["Wd"]["Data"] = numpy.ma.masked_equal(ds.series["Wd"]["Data"], 999) ds.series["Wd"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Wd"]["Data"]) == True, f1, f0) ds.series["Ws"]["Data"] = numpy.ma.masked_equal(ds.series["Ws"]["Data"], 999.9) ds.series["Ws"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ws"]["Data"]) == True, f1, f0) ds.series["Ta"]["Data"] = numpy.ma.masked_equal(ds.series["Ta"]["Data"], 999.9) ds.series["Ta"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Ta"]["Data"]) == True, f1, f0) ds.series["Td"]["Data"] = numpy.ma.masked_equal(ds.series["Td"]["Data"], 999.9) ds.series["Td"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Td"]["Data"]) == True, f1, f0) # hPa to kPa ds.series["ps"]["Data"] = numpy.ma.masked_equal(ds.series["ps"]["Data"], 9999.9) / float(10) ds.series["ps"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["ps"]["Data"]) == True, f1, f0) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((ds.series["Ta"]["Data"] + 273.15) * 29.263)) ds.series["ps"]["Data"] = ds.series["ps"]["Data"] * cfac # do precipitation and apply crude limits ds.series["Precip"]["Data"] = numpy.ma.masked_equal( ds.series["Precip"]["Data"], 999.9) condition = (ds.series["Precip"]["Data"] < 0) | (ds.series["Precip"]["Data"] > 100) ds.series["Precip"]["Data"] = numpy.ma.masked_where( condition, ds.series["Precip"]["Data"]) ds.series["Precip"]["Flag"] = numpy.where( numpy.ma.getmaskarray(ds.series["Precip"]["Data"]) == True, f1, f0) # get the humidities from Td Ta, flag, attr = pfp_utils.GetSeriesasMA(ds, "Ta") Td, flag, attr = pfp_utils.GetSeriesasMA(ds, "Td") ps, flag, attr = pfp_utils.GetSeriesasMA(ds, "ps") RH = mf.relativehumidityfromdewpoint(Td, Ta) flag = numpy.where(numpy.ma.getmaskarray(RH) == True, f1, f0) attr = {"long_name": "Relative humidity", "units": "percent"} pfp_utils.CreateSeries(ds, "RH", RH, Flag=flag, Attr=attr) AH = mf.absolutehumidityfromrelativehumidity(Ta, RH) flag = numpy.where(numpy.ma.getmaskarray(AH) == True, f1, f0) attr = {"long_name": "Absolute humidity", "units": "g/m^3"} pfp_utils.CreateSeries(ds, "AH", AH, Flag=flag, Attr=attr) SH = mf.specifichumidityfromrelativehumidity(RH, Ta, ps) flag = numpy.where(numpy.ma.getmaskarray(SH) == True, f1, f0) attr = {"long_name": "Specific humidity", "units": "kg/kg"} pfp_utils.CreateSeries(ds, "SH", SH, Flag=flag, Attr=attr) # return the data return ds
def read_isd_file_csv(isd_file_path): """ Purpose: Reads a NOAA ISD CSV file downlaoded from https://www.ncei.noaa.gov/data/global-hourly/access/ These files used to be field formatted ASCII where the character position in a line of ASCII determined the data type. Some time in 2020 or 2021, the old FFA format was replaced with CSV. The format of the old-style .gz files is described in https://www.ncei.noaa.gov/data/global-hourly/doc/isd-format-document.pdf This document still describes the data in the new CSV format. Usage: Side effects: Returns a PFP data structure with the data at the site time step. Author: PRI Date: July 2021 """ msg = " Reading " + isd_file_path logger.info(msg) # list of variables to read from the CSV file csv_labels = [ "STATION", "DATE", "LATITUDE", "LONGITUDE", "ELEVATION", "REPORT_TYPE", "QUALITY_CONTROL", "WND", "TMP", "DEW", "SLP", "AA1", "AA2", "AA3", "AA4" ] # read the CSV file df = pandas.read_csv(isd_file_path, delimiter=",", header=0) # remove items from csv_labels that are not in the data frame df_labels = df.columns.to_list() for csv_label in list(csv_labels): if csv_label not in df_labels: csv_labels.remove(csv_label) # keep only what we need df = df[csv_labels] # remove duplicate dates, keep the SYNOP (FM-12) reports # first, we find the duplicate dates df["Duplicates"] = df["DATE"].duplicated() # next, we drop rows with duplicate dates that are not SYNOP reports df = df.drop(df[(df["Duplicates"]) & (df["REPORT_TYPE"] != "FM-12")].index) # then check for duplicates again df["Duplicates"] = df["DATE"].duplicated() if df["Duplicates"].sum() != 0: msg = " Unable to remove all duplicate dates in files" logger.error(msg) raise ValueError(msg) # convert the date in the CSV file to a pandas datetime df["TIMESTAMP"] = pandas.to_datetime(df["DATE"].astype("string"), errors="raise") # find all of the timestamps (should only be 1) timestamps = list(df.select_dtypes(include=['datetime64'])) # take the first if more than 1 timestamp = timestamps[0] # use the timestamp as the index df.set_index(timestamp, inplace=True) df.index = df.index.round('1S') # wind direction field, see isd_format_document.pdf for details wind = df["WND"].str.split(',', expand=True) df["Wd"] = wind[0].apply(pandas.to_numeric, errors='coerce') df["Ws"] = wind[3].apply(pandas.to_numeric, errors='coerce') / float(10) del df["WND"] # air temperature temperature = df["TMP"].str.split(',', expand=True) df["Ta"] = temperature[0].apply(pandas.to_numeric, errors='coerce') / float(10) del df["TMP"] # dew point temperature dew_point = df["DEW"].str.split(',', expand=True) df["Td"] = dew_point[0].apply(pandas.to_numeric, errors='coerce') / float(10) del df["DEW"] # surface pressure surface_pressure = df["SLP"].str.split(',', expand=True) df["ps"] = surface_pressure[0].apply(pandas.to_numeric, errors='coerce') / float(100) del df["SLP"] # Precipitation is stored in columns AA1 to AA4 but not all columns will be present # # Within each column, precipitation is stored as "HH,PPPP,C,Q" where HH is the # period over which the precipitation was accumulated (e.g. 1, 3, 6, 24 hours), # PPPP is the precipitation amount in mm*10, C is the condition code and Q is # the QC flag (1 = passed all QC checks) # # Column AA1 contains most of the precipitation data. When precipitation data is # available for 2 accumulation periods e.g. 3 hours and 6 or 24 hours, the second # accumulation period is given in AA2. And so on for up to 4 separate accumulation' # periods e.g. 1 hour, 3 hours, 6 hours and 24 hours. # # get a list of the precipitation columns in the data frame precip_labels = [l for l in df.columns.to_list() if "AA" in l] # create a data frame for the precipitation data, same index as main data frame df_precip = pandas.DataFrame(index=df.index) # loop over the precipitation fields for precip_label in precip_labels: # split the "HH,PPPP,C,Q" fields to get individual parts tmp = df[precip_label].str.split(',', expand=True) # name the columns tmp.columns = ["Period", "Amount", "Condition", "Quality"] # coerce to numeric values tmp = tmp.apply(pandas.to_numeric, errors='coerce') # loop over the accumulation periods for n in [1, 3, 6, 24]: # get the data for this accumulation period and store in a new column # e.g. "3_hourly_AA1" tmp.loc[(tmp["Period"] == n) & (tmp["Quality"] == 1), str(n) + "_hourly_" + precip_label] = tmp["Amount"] # drop the intermediate columns, no longer needed tmp = tmp.drop(["Period", "Amount", "Condition", "Quality"], axis=1) # concatenate the new data df_precip = pandas.concat([df_precip, tmp], axis=1) # drop the individual columns e.g. AA1, AA2 etc df.drop(precip_label, axis=1, inplace=True) # now loop over the accumulation periods and combine to get a single column # for each accumulation period for n in [1, 3, 6, 24]: # list of column headings for this accumulation period label = str(n) + "_hourly" hour_labels = [l for l in df_precip.columns.to_list() if label in l] # rename the first column e.g. "3_hourly_AA1" to "3_hourly" df_precip.rename({hour_labels[0]: label}, axis=1, inplace=True) # loop over the remaining columns and merge into a single column for this # accumulation period for hour_label in hour_labels[1:]: # merge "3_hourly" with "3_hourly_AA2" etc df_precip[label] = df_precip[label].combine_first( df_precip[hour_label]) # convert mm*10 to mm df_precip[label] = df_precip[label] / float(10) # delete columns that are no longer needed df_precip.drop(hour_label, axis=1, inplace=True) # print the sum of the 1, 3, 6 and 24 hourly accumulation periods (we expect them to # be equal) msg = " 1 hourly precipitation total is " + str( round(df_precip["1_hourly"].sum(), 4)) logger.info(msg) msg = " 3 hourly precipitation total is " + str( round(df_precip["3_hourly"].sum(), 4)) logger.info(msg) msg = " 6 hourly precipitation total is " + str( round(df_precip["6_hourly"].sum(), 4)) logger.info(msg) msg = " 24 hourly precipitation total is " + str( round(df_precip["24_hourly"].sum(), 4)) logger.info(msg) # choose the most common accumulation period msg = " Using " + df_precip.count().idxmax() + " for precipitation" logger.info(msg) # and use it for the precipitation data df["Precip"] = df_precip[df_precip.count().idxmax()] # now copy the data from a pandas data frame to a PFP data structure nrecs = len(df) ones = numpy.ones(nrecs) zeros = numpy.zeros(nrecs) # create a data structure ds_its = pfp_io.DataStructure() # set the global attributes ds_its.globalattributes["nc_nrecs"] = nrecs ds_its.globalattributes["altitude"] = float(df["ELEVATION"][0]) ds_its.globalattributes["latitude"] = float(df["LATITUDE"][0]) ds_its.globalattributes["longitude"] = float(df["LONGITUDE"][0]) ds_its.globalattributes["isd_site_id"] = int(df["STATION"][0]) # get the datetime variable ldt = pfp_utils.CreateEmptyVariable("DateTime", nrecs) ldt["Data"] = numpy.array(df.index.to_pydatetime()) ldt["Flag"] = zeros ldt["Attr"] = {"long_name": "Datetime in UTC", "units": ""} pfp_utils.CreateVariable(ds_its, ldt) # get the time step dt = pfp_utils.get_timestep(ds_its) time_step = int(scipy.stats.mode(dt / float(60))[0][0]) if time_step not in [10, 30, 60, 180]: msg = " Time step (" + str( time_step) + ") must be 10, 30, 60 or 180 minutes" logger.error(msg) raise ValueError(msg) else: ds_its.globalattributes["time_step"] = int( scipy.stats.mode(dt / float(60))[0][0]) # now add the other variables # wind direction Wd = pfp_utils.CreateEmptyVariable("Wd", nrecs, datetime=ldt["Data"]) Wd["Data"] = numpy.ma.masked_equal(df["Wd"].values, 999) Wd["Flag"] = numpy.where( numpy.ma.getmaskarray(Wd["Data"]) == True, ones, zeros) Wd["Attr"] = { "long_name": "Wind direction", "statistic_type": "average", "standard_name": "wind_from_direction", "units": "degrees" } pfp_utils.CreateVariable(ds_its, Wd) # wind speed Ws = pfp_utils.CreateEmptyVariable("Ws", nrecs, datetime=ldt["Data"]) Ws["Data"] = numpy.ma.masked_equal(df["Ws"].values, 999.9) Ws["Flag"] = numpy.where( numpy.ma.getmaskarray(Ws["Data"]) == True, ones, zeros) Ws["Attr"] = { "long_name": "Wind speed", "statistic_type": "average", "standard_name": "wind_speed", "units": "m/s" } pfp_utils.CreateVariable(ds_its, Ws) # air temperature Ta = pfp_utils.CreateEmptyVariable("Ta", nrecs, datetime=ldt["Data"]) Ta["Data"] = numpy.ma.masked_equal(df["Ta"].values, 999.9) Ta["Flag"] = numpy.where( numpy.ma.getmaskarray(Ta["Data"]) == True, ones, zeros) Ta["Attr"] = { "long_name": "Air temperature", "statistic_type": "average", "standard_name": "air_temperature", "units": "degC" } pfp_utils.CreateVariable(ds_its, Ta) # dew point temperature Td = pfp_utils.CreateEmptyVariable("Td", nrecs, datetime=ldt["Data"]) Td["Data"] = numpy.ma.masked_equal(df["Td"].values, 999.9) Td["Flag"] = numpy.where( numpy.ma.getmaskarray(Td["Data"]) == True, ones, zeros) Td["Attr"] = { "long_name": "Dew point temperature", "statistic_type": "average", "standard_name": "dew_point_temperature", "units": "degC" } pfp_utils.CreateVariable(ds_its, Td) # surface pressure ps = pfp_utils.CreateEmptyVariable("ps", nrecs, datetime=ldt["Data"]) site_altitude = float(ds_its.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((Ta["Data"] + 273.15) * 29.263)) ps["Data"] = numpy.ma.masked_equal(df["ps"].values, 9999.9) ps["Data"] = ps["Data"] * cfac ps["Flag"] = numpy.where( numpy.ma.getmaskarray(ps["Data"]) == True, ones, zeros) ps["Attr"] = { "long_name": "Surface pressure", "statistic_type": "average", "standard_name": "surface_air_pressure", "units": "kPa" } pfp_utils.CreateVariable(ds_its, ps) # precipitation Precip = pfp_utils.CreateEmptyVariable("Precip", nrecs, datetime=ldt["Data"]) Precip["Data"] = numpy.ma.masked_equal(df["Precip"].values, 999.9) Precip["Flag"] = numpy.where( numpy.ma.getmaskarray(Precip["Data"]) == True, ones, zeros) Precip["Attr"] = { "long_name": "Rainfall", "statistic_type": "sum", "standard_name": "thickness_of_rainfall_amount", "units": "mm" } pfp_utils.CreateVariable(ds_its, Precip) # relative humidity RH = pfp_utils.CreateEmptyVariable("RH", nrecs, datetime=ldt["Data"]) RH["Data"] = mf.relativehumidityfromdewpoint(Td["Data"], Ta["Data"]) RH["Flag"] = numpy.where( numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros) RH["Attr"] = { "long_name": "Relative humidity", "statistics_type": "average", "standard_name": "relative_humidity", "units": "percent" } pfp_utils.CreateVariable(ds_its, RH) # absolute humidity AH = pfp_utils.CreateEmptyVariable("AH", nrecs, datetime=ldt["Data"]) AH["Data"] = mf.absolutehumidityfromrelativehumidity( Ta["Data"], RH["Data"]) AH["Flag"] = numpy.where( numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"] = { "long_name": "Absolute humidity", "statistic_type": "average", "standard_name": "mass_concentration_of_water_vapor_in_air", "units": "g/m^3" } pfp_utils.CreateVariable(ds_its, AH) # specific humidity SH = pfp_utils.CreateEmptyVariable("SH", nrecs, datetime=ldt["Data"]) SH["Data"] = mf.specifichumidityfromrelativehumidity( RH["Data"], Ta["Data"], ps["Data"]) SH["Flag"] = numpy.where( numpy.ma.getmaskarray(SH["Data"]) == True, ones, zeros) SH["Attr"] = { "long_name": "Specific humidity", "statistic_type": "average", "standard_name": "specific_humidity", "units": "kg/kg" } pfp_utils.CreateVariable(ds_its, SH) return ds_its
# add some useful global attributes ds_out[site_index[isd_site]].globalattributes[ "isd_site_id"] = isd_site ds_out[ site_index[isd_site]].globalattributes["time_zone"] = time_zone # write out a netCDF file for each ISD site and each year #nc_file_name = isd_site+"_"+str(year)+".nc" #nc_dir_path = os.path.join(out_base_path,site,"Data","ISD") #if not os.path.exists(nc_dir_path): #os.makedirs(nc_dir_path) #nc_file_path = os.path.join(nc_dir_path,nc_file_name) #nc_file = pfp_io.nc_open_write(nc_file_path) #pfp_io.nc_write_series(nc_file, ds_out[site_index[isd_site]], ndims=1) # now we merge the data structures for each ISD station into a single data structure # first, instance a data structure ds_all = pfp_io.DataStructure() ds_all.globalattributes["latitude"] = site_info[site]["Latitude"] ds_all.globalattributes["longitude"] = site_info[site]["Longitude"] ds_all.globalattributes["altitude"] = site_info[site]["Altitude"] ds_all.globalattributes["site_name"] = site_info[site]["site_name"] # now loop over the data structures for each ISD station and get the earliest # start time and the latest end time start_datetime = [] end_datetime = [] for i in list(ds_out.keys()): # print(i) start_datetime.append(ds_out[i].series["DateTime"]["Data"][0]) end_datetime.append(ds_out[i].series["DateTime"]["Data"][-1]) start = min(start_datetime) end = max(end_datetime) # now make a datetime series at the required time step from the earliest start
l = label + "_" + str(i) + str(j) data_nogaps[site][l]["Data"] = numpy.ma.masked_all( nrecs_nogaps) data_nogaps[site][l]["Flag"] = numpy.ones(nrecs_nogaps) data_nogaps[site][l]["Data"][iA] = numpy.ma.array( data[site][l]["Data"])[iB] data_nogaps[site][l]["Flag"][iA] = int(0) # now we copy the data from the no gaps data sets to PFP data structures # dictionary to hold data structures for each site dss_ats = {} msg = " Creating data structure for each site" logger.info(msg) for site in sites: # create a data structure for this site dss_ats[site] = pfp_io.DataStructure() # convert UTC datetime to local standard time dt_loc_nogaps = numpy.array( convert_utc_to_local_standard(dt_utc_nogaps, site_info[site]["Time zone"])) # add the global attributes dss_ats[site].root["Attributes"]["site_name"] = site.replace(" ", "") dss_ats[site].root["Attributes"]["time_zone"] = site_info[site][ "Time zone"] dss_ats[site].root["Attributes"]["latitude"] = site_info[site]["Latitude"] dss_ats[site].root["Attributes"]["longitude"] = site_info[site][ "Longitude"] dss_ats[site].root["Attributes"]["time_coverage_start"] = str( dt_loc_nogaps[0]) dss_ats[site].root["Attributes"]["time_coverage_end"] = str( dt_loc_nogaps[-1])