def do_EPQCFlagCheck(cf, ds, section, series, code=9): """ Purpose: Mask data according to the value of an EddyPro QC flag. Usage: Author: PRI Date: August 2017 """ if 'EPQCFlagCheck' not in cf[section][series].keys(): return nRecs = int(ds.globalattributes["nc_nrecs"]) flag = numpy.zeros(nRecs, dtype=numpy.int32) source_list = ast.literal_eval( cf[section][series]['EPQCFlagCheck']["Source"]) reject_list = ast.literal_eval( cf[section][series]['EPQCFlagCheck']["Reject"]) variable = qcutils.GetVariable(ds, series) for source in source_list: epflag = qcutils.GetVariable(ds, source) for value in reject_list: bool_array = numpy.isclose(epflag["Data"], float(value)) idx = numpy.where(bool_array == True)[0] flag[idx] = numpy.int32(1) idx = numpy.where(flag == 1)[0] variable["Data"][idx] = numpy.float(c.missing_value) variable["Flag"][idx] = numpy.int32(9) qcutils.CreateVariable(ds, variable) return
def interpolate_precip(ds_in, x1, ds_out, x2): """ Purpose: Transfer precipitation data from one time stamp to another. Interpolating precipitation does not make sense since interpolation will change the total amount of the precipitation and because precipitation is very intermittent, we do not know how it will behave at the missing times. Instead of interpolation, we assign all of the precipitation total at the input time stamp to the matching output time stamp. Assumptions: That the input time stamps are a subset of the output time stamps. Usage: Side effects: Author: PRI Date: December 2017 """ nrecs = ds_out.globalattributes["nc_nrecs"] # local pointer to the output datetime ldt = qcutils.GetVariable(ds_out, "DateTime") # just assign any precipitation to the ISD time stamp. precip_out = qcutils.create_empty_variable("Precip", nrecs, datetime=ldt["Data"]) # zero the data array precip_out["Data"] = precip_out["Data"] * float(0) precip_in = qcutils.GetVariable(ds_in, "Precip") # get indices of elements where the times match idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1)) precip_out["Data"][idx] = precip_in["Data"] precip_out["Flag"] = numpy.zeros(nrecs, dtype=numpy.int32) precip_out["Attr"] = copy.deepcopy(precip_in["Attr"]) qcutils.CreateVariable(ds_out, precip_out) return
def interpolate_ds(ds_in, ts): """ Purpose: Interpolate the contents of a data structure onto a different time step. Assumptions: Usage: Author: PRI Date: June 2017 """ logger.info("Interpolating data") # instance the output data structure ds_out = qcio.DataStructure() # copy the global attributes ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes) # add the time step ds_out.globalattributes["time_step"] = str(ts) # generate a regular time series at the required time step dt = ds_in.series["DateTime"]["Data"] dt0 = qcutils.rounddttots(dt[0], ts=ts) if dt0 < dt[0]: dt0 = dt0 + datetime.timedelta(minutes=ts) dt1 = qcutils.rounddttots(dt[-1], ts=ts) if dt1 > dt[-1]: dt1 = dt1 - datetime.timedelta(minutes=ts) idt = [result for result in qcutils.perdelta(dt0, dt1, datetime.timedelta(minutes=ts))] x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))]) x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))]) # loop over the series in the data structure and interpolate flag = numpy.zeros(len(idt), dtype=numpy.int32) attr = {"long_name":"Datetime", "units":"none"} ldt_var = {"Label":"DateTime", "Data":idt, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds_out, ldt_var) qcutils.get_nctime_from_datetime(ds_out) nrecs = len(idt) ds_out.globalattributes["nc_nrecs"] = nrecs # first, we do the air temperature, dew point temperature and surface pressure f0 = numpy.zeros(nrecs, dtype=numpy.int32) f1 = numpy.ones(nrecs, dtype=numpy.int32) for label in ["Ta", "Td", "ps", "RH", "Ah", "q"]: var_out = qcutils.create_empty_variable(label, nrecs, datetime=idt) var_in = qcutils.GetVariable(ds_in, label) var_out["Data"] = interpolate_1d(x1, var_in["Data"], x2) var_out["Flag"] = numpy.where(numpy.ma.getmaskarray(var_out["Data"])==True, f1, f0) var_out["Attr"] = copy.deepcopy(var_in["Attr"]) qcutils.CreateVariable(ds_out, var_out) # now clamp the dew point so that TD <= TA Ta = qcutils.GetVariable(ds_out, "Ta") Td = qcutils.GetVariable(ds_out, "Td") Td["Data"] = numpy.ma.where(Td["Data"]<=Ta["Data"], x=Td["Data"], y=Ta["Data"]) qcutils.CreateVariable(ds_out, Td) # now we do wind speed and direction by converting to U and V components interpolate_wswd(ds_in, x1, ds_out, x2) # and lastly, do precipitation interpolate_precip(ds_in, x1, ds_out, x2) return ds_out
def interpolate_wswd(ds_in, x1, ds_out, x2): """ Purpose: Interpolate wind speed and direction by converting them to U and V components first, doing the interpolation and then converting back to wind speed and direction. Usage: Side effects Author: PRI Date: December 2017 """ # get the number of records in the output data nrecs = ds_out.globalattributes["nc_nrecs"] f0 = numpy.zeros(nrecs, dtype=numpy.int32) f1 = numpy.ones(nrecs, dtype=numpy.int32) # local pointer to the output datetime ldt = qcutils.GetVariable(ds_out, "DateTime") # create empty variables for the output dara Ws_out = qcutils.create_empty_variable("Ws", nrecs, datetime=ldt["Data"]) Wd_out = qcutils.create_empty_variable("Wd", nrecs, datetime=ldt["Data"]) U_out = qcutils.create_empty_variable("u", nrecs, datetime=ldt["Data"]) V_out = qcutils.create_empty_variable("v", nrecs, datetime=ldt["Data"]) # get the input wind speed and direction Ws_in = qcutils.GetVariable(ds_in, "Ws") Wd_in = qcutils.GetVariable(ds_in, "Wd") # covert to U and V components U_in, V_in = qcutils.convert_WSWDtoUV(Ws_in, Wd_in) # interpolate the components to the output time stamp U_out["Data"] = interpolate_1d(x1, U_in["Data"], x2) V_out["Data"] = interpolate_1d(x1, V_in["Data"], x2) # add the QC flag and update variable attributes U_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(U_out["Data"])==True, f1, f0) V_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(V_out["Data"])==True, f1, f0) U_out["Attr"]["long_name"] = "U component of wind velocity, postive east" U_out["Attr"]["units"] = "m/s" V_out["Attr"]["long_name"] = "V component of wind velocity, postive north" V_out["Attr"]["units"] = "m/s" # write the U and V components to the output data structure qcutils.CreateVariable(ds_out, U_out) qcutils.CreateVariable(ds_out, V_out) # convert the interpolated components to wind speed and direction Ws_out, Wd_out = qcutils.convert_UVtoWSWD(U_out, V_out) # add the Qc flag and update the variable attributes Ws_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(Ws_out["Data"])==True, f1, f0) Wd_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(Wd_out["Data"])==True, f1, f0) Ws_out["Attr"] = copy.deepcopy(Ws_in["Attr"]) Wd_out["Attr"] = copy.deepcopy(Wd_in["Attr"]) # write the wind speed and direction into the output data structure qcutils.CreateVariable(ds_out, Ws_out) qcutils.CreateVariable(ds_out, Wd_out) return
def run_mpt_code(ds, nc_file_name): ldt = qcutils.GetVariable(ds, "DateTime") out_file_paths = {} header = "TIMESTAMP,NEE,VPD,USTAR,TA,SW_IN,H,LE" fmt = "%12i,%f,%f,%f,%f,%f,%f,%f" first_year = ldt["Data"][0].year last_year = ldt["Data"][-1].year log_file_path = os.path.join("mpt", "log", "mpt.log") mptlogfile = open(log_file_path, "wb") in_base_path = os.path.join("mpt", "input", "") out_base_path = os.path.join("mpt", "output", "") for current_year in range(first_year, last_year + 1): in_name = nc_file_name.replace(".nc", "_" + str(current_year) + "_MPT.csv") in_full_path = os.path.join(in_base_path, in_name) out_full_path = in_full_path.replace("input", "output").replace( ".csv", "_ut.txt") data = make_data_array(ds, current_year) numpy.savetxt(in_full_path, data, header=header, delimiter=",", comments="", fmt=fmt) ustar_mp_exe = os.path.join(".", "mpt", "bin", "ustar_mp") cmd = [ ustar_mp_exe, "-input_path=" + in_full_path, "-output_path=" + out_base_path ] subprocess.call(cmd, stdout=mptlogfile) if os.path.isfile(out_full_path): out_file_paths[current_year] = out_full_path mptlogfile.close() return out_file_paths
def gfMDS_make_data_array(ds, current_year, info): """ Purpose: Create a data array for the MDS gap filling routine. The array constructed here will be written to a CSV file that is read by the MDS C code. Usage: Side Effects: The constructed data arrays are full years. That is they run from YYYY-01-01 00:30 to YYYY+1-01-01 00:00. Missing data is represented as -9999. Author: PRI Date: May 2018 """ ldt = qcutils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) # need entry for the timestamp and the target ... array_list = [cdt, mt] # ... and entries for the drivers for driver in info["drivers"]: array_list.append(mt) # now we can create the data array data = numpy.stack(array_list, axis=-1) si = qcutils.GetDateIndex(ldt["Data"], start, default=0) ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, _ = qcutils.FindMatchingIndices(cdt, dt["Data"]) pfp_label_list = [info["target"]] + info["drivers"] mds_label_list = [info["target_mds"]] + info["drivers_mds"] header = "TIMESTAMP" fmt = "%12i" for n, label in enumerate(pfp_label_list): var = qcutils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] header = header + "," + mds_label_list[n] fmt = fmt + "," + "%f" # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data, header, fmt
def mask_long_gaps(ds_int, ds_avg, max_interp_length): """ Purpose: Assumptions: Usage: Side effects: Author: PRI Date: December 2017 """ logger.info("Masking long gaps") # make a copy of the interpolated data structure ds_mlg = copy.deepcopy(ds_int) nrecs = ds_int.globalattributes["nc_nrecs"] # get a list of the variable labels in this group labels = [label for label in ds_avg.series.keys() if label not in ["DateTime","time"]] # loop oover the variables in this group for label in labels: # check to see if this variable exists in the interpolated data structure if label not in ds_int.series: msg = "mask_long_gaps: variable "+label+" not found, skipping ..." logger.warning(msg) continue # get the average and interpolated variables var_avg = qcutils.GetVariable(ds_avg, label) var_int = qcutils.GetVariable(ds_int, label) # make a copy of the interpolated variable as a base for the output variable var_mlg = copy.deepcopy(var_int) # get a boolean array that is True where the average data is masked # Note to self: why not use numpy.ma.getmaskarray() here? mask = numpy.ma.getmaskarray(var_avg["Data"]) cond_bool = numpy.ma.where(mask == True, True, False) # 2D array of start and end indices for gaps longer that max_interp_length cidx = contiguous_regions(cond_bool, max_interp_length = max_interp_length) # loop over gaps longer than max_interp_length for start, stop in cidx: # mask the interpolated data in gaps longer than max_interp_length var_mlg["Data"].mask[start:stop] = True var_mlg["Flag"][start:stop] = 1 # write the masked variable back to the output data structure qcutils.CreateVariable(ds_mlg, var_mlg) # return a copy of the interpolated data structure with gaps longer than max_interp_length masked return ds_mlg
def make_data_array(ds, current_year): ldt = qcutils.GetVariable(ds, "DateTime") nrecs = ds.globalattributes["nc_nrecs"] ts = int(ds.globalattributes["time_step"]) start = datetime.datetime(current_year, 1, 1, 0, 30, 0) end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0) cdt = numpy.array([ dt for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts)) ]) mt = numpy.ones(len(cdt)) * float(-9999) data = numpy.stack([cdt, mt, mt, mt, mt, mt, mt, mt], axis=-1) si = qcutils.GetDateIndex(ldt["Data"], start, default=0) ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs) dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei) idx1, idx2 = qcutils.FindMatchingIndices(cdt, dt["Data"]) for n, label in enumerate(["Fc", "VPD", "ustar", "Ta", "Fsd", "Fh", "Fe"]): var = qcutils.GetVariable(ds, label, start=si, end=ei) data[idx1, n + 1] = var["Data"] # convert datetime to ISO dates data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt]) return data
def ConvertK2C(ds, T_in, T_out): """ Purpose: Function to convert temperature from K to C. Usage: qcfunc.ConvertK2C(ds, T_in, T_out) Author: PRI Date: February 2018 """ var_in = qcutils.GetVariable(ds, T_in) var_out = qcutils.convert_units_func(ds, var_in, "C", mode="quiet") var_out["Label"] = T_out qcutils.CreateVariable(ds, var_out) return 1
def ConvertPa2kPa(ds, ps_in, ps_out): """ Purpose: Function to convert pressure from Pa to kPa. Usage: qcfunc.ConvertPa2kPa(ds, ps_in, ps_out) Author: PRI Date: February 2018 """ var_in = qcutils.GetVariable(ds, ps_in) var_out = qcutils.convert_units_func(ds, var_in, "kPa", mode="quiet") var_out["Label"] = ps_out qcutils.CreateVariable(ds, var_out) return 1
ds_all.globalattributes[gattr] = ds_out[i].globalattributes[gattr] # and update the site specific global attributes ds_all.globalattributes["time_zone_"+isd_site_id] = ds_out[i].globalattributes["time_zone"] ds_all.globalattributes["latitude_"+isd_site_id] = ds_out[i].globalattributes["latitude"] ds_all.globalattributes["longitude_"+isd_site_id] = ds_out[i].globalattributes["longitude"] ds_all.globalattributes["altitude_"+isd_site_id] = ds_out[i].globalattributes["altitude"] # now copy the variables # first, we get the indices of matching datetimes ldt_one = ds_out[i].series["DateTime"]["Data"] idx = numpy.searchsorted(ldt_all, numpy.intersect1d(ldt_all, ldt_one)) # then we get a list of the variables to copy labels = [label for label in ds_out[i].series.keys() if label not in ["DateTime"]] # and then we loop over the variables to be copied for label in labels: # read the data out of the ISD site data structure var_out = qcutils.GetVariable(ds_out[i], label) # create an empty output variable with a number, unique to each ISD station, # appended to the label var_all = qcutils.create_empty_variable(label+"_"+str(i), nrecs) # copy the variable attributes var_all["Attr"] = copy.deepcopy(var_out["Attr"]) # add the ISD site ID var_all["Attr"]["isd_site_id"] = isd_site_id # copy the data and flag onto the matching times var_all["Data"][idx] = var_out["Data"] var_all["Flag"][idx] = var_out["Flag"] # put the data, flag and attributes into the all-in-one data structure qcutils.CreateVariable(ds_all, var_all) # write the netCDF file with the combined data for this year if len(fluxnet_id) == 0: nc_dir_path = os.path.join(out_base_path,site,"Data","ISD")
def gfMDS_plot(cf, pd, ds, mds_label): ts = int(ds.globalattributes["time_step"]) drivers = ds.mds[mds_label]["drivers"] target = ds.mds[mds_label]["target"] Hdh = qcutils.GetVariable(ds, "Hdh") obs = qcutils.GetVariable(ds, target) mds = qcutils.GetVariable(ds, mds_label) if pd["show_plots"]: plt.ion() else: plt.ioff() fig = plt.figure(pd["fig_num"], figsize=(13, 8)) fig.clf() fig.canvas.set_window_title(target) plt.figtext(0.5, 0.95, pd["title"], ha='center', size=16) # diurnal plot # XY plot of the diurnal variation rect1 = [0.10, pd["margin_bottom"], pd["xy_width"], pd["xy_height"]] ax1 = plt.axes(rect1) # get the diurnal stats of the observations mask = numpy.ma.mask_or(obs["Data"].mask, mds["Data"].mask) obs_mor = numpy.ma.array(obs["Data"], mask=mask) _, Hr1, Av1, _, _, _ = gf_getdiurnalstats(Hdh["Data"], obs_mor, ts) ax1.plot(Hr1, Av1, 'b-', label="Obs") # get the diurnal stats of all SOLO predictions _, Hr2, Av2, _, _, _ = gf_getdiurnalstats(Hdh["Data"], mds["Data"], ts) ax1.plot(Hr2, Av2, 'r-', label="MDS") plt.xlim(0, 24) plt.xticks([0, 6, 12, 18, 24]) ax1.set_ylabel(target) ax1.set_xlabel('Hour') ax1.legend(loc='upper right', frameon=False, prop={'size': 8}) # histogram of window size time_window = qcutils.GetVariable(ds, "MDS_" + target + "_TIMEWINDOW") idx = numpy.where(mds["Flag"] == 40)[0] if len(idx) != 0: tw_hist_data = time_window["Data"][idx] rect2 = [0.40, pd["margin_bottom"], pd["xy_width"], pd["xy_height"]] ax2 = plt.axes(rect2) ax2.hist(tw_hist_data) ax2.set_ylabel("Occurrence") ax2.set_xlabel("MDS window length") # write statistics to the plot numpoints = numpy.ma.count(obs["Data"]) numfilled = numpy.ma.count(mds["Data"]) - numpy.ma.count(obs["Data"]) plt.figtext(0.65, 0.225, 'No. points') plt.figtext(0.75, 0.225, str(numpoints)) plt.figtext(0.65, 0.200, 'No. filled') plt.figtext(0.75, 0.200, str(numfilled)) avg_obs = numpy.ma.mean(obs["Data"]) avg_mds = numpy.ma.mean(mds["Data"]) plt.figtext(0.65, 0.175, 'Avg (obs)') plt.figtext(0.75, 0.175, '%.4g' % (avg_obs)) plt.figtext(0.65, 0.150, 'Avg (MDS)') plt.figtext(0.75, 0.150, '%.4g' % (avg_mds)) var_obs = numpy.ma.var(obs["Data"]) var_mds = numpy.ma.var(mds["Data"]) plt.figtext(0.65, 0.125, 'Var (obs)') plt.figtext(0.75, 0.125, '%.4g' % (var_obs)) plt.figtext(0.65, 0.100, 'Var (MDS)') plt.figtext(0.75, 0.100, '%.4g' % (var_mds)) # time series of drivers and target ts_axes = [] rect = [ pd["margin_left"], pd["ts_bottom"], pd["ts_width"], pd["ts_height"] ] ts_axes.append(plt.axes(rect)) ts_axes[0].plot(obs["DateTime"], obs["Data"], 'b.', mds["DateTime"], mds["Data"], 'r-') ts_axes[0].set_xlim(obs["DateTime"][0], obs["DateTime"][-1]) TextStr = target + '_obs (' + obs['Attr']['units'] + ')' ts_axes[0].text(0.05, 0.85, TextStr, color='b', horizontalalignment='left', transform=ts_axes[0].transAxes) TextStr = target + '(' + mds['Attr']['units'] + ')' ts_axes[0].text(0.85, 0.85, TextStr, color='r', horizontalalignment='right', transform=ts_axes[0].transAxes) for i, driver in enumerate(drivers): this_bottom = pd["ts_bottom"] + (i + 1) * pd["ts_height"] rect = [ pd["margin_left"], this_bottom, pd["ts_width"], pd["ts_height"] ] ts_axes.append(plt.axes(rect, sharex=ts_axes[0])) drv = qcutils.GetVariable(ds, driver) drv_notgf = numpy.ma.masked_where(drv["Flag"] != 0, drv["Data"]) drv_gf = numpy.ma.masked_where(drv["Flag"] == 0, drv["Data"]) ts_axes[i + 1].plot(drv["DateTime"], drv_notgf, 'b-') ts_axes[i + 1].plot(drv["DateTime"], drv_gf, 'r-', linewidth=2) plt.setp(ts_axes[i + 1].get_xticklabels(), visible=False) TextStr = driver + '(' + drv['Attr']['units'] + ')' ts_axes[i + 1].text(0.05, 0.85, TextStr, color='b', horizontalalignment='left', transform=ts_axes[i + 1].transAxes) # save a hard copy sdt = obs["DateTime"][0].strftime("%Y%m%d") edt = obs["DateTime"][-1].strftime("%Y%m%d") if "plot_path" in cf["Files"]: plot_path = cf["Files"]["plot_path"] + "L5/" else: plot_path = "plots/L5" if not os.path.exists(plot_path): os.makedirs(plot_path) figname = plot_path + pd["site_name"].replace(" ", "") + "_MDS_" + pd["label"] figname = figname + "_" + sdt + "_" + edt + '.png' fig.savefig(figname, format='png') if pd["show_plots"]: plt.draw() plt.pause(1) plt.ioff() else: plt.close(fig) plt.ion() return
def GapFillFluxUsingMDS(cf, ds): """ Purpose: Run the FluxNet C code to implement the MDS gap filling method. Usage: Side effects: Author: PRI Date: May 2018 """ if "mds" not in dir(ds): return # get the file name file_path = cf["Files"]["file_path"] file_name = cf["Files"]["in_filename"] nc_full_path = os.path.join(file_path, file_name) nc_name = os.path.split(nc_full_path)[1] # get some useful metadata ts = int(ds.globalattributes["nc_nrecs"]) site_name = ds.globalattributes["site_name"] level = ds.globalattributes["nc_level"] # define the MDS input file location in_base_path = os.path.join("mds", "input") # get a list of CSV files in the input directory in_path_files = glob.glob(os.path.join(in_base_path, "*.csv")) # and clean them out for in_file in in_path_files: if os.path.exists(in_file): os.remove(in_file) # define the MDS output file location out_base_path = os.path.join("mds", "output", "") # get a list of CSV files in the output directory out_path_files = glob.glob(os.path.join(out_base_path, "*.csv")) # and clean them out for out_file in out_path_files: if os.path.exists(out_file): os.remove(out_file) # get some useful odds and ends ldt = qcutils.GetVariable(ds, "DateTime") first_year = ldt["Data"][0].year last_year = ldt["Data"][-1].year # now loop over the series to be gap filled using MDS # open a log file for the MDS C code output log_file_path = os.path.join("mds", "log", "mds.log") mdslogfile = open(log_file_path, "wb") for fig_num, mds_label in enumerate(ds.mds): logger.info(" Doing MDS gap filling for %s", ds.mds[mds_label]["target"]) ds.mds[mds_label]["out_base_path"] = out_base_path ds.mds[mds_label]["time_step"] = ts # make the output file name out_name = site_name + "_" + level + "_" + mds_label + "_mds.csv" out_file_path = os.path.join(out_base_path, out_name) # first, we write the yearly CSV input files ds.mds[mds_label]["in_file_paths"] = [] for current_year in range(first_year, last_year + 1): in_name = nc_name.replace(".nc", "_" + str(current_year) + "_MDS.csv") #in_name = str(current_year)+".csv" in_file_path = os.path.join(in_base_path, in_name) data, header, fmt = gfMDS_make_data_array(ds, current_year, ds.mds[mds_label]) numpy.savetxt(in_file_path, data, header=header, delimiter=",", comments="", fmt=fmt) ds.mds[mds_label]["in_file_paths"].append(in_file_path) # then we construct the MDS C code command options list cmd = gfMDS_make_cmd_string(ds.mds[mds_label]) # then we spawn a subprocess for the MDS C code subprocess.call(cmd, stdout=mdslogfile) mds_out_file = os.path.join("mds", "output", "mds.csv") os.rename(mds_out_file, out_file_path) # and put the MDS results into the data structure gfMDS_get_mds_output(ds, mds_label, out_file_path) # plot the MDS results target = ds.mds[mds_label]["target"] drivers = ds.mds[mds_label]["drivers"] title = site_name + ' : Comparison of tower and MDS data for ' + target pd = gfMDS_initplot(site_name=site_name, label=target, fig_num=fig_num, title=title, nDrivers=len(drivers), show_plots=True) gfMDS_plot(cf, pd, ds, mds_label) # close the log file mdslogfile.close() return
def gfMDS_get_mds_output(ds, mds_label, out_file_path, include_qc=False): """ Purpose: Reads the CSV file output by the MDS C code and puts the contents into the data structure. Usage: gfMDS_get_mds_output(ds, out_file_path, first_date, last_date, include_qc=False) where ds is a data structure out_file_path is the full path to the MDS output file include_qc controls treatment of the MDS QC output True = include QC output False = do not include QC output Side effects: New series are created in the data structure to hold the MDS data. Author: PRI Date: May 2018 """ ldt = qcutils.GetVariable(ds, "DateTime") first_date = ldt["Data"][0] last_date = ldt["Data"][-1] data_mds = numpy.genfromtxt(out_file_path, delimiter=",", names=True, autostrip=True, dtype=None) dt_mds = numpy.array( [dateutil.parser.parse(str(dt)) for dt in data_mds["TIMESTAMP"]]) si_mds = qcutils.GetDateIndex(dt_mds, first_date) ei_mds = qcutils.GetDateIndex(dt_mds, last_date) # get a list of the names in the data array mds_output_names = list(data_mds.dtype.names) # strip out the timestamp and the original data for item in ["TIMESTAMP", ds.mds[mds_label]["target_mds"]]: if item in mds_output_names: mds_output_names.remove(item) # check to see if the QC outputs have been requested if not include_qc: # if not, then remove them from the list of requested outputs for item in ["QC", "HAT", "SAMPLE", "STDDEV", "METHOD", "QC_HAT"]: if item in mds_output_names: mds_output_names.remove(item) # and now loop over the MDS output series for mds_output_name in mds_output_names: if mds_output_name == "FILLED": # get the gap filled target and write it to the data structure var_in = qcutils.GetVariable(ds, ds.mds[mds_label]["target"]) data = data_mds[mds_output_name][si_mds:ei_mds + 1] idx = numpy.where((numpy.ma.getmaskarray(var_in["Data"]) == True) & (abs(data - c.missing_value) > c.eps))[0] flag = numpy.array(var_in["Flag"]) flag[idx] = numpy.int32(40) attr = copy.deepcopy(var_in["Attr"]) attr["long_name"] = attr["long_name"] + ", gap filled using MDS" var_out = { "Label": mds_label, "Data": data, "Flag": flag, "Attr": attr } qcutils.CreateVariable(ds, var_out) elif mds_output_name == "TIMEWINDOW": # make the series name for the data structure mds_qc_label = "MDS" + "_" + ds.mds[mds_label][ "target"] + "_" + mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds + 1] flag = numpy.zeros(len(data)) attr = { "long_name": "TIMEWINDOW from MDS gap filling for " + ds.mds[mds_label]["target"] } var_out = { "Label": mds_qc_label, "Data": data, "Flag": flag, "Attr": attr } qcutils.CreateVariable(ds, var_out) else: # make the series name for the data structure mds_qc_label = "MDS" + "_" + ds.mds[mds_label][ "target"] + "_" + mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds + 1] flag = numpy.zeros(len(data)) attr = { "long_name": "QC field from MDS gap filling for " + ds.mds[mds_label]["target"] } var_out = { "Label": mds_qc_label, "Data": data, "Flag": flag, "Attr": attr } qcutils.CreateVariable(ds, var_out) return
def average_duplicate_times(ds_in, time_step): """ Purpose: Remove duplicate time steps by averaging data with the same time stamp. The routine uses scipy.stats.binned_statistics() to bin the data based on the time (bins have width time_step and are centered on times that are an integral of time_step). Usage: ds_out = average_duplicate_times(ds_in, time_step=30) Side effects: The time given for the averages and sums is the end of the time period. Author: PRI Date: October 2017 """ logger.info("Getting data onto a regular time step") # get the time as a number (see attr["units"] for units) time_var = qcutils.GetVariable(ds_in, "time") # generate an array of bin edges for use by binned_statistics() bin_width = time_step*60 # round the ISD start time to an integral of the time step t0 = time_step*60*int(time_var["Data"].data[0]/(time_step*60)) bin_first = t0 - bin_width # round the ISD end time to an integral of the time step t1 = time_step*60*int(time_var["Data"].data[-1]/(time_step*60)) # make sure we go 1 beyond the end time if t1 < time_var["Data"][-1]: t1 = t1 + bin_width # generate an array of bin edges bin_last = t1 + bin_width bins = numpy.arange(bin_first, bin_last, bin_width) # get the number of records in the output series nrecs = len(bins)-1 # generate series of zeros and ones to be used as QC flags f0 = numpy.zeros(nrecs) f1 = numpy.ones(nrecs) # create an output data structure with a copy of the input global attributes ds_out = qcio.DataStructure() ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes) # update the number of records ds_out.globalattributes["nc_nrecs"] = nrecs # get a list of variable labels but exclude the datetime, time, wind speed and direction variables # NB: Wind velocity components U and V will be averaged and wind speed and direction calculated # from these. labels = [label for label in ds_in.series.keys() if label not in ["DateTime", "time"]] # loop over variables for label in labels: # get the variable var_in = qcutils.GetVariable(ds_in, label) # indices of non-masked elements idx = numpy.ma.where(numpy.ma.getmaskarray(var_in["Data"]) == False)[0] # check to see if we have at least 1 data point to deal with if len(idx) != 0: # get the non-masked data as an ndarray data_in = numpy.array(var_in["Data"][idx].data) time_in = numpy.array(time_var["Data"][idx].data) # use binned_statistic() to average records with the same datetime if var_in["Label"][0:1] == "P" and var_in["Attr"]["units"] in ["m", "mm"]: # do sum for precipitation sums, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="sum", bins=bins) # convert output to a masked array and mask empty bins data_out = numpy.ma.masked_where(numpy.isfinite(sums) == False, numpy.ma.array(sums)) else: # do average for everything else means, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="mean", bins=bins) # convert output to a masked array and mask empty bins data_out = numpy.ma.masked_where(numpy.isfinite(means) == False, numpy.ma.array(means)) # generate the QC flag flag_out = numpy.where(numpy.ma.getmaskarray(data_out) == True, f1, f0) # and create the output variable var_out = {"Label":label, "Data":data_out, "Flag":flag_out, "Attr":var_in["Attr"]} else: # no data, so create an empty output variable var_out = {"Label":label, "Data":numpy.ma.masked_all(nrecs), "Flag":f1, "Attr":var_in["Attr"]} # and write the output variable to the output data structure qcutils.CreateVariable(ds_out, var_out) # generate a series of the bin mid-points mids = edges[1:] # and convert these to a series of Python datetimes attr = copy.deepcopy(ds_in.series["DateTime"]["Attr"]) ldt_out = {"Label":"DateTime", "Data":netCDF4.num2date(mids, time_var["Attr"]["units"]), "Flag":f0, "Attr":attr} # and write the datetime to the output data structure qcutils.CreateVariable(ds_out, ldt_out) qcutils.get_nctime_from_datetime(ds_out) # get wind speed and direction from components U = qcutils.GetVariable(ds_out, "u") V = qcutils.GetVariable(ds_out, "v") WS, WD = qcutils.convert_UVtoWSWD(U, V) qcutils.CreateVariable(ds_out, WS) qcutils.CreateVariable(ds_out, WD) return ds_out
def read_isd_file(isd_file_path): """ Purpose: Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure. Assumptions: Usage: Author: PRI Date: June 2017 """ isd_file_name = os.path.split(isd_file_path)[1] msg = "Reading ISD file "+isd_file_name logger.info(msg) isd_site_id = isd_file_name.split("-") isd_site_id = isd_site_id[0]+"-"+isd_site_id[1] # read the file if os.path.splitext(isd_file_path)[1] == ".gz": with gzip.open(isd_file_path, 'rb') as fp: content = fp.readlines() else: with open(isd_file_path) as fp: content = fp.readlines() # get a data structure ds = qcio.DataStructure() # get the site latitude, longitude and altitude ds.globalattributes["altitude"] = float(content[0][46:51]) ds.globalattributes["latitude"] = float(content[0][28:34])/float(1000) ds.globalattributes["longitude"] = float(content[0][34:41])/float(1000) ds.globalattributes["isd_site_id"] = isd_site_id # initialise the data structure isd = {} isd["DateTime"] = {"Data":[],"Flag":[],"Attr":{"long_name":"Datetime","units":"none"}} isd["Wd"] = {"Data":[],"Attr":{"long_name":"Wind direction","units":"degrees","missing_value":999}} isd["Ws"] = {"Data":[],"Attr":{"long_name":"Wind speed","units":"m/s","missing_value":999.9}} isd["Ta"] = {"Data":[],"Attr":{"long_name":"Air temperature","units":"C","missing_value":999.9}} isd["Td"] = {"Data":[],"Attr":{"long_name":"Dew point temperature","units":"C","missing_value":999.9}} isd["ps"] = {"Data":[],"Attr":{"long_name":"Surface pressure","units":"kPa","missing_value":9999.9}} isd["Precip"] = {"Data":[],"Attr":{"long_name":"Precipitation","units":"mm","missing_value":999.9}} # define the codes for good data in the ISD file OK_obs_code = ["AUTO ","CRN05","CRN15","FM-12","FM-15","FM-16","SY-MT"] # iterate over the lines in the file and decode the data for i in range(len(content)-1): #for i in range(10): # filter out anything other than hourly data if content[i][41:46] not in OK_obs_code: continue YY = int(content[i][15:19]) MM = int(content[i][19:21]) DD = int(content[i][21:23]) HH = int(content[i][23:25]) mm = int(content[i][25:27]) dt = datetime.datetime(YY,MM,DD,HH,mm,0) #isd["DateTime"]["Data"].append(pytz.utc.localize(dt)) isd["DateTime"]["Data"].append(dt) # wind direction, degT try: isd["Wd"]["Data"].append(float(content[i][60:63])) except: isd["Wd"]["Data"].append(float(999)) # wind speed, m/s try: isd["Ws"]["Data"].append(float(content[i][65:69])/float(10)) except: isd["Ws"]["Data"].append(float(999.9)) # air temperature, C try: isd["Ta"]["Data"].append(float(content[i][87:92])/float(10)) except: isd["Ta"]["Data"].append(float(999.9)) # dew point temperature, C try: isd["Td"]["Data"].append(float(content[i][93:98])/float(10)) except: isd["Td"]["Data"].append(float(999.9)) # sea level pressure, hPa try: isd["ps"]["Data"].append(float(content[i][99:104])/float(10)) except: isd["ps"]["Data"].append(float(9999.9)) # precipitation, mm if content[i][108:111] == "AA1": try: isd["Precip"]["Data"].append(float(content[i][113:117])/float(10)) except: isd["Precip"]["Data"].append(float(999.9)) else: isd["Precip"]["Data"].append(float(999.9)) # add the time zone to the DateTime ataributes isd["DateTime"]["Attr"]["time_zone"] = "UTC" # get the number of records and add this to the global attributes nrecs = len(isd["DateTime"]["Data"]) ds.globalattributes["nc_nrecs"] = str(nrecs) # define the QC flags f0 = numpy.zeros(len(isd["DateTime"]["Data"])) f1 = numpy.ones(len(isd["DateTime"]["Data"])) # deal with the datetime first variable = {"Label":"DateTime", "Data":numpy.array(isd["DateTime"]["Data"]), "Flag":f0, "Attr":isd["DateTime"]["Attr"]} qcutils.CreateVariable(ds, variable) # get the nominal time step dt_delta = qcutils.get_timestep(ds) ts = scipy.stats.mode(dt_delta)[0]/60 ds.globalattributes["time_step"] = ts[0] # add the variables to the data structure logger.info("Writing data to the data structure") labels = [label for label in isd.keys() if label != "DateTime"] for label in labels: data = numpy.ma.masked_equal(isd[label]["Data"], isd[label]["Attr"]["missing_value"]) flag = numpy.where(numpy.ma.getmaskarray(data) == True, f1, f0) attr = isd[label]["Attr"] variable = {"Label":label, "Data":data, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) # hPa to kPa ps = qcutils.GetVariable(ds, "ps") ps["Data"] = ps["Data"]/float(10) # convert sea level pressure to station pressure site_altitude = float(ds.globalattributes["altitude"]) Ta = qcutils.GetVariable(ds, "Ta") cfac = numpy.ma.exp((-1*site_altitude)/((Ta["Data"]+273.15)*29.263)) ps["Data"] = ps["Data"]*cfac ps["Attr"]["long_name"] = ps["Attr"]["long_name"]+", adjusted from sea level to station" qcutils.CreateVariable(ds, ps) # do precipitation and apply crude limits Precip = qcutils.GetVariable(ds, "Precip") condition = (Precip["Data"]<0)|(Precip["Data"]>100) Precip["Data"] = numpy.ma.masked_where(condition, Precip["Data"]) Precip["Flag"] = numpy.where(numpy.ma.getmaskarray(Precip["Data"])==True, f1, f0) Precip["Attr"]["RangeCheck_upper"] = 100 Precip["Attr"]["RangeCheck_lower"] = 0 qcutils.CreateVariable(ds, Precip) # get the humidities from Td Ta = qcutils.GetVariable(ds, "Ta") Td = qcutils.GetVariable(ds, "Td") ps = qcutils.GetVariable(ds, "ps") RH = mf.RHfromdewpoint(Td["Data"], Ta["Data"]) flag = numpy.where(numpy.ma.getmaskarray(RH)==True, f1, f0) attr = {"long_name":"Relative humidity", "units":"%"} variable = {"Label":"RH", "Data":RH, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) Ah = mf.absolutehumidityfromRH(Ta["Data"], RH) flag = numpy.where(numpy.ma.getmaskarray(Ah)==True, f1, f0) attr = {"long_name":"Absolute humidity", "units":"g/m3"} variable = {"Label":"Ah", "Data":Ah, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) q = mf.specifichumidityfromRH(RH, Ta["Data"], ps["Data"]) flag = numpy.where(numpy.ma.getmaskarray(q)==True, f1, f0) attr = {"long_name":"Specific humidity", "units":"kg/kg"} variable = {"Label":"q", "Data":q, "Flag":flag, "Attr":attr} qcutils.CreateVariable(ds, variable) # get U and V components from wind speed and direction Ws = qcutils.GetVariable(ds, "Ws") Wd = qcutils.GetVariable(ds, "Wd") U, V = qcutils.convert_WSWDtoUV(Ws, Wd) qcutils.CreateVariable(ds, U) qcutils.CreateVariable(ds, V) # add the time variable qcutils.get_nctime_from_datetime(ds) # return the data return ds
def compare_eddypro(): epname = qcio.get_filename_dialog( title='Choose an EddyPro full output file') ofname = qcio.get_filename_dialog(title='Choose an L3 output file') ds_ep = qcio.read_eddypro_full(epname) ds_of = qcio.nc_read_series(ofname) dt_ep = ds_ep.series['DateTime']['Data'] dt_of = ds_of.series['DateTime']['Data'] start_datetime = max([dt_ep[0], dt_of[0]]) end_datetime = min([dt_ep[-1], dt_of[-1]]) si_of = qcutils.GetDateIndex(dt_of, str(start_datetime), ts=30, default=0, match='exact') ei_of = qcutils.GetDateIndex(dt_of, str(end_datetime), ts=30, default=len(dt_of), match='exact') si_ep = qcutils.GetDateIndex(dt_ep, str(start_datetime), ts=30, default=0, match='exact') ei_ep = qcutils.GetDateIndex(dt_ep, str(end_datetime), ts=30, default=len(dt_ep), match='exact') us_of = qcutils.GetVariable(ds_of, 'ustar', start=si_of, end=ei_of) us_ep = qcutils.GetVariable(ds_ep, 'ustar', start=si_ep, end=ei_ep) Fh_of = qcutils.GetVariable(ds_of, 'Fh', start=si_of, end=ei_of) Fh_ep = qcutils.GetVariable(ds_ep, 'Fh', start=si_ep, end=ei_ep) Fe_of = qcutils.GetVariable(ds_of, 'Fe', start=si_of, end=ei_of) Fe_ep = qcutils.GetVariable(ds_ep, 'Fe', start=si_ep, end=ei_ep) Fc_of = qcutils.GetVariable(ds_of, 'Fc', start=si_of, end=ei_of) Fc_ep = qcutils.GetVariable(ds_ep, 'Fc', start=si_ep, end=ei_ep) # copy the range check values from the OFQC attributes to the EP attributes for of, ep in zip([us_of, Fh_of, Fe_of, Fc_of], [us_ep, Fh_ep, Fe_ep, Fc_ep]): for item in ["rangecheck_upper", "rangecheck_lower"]: if item in of["Attr"]: ep["Attr"][item] = of["Attr"][item] # apply QC to the EddyPro data qcck.ApplyRangeCheckToVariable(us_ep) qcck.ApplyRangeCheckToVariable(Fc_ep) qcck.ApplyRangeCheckToVariable(Fe_ep) qcck.ApplyRangeCheckToVariable(Fh_ep) # plot the comparison plt.ion() fig = plt.figure(1, figsize=(8, 8)) qcplot.xyplot(us_ep["Data"], us_of["Data"], sub=[2, 2, 1], regr=2, xlabel='u*_EP (m/s)', ylabel='u*_OF (m/s)') qcplot.xyplot(Fh_ep["Data"], Fh_of["Data"], sub=[2, 2, 2], regr=2, xlabel='Fh_EP (W/m2)', ylabel='Fh_OF (W/m2)') qcplot.xyplot(Fe_ep["Data"], Fe_of["Data"], sub=[2, 2, 3], regr=2, xlabel='Fe_EP (W/m2)', ylabel='Fe_OF (W/m2)') qcplot.xyplot(Fc_ep["Data"], Fc_of["Data"], sub=[2, 2, 4], regr=2, xlabel='Fc_EP (umol/m2/s)', ylabel='Fc_OF (umol/m2/s)') plt.tight_layout() plt.draw() plt.ioff()