Пример #1
0
def GapFillUsingInterpolation(cf, ds):
    """
    Purpose:
     Gap fill variables in the data structure using interpolation.
     All variables in the [Variables], [Drivers] and [Fluxes] section
     are processed.
    Usage:
     qcgf.GapFillUsingInterpolation(cf,ds)
     where cf is a control file object
           ds is a data structure
    Author: PRI
    Date: September 2016
    """
    label_list = qcutils.get_label_list_from_cf(cf)
    maxlen = int(
        qcutils.get_keyvaluefromcf(cf, ["Options"],
                                   "MaxGapInterpolate",
                                   default=2))
    if maxlen == 0:
        msg = " Gap fill by interpolation disabled in control file"
        logger.info(msg)
        return
    for label in label_list:
        section = qcutils.get_cfsection(cf, series=label)
        if "MaxGapInterpolate" in cf[section][label]:
            maxlen = int(
                qcutils.get_keyvaluefromcf(cf, [section, label],
                                           "MaxGapInterpolate",
                                           default=2))
            if maxlen == 0:
                msg = " Gap fill by interpolation disabled for " + label
                logger.info(msg)
                continue
            qcts.InterpolateOverMissing(ds, series=label, maxlen=2)
Пример #2
0
def ApplyTurbulenceFilter_checks(cf,ds):
    """
    Purpose:
    Usage:
    Author:
    Date:
    """
    opt = {"OK":True,"turbulence_filter":"ustar","filter_list":['Fc']}
    # return if there is no Options section in control file
    if "Options" not in cf:
        msg = " ApplyTurbulenceFilter: Options section not found in control file"
        log.warning(msg)
        opt["OK"] = False
        return opt
    # get the value of the TurbulenceFilter key in the Options section
    opt["turbulence_filter"] = qcutils.get_keyvaluefromcf(cf,["Options"],"TurbulenceFilter",default="None")
    # return if turbulence filter disabled
    if opt["turbulence_filter"].lower()=="none":
        msg = " Turbulence filter disabled in control file at "+ds.globalattributes["nc_level"]
        log.info(msg)
        opt["OK"] = False
        return opt
    # check to see if filter type can be handled
    if opt["turbulence_filter"].lower() not in ["ustar","ustar_evg","l"]:
        msg = " Unrecognised turbulence filter option ("
        msg = msg+opt["turbulence_filter"]+"), no filter applied"
        log.error(msg)
        opt["OK"] = False
        return opt
    # get the list of series to be filtered
    if "FilterList" in cf["Options"]:
        opt["filter_list"] = ast.literal_eval(cf["Options"]["FilterList"])
    # check to see if the series are in the data structure
    for item in opt["filter_list"]:
        if item not in ds.series.keys():
            msg = " Series "+item+" given in FilterList not found in data stucture"
            log.warning(msg)
            opt["filter_list"].remove(item)
    # return if the filter list is empty
    if len(opt["filter_list"])==0:
        msg = " FilterList in control file is empty, skipping turbulence filter"
        log.warning(msg)
        opt["OK"] = False
        return opt
    # get the value of the DayNightFilter key in the Options section
    opt["daynight_filter"] = qcutils.get_keyvaluefromcf(cf,["Options"],"DayNightFilter",default="None")
    # check to see if filter type can be handled
    if opt["daynight_filter"].lower() not in ["fsd","sa","none"]:
        msg = " Unrecognised day/night filter option ("
        msg = msg+opt["daynight_filter"]+"), no filter applied"
        log.error(msg)
        opt["OK"] = False
        return opt
    # check to see if all day time values are to be accepted
    opt["accept_day_times"] = qcutils.get_keyvaluefromcf(cf,["Options"],"AcceptDayTimes",default="Yes")
    opt["use_evening_filter"] = qcutils.get_keyvaluefromcf(cf,["Options"],"UseEveningFilter",default="Yes")
    
    return opt
Пример #3
0
def get_configs_dict(cf,ds):
#    configs_dict = {'nan_value': -9999,
#                    'minimum_temperature_spread': 5,
#                    'step_size_days': 5,
#                    'window_size_days': 15,
#                    'min_pct_annual': 30,
#                    'min_pct_noct_window': 20,
#                    'min_pct_day_window': 50,
#                    'output_plots': False,
#                    'measurement_interval': 0.5,
#                    'QC_accept_code': 0,
#                    'plot_output_path': '/home/imchugh/Documents'}
    configs_dict = {}
    configs_dict["nan_value"] = int(c.missing_value)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "minimum_temperature_spread",default=5)
    configs_dict["minimum_temperature_spread"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "step_size_days",default=5)
    configs_dict["step_size_days"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "window_size_days",default=15)
    configs_dict["window_size_days"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "minimum_percent_annual",default=30)
    configs_dict["minimum_pct_annual"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "minimum_percent_noct_window",default=20)
    configs_dict["minimum_pct_noct_window"] = int(opt)
    #opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     #"minimum_percent_day_window",
                                     #default=50)
    #configs_dict["minimum_pct_day_window"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "output_plots",default="False")
    configs_dict["output_plots"] = (opt=="True")
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "target",default="ER")
    configs_dict["target"] = str(opt)
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "drivers",default="['Ta']")
    configs_dict["drivers"] = ast.literal_eval(opt)[0]
    opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
                                     "output",default="ER_LT_all")
    configs_dict["output_label"] = opt
    configs_dict["output_results"] = True
    ts = int(ds.globalattributes["time_step"])
    configs_dict["measurement_interval"] = float(ts)/60.0
    configs_dict["QC_accept_code"] = 0
    opt = qcutils.get_keyvaluefromcf(cf,["Files"],"plot_path",default="plots/")
    configs_dict["output_path"] = os.path.join(opt,"respiration/")
    return configs_dict
Пример #4
0
def l1qc_process(cf, ds1):
    # get the netCDF attributes from the control file
    qcts.do_attributes(cf, ds1)
    # round the Python datetime to the nearest second
    qcutils.round_datetime(ds1, mode="nearest_second")
    #check for gaps in the Python datetime series and fix if present
    fixtimestepmethod = qcutils.get_keyvaluefromcf(cf, ["options"],
                                                   "FixTimeStepMethod",
                                                   default="round")
    if qcutils.CheckTimeStep(ds1):
        qcutils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod)
    # recalculate the Excel datetime
    qcutils.get_xldatefromdatetime(ds1)
    # get the Year, Month, Day etc from the Python datetime
    qcutils.get_ymdhmsfromdatetime(ds1)
    # write the processing level to a global attribute
    ds1.globalattributes['nc_level'] = str("L1")
    # get the start and end date from the datetime series unless they were
    # given in the control file
    if 'start_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['start_date'] = str(
            ds1.series['DateTime']['Data'][0])
    if 'end_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['end_date'] = str(
            ds1.series['DateTime']['Data'][-1])
    # calculate variances from standard deviations and vice versa
    qcts.CalculateStandardDeviations(cf, ds1)
    # create new variables using user defined functions
    qcts.DoFunctions(cf, ds1)
    # create a series of synthetic downwelling shortwave radiation
    qcts.get_synthetic_fsd(ds1)
Пример #5
0
def do_IRGAcheck(cf,ds):
    """
    Purpose:
     Decide which IRGA check routine to use depending on the setting
     of the "irga_type" key in the [Options] section of the control
     file.  The default is Li7500.
    Usage:
    Author: PRI
    Date: September 2015
    """
    irga_list = ["li7500","li7500a","li7500rs","ec150","ec155","irgason"]
    # get the IRGA type from the control file
    irga_type = qcutils.get_keyvaluefromcf(cf,["Options"],"irga_type", default="li7500")
    # remove any hyphens or spaces
    for item in ["-"," "]:
        if item in irga_type: irga_type = irga_type.replace(item,"")
    # check the IRGA type against the list of suppprted devices
    if irga_type.lower() not in irga_list:
        msg = " Unrecognised IRGA type "+irga_type+" given in control file, IRGA checks skipped ..."
        log.error(msg)
        return
    # do the IRGA checks
    if irga_type.lower()=="li7500":
        ds.globalattributes["irga_type"] = irga_type
        do_li7500check(cf,ds)
    elif irga_type.lower() in ["li7500a","irgason"]:
        ds.globalattributes["irga_type"] = irga_type
        do_li7500acheck(cf,ds)
    elif irga_type.lower() in ["ec155","ec150","irgason"]:
        ds.globalattributes["irga_type"] = irga_type
        do_EC155check(cf,ds)
    else:
        msg = " Unsupported IRGA type "+irga_type+", contact the devloper ..."
        log.error(msg)
        return
Пример #6
0
def ImportSeries(cf, ds):
    # check to see if there is an Imports section
    if "Imports" not in cf.keys(): return
    # number of records
    nRecs = int(ds.globalattributes["nc_nrecs"])
    # get the start and end datetime
    ldt = ds.series["DateTime"]["Data"]
    start_date = ldt[0]
    end_date = ldt[-1]
    # loop over the series in the Imports section
    for label in cf["Imports"].keys():
        import_filename = qcutils.get_keyvaluefromcf(cf, ["Imports", label],
                                                     "file_name",
                                                     default="")
        if import_filename == "":
            msg = " ImportSeries: import filename not found in control file, skipping ..."
            logger.warning(msg)
            continue
        var_name = qcutils.get_keyvaluefromcf(cf, ["Imports", label],
                                              "var_name",
                                              default="")
        if var_name == "":
            msg = " ImportSeries: variable name not found in control file, skipping ..."
            logger.warning(msg)
            continue
        ds_import = qcio.nc_read_series(import_filename)
        ts_import = ds_import.globalattributes["time_step"]
        ldt_import = ds_import.series["DateTime"]["Data"]
        si = qcutils.GetDateIndex(ldt_import,
                                  str(start_date),
                                  ts=ts_import,
                                  default=0,
                                  match="exact")
        ei = qcutils.GetDateIndex(ldt_import,
                                  str(end_date),
                                  ts=ts_import,
                                  default=len(ldt_import) - 1,
                                  match="exact")
        data = numpy.ma.ones(nRecs) * float(c.missing_value)
        flag = numpy.ma.ones(nRecs)
        data_import, flag_import, attr_import = qcutils.GetSeriesasMA(
            ds_import, var_name, si=si, ei=ei)
        ldt_import = ldt_import[si:ei + 1]
        index = qcutils.FindIndicesOfBInA(ldt_import, ldt)
        data[index] = data_import
        flag[index] = flag_import
        qcutils.CreateSeries(ds, label, data, flag, attr_import)
Пример #7
0
def l1qc(cf):
    # get the data series from the Excel file
    in_filename = qcio.get_infilenamefromcf(cf)
    if not qcutils.file_exists(in_filename, mode="quiet"):
        msg = " Input file " + in_filename + " not found ..."
        logger.error(msg)
        ds1 = qcio.DataStructure()
        ds1.returncodes = {"value": 1, "message": msg}
        return ds1
    file_name, file_extension = os.path.splitext(in_filename)
    if "csv" in file_extension.lower():
        ds1 = qcio.csv_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Excel datetime from the Python datetime objects
        qcutils.get_xldatefromdatetime(ds1)
    else:
        ds1 = qcio.xl_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Python datetime objects from the Excel datetime
        qcutils.get_datetimefromxldate(ds1)
    # get the netCDF attributes from the control file
    qcts.do_attributes(cf, ds1)
    # round the Python datetime to the nearest second
    qcutils.round_datetime(ds1, mode="nearest_second")
    #check for gaps in the Python datetime series and fix if present
    fixtimestepmethod = qcutils.get_keyvaluefromcf(cf, ["options"],
                                                   "FixTimeStepMethod",
                                                   default="round")
    if qcutils.CheckTimeStep(ds1):
        qcutils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod)
    # recalculate the Excel datetime
    qcutils.get_xldatefromdatetime(ds1)
    # get the Year, Month, Day etc from the Python datetime
    qcutils.get_ymdhmsfromdatetime(ds1)
    # write the processing level to a global attribute
    ds1.globalattributes['nc_level'] = str("L1")
    # get the start and end date from the datetime series unless they were
    # given in the control file
    if 'start_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['start_date'] = str(
            ds1.series['DateTime']['Data'][0])
    if 'end_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['end_date'] = str(
            ds1.series['DateTime']['Data'][-1])
    # calculate variances from standard deviations and vice versa
    qcts.CalculateStandardDeviations(cf, ds1)
    # create new variables using user defined functions
    qcts.DoFunctions(cf, ds1)
    # create a series of synthetic downwelling shortwave radiation
    qcts.get_synthetic_fsd(ds1)
    # check missing data and QC flags are consistent
    qcutils.CheckQCFlags(ds1)

    return ds1
Пример #8
0
def l1qc(cf):
    # get the data series from the Excel file
    in_filename = qcio.get_infilenamefromcf(cf)
    if not qcutils.file_exists(in_filename,mode="quiet"):
        msg = " Input file "+in_filename+" not found ..."
        log.error(msg)
        ds1 = qcio.DataStructure()
        ds1.returncodes = {"value":1,"message":msg}
        return ds1
    file_name,file_extension = os.path.splitext(in_filename)
    if "csv" in file_extension.lower():
        ds1 = qcio.csv_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Excel datetime from the Python datetime objects
        qcutils.get_xldatefromdatetime(ds1)
    else:
        ds1 = qcio.xl_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Python datetime objects from the Excel datetime
        qcutils.get_datetimefromxldate(ds1)
    # get the netCDF attributes from the control file
    qcts.do_attributes(cf,ds1)
    # round the Python datetime to the nearest second
    qcutils.round_datetime(ds1,mode="nearest_second")
    #check for gaps in the Python datetime series and fix if present
    fixtimestepmethod = qcutils.get_keyvaluefromcf(cf,["options"],"FixTimeStepMethod",default="round")
    if qcutils.CheckTimeStep(ds1): qcutils.FixTimeStep(ds1,fixtimestepmethod=fixtimestepmethod)
    # recalculate the Excel datetime
    qcutils.get_xldatefromdatetime(ds1)
    # get the Year, Month, Day etc from the Python datetime
    qcutils.get_ymdhmsfromdatetime(ds1)
    # write the processing level to a global attribute
    ds1.globalattributes['nc_level'] = str("L1")
    # get the start and end date from the datetime series unless they were
    # given in the control file
    if 'start_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['start_date'] = str(ds1.series['DateTime']['Data'][0])
    if 'end_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['end_date'] = str(ds1.series['DateTime']['Data'][-1])
    # calculate variances from standard deviations and vice versa
    qcts.CalculateStandardDeviations(cf,ds1)
    # create new variables using user defined functions
    qcts.DoFunctions(cf,ds1)
    # create a series of synthetic downwelling shortwave radiation
    qcts.get_synthetic_fsd(ds1)

    return ds1
Пример #9
0
 # number of records
 ds_60minutes.globalattributes["nc_nrecs"] = nRecs
 # processing level
 ds_60minutes.globalattributes["nc_level"] = "L1"
 # latitude and longitude, chose central pixel of 3x3 grid
 ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1]
 ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1]
 # put the ACCESS data into the 60 minute data structure ds_60minutes
 # make a QC flag with a value of 0
 flag_60minutes = numpy.zeros(nRecs)
 # loop over the variables defined in the control file
 for item in ["valid_date","valid_time","lat","lon"]:
     if item in var_list: var_list.remove(item)
 for var in var_list:
     # get the name of the ACCESS variable
     access_name = qcutils.get_keyvaluefromcf(cf,["Variables",var],"access_name",default=var)
     if access_name not in f.variables.keys():
         logging.error("Requested variable "+access_name+" not found in ACCESS data")
         continue
     attr = {}
     for this_attr in f.varattr[access_name].keys():
         attr[this_attr] = f.varattr[access_name][this_attr]
     attr["missing_value"] = c.missing_value
     # loop over all ACCESS grids and give them standard OzFlux names with the grid idices appended
     for i in range(0,3):
         for j in range(0,3):
             if len(f.variables[access_name].shape)==3:
                 var_ij = var+'_'+str(i)+str(j)
                 series = f.variables[access_name][:,i,j]
                 qcutils.CreateSeries(ds_60minutes,var_ij,series,Flag=flag_60minutes,Attr=attr)
             elif len(f.variables[access_name].shape)==4:
def get_accessdata(cf,ds_60minutes,f,info):
            # latitude and longitude, chose central pixel of 3x3 grid
    ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1]
    ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1]
    # list of variables to process
    var_list = list(cf["Variables"].keys())
    # get a series of Python datetimes and put this into the data structure
    valid_date = f.variables["valid_date"][:]
    nRecs = len(valid_date)
    valid_time = f.variables["valid_time"][:]
    dl = [datetime.datetime.strptime(str(int(valid_date[i])*10000+int(valid_time[i])),"%Y%m%d%H%M") for i in range(0,nRecs)]
    dt_utc_all = numpy.array(dl)
    time_step = numpy.array([(dt_utc_all[i]-dt_utc_all[i-1]).total_seconds() for i in range(1,len(dt_utc_all))])
    time_step = numpy.append(time_step,3600)
    idxne0 = numpy.where(time_step!=0)[0]
    idxeq0 = numpy.where(time_step==0)[0]
    idx_clipped = numpy.where((idxeq0>0)&(idxeq0<nRecs))[0]
    idxeq0 = idxeq0[idx_clipped]
    dt_utc = dt_utc_all[idxne0]
    dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_utc]
    dt_loc = [x.astimezone(info["site_tz"]) for x in dt_utc]
    dt_loc = [x-x.dst() for x in dt_loc]
    dt_loc = [x.replace(tzinfo=None) for x in dt_loc]
    flag = numpy.zeros(len(dt_loc),dtype=numpy.int32)
    ds_60minutes.series["DateTime"] = {}
    ds_60minutes.series["DateTime"]["Data"] = dt_loc
    ds_60minutes.series["DateTime"]["Flag"] = flag
    ds_60minutes.series["DateTime_UTC"] = {}
    ds_60minutes.series["DateTime_UTC"]["Data"] = dt_utc
    ds_60minutes.series["DateTime_UTC"]["Flag"] = flag
    nRecs = len(ds_60minutes.series["DateTime"]["Data"])
    ds_60minutes.globalattributes["nc_nrecs"] = nRecs
    # we're done with valid_date and valid_time, drop them from the variable list
    for item in ["valid_date","valid_time","lat","lon"]:
        if item in var_list: var_list.remove(item)
    # create the QC flag with all zeros
    nRecs = ds_60minutes.globalattributes["nc_nrecs"]
    flag_60minutes = numpy.zeros(nRecs,dtype=numpy.int32)
    # get the UTC hour
    hr_utc = [x.hour for x in dt_utc]
    attr = qcutils.MakeAttributeDictionary(long_name='UTC hour')
    qcutils.CreateSeries(ds_60minutes,'Hr_UTC',hr_utc,Flag=flag_60minutes,Attr=attr)
    # now loop over the variables listed in the control file
    for label in var_list:
        # get the name of the variable in the ACCESS file
        access_name = qcutils.get_keyvaluefromcf(cf,["Variables",label],"access_name",default=label)
        # warn the user if the variable not found
        if access_name not in list(f.variables.keys()):
            msg = "Requested variable "+access_name
            msg = msg+" not found in ACCESS data"
            logging.error(msg)
            continue
        # get the variable attibutes
        attr = get_variableattributes(f,access_name)
        # loop over the 3x3 matrix of ACCESS grid data supplied
        for i in range(0,3):
            for j in range(0,3):
                label_ij = label+'_'+str(i)+str(j)
                if len(f.variables[access_name].shape)==3:
                    series = f.variables[access_name][:,i,j]
                elif len(f.variables[access_name].shape)==4:
                    series = f.variables[access_name][:,0,i,j]
                else:
                    msg = "Unrecognised variable ("+label
                    msg = msg+") dimension in ACCESS file"
                    logging.error(msg)
                series = series[idxne0]
                qcutils.CreateSeries(ds_60minutes,label_ij,series,
                                     Flag=flag_60minutes,Attr=attr)
    return
def access_read_mfiles2(file_list,var_list=[]):
    f = ACCESSData()
    # check that we have a list of files to process
    if len(file_list)==0:
        print("access_read_mfiles: empty file_list received, returning ...")
        return f
    # make sure latitude and longitude are read
    if "lat" not in var_list: var_list.append("lat")
    if "lon" not in var_list: var_list.append("lon")
    # make sure valid_date and valid_time are read
    if "valid_date" not in var_list: var_list.append("valid_date")
    if "valid_time" not in var_list: var_list.append("valid_time")
    for file_name in file_list:
        # open the netCDF file
        ncfile = netCDF4.Dataset(file_name)
        # check the number of records
        dims = ncfile.dimensions
        shape = (len(dims["time"]),len(dims["lat"]),len(dims["lon"]))
        # move to the next file if this file doesn't have 25 time records
        if shape[0]!=1:
            print("access_read_mfiles: length of time dimension in "+file_name+" is "+str(shape[0])+" (expected 1)")
            continue
        # move to the next file if this file doesn't have 3 latitude records
        if shape[1]!=3:
            print("access_read_mfiles: length of lat dimension in "+file_name+" is "+str(shape[1])+" (expected 3)")
            continue
        # move to the next file if this file doesn't have 3 longitude records
        if shape[2]!=3:
            print("access_read_mfiles: length of lon dimension in "+file_name+" is "+str(shape[2])+" (expected 3)")
            continue
        # seems OK to continue with this file ...
        # add the file name to the file_list in the global attributes
        f.globalattr["file_list"].append(file_name)
        # get the global attributes
        for gattr in ncfile.ncattrs():
            if gattr not in f.globalattr:
                f.globalattr[gattr] = getattr(ncfile,gattr)
        # if no variable list was passed to this routine, use all variables
        if len(var_list)==0:
            var_list=list(ncfile.variables.keys())
        # load the data into the data structure
        for var in var_list:
            # get the name of the variable in the ACCESS file
            access_name = qcutils.get_keyvaluefromcf(cf,["Variables",var],"access_name",default=var)
            # check that the requested variable exists in the ACCESS file
            if access_name in list(ncfile.variables.keys()):
                # check to see if the variable is already in the data structure
                if access_name not in list(f.variables.keys()):
                    f.variables[access_name] = ncfile.variables[access_name][:]
                else:
                    f.variables[access_name] = numpy.concatenate((f.variables[access_name],ncfile.variables[access_name][:]),axis=0)
                # now copy the variable attribiutes
                # create the variable attribute dictionary
                if access_name not in f.varattr: f.varattr[access_name] = {}
                # loop over the variable attributes
                for this_attr in ncfile.variables[access_name].ncattrs():
                    # check to see if the attribute has already 
                    if this_attr not in list(f.varattr[access_name].keys()):
                        # add the variable attribute if it's not there already
                        f.varattr[access_name][this_attr] = getattr(ncfile.variables[access_name],this_attr)
            else:
                print("access_read_mfiles: ACCESS variable "+access_name+" not found in "+file_name)
                if access_name not in list(f.variables.keys()):
                    f.variables[access_name] = makedummyseries(shape)
                else:
                    f.variables[access_name] = numpy.concatenate((f.variables[access_name],makedummyseries(shape)),axis=0)
        # close the netCDF file
        ncfile.close()
    # return with the data structure
    return f
Пример #12
0
def rpLT_createdict(cf, ds, series):
    """
    Purpose:
     Creates a dictionary in ds to hold information about estimating ecosystem
     respiration using the Lloyd-Taylor method.
    Usage:
    Author: PRI
    Date October 2015
    """
    # get the section of the control file containing the series
    section = qcutils.get_cfsection(cf, series=series, mode="quiet")
    # return without doing anything if the series isn't in a control file section
    if len(section) == 0:
        logger.error("ERUsingLloydTaylor: Series " + series +
                     " not found in control file, skipping ...")
        return
    # check that none of the drivers have missing data
    driver_list = ast.literal_eval(
        cf[section][series]["ERUsingLloydTaylor"]["drivers"])
    target = cf[section][series]["ERUsingLloydTaylor"]["target"]
    for label in driver_list:
        data, flag, attr = qcutils.GetSeriesasMA(ds, label)
        if numpy.ma.count_masked(data) != 0:
            logger.error("ERUsingLloydTaylor: driver " + label +
                         " contains missing data, skipping target " + target)
            return
    # create the dictionary keys for this series
    rpLT_info = {}
    # site name
    rpLT_info["site_name"] = ds.globalattributes["site_name"]
    # source series for ER
    opt = qcutils.get_keyvaluefromcf(cf,
                                     [section, series, "ERUsingLloydTaylor"],
                                     "source",
                                     default="Fc")
    rpLT_info["source"] = opt
    # target series name
    rpLT_info["target"] = cf[section][series]["ERUsingLloydTaylor"]["target"]
    # list of drivers
    rpLT_info["drivers"] = ast.literal_eval(
        cf[section][series]["ERUsingLloydTaylor"]["drivers"])
    # name of SOLO output series in ds
    rpLT_info["output"] = cf[section][series]["ERUsingLloydTaylor"]["output"]
    # results of best fit for plotting later on
    rpLT_info["results"] = {
        "startdate": [],
        "enddate": [],
        "No. points": [],
        "r": [],
        "Bias": [],
        "RMSE": [],
        "Frac Bias": [],
        "NMSE": [],
        "Avg (obs)": [],
        "Avg (LT)": [],
        "Var (obs)": [],
        "Var (LT)": [],
        "Var ratio": [],
        "m_ols": [],
        "b_ols": []
    }
    # create the configuration dictionary
    rpLT_info["configs_dict"] = get_configs_dict(cf, ds)
    # create an empty series in ds if the output series doesn't exist yet
    if rpLT_info["output"] not in ds.series.keys():
        data, flag, attr = qcutils.MakeEmptySeries(ds, rpLT_info["output"])
        qcutils.CreateSeries(ds, rpLT_info["output"], data, flag, attr)
    # create the merge directory in the data structure
    if "merge" not in dir(ds): ds.merge = {}
    if "standard" not in ds.merge.keys(): ds.merge["standard"] = {}
    # create the dictionary keys for this series
    ds.merge["standard"][series] = {}
    # output series name
    ds.merge["standard"][series]["output"] = series
    # source
    ds.merge["standard"][series]["source"] = ast.literal_eval(
        cf[section][series]["MergeSeries"]["Source"])
    # create an empty series in ds if the output series doesn't exist yet
    if ds.merge["standard"][series]["output"] not in ds.series.keys():
        data, flag, attr = qcutils.MakeEmptySeries(
            ds, ds.merge["standard"][series]["output"])
        qcutils.CreateSeries(ds, ds.merge["standard"][series]["output"], data,
                             flag, attr)
    return rpLT_info
Пример #13
0
def get_configs_dict(cf, ds):
    #    configs_dict = {'nan_value': -9999,
    #                    'minimum_temperature_spread': 5,
    #                    'step_size_days': 5,
    #                    'window_size_days': 15,
    #                    'min_pct_annual': 30,
    #                    'min_pct_noct_window': 20,
    #                    'min_pct_day_window': 50,
    #                    'output_plots': False,
    #                    'measurement_interval': 0.5,
    #                    'QC_accept_code': 0,
    #                    'plot_output_path': '/home/imchugh/Documents'}
    configs_dict = {}
    configs_dict["nan_value"] = int(c.missing_value)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "minimum_temperature_spread",
                                     default=5)
    configs_dict["minimum_temperature_spread"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "step_size_days",
                                     default=5)
    configs_dict["step_size_days"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "window_size_days",
                                     default=15)
    configs_dict["window_size_days"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "minimum_percent_annual",
                                     default=30)
    configs_dict["minimum_pct_annual"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "minimum_percent_noct_window",
                                     default=20)
    configs_dict["minimum_pct_noct_window"] = int(opt)
    #opt = qcutils.get_keyvaluefromcf(cf,["ER","ER_LT","ERUsingLloydTaylor"],
    #"minimum_percent_day_window",
    #default=50)
    #configs_dict["minimum_pct_day_window"] = int(opt)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "output_plots",
                                     default="False")
    configs_dict["output_plots"] = (opt == "True")
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "show_plots",
                                     default="False")
    configs_dict["show_plots"] = (opt == "True")
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "target",
                                     default="ER")
    configs_dict["target"] = str(opt)
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "drivers",
                                     default="['Ta']")
    configs_dict["drivers"] = ast.literal_eval(opt)[0]
    opt = qcutils.get_keyvaluefromcf(cf, ["ER", "ER_LT", "ERUsingLloydTaylor"],
                                     "output",
                                     default="ER_LT_all")
    configs_dict["output_label"] = opt
    configs_dict["output_results"] = True
    ts = int(ds.globalattributes["time_step"])
    configs_dict["measurement_interval"] = float(ts) / 60.0
    configs_dict["QC_accept_code"] = 0
    opt = qcutils.get_keyvaluefromcf(cf, ["Files"],
                                     "plot_path",
                                     default="plots/")
    configs_dict["output_path"] = os.path.join(opt, "respiration/")
    return configs_dict
Пример #14
0
def gfSOLO_createdict(cf, ds, series):
    """ Creates a dictionary in ds to hold information about the SOLO data used
        to gap fill the tower data."""
    # get the section of the control file containing the series
    section = qcutils.get_cfsection(cf, series=series, mode="quiet")
    # return without doing anything if the series isn't in a control file section
    if len(section) == 0:
        logger.error(
            "GapFillUsingSOLO: Series %s not found in control file, skipping ...",
            series)
        return
    # create the solo directory in the data structure
    if "solo" not in dir(ds): ds.solo = {}
    # name of SOLO output series in ds
    output_list = cf[section][series]["GapFillUsingSOLO"].keys()
    # loop over the outputs listed in the control file
    for output in output_list:
        # create the dictionary keys for this series
        ds.solo[output] = {}
        # get the target
        if "target" in cf[section][series]["GapFillUsingSOLO"][output]:
            ds.solo[output]["label_tower"] = cf[section][series][
                "GapFillUsingSOLO"][output]["target"]
        else:
            ds.solo[output]["label_tower"] = series
        # site name
        ds.solo[output]["site_name"] = ds.globalattributes["site_name"]
        # list of SOLO settings
        if "solo_settings" in cf[section][series]["GapFillUsingSOLO"][output]:
            ss_list = ast.literal_eval(cf[section][series]["GapFillUsingSOLO"]
                                       [output]["solo_settings"])
            ds.solo[output]["solo_settings"] = {}
            ds.solo[output]["solo_settings"]["nodes_target"] = int(ss_list[0])
            ds.solo[output]["solo_settings"]["training"] = int(ss_list[1])
            ds.solo[output]["solo_settings"]["factor"] = int(ss_list[2])
            ds.solo[output]["solo_settings"]["learningrate"] = float(
                ss_list[3])
            ds.solo[output]["solo_settings"]["iterations"] = int(ss_list[4])
        # list of drivers
        ds.solo[output]["drivers"] = ast.literal_eval(
            cf[section][series]["GapFillUsingSOLO"][output]["drivers"])
        # apply ustar filter
        opt = qcutils.get_keyvaluefromcf(
            cf, [section, series, "GapFillUsingSOLO", output],
            "turbulence_filter",
            default="")
        ds.solo[output]["turbulence_filter"] = opt
        opt = qcutils.get_keyvaluefromcf(
            cf, [section, series, "GapFillUsingSOLO", output],
            "daynight_filter",
            default="")
        ds.solo[output]["daynight_filter"] = opt
        # results of best fit for plotting later on
        ds.solo[output]["results"] = {
            "startdate": [],
            "enddate": [],
            "No. points": [],
            "r": [],
            "Bias": [],
            "RMSE": [],
            "Frac Bias": [],
            "NMSE": [],
            "Avg (obs)": [],
            "Avg (SOLO)": [],
            "Var (obs)": [],
            "Var (SOLO)": [],
            "Var ratio": [],
            "m_ols": [],
            "b_ols": []
        }
        # create an empty series in ds if the SOLO output series doesn't exist yet
        if output not in ds.series.keys():
            data, flag, attr = qcutils.MakeEmptySeries(ds, output)
            qcutils.CreateSeries(ds, output, data, flag, attr)
Пример #15
0
def CPD_run(cf):
    # *** original code from IMcH
    ## Prompt user for configuration file and get it
    #root = Tkinter.Tk(); root.withdraw()
    #cfName = tkFileDialog.askopenfilename(initialdir='')
    #root.destroy()
    #cf=ConfigObj(cfName)
    
    # Set input file and output path and create directories for plots and results
    file_in = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename'])
    path_out = cf['Files']['file_path']
    #path_out = os.path.join(path_out,'CPD')
    file_out = os.path.join(cf['Files']['file_path'],cf['Files']['in_filename'].replace(".nc","_CPD.xls"))
    plot_path = "plots/"
    if "plot_path" in cf["Files"]: plot_path = os.path.join(cf["Files"]["plot_path"],"CPD/")
    #plot_path = os.path.join(path_out,'Plots')
    if not os.path.isdir(plot_path): os.makedirs(plot_path)
    results_path = path_out
    if not os.path.isdir(results_path): os.makedirs(results_path)
    # **** original code from IMcH
    #file_in=os.path.join(cf['files']['input_path'],cf['files']['input_file'])
    #path_out=cf['files']['output_path']
    #plot_path_out=os.path.join(path_out,'Plots')
    #if not os.path.isdir(plot_path_out): os.makedirs(os.path.join(path_out,'Plots'))
    #results_path_out=os.path.join(path_out,'Results')
    #if not os.path.isdir(results_path_out): os.makedirs(os.path.join(path_out,'Results'))    

    # Get user-set variable names from config file
    # *** original code from IMcH
    #vars_data=[cf['variables']['data'][i] for i in cf['variables']['data']]
    #vars_QC=[cf['variables']['QC'][i] for i in cf['variables']['QC']]
    #vars_all=vars_data+vars_QC
       
    vars_data = []
    for item in cf["Variables"].keys():
        if "AltVarName" in cf["Variables"][item].keys():
            vars_data.append(str(cf["Variables"][item]["AltVarName"]))
        else:
            vars_data.append(str(item))
    vars_QC = []
    for item in vars_data:
        vars_QC.append(item+"_QCFlag")
    vars_all = vars_data+vars_QC

    # Read .nc file
    # *** original code from IMcH
    #nc_obj=netCDF4.Dataset(file_in)
    #flux_frequency=int(nc_obj.time_step)
    #dates_list=[dt.datetime(*xlrd.xldate_as_tuple(elem,0)) for elem in nc_obj.variables['xlDateTime']]
    #d={}
    #for i in vars_all:
        #d[i]=nc_obj.variables[i][:]
    #nc_obj.close()
    #df=pd.DataFrame(d,index=dates_list)
    log.info(' Reading netCDF file '+file_in)   
    ncFile = netCDF4.Dataset(file_in)
    flux_period=int(ncFile.time_step)
    dates_list=[dt.datetime(*xlrd.xldate_as_tuple(elem,0)) for elem in ncFile.variables['xlDateTime']]
    d={}
    for item in vars_all:
        nDims = len(ncFile.variables[item].shape)
        if nDims not in [1,3]:
            msg = "CPD_run: unrecognised number of dimensions ("+str(nDims)
            msg = msg+") for netCDF variable "+item
            raise Exception(msg)
        if nDims==1:
            # single dimension
            d[item] = ncFile.variables[item][:]
        elif nDims==3:
            # 3 dimensions
            d[item] = ncFile.variables[item][:,0,0]
    df=pd.DataFrame(d,index=dates_list)

    # Build dictionary of additional configs
    # *** original code from IMcH
    #d={}
    #d['radiation_threshold']=int(cf['options']['radiation_threshold'])
    #d['num_bootstraps']=int(cf['options']['num_bootstraps'])
    #d['flux_frequency']=flux_frequency
    #if cf['options']['output_plots']=='True':
        #d['plot_output_path']=plot_path_out
    #if cf['options']['output_results']=='True':
        #d['results_output_path']=results_path_out
    d={}
    d['radiation_threshold']=int(cf['Options']['Fsd_threshold'])
    d['num_bootstraps']=int(cf['Options']['Num_bootstraps'])
    d['flux_period']=flux_period
    d['site_name']=getattr(ncFile,"site_name")
    d["call_mode"]=qcutils.get_keyvaluefromcf(cf,["Options"],"call_mode",default="interactive",mode="quiet")
    d["show_plots"]=qcutils.get_keyvaluefromcf(cf,["Options"],"show_plots",default=True,mode="quiet")
    if cf['Options']['Output_plots']=='True':
        d['plot_path']=plot_path
    if cf['Options']['Output_results']=='True':
        d['results_path']=results_path
        d["file_out"]=file_out

    # Replace configured error values with NaNs and remove data with unacceptable QC codes, then drop flags
    # *** original code from IMcH
    #df.replace(int(cf['options']['nan_value']),np.nan)
    #if 'QC_accept_codes' in cf['options']:
        #QC_accept_codes=ast.literal_eval(cf['options']['QC_accept_codes'])
        #eval_string='|'.join(['(df[vars_QC[i]]=='+str(i)+')' for i in QC_accept_codes])
        #for i in xrange(4):
            #df[vars_data[i]]=np.where(eval(eval_string),df[vars_data[i]],np.nan)
    df.replace(c.missing_value,np.nan)
    eval_string='|'.join(['(df[vars_QC[i]]=='+str(i)+')' for i in [0,10]])
    #for i in xrange(len(vars_data)):
    for i in range(len(vars_data)):
        df[vars_data[i]]=np.where(eval(eval_string),df[vars_data[i]],np.nan)
    df=df[vars_data]

    ncFile.close()
    
    return df,d
Пример #16
0
 start = modis_dt[0]
 end = modis_dt[-1]
 modis_dt_interp = [
     result
     for result in perdelta(start, end, datetime.timedelta(minutes=ts))
 ]
 modis_time_interp = netCDF4.date2num(modis_dt_interp, modis_time_units)
 modis_time_masked = numpy.ma.masked_where(
     numpy.ma.getmaskarray(evi_masked_median) == True, modis_time)
 modis_time_comp = numpy.ma.compressed(modis_time_masked)
 evi_masked_median_comp = numpy.ma.compressed(evi_masked_median)
 x_org = modis_time_comp
 y_org = evi_masked_median_comp
 # interpolate onto the tower time step
 interp_type = qcutils.get_keyvaluefromcf(cf, ["EVI"],
                                          "interp_type",
                                          default="linear")
 if interp_type.lower() not in ["linear", "smooth_interp"]:
     msg = " Unrecognised interpolation type (" + interp_type + "), using linear ..."
     log.warning(msg)
     interp_type = "linear"
 if interp_type.lower() == "linear":
     # linear interpolation
     log.info(" Using linear interpolation")
     f = scipy.interpolate.interp1d(x_org, y_org, bounds_error=False)
     evi_interp = f(modis_time_interp)
     filter_type = qcutils.get_keyvaluefromcf(cf, ["EVI"],
                                              "filter_type",
                                              default="savgol")
     if filter_type.lower() not in ["savgol"]:
         msg = " Unrecognised filter type (" + filter_type + "), using Savitsky-Golay ..."
Пример #17
0
         cfname = cf_batch["Levels"][level][i]
         logging.info('Starting FluxNet output with '+cfname)
         cf = qcio.get_controlfilecontents(cfname)
         qcio.fn_write_csv(cf)
         logging.info('Finished FluxNet output with '+cfname)
         logging.info('')
 elif level.lower()=="concatenate":
     # concatenate netCDF files
     for i in cf_batch["Levels"][level].keys():
         cfname = cf_batch["Levels"][level][i]
         logging.info('Starting concatenation with '+cfname)
         cf_cc = qcio.get_controlfilecontents(cfname)
         qcio.nc_concatenate(cf_cc)
         logging.info('Finished concatenation with '+cfname)
         # now plot the fingerprints for the concatenated files
         opt = qcutils.get_keyvaluefromcf(cf_cc,["Options"],"DoFingerprints", default="yes")
         if opt.lower()=="no": continue
         cf_fp = qcio.get_controlfilecontents("controlfiles/standard/fingerprint.txt")
         if "Files" not in dir(cf_fp): cf_fp["Files"] = {}
         file_name = cf_cc["Files"]["Out"]["ncFileName"]
         file_path = ntpath.split(file_name)[0]+"/"
         cf_fp["Files"]["file_path"] = file_path
         cf_fp["Files"]["in_filename"] = ntpath.split(file_name)[1]
         cf_fp["Files"]["plot_path"] = file_path[:file_path.index("Data")]+"Plots/"
         if "Options" not in cf_fp: cf_fp["Options"]={}
         cf_fp["Options"]["call_mode"] = "batch"
         cf_fp["Options"]["show_plots"] = "no"
         logging.info('Doing fingerprint plots using '+cf_fp["Files"]["in_filename"])
         qcplot.plot_fingerprint(cf_fp)
         logging.info('Finished fingerprint plots')
         logging.info('')
Пример #18
0
def do_dependencycheck(cf, ds, section, series, code=23, mode="quiet"):
    """
    Purpose:
    Usage:
    Author: PRI
    Date: Back in the day
    """
    if len(section) == 0 and len(series) == 0: return
    if len(section) == 0:
        section = qcutils.get_cfsection(cf, series=series, mode='quiet')
    if "DependencyCheck" not in cf[section][series].keys(): return
    if "Source" not in cf[section][series]["DependencyCheck"]:
        msg = " DependencyCheck: keyword Source not found for series " + series + ", skipping ..."
        logger.error(msg)
        return
    if mode == "verbose":
        msg = " Doing DependencyCheck for " + series
        logger.info(msg)
    # get the precursor source list from the control file
    source_list = ast.literal_eval(
        cf[section][series]["DependencyCheck"]["Source"])
    # check to see if the "ignore_missing" flag is set
    opt = qcutils.get_keyvaluefromcf(cf, [section, series, "DependencyCheck"],
                                     "ignore_missing",
                                     default="no")
    ignore_missing = False
    if opt.lower() in ["yes", "y", "true", "t"]:
        ignore_missing = True
    # get the data
    dependent_data, dependent_flag, dependent_attr = qcutils.GetSeries(
        ds, series)
    # loop over the precursor source list
    for item in source_list:
        # check the precursor is in the data structure
        if item not in ds.series.keys():
            msg = " DependencyCheck: " + series + " precursor series " + item + " not found, skipping ..."
            logger.warning(msg)
            continue
        # get the precursor data
        precursor_data, precursor_flag, precursor_attr = qcutils.GetSeries(
            ds, item)
        # check if the user wants to ignore missing precursor data
        if ignore_missing:
            # they do, so make an array of missing values
            nRecs = int(ds.globalattributes["nc_nrecs"])
            missing_array = numpy.ones(nRecs) * float(c.missing_value)
            # and find the indicies of elements equal to the missing value
            bool_array = numpy.isclose(precursor_data, missing_array)
            idx = numpy.where(bool_array == True)[0]
            # and set these flags to 0 so missing data is ignored
            precursor_flag[idx] = numpy.int32(0)
        # mask the dependent data where the precursor flag shows data not OK
        dependent_data = numpy.ma.masked_where(
            numpy.mod(precursor_flag, 10) != 0, dependent_data)
        # get an index where the precursor flag shows data not OK
        idx = numpy.ma.where(numpy.mod(precursor_flag, 10) != 0)[0]
        # set the dependent QC flag
        dependent_flag[idx] = numpy.int32(code)
    # put the data back into the data structure
    dependent_attr["DependencyCheck_source"] = str(source_list)
    qcutils.CreateSeries(ds, series, dependent_data, dependent_flag,
                         dependent_attr)
    # our work here is done
    return
Пример #19
0
def access_read_mfiles2(file_list,var_list=[]):
    f = ACCESSData()
    # check that we have a list of files to process
    if len(file_list)==0:
        print "access_read_mfiles: empty file_list received, returning ..."
        return f
    # make sure latitude and longitude are read
    if "lat" not in var_list: var_list.append("lat")
    if "lon" not in var_list: var_list.append("lon")
    # make sure valid_date and valid_time are read
    if "valid_date" not in var_list: var_list.append("valid_date")
    if "valid_time" not in var_list: var_list.append("valid_time")
    for file_name in file_list:
        # open the netCDF file
        ncfile = Dataset(file_name)
        # check the number of records
        dims = ncfile.dimensions
        shape = (len(dims["time"]),len(dims["lat"]),len(dims["lon"]))
        # move to the next file if this file doesn't have 25 time records
        if shape[0]!=25:
            print "access_read_mfiles: length of time dimension in "+file_name+" is "+str(shape[0])+" (expected 25)"
            continue
        # move to the next file if this file doesn't have 3 latitude records
        if shape[1]!=3:
            print "access_read_mfiles: length of lat dimension in "+file_name+" is "+str(shape[1])+" (expected 3)"
            continue
        # move to the next file if this file doesn't have 3 longitude records
        if shape[2]!=3:
            print "access_read_mfiles: length of lon dimension in "+file_name+" is "+str(shape[2])+" (expected 3)"
            continue
        # seems OK to continue with this file ...
        # add the file name to the file_list in the global attributes
        f.globalattr["file_list"].append(file_name)
        # get the global attributes
        for gattr in ncfile.ncattrs():
            if gattr not in f.globalattr:
                f.globalattr[gattr] = getattr(ncfile,gattr)
        # if no variable list was passed to this routine, use all variables
        if len(var_list)==0: var_list=ncfile.variables.keys()
        # load the data into the data structure
        for var in var_list:
            # get the name of the variable in the ACCESS file
            access_name = qcutils.get_keyvaluefromcf(cf,["Variables",var],"access_name",default=var)
            # check that the requested variable exists in the ACCESS file
            if access_name in ncfile.variables.keys():
                # check to see if the variable is already in the data structure
                if access_name not in f.variables.keys():
                    f.variables[access_name] = ncfile.variables[access_name][:]
                else:
                    f.variables[access_name] = numpy.concatenate((f.variables[access_name],ncfile.variables[access_name][:]),axis=0)
                # now copy the variable attribiutes
                # create the variable attribute dictionary
                if access_name not in f.varattr: f.varattr[access_name] = {}
                # loop over the variable attributes
                for this_attr in ncfile.variables[access_name].ncattrs():
                    # check to see if the attribute has already 
                    if this_attr not in f.varattr[access_name].keys():
                        # add the variable attribute if it's not there already
                        f.varattr[access_name][this_attr] = getattr(ncfile.variables[access_name],this_attr)
            else:
                print "access_read_mfiles: ACCESS variable "+access_name+" not found in "+file_name
                if access_name not in f.variables.keys():
                    f.variables[access_name] = makedummyseries(shape)
                else:
                    f.variables[access_name] = numpy.concatenate((f.variables[access_name],makedummyseries(shape)),axis=0)
        # close the netCDF file
        ncfile.close()
    # return with the data structure
    return f
Пример #20
0
 # now loop over the sies
 for site in site_list:
     # get the output file name
     if not os.path.exists(cf["Sites"][site]["out_filepath"]):
         os.makedirs(cf["Sites"][site]["out_filepath"])
     out_filename = os.path.join(cf["Sites"][site]["out_filepath"],
                                 cf["Sites"][site]["out_filename"])
     # get the metadata from the control file
     site_name = cf["Sites"][site]["site_name"]
     print " Processing " + site_name
     site_timezone = cf["Sites"][site]["site_timezone"]
     site_latitude = float(cf["Sites"][site]["site_latitude"])
     site_longitude = float(cf["Sites"][site]["site_longitude"])
     site_timestep = int(cf["Sites"][site]["site_timestep"])
     site_sa_limit = qcutils.get_keyvaluefromcf(cf, ["Sites", site],
                                                "site_sa_limit",
                                                default=5)
     # index of the site in latitude dimension
     site_lat_index = int(((latitude[0] - site_latitude) / lat_resolution) +
                          0.5)
     erai_latitude = latitude[site_lat_index]
     # index of the site in longitude dimension
     if site_longitude < 0: site_longitude = float(360) + site_longitude
     site_lon_index = int((
         (site_longitude - longitude[0]) / lon_resolution) + 0.5)
     erai_longitude = longitude[site_lon_index]
     print "  Site coordinates: ", site_latitude, site_longitude
     print "  ERAI grid: ", latitude[site_lat_index], longitude[
         site_lon_index]
     # get an instance of the Datastructure
     ds_erai = qcio.DataStructure()
Пример #21
0
def CPD_run(cf):
    # Set input file and output path and create directories for plots and results
    path_out = cf['Files']['file_path']
    file_in = os.path.join(cf['Files']['file_path'],
                           cf['Files']['in_filename'])
    #
    if "out_filename" in cf['Files']:
        file_out = os.path.join(cf['Files']['file_path'],
                                cf['Files']['out_filename'])
    else:
        file_out = os.path.join(
            cf['Files']['file_path'],
            cf['Files']['in_filename'].replace(".nc", "_CPD.xls"))
    plot_path = "plots/"
    if "plot_path" in cf["Files"]:
        plot_path = os.path.join(cf["Files"]["plot_path"], "CPD/")
    if not os.path.isdir(plot_path): os.makedirs(plot_path)
    results_path = path_out
    if not os.path.isdir(results_path): os.makedirs(results_path)
    # get a dictionary of the variable names
    var_list = cf["Variables"].keys()
    names = {}
    for item in var_list:
        if "AltVarName" in cf["Variables"][item].keys():
            names[item] = cf["Variables"][item]["AltVarName"]
        else:
            names[item] = item
    # add the xlDateTime
    names["xlDateTime"] = "xlDateTime"
    names["Year"] = "Year"
    # read the netcdf file
    logger.info(' Reading netCDF file ' + file_in)
    ds = qcio.nc_read_series(file_in)
    dates_list = ds.series["DateTime"]["Data"]
    nrecs = int(ds.globalattributes["nc_nrecs"])
    # now get the data
    d = {}
    f = {}
    for item in names.keys():
        data, flag, attr = qcutils.GetSeries(ds, names[item])
        d[item] = np.where(data == c.missing_value, np.nan, data)
        f[item] = flag
    # set all data to NaNs if any flag not 0 or 10
    for item in f.keys():
        for f_OK in [0, 10]:
            idx = np.where(f[item] != 0)[0]
            if len(idx) != 0:
                for itemd in d.keys():
                    d[itemd][idx] = np.nan
    df = pd.DataFrame(d, index=dates_list)
    # replace missing values with NaN
    df.replace(c.missing_value, np.nan)
    # Build dictionary of additional configs
    d = {}
    d['radiation_threshold'] = int(cf['Options']['Fsd_threshold'])
    d['num_bootstraps'] = int(cf['Options']['Num_bootstraps'])
    d['flux_period'] = int(ds.globalattributes["time_step"])
    d['site_name'] = ds.globalattributes["site_name"]
    d["call_mode"] = qcutils.get_keyvaluefromcf(cf, ["Options"],
                                                "call_mode",
                                                default="interactive",
                                                mode="quiet")
    d["show_plots"] = qcutils.get_keyvaluefromcf(cf, ["Options"],
                                                 "show_plots",
                                                 default=True,
                                                 mode="quiet")
    d['plot_tclass'] = False
    if cf['Options']['Plot_TClass'] == 'True': d['plot_tclass'] = True
    if cf['Options']['Output_plots'] == 'True':
        d['plot_path'] = plot_path
    if cf['Options']['Output_results'] == 'True':
        d['results_path'] = results_path
        d["file_out"] = file_out

    return df, d
Пример #22
0
def l4qc(cf, ds3):

    # !!! code here to use existing L4 file
    # logic
    # if the L4 doesn't exist
    #  - create ds4 by using copy.deepcopy(ds3)
    # if the L4 does exist and the "UseExistingL4File" option is False
    #  - create ds4 by using copy.deepcopy(ds3)
    # if the L4 does exist and the "UseExistingL4File" option is True
    #  - read the contents of the L4 netCDF file
    #  - check the start and end dates of the L3 and L4 data
    #     - if these are the same then tell the user there is nothing to do
    #  - copy the L3 data to the L4 data structure
    #  - replace the L3 data with the L4 data
    #ds4 = copy.deepcopy(ds3)
    ds4 = qcio.copy_datastructure(cf, ds3)
    # ds4 will be empty (logical false) if an error occurs in copy_datastructure
    # return from this routine if this is the case
    if not ds4: return ds4
    # set some attributes for this level
    qcutils.UpdateGlobalAttributes(cf, ds4, "L4")
    ds4.cf = cf
    # calculate the available energy
    if "Fa" not in ds4.series.keys():
        qcts.CalculateAvailableEnergy(ds4, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg')
    # create a dictionary to hold the gap filling data
    ds_alt = {}
    # check to see if we have any imports
    qcgf.ImportSeries(cf, ds4)
    # re-apply the quality control checks (range, diurnal and rules)
    qcck.do_qcchecks(cf, ds4)
    # now do the meteorological driver gap filling
    for ThisOne in cf["Drivers"].keys():
        if ThisOne not in ds4.series.keys():
            log.error("Series " + ThisOne + " not in data structure")
            continue
        # parse the control file for information on how the user wants to do the gap filling
        qcgf.GapFillParseControlFile(cf, ds4, ThisOne, ds_alt)
    # *** start of the section that does the gap filling of the drivers ***
    # fill short gaps using interpolation
    qcgf.GapFillUsingInterpolation(cf, ds4)
    # gap fill using climatology
    qcgf.GapFillFromClimatology(ds4)
    # do the gap filling using the ACCESS output
    qcgf.GapFillFromAlternate(cf, ds4, ds_alt)
    if ds4.returncodes["alternate"] == "quit": return ds4
    # gap fill using SOLO
    qcgf.GapFillUsingSOLO(cf, ds3, ds4)
    if ds4.returncodes["solo"] == "quit": return ds4
    # merge the first group of gap filled drivers into a single series
    qcts.MergeSeriesUsingDict(ds4, merge_order="prerequisite")
    # re-calculate the ground heat flux but only if requested in control file
    opt = qcutils.get_keyvaluefromcf(cf, ["Options"],
                                     "CorrectFgForStorage",
                                     default="No",
                                     mode="quiet")
    if opt.lower() != "no":
        qcts.CorrectFgForStorage(cf,
                                 ds4,
                                 Fg_out='Fg',
                                 Fg_in='Fg_Av',
                                 Ts_in='Ts',
                                 Sws_in='Sws')
    # re-calculate the net radiation
    qcts.CalculateNetRadiation(cf,
                               ds4,
                               Fn_out='Fn',
                               Fsd_in='Fsd',
                               Fsu_in='Fsu',
                               Fld_in='Fld',
                               Flu_in='Flu')
    # re-calculate the available energy
    qcts.CalculateAvailableEnergy(ds4, Fa_out='Fa', Fn_in='Fn', Fg_in='Fg')
    # merge the second group of gap filled drivers into a single series
    qcts.MergeSeriesUsingDict(ds4, merge_order="standard")
    # re-calculate the water vapour concentrations
    qcts.CalculateHumiditiesAfterGapFill(ds4)
    # re-calculate the meteorological variables
    qcts.CalculateMeteorologicalVariables(ds4)
    # the Tumba rhumba
    qcts.CalculateComponentsFromWsWd(ds4)
    # check for any missing data
    qcutils.get_missingingapfilledseries(ds4)
    # write the percentage of good data as a variable attribute
    qcutils.get_coverage_individual(ds4)
    # write the percentage of good data for groups
    qcutils.get_coverage_groups(ds4)

    return ds4
Пример #23
0
def gfMDS_createdict(cf, ds, series):
    """
    Purpose:
     Create an information dictionary for MDS gap filling from the contents
     of the control file.
    Usage:
     info["MDS"] = gfMDS_createdict(cf)
    Author: PRI
    Date: May 2018
    """
    # get the section of the control file containing the series
    section = qcutils.get_cfsection(cf, series=series, mode="quiet")
    # return without doing anything if the series isn't in a control file section
    if len(section) == 0:
        logger.error(
            "GapFillUsingMDS: Series %s not found in control file, skipping ...",
            series)
        return
    # create the MDS attribute (a dictionary) in ds, this will hold all MDS settings
    if "mds" not in dir(ds):
        ds.mds = {}
    # name of MDS output series in ds
    output_list = cf[section][series]["GapFillUsingMDS"].keys()
    # loop over the outputs listed in the control file
    for output in output_list:
        # create the dictionary keys for this series
        ds.mds[output] = {}
        # get the target
        if "target" in cf[section][series]["GapFillUsingMDS"][output]:
            ds.mds[output]["target"] = cf[section][series]["GapFillUsingMDS"][
                output]["target"]
        else:
            ds.mds[output]["target"] = series
        # site name
        ds.mds[output]["site_name"] = ds.globalattributes["site_name"]
        # list of SOLO settings
        if "mds_settings" in cf[section][series]["GapFillUsingMDS"][output]:
            mdss_list = ast.literal_eval(
                cf[section][series]["GapFillUsingMDS"][output]["mds_settings"])

        # list of drivers
        ds.mds[output]["drivers"] = ast.literal_eval(
            cf[section][series]["GapFillUsingMDS"][output]["drivers"])
        # list of tolerances
        ds.mds[output]["tolerances"] = ast.literal_eval(
            cf[section][series]["GapFillUsingMDS"][output]["tolerances"])
        # get the ustar filter option
        opt = qcutils.get_keyvaluefromcf(
            cf, [section, series, "GapFillUsingMDS", output],
            "turbulence_filter",
            default="")
        ds.mds[output]["turbulence_filter"] = opt
        # get the day/night filter option
        opt = qcutils.get_keyvaluefromcf(
            cf, [section, series, "GapFillUsingMDS", output],
            "daynight_filter",
            default="")
        ds.mds[output]["daynight_filter"] = opt

    # check that all requested targets and drivers have a mapping to
    # a FluxNet label, remove if they don't
    fluxnet_label_map = {
        "Fc": "NEE",
        "Fe": "LE",
        "Fh": "H",
        "Fsd": "SW_IN",
        "Ta": "TA",
        "VPD": "VPD"
    }
    for mds_label in ds.mds:
        ds.mds[mds_label]["mds_label"] = mds_label
        pfp_target = ds.mds[mds_label]["target"]
        if pfp_target not in fluxnet_label_map:
            msg = " Target (" + pfp_target + ") not supported for MDS gap filling"
            logger.warning(msg)
            del ds.mds[mds_label]
        else:
            ds.mds[mds_label]["target_mds"] = fluxnet_label_map[pfp_target]
        pfp_drivers = ds.mds[mds_label]["drivers"]
        for pfp_driver in pfp_drivers:
            if pfp_driver not in fluxnet_label_map:
                msg = "Driver (" + pfp_driver + ") not supported for MDS gap filling"
                logger.warning(msg)
                ds.mds[mds_label]["drivers"].remove(pfp_driver)
            else:
                if "drivers_mds" not in ds.mds[mds_label]:
                    ds.mds[mds_label]["drivers_mds"] = []
                ds.mds[mds_label]["drivers_mds"].append(
                    fluxnet_label_map[pfp_driver])
        if len(ds.mds[mds_label]["drivers"]) == 0:
            del ds.mds[mds_label]
    return
Пример #24
0
     index = numpy.ma.where(quality==item)[0]
     ok_mask[index] = 0
 evi_masked = numpy.ma.masked_where(ok_mask!=0,evi)
 evi_masked_median = numpy.ma.median(evi_masked.reshape(evi_masked.shape[0],-1),axis=1)
 # get data for interpolation
 start = modis_dt[0]
 end = modis_dt[-1]
 modis_dt_interp = [result for result in perdelta(start,end,datetime.timedelta(minutes=ts))]
 modis_time_interp = netCDF4.date2num(modis_dt_interp,modis_time_units)
 modis_time_masked = numpy.ma.masked_where(numpy.ma.getmaskarray(evi_masked_median)==True,modis_time)
 modis_time_comp = numpy.ma.compressed(modis_time_masked)
 evi_masked_median_comp = numpy.ma.compressed(evi_masked_median)
 x_org = modis_time_comp
 y_org = evi_masked_median_comp
 # interpolate onto the tower time step
 interp_type = qcutils.get_keyvaluefromcf(cf,["EVI"],"interp_type",default="linear")
 if interp_type.lower() not in ["linear","smooth_interp"]:
     msg = " Unrecognised interpolation type ("+interp_type+"), using linear ..."
     log.warning(msg)
     interp_type = "linear"
 if interp_type.lower()=="linear":
     # linear interpolation
     log.info(" Using linear interpolation")
     f = scipy.interpolate.interp1d(x_org,y_org,bounds_error=False)
     evi_interp = f(modis_time_interp)
     filter_type = qcutils.get_keyvaluefromcf(cf,["EVI"],"filter_type",default="savgol")
     if filter_type.lower() not in ["savgol"]:
         msg = " Unrecognised filter type ("+filter_type+"), using Savitsky-Golay ..."
         log.warning(msg)
         filter_type = "savgol"
     if filter_type.lower()=="savgol":
Пример #25
0
def get_accessdata(cf,ds_60minutes,f,info):
	  # latitude and longitude, chose central pixel of 3x3 grid
    ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1]
    ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1]
    # list of variables to process
    var_list = cf["Variables"].keys()
    # get a series of Python datetimes and put this into the data structure
    valid_date = f.variables["valid_date"][:]
    nRecs = len(valid_date)
    valid_time = f.variables["valid_time"][:]
    dl = [datetime.datetime.strptime(str(int(valid_date[i])*10000+int(valid_time[i])),"%Y%m%d%H%M") for i in range(0,nRecs)]
    dt_utc_all = numpy.array(dl)
    time_step = numpy.array([(dt_utc_all[i]-dt_utc_all[i-1]).total_seconds() for i in range(1,len(dt_utc_all))])
    time_step = numpy.append(time_step,3600)
    idxne0 = numpy.where(time_step!=0)[0]
    idxeq0 = numpy.where(time_step==0)[0]
    idx_clipped = numpy.where((idxeq0>0)&(idxeq0<nRecs))[0]
    idxeq0 = idxeq0[idx_clipped]
    dt_utc = dt_utc_all[idxne0]
    dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_utc]
    dt_loc = [x.astimezone(info["site_tz"]) for x in dt_utc]
    dt_loc = [x-x.dst() for x in dt_loc]
    dt_loc = [x.replace(tzinfo=None) for x in dt_loc]
    flag = numpy.zeros(len(dt_loc),dtype=numpy.int32)
    ds_60minutes.series["DateTime"] = {}
    ds_60minutes.series["DateTime"]["Data"] = dt_loc
    ds_60minutes.series["DateTime"]["Flag"] = flag
    ds_60minutes.series["DateTime_UTC"] = {}
    ds_60minutes.series["DateTime_UTC"]["Data"] = dt_utc
    ds_60minutes.series["DateTime_UTC"]["Flag"] = flag
    nRecs = len(ds_60minutes.series["DateTime"]["Data"])
    ds_60minutes.globalattributes["nc_nrecs"] = nRecs
    # we're done with valid_date and valid_time, drop them from the variable list
    for item in ["valid_date","valid_time","lat","lon"]:
        if item in var_list: var_list.remove(item)
    # create the QC flag with all zeros
    nRecs = ds_60minutes.globalattributes["nc_nrecs"]
    flag_60minutes = numpy.zeros(nRecs,dtype=numpy.int32)
    # get the UTC hour
    hr_utc = [x.hour for x in dt_utc]
    attr = qcutils.MakeAttributeDictionary(long_name='UTC hour')
    qcutils.CreateSeries(ds_60minutes,'Hr_UTC',hr_utc,Flag=flag_60minutes,Attr=attr)
    # now loop over the variables listed in the control file
    for label in var_list:
        # get the name of the variable in the ACCESS file
        access_name = qcutils.get_keyvaluefromcf(cf,["Variables",label],"access_name",default=label)
        # warn the user if the variable not found
        if access_name not in f.variables.keys():
            msg = "Requested variable "+access_name
            msg = msg+" not found in ACCESS data"
            logging.error(msg)
            continue
        # get the variable attibutes
        attr = get_variableattributes(f,access_name)
        # loop over the 3x3 matrix of ACCESS grid data supplied
        for i in range(0,3):
            for j in range(0,3):
                label_ij = label+'_'+str(i)+str(j)
                if len(f.variables[access_name].shape)==3:
                    series = f.variables[access_name][:,i,j]
                elif len(f.variables[access_name].shape)==4:
                    series = f.variables[access_name][:,0,i,j]
                else:
                    msg = "Unrecognised variable ("+label
                    msg = msg+") dimension in ACCESS file"
                    logging.error(msg)
                series = series[idxne0]
                qcutils.CreateSeries(ds_60minutes,label_ij,series,
                                     Flag=flag_60minutes,Attr=attr)
    return
Пример #26
0
def l4qc(cf,ds3):

    # !!! code here to use existing L4 file
    # logic
    # if the L4 doesn't exist
    #  - create ds4 by using copy.deepcopy(ds3)
    # if the L4 does exist and the "UseExistingL4File" option is False
    #  - create ds4 by using copy.deepcopy(ds3)
    # if the L4 does exist and the "UseExistingL4File" option is True
    #  - read the contents of the L4 netCDF file
    #  - check the start and end dates of the L3 and L4 data
    #     - if these are the same then tell the user there is nothing to do
    #  - copy the L3 data to the L4 data structure
    #  - replace the L3 data with the L4 data
    #ds4 = copy.deepcopy(ds3)
    ds4 = qcio.copy_datastructure(cf,ds3)
    # ds4 will be empty (logical false) if an error occurs in copy_datastructure
    # return from this routine if this is the case
    if not ds4: return ds4
    # set some attributes for this level    
    qcutils.UpdateGlobalAttributes(cf,ds4,"L4")
    ds4.cf = cf
    # calculate the available energy
    if "Fa" not in ds4.series.keys():
        qcts.CalculateAvailableEnergy(ds4,Fa_out='Fa',Fn_in='Fn',Fg_in='Fg')
    # create a dictionary to hold the gap filling data
    ds_alt = {}
    # check to see if we have any imports
    qcgf.ImportSeries(cf,ds4)
    # re-apply the quality control checks (range, diurnal and rules)
    qcck.do_qcchecks(cf,ds4)
    # now do the meteorological driver gap filling
    for ThisOne in cf["Drivers"].keys():
        if ThisOne not in ds4.series.keys(): log.error("Series "+ThisOne+" not in data structure"); continue
        # parse the control file for information on how the user wants to do the gap filling
        qcgf.GapFillParseControlFile(cf,ds4,ThisOne,ds_alt)
    # *** start of the section that does the gap filling of the drivers ***
    # fill short gaps using interpolation
    qcgf.GapFillUsingInterpolation(cf,ds4)
    # gap fill using climatology
    qcgf.GapFillFromClimatology(ds4)
    # do the gap filling using the ACCESS output
    qcgf.GapFillFromAlternate(cf,ds4,ds_alt)
    if ds4.returncodes["alternate"]=="quit": return ds4
    # gap fill using SOLO
    qcgf.GapFillUsingSOLO(cf,ds3,ds4)
    if ds4.returncodes["solo"]=="quit": return ds4
    # merge the first group of gap filled drivers into a single series
    qcts.MergeSeriesUsingDict(ds4,merge_order="prerequisite")
    # re-calculate the ground heat flux but only if requested in control file
    opt = qcutils.get_keyvaluefromcf(cf,["Options"],"CorrectFgForStorage",default="No",mode="quiet")
    if opt.lower()!="no":
        qcts.CorrectFgForStorage(cf,ds4,Fg_out='Fg',Fg_in='Fg_Av',Ts_in='Ts',Sws_in='Sws')
    # re-calculate the net radiation
    qcts.CalculateNetRadiation(cf,ds4,Fn_out='Fn',Fsd_in='Fsd',Fsu_in='Fsu',Fld_in='Fld',Flu_in='Flu')
    # re-calculate the available energy
    qcts.CalculateAvailableEnergy(ds4,Fa_out='Fa',Fn_in='Fn',Fg_in='Fg')
    # merge the second group of gap filled drivers into a single series
    qcts.MergeSeriesUsingDict(ds4,merge_order="standard")
    # re-calculate the water vapour concentrations
    qcts.CalculateHumiditiesAfterGapFill(ds4)
    # re-calculate the meteorological variables
    qcts.CalculateMeteorologicalVariables(ds4)
    # the Tumba rhumba
    qcts.CalculateComponentsFromWsWd(ds4)
    # check for any missing data
    qcutils.get_missingingapfilledseries(ds4)
    # write the percentage of good data as a variable attribute
    qcutils.get_coverage_individual(ds4)
    # write the percentage of good data for groups
    qcutils.get_coverage_groups(ds4)

    return ds4
Пример #27
0
 ds_60minutes.globalattributes["nc_nrecs"] = nRecs
 # processing level
 ds_60minutes.globalattributes["nc_level"] = "L1"
 # latitude and longitude, chose central pixel of 3x3 grid
 ds_60minutes.globalattributes["latitude"] = f.variables["lat"][1]
 ds_60minutes.globalattributes["longitude"] = f.variables["lon"][1]
 # put the ACCESS data into the 60 minute data structure ds_60minutes
 # make a QC flag with a value of 0
 flag_60minutes = numpy.zeros(nRecs)
 # loop over the variables defined in the control file
 for item in ["valid_date", "valid_time", "lat", "lon"]:
     if item in var_list: var_list.remove(item)
 for var in var_list:
     # get the name of the ACCESS variable
     access_name = qcutils.get_keyvaluefromcf(cf, ["Variables", var],
                                              "access_name",
                                              default=var)
     if access_name not in f.variables.keys():
         logging.error("Requested variable " + access_name +
                       " not found in ACCESS data")
         continue
     attr = {}
     for this_attr in f.varattr[access_name].keys():
         attr[this_attr] = f.varattr[access_name][this_attr]
     attr["missing_value"] = c.missing_value
     # loop over all ACCESS grids and give them standard OzFlux names with the grid idices appended
     for i in range(0, 3):
         for j in range(0, 3):
             if len(f.variables[access_name].shape) == 3:
                 var_ij = var + '_' + str(i) + str(j)
                 series = f.variables[access_name][:, i, j]