Python process_qc_flags示例

编程语言: Python

命名空间/包名称: utils

方法/功能: process_qc_flags

hotexamples.com的示例: 3

Python process_qc_flags - 已找到3个示例。这些是从开源项目中提取的最受好评的utils.process_qc_flags现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： plot_qc_diagnostics.py 项目： Kate-Willett/HadISDH_Marine_Build

def values_vs_lat_dist(var, lats, data, qc_flags, these_flags, filename, multiplier = 100., doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False):
# end
    '''
    Plots showing benefit of QC using all QC flags bar day/night
    This version adds a line for each set of data to show the frequency distribution of all values

    :param MetVar variable: input variable
    :param array lats: latitudes
    :param array data: indata
    :param array qc_flags: QC flag array
    :param array these_flags: QC flags to apply
    :param str filename: output filename
    :param float multiplier: multiplier which has been applied to the data already.
    :param bool doBC: work on the bias corrected QC flag definitions
# KATE modified - BC options
    :param bool doBCtotal: work on the full bias corrected QC flag definitions
    :param bool doBChgt: work on the height only bias corrected QC flag definitions
    :param bool doBCscn: work on the screen only bias corrected QC flag definitions
# end
    '''

    # get the final data mask
# KATE modified - BC options
#    data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC)
    data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn)
# end

    # apportion the mask
    clean_data = np.ma.masked_array(data, data_mask)
    dirty_data = np.ma.masked_array(data, np.logical_not(data_mask))

    # make a 2-panel plot
    plt.clf()

    # all data
    ax1 = plt.subplot(1,2,1)
# KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100.
    ax1.scatter(data/multiplier, lats/100., c = 'gold', marker = '.') # KATE changed to gold
    ax1.scatter(dirty_data/multiplier, lats/100., c = 'r', marker = '.')
    #plt.scatter(data/multiplier, lats/multiplier, c = 'k', marker = '.')
    #plt.scatter(dirty_data/multiplier, lats/multiplier, c = 'r', marker = '.')
# end

    # Make histograms of the distribution of values and plot 
    # What are the xaxis limits?
    xMin = np.min(data/multiplier)
    xMax = np.max(data/multiplier)
    xRange = xMax - xMin
    xMin = np.floor(xMin - (0.1*xRange))
    xMax = np.ceil(xMax + (0.1*xRange))    
    
    # Set up equally spaced histogram bins between min and max range (including end point as a value for 51 bins, 
    binsies = np.linspace(xMin,xMax,51) # a range of bins from left most to right most point

    # Set up the second axes
    ax2 = ax1.twinx()
    
    # Plot for all data
    ThisHist = np.histogram(data/multiplier,binsies)
    y2Max = np.max(ThisHist[0])
    HalfX = (ThisHist[1][1] - ThisHist[1][0]) / 2.
    ax2.plot(ThisHist[1][0:50]+HalfX,ThisHist[0],c='gold',linestyle='solid',linewidth=4) 
    meanHist = '{:5.1f}'.format(np.mean(data/multiplier))
    sdHist = '{:5.1f}'.format(np.std(data/multiplier))
    ax2.annotate('All Obs ('+meanHist+', '+sdHist+')',xy=(0.05,0.96),xycoords='axes fraction',size=12,color='gold')

    # Plot for failed data
    ThisHist = np.histogram(dirty_data[~dirty_data.mask]/multiplier,binsies)
    HalfX = (ThisHist[1][1] - ThisHist[1][0]) / 2.
    ax2.plot(ThisHist[1][0:50]+HalfX,ThisHist[0],c='r',linestyle='solid',linewidth=4) 
    meanHist = '{:5.1f}'.format(np.mean(dirty_data[~dirty_data.mask]/multiplier))
    sdHist = '{:5.1f}'.format(np.std(dirty_data[~dirty_data.mask]/multiplier))
    PctFail = '{:5.1f}'.format((len(dirty_data[~dirty_data.mask]) / np.float(len(data))) * 100.)+'%'
    ax2.annotate('Bad Obs ('+meanHist+', '+sdHist+', '+PctFail+')',xy=(0.05,0.92),xycoords='axes fraction',size=12,color='r')

    ax1.set_ylim([-90,90])
    ax1.set_xlim([xMin,xMax])
    ax2.set_ylim([0,y2Max*1.1])
    ax1.set_yticks(np.arange(-90,120,30))
    #ax1.yticks(np.arange(-90,120,30), fontsize = 12)
    ax1.set_title("All data")
    #ax1.title("All data", fontsize = 12)
    ax1.set_xlabel(var.units)
    ax1.set_ylabel("latitude")
    labels = [item.get_text() for item in ax2.get_yticklabels()]
    empty_string_labels = ['']*len(labels)
    ax2.set_yticklabels(empty_string_labels)
    #ax2.set_ylabel("number of observations")

    #pdb.set_trace()

    # clean data
    ax1 = plt.subplot(1,2,2)
# KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100.
    ax1.scatter(clean_data/multiplier, lats/100., c = 'b', marker = '.')
    #plt.scatter(clean_data/multiplier, lats/multiplier, c = 'b', marker = '.')
# end

    # Make histograms of the distribution of values and plot 
    # xaxis limits already established from above

    # Set up the second axes
    ax2 = plt.twinx()
    
    # Plot for all data
    ThisHist = np.histogram(clean_data[~clean_data.mask]/multiplier,binsies)
#    y2Max = np.max(ThisHist[0])
    HalfX = (ThisHist[1][1] - ThisHist[1][0]) / 2.
    ax2.plot(ThisHist[1][0:50]+HalfX,ThisHist[0],c='b',linestyle='solid',linewidth=4) 
    meanHist = '{:5.1f}'.format(np.mean(clean_data[~clean_data.mask]/multiplier))
    sdHist = '{:5.1f}'.format(np.std(clean_data[~clean_data.mask]/multiplier))
    ax2.annotate('Clean Obs ('+meanHist+', '+sdHist+')',xy=(0.05,0.96),xycoords='axes fraction',size=12,color='b')

    ax1.set_ylim([-90,90])
    ax1.set_xlim([xMin,xMax])
    ax2.set_ylim([0,y2Max*1.1])
    ax1.set_yticks(np.arange(-90,120,30))
    #ax1.set_yticks(np.arange(-90,120,30), fontsize = 12)
    labels = [item.get_text() for item in ax1.get_yticklabels()]
    empty_string_labels = ['']*len(labels)
    ax1.set_yticklabels(empty_string_labels)
    ax1.set_title("Clean data")
    #ax1.set_title("Clean data", fontsize = 12)
    ax1.set_xlabel(var.units)
    ax2.set_ylabel("number of observations")

    plt.figtext(0.5, 0.95, var.long_name, ha = 'center', fontsize = 14)

    plt.savefig(filename)

    return # values_vs_lat_dist

示例#2

显示文件

文件： plot_qc_diagnostics.py 项目： Kate-Willett/HadISDH_Marine_Build

def values_vs_lat(var, lats, data, qc_flags, these_flags, filename, multiplier = 100., doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False):
# end
    '''
    Plots showing benefit of QC using all QC flags bar day/night

    :param MetVar variable: input variable
    :param array lats: latitudes
    :param array data: indata
    :param array qc_flags: QC flag array
    :param array these_flags: QC flags to apply
    :param str filename: output filename
    :param float multiplier: multiplier which has been applied to the data already.
    :param bool doBC: work on the bias corrected QC flag definitions
# KATE modified - BC options
    :param bool doBCtotal: work on the full bias corrected QC flag definitions
    :param bool doBChgt: work on the height only bias corrected QC flag definitions
    :param bool doBCscn: work on the screen only bias corrected QC flag definitions
# end
    '''

    # get the final data mask
# KATE modified - QC options
#    data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC)
    data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn)
# end
    # apportion the mask
    clean_data = np.ma.masked_array(data, data_mask)
    dirty_data = np.ma.masked_array(data, np.logical_not(data_mask))

    # make a 2-panel plot
    plt.clf()

    # all data
    plt.subplot(1,2,1)
# KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100.
    plt.scatter(data/multiplier, lats/100., c = 'gold', marker = '.') # KATE changed to gold
    plt.scatter(dirty_data/multiplier, lats/100., c = 'r', marker = '.')
    #plt.scatter(data/multiplier, lats/multiplier, c = 'k', marker = '.')
    #plt.scatter(dirty_data/multiplier, lats/multiplier, c = 'r', marker = '.')
# end
    plt.ylim([-90,90])
    plt.yticks(np.arange(-90,120,30), fontsize = 12)
    plt.title("All data", fontsize = 12)
    plt.xlabel(var.units)
    plt.ylabel("latitude")


    # clean data
    plt.subplot(1,2,2)
# KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100.
    plt.scatter(clean_data/multiplier, lats/100., c = 'b', marker = '.')
    #plt.scatter(clean_data/multiplier, lats/multiplier, c = 'b', marker = '.')
# end

    plt.ylim([-90,90])
    plt.yticks(np.arange(-90,120,30), fontsize = 12)
    plt.title("Clean data", fontsize = 12)
    plt.xlabel(var.units)

    plt.figtext(0.5, 0.95, var.long_name, ha = 'center', fontsize = 14)

    plt.savefig(filename)

    return # plot_values_vs_lat

示例#3

显示文件

文件： gridding_cam.py 项目： Kate-Willett/HadISDH_Marine_Build

def do_gridding(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, 
                doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doSST_SLP = False, 
		doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False, doUncert = False):
#def do_gridding(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doSST_SLP = False, doBC = False, doUncert = False):
# end
    '''
    Do the gridding, first to 3hrly 1x1, then to daily 1x1 and finally monthly 1x1 and 5x5

    :param str suffix: "relax" or "strict" criteria
    :param int start_year: start year to process
    :param int end_year: end year to process
    :param int start_month: start month to process
    :param int end_month: end month to process
    :param bool doQC: incorporate the QC flags or not
    :param bool doQC1it: incorporate the first iteration (no buddy) QC flags or not
    :param bool doQC2it: incorporate the second iteration (no buddy) QC flags or not
    :param bool doQC3it: incorporate the third iteration (buddy) QC flags or not
    :param bool doSST_SLP: process additional variables or not
    :param bool doBC: work on the bias corrected data
    :param bool doBCtotal: work on the full bias corrected data
    :param bool doBChgt: work on the height only bias corrected data
    :param bool doBCscn: work on the screen only bias corrected data
    :param bool doUncert: work on files with uncertainty information (not currently used)

    :returns:
    '''
# KATE modified    
    settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it)
    #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC)
# end


# KATE modified  - added other BC options  
#    if doBC:
    if doBC | doBCtotal | doBChgt | doBCscn:
# end
        fields = mds.TheDelimitersExt # extended (BC)
    else:
        fields = mds.TheDelimitersStd # Standard

# KATE modified  - added other BC options  
#    OBS_ORDER = utils.make_MetVars(settings.mdi, doSST_SLP = doSST_SLP, multiplier = True, doBC = doBC) # ensure that convert from raw format at writing stage with multiplier
    OBS_ORDER = utils.make_MetVars(settings.mdi, doSST_SLP = doSST_SLP, multiplier = True, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # ensure that convert from raw format at writing stage with multiplier
# end

    # KW switching between 4 ('_strict') for climatology build and 2 for anomaly buily ('_relax') - added subscripts to files
    if suffix == "relax":
        N_OBS_DAY = 2 # KW ok for anomalies but this was meant to be 4 for dailies_all? and 2 for dailies_night/day?
        N_OBS_FRAC_MONTH = 0.3

    elif suffix == "strict":
        N_OBS_DAY = 4
        N_OBS_FRAC_MONTH = 0.3


    # flags to check on and values to allow through
# KATE modified
    if doQC1it | doQC2it:
        these_flags = {"ATclim":0,"ATrep":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0}
    else:
        these_flags = {"ATbud":0, "ATclim":0,"ATrep":0,"DPTbud":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0}    
    #these_flags = {"ATbud":0, "ATclim":0,"ATrep":0,"DPTbud":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0}
# end

    # spin through years and months to read files
    for year in np.arange(start_year, end_year + 1): 

        for month in np.arange(start_month, end_month + 1):

            times = utils.TimeVar("time", "time since 1/{}/{} in hours".format(month, year), "hours", "time")

            grid_hours = np.arange(0, 24 * calendar.monthrange(year, month)[1], DELTA_HOUR)

            times.data = grid_hours

            # process the monthly file
# KATE modified  - added other BC options  
#            if doBC:
            if doBC | doBCtotal | doBChgt | doBCscn:
# end
                filename = "new_suite_{}{:02d}_{}_extended.txt".format(year, month, settings.OUTROOT)
            else:
                filename = "new_suite_{}{:02d}_{}.txt".format(year, month, settings.OUTROOT)

# KATE modified  - added other BC options  
#            raw_platform_data, raw_obs, raw_meta, raw_qc = utils.read_qc_data(filename, settings.ICOADS_LOCATION, fields, doBC = doBC)
            raw_platform_data, raw_obs, raw_meta, raw_qc = utils.read_qc_data(filename, settings.ICOADS_LOCATION, fields, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn)
# end

            # extract observation details
            lats, lons, years, months, days, hours = utils.process_platform_obs(raw_platform_data)

            # test dates *KW - SHOULDN'T NEED THIS - ONLY OBS PASSING DATE CHECK ARE INCLUDED*
            #  *RD* - hasn't run yet but will leave it in just in case of future use.
            if not utils.check_date(years, year, "years", filename):
                sys.exit(1)
            if not utils.check_date(months, month, "months", filename):
                sys.exit(1)

# KATE modified - seems to be an error with missing global name plots so have changed to settings.plots
            # Choose this one to only output once per decade
	    #if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]):
	    # Choose this one to output a plot for each month
            if settings.plots:
            #if plots and (year in [1973, 1983, 1993, 2003, 2013]):
# end
                # plot the distribution of hours

                import matplotlib.pyplot as plt

                plt.clf()
                plt.hist(hours, np.arange(-100,2500,100))
                plt.ylabel("Number of observations")
                plt.xlabel("Hours")
                plt.xticks(np.arange(-300, 2700, 300))
                plt.savefig(settings.PLOT_LOCATION + "obs_distribution_{}{:02d}_{}.png".format(year, month, suffix))


                # only for a few of the variables
                for variable in OBS_ORDER:
                    if variable.name in ["marine_air_temperature", "dew_point_temperature", "specific_humidity", "relative_humidity", "marine_air_temperature_anomalies", "dew_point_temperature_anomalies", "specific_humidity_anomalies", "relative_humidity_anomalies"]:

                        #plot_qc_diagnostics.values_vs_lat(variable, lats, raw_obs[:, variable.column], raw_qc, these_flags, settings.PLOT_LOCATION + "qc_actuals_{}_{}{:02d}_{}.png".format(variable.name, year, month, suffix), multiplier = variable.multiplier, doBC = doBC)
                        plot_qc_diagnostics.values_vs_lat_dist(variable, lats, raw_obs[:, variable.column], raw_qc, these_flags, \
			        settings.PLOT_LOCATION + "qc_actuals_{}_{}{:02d}_{}.png".format(variable.name, year, month, suffix), multiplier = variable.multiplier, \
# KATE modified  - added other BC options  
				doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn)
# end

            # QC sub-selection
	    
# KATE modified - added QC iterations but also think this needs to include the bias corrected versions because the QC flags need to be applied to those too.
# Not sure what was happening previously with the doBC run - any masking to QC'd obs?
            if doQC | doQC1it | doQC2it | doQC3it | doBC | doBCtotal | doBChgt | doBCscn:
            #if doQC:
# end
                print "Using {} as flags".format(these_flags)
# KATE modified - BC options
#                mask = utils.process_qc_flags(raw_qc, these_flags, doBC = doBC)
                mask = utils.process_qc_flags(raw_qc, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn)
# end
		print "All Obs: ",len(mask)
		print "Good Obs: ",len(mask[np.where(mask == 0)])
		print "Bad Obs: ",len(mask[np.where(mask == 1)])
		#pdb.set_trace()
		

                complete_mask = np.zeros(raw_obs.shape)
                for i in range(raw_obs.shape[1]):
                    complete_mask[:,i] = mask
                clean_data = np.ma.masked_array(raw_obs, mask = complete_mask)

# end
            else:
                print "No QC flags selected"
                clean_data = np.ma.masked_array(raw_obs, mask = np.zeros(raw_obs.shape))


            # discretise hours
            hours = utils.make_index(hours, DELTA_HOUR, multiplier = 100)

            # get the hours since start of month
            hours_since = ((days - 1) * 24) + (hours * DELTA_HOUR)

            # discretise lats/lons
            lat_index = utils.make_index(lats, DELTA_LAT, multiplier = 100)
            lon_index = utils.make_index(lons, DELTA_LON, multiplier = 100)

            lat_index += ((len(grid_lats)-1)/2) # and as -ve indices are unhelpful, roll by offsetting by most westward
            lon_index += ((len(grid_lons)-1)/2) #    or most southerly so that (0,0) is (-90,-180)

            # NOTE - ALWAYS GIVING TOP-RIGHT OF BOX TO GIVE < HARD LIMIT (as opposed to <=)
            # do the gridding
            # extract the full grid, number of obs, and day/night flag
# KATE MEDIAN WATCH This is hard coded to doMedian (rather than settings.doMedian) - OK WITH MEDIAN HERE!!!
# KATE modified - to add settings.doMedian instead of just doMedian which seems to be consistent with the other bits and BC options
	    raw_month_grid, raw_month_n_obs, this_month_period = utils.grid_1by1_cam(clean_data, raw_qc, hours_since, lat_index, lon_index, \
	              grid_hours, grid_lats, grid_lons, OBS_ORDER, settings.mdi, doMedian = settings.doMedian, \
		      doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn)
	    #raw_month_grid, raw_month_n_obs, this_month_period = utils.grid_1by1_cam(clean_data, raw_qc, hours_since, lat_index, lon_index, grid_hours, grid_lats, grid_lons, OBS_ORDER, settings.mdi, doMedian = True, doBC = doBC)
# end
            print "successfully read data into 1x1 3hrly grids"

            # create matching array size
            this_month_period = np.tile(this_month_period, (len(OBS_ORDER),1,1,1))

            for period in ["all", "day", "night"]:

                if period == "day":
                    this_month_grid = np.ma.masked_where(this_month_period == 1, raw_month_grid)
                    this_month_obs = np.ma.masked_where(this_month_period[0] == 1, raw_month_n_obs) # and take first slice to re-match the array size
                elif period == "night":
                    this_month_grid = np.ma.masked_where(this_month_period == 0, raw_month_grid)
                    this_month_obs = np.ma.masked_where(this_month_period[0] == 0, raw_month_n_obs) # and take first slice to re-match the array size
                else:
                    this_month_grid = copy.deepcopy(raw_month_grid)
                    this_month_obs = copy.deepcopy(raw_month_n_obs)
                    
# KATE modified
                # If SwitchOutput == 1 then we're in test mode - output interim files!!!
		if (SwitchOutput == 1):
		    # have one month of gridded data.
                    out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_3hr_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)              

                    utils.netcdf_write(out_filename, this_month_grid, np.zeros(this_month_obs.shape), this_month_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "H")
		## have one month of gridded data.
                #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_3hr_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)              

                #utils.netcdf_write(out_filename, this_month_grid, np.zeros(this_month_obs.shape), this_month_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "H")
# end
                # now average over time
                # Dailies
                daily_hours = grid_hours.reshape(-1, 24/DELTA_HOUR)

                shape = this_month_grid.shape
                this_month_grid = this_month_grid.reshape(shape[0], -1, 24/DELTA_HOUR, shape[2], shape[3])
                this_month_obs = this_month_obs.reshape(-1, 24/DELTA_HOUR, shape[2], shape[3])

# KATE MEDIAN WATCH - settings.doMedian is generally set to True - I think we may want the MEAN HERE!!!
# KATE modified - to hard wire in MEAN here
                daily_grid = np.ma.mean(this_month_grid, axis = 2)
                #if settings.doMedian:
                #    daily_grid = np.ma.median(this_month_grid, axis = 2)
                #else:
                #    daily_grid = np.ma.mean(this_month_grid, axis = 2)
# end
                daily_grid.fill_value = settings.mdi

                # filter on number of observations/day
                n_hrs_per_day = np.ma.count(this_month_grid, axis = 2) 
                n_obs_per_day = np.ma.sum(this_month_obs, axis = 1) 

                if period == "all":
                    bad_locs = np.where(n_hrs_per_day < N_OBS_DAY) # at least 2 of possible 8 3-hourly values (6hrly data *KW OR AT LEAST 4 3HRLY OBS PRESENT*)
                else:
                    bad_locs = np.where(n_hrs_per_day < np.floor(N_OBS_DAY / 2.)) # at least 1 of possible 8 3-hourly values (6hrly data *KW OR AT LEAST 4 3HRLY OBS PRESENT*)              
                daily_grid.mask[bad_locs] = True

# KATE modified - added SwitchOutput to if loop
                if (SwitchOutput == 1) and settings.plots and (year in [1973, 1983, 1993, 2003, 2013]):
                #if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]):
# end
                    # plot the distribution of hours

                    plt.clf()
                    plt.hist(n_hrs_per_day.reshape(-1), bins = np.arange(-1,10), align = "left", log = True, rwidth=0.5)
                    if period == "all":
                        plt.axvline(x = N_OBS_DAY-0.5, color = "r")
                    else:
                        plt.axvline(x = np.floor(N_OBS_DAY / 2.)-0.5, color = "r")       

                    plt.title("Number of 1x1-3hrly in each 1x1-daily grid box")
                    plt.xlabel("Number of 3-hrly observations (max = 8)")
                    plt.ylabel("Frequency (log scale)")
                    plt.savefig(settings.PLOT_LOCATION + "n_grids_1x1_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix))

                    plt.clf()
                    plt.hist(n_obs_per_day.reshape(-1), bins = np.arange(-5,100,5),  log = True, rwidth=0.5)                 
                    plt.title("Total number of raw observations in each 1x1 daily grid box")
                    plt.xlabel("Number of raw observations")
                    plt.ylabel("Frequency (log scale)")
                    plt.savefig(settings.PLOT_LOCATION + "n_obs_1x1_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix))

                # clear up memory
                del this_month_grid
                del this_month_obs
                gc.collect()

# KATE modified
                # If SwitchOutput == 1 then we're in test mode - output interim files!!!
		if (SwitchOutput == 1):
                    # write dailies file
                    times.data = daily_hours[:,0]
                    out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)

                    utils.netcdf_write(out_filename, daily_grid, n_hrs_per_day[0], n_obs_per_day, OBS_ORDER, grid_lats, grid_lons, times, frequency = "D")
                #times.data = daily_hours[:,0]
                #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)

                #utils.netcdf_write(out_filename, daily_grid, n_hrs_per_day[0], n_obs_per_day, OBS_ORDER, grid_lats, grid_lons, times, frequency = "D")
# end
                # Monthlies
                times.data = daily_hours[0,0]

# KATE modified - commenting out as we don't need this anymore
#                if settings.doMedian:
#                    monthly_grid = np.ma.median(daily_grid, axis = 1)
#                else:
#                    monthly_grid = np.ma.mean(daily_grid, axis = 1)
#
#                monthly_grid.fill_value = settings.mdi
#
#                # filter on number of observations/month
#                n_grids_per_month = np.ma.count(daily_grid, axis = 1) 
#                bad_locs = np.where(n_grids_per_month < calendar.monthrange(year, month)[1] * N_OBS_FRAC_MONTH) # 30% of possible daily values
#                monthly_grid.mask[bad_locs] = True
#
#                # number of raw observations
#                n_obs_per_month = np.ma.sum(n_obs_per_day, axis = 0)
#
#                if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]):
#                    # plot the distribution of days
#
#                    plt.clf()
#                    plt.hist(n_obs_per_month.reshape(-1), bins = np.arange(-10,500,10),  log = True, rwidth=0.5)
#                    plt.title("Total number of raw observations in each 1x1 monthly grid box")
#                    plt.xlabel("Number of raw observations")
#                    plt.ylabel("Frequency (log scale)")
#                    plt.savefig(settings.PLOT_LOCATION + "n_obs_1x1_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix))
#
#                    plt.clf()
#                    plt.hist(n_grids_per_month[0].reshape(-1), bins = np.arange(-2,40,2), align = "left",  log = True, rwidth=0.5)
#                    plt.axvline(x = calendar.monthrange(year, month)[1] * N_OBS_FRAC_MONTH, color="r")
#                    plt.title("Total number of 1x1 daily grids in each 1x1 monthly grid")
#                    plt.xlabel("Number of 1x1 daily grids")
#                    plt.ylabel("Frequency (log scale)")
#                    plt.savefig(settings.PLOT_LOCATION + "n_grids_1x1_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix))
#
#                # write monthly 1x1 file
#                out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_monthly_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)
#                utils.netcdf_write(out_filename, monthly_grid, n_grids_per_month[0], n_obs_per_month, OBS_ORDER, grid_lats, grid_lons, times, frequency = "M")
#            
#                # now to re-grid to coarser resolution
#                # KW # Here we may want to use the mean because its a large area but could be sparsely
#                #             populated with quite different climatologies so we want 
#                # the influence of the outliers (we've done our best to ensure these are good values) 
#
#                # go from monthly 1x1 to monthly 5x5 - retained as limited overhead
#                monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(monthly_grid, n_obs_per_month, grid_lats, grid_lons, doMedian = settings.doMedian, daily = False)
#                out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)
#
#                utils.netcdf_write(out_filename, monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "M")
#
#                if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]):
#                    # plot the distribution of days
#
#                    plt.clf()
#                    plt.hist(monthly_5by5_n_obs.reshape(-1), bins = np.arange(0,100,5), log = True, rwidth=0.5)
#                    plt.title("Total number of raw observations in each 5x5 monthly grid box")
#                    plt.xlabel("Number of raw observations")
#                    plt.ylabel("Frequency (log scale)")
#                    plt.savefig(settings.PLOT_LOCATION + "n_obs_5x5_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix))
#
#                    plt.clf()
#                    plt.hist(monthly_5by5_n_grids.reshape(-1), bins = np.arange(-2,30,2), align = "left", log = True, rwidth=0.5)
#                    plt.axvline(x = 1, color="r")
#                    plt.title("Total number of 1x1 monthly grids in each 5x5 monthly grid")
#                    plt.xlabel("Number of 1x1 monthly grids")
#                    plt.ylabel("Frequency (log scale)")
#                    plt.savefig(settings.PLOT_LOCATION + "n_grids_5x5_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix))
#
#                # clear up memory
#                del monthly_grid
#                del monthly_5by5
#                del monthly_5by5_n_grids
#                del monthly_5by5_n_obs
#                del n_grids_per_month
#                del n_obs_per_month
#                del n_hrs_per_day
#                gc.collect()
# end
                # go direct from daily 1x1 to monthly 5x5
# KATE MEDIAN WATCH - settings.doMedian is generally set to True - I think we may want the MEAN HERE!!!
# KATE modified - to hard wire in MEAN here
                monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(daily_grid, n_obs_per_day, grid_lats, grid_lons, doMedian = False, daily = True)
                #monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(daily_grid, n_obs_per_day, grid_lats, grid_lons, doMedian = settings.doMedian, daily = True)
# end
                out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_from_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix)
 
                utils.netcdf_write(out_filename, monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "M")

                

                if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]):
                    # plot the distribution of days

                    plt.clf()
                    plt.hist(monthly_5by5_n_obs.reshape(-1), bins = np.arange(-10,1000,10),  log = True, rwidth=0.5)
                    plt.title("Total number of raw observations in each 5x5 monthly grid box")
                    plt.xlabel("Number of raw observations")
                    plt.ylabel("Frequency (log scale)")
                    plt.savefig(settings.PLOT_LOCATION + "n_obs_5x5_monthly_from_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix))


                    plt.clf()
                    plt.hist(monthly_5by5_n_grids.reshape(-1), bins = np.arange(-5,100,5), align = "left", log = True, rwidth=0.5)
                    plt.axvline(x = (0.3 * daily_grid.shape[0]), color="r")
                    plt.title("Total number of 1x1 daily grids in each 5x5 monthly grid")
                    plt.xlabel("Number of 1x1 daily grids")
                    plt.ylabel("Frequency (log scale)")

                    plt.savefig(settings.PLOT_LOCATION + "n_grids_5x5_monthly_from_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix))


                del daily_grid
                del monthly_5by5
                del n_obs_per_day
                del monthly_5by5_n_grids
                del monthly_5by5_n_obs
                gc.collect()

    return # do_gridding