if stat[0] == input_id: # set up station station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3])) break else: sys.exit(0) # read attributes and qc_flags ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_external.nc"), station, process_vars, []) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, []) # nyears x 12 months month_start_locs = np.array(utils.month_starts(DATASTART, DATAEND)).reshape(-1,12) # which years years = DATASTART.year + np.arange(month_start_locs.shape[0]) for year in range(DATASTART.year, DATAEND.year): year_loc, = np.where(years == year) if year != DATAEND.year - 1: plot_range = (month_start_locs[year_loc,0], month_start_locs[year_loc+1,0]) else: plot_range = (month_start_locs[year_loc,0], -1) # misses last hour plot_times = utils.times_hours_to_datetime(station.time.data[plot_range[0]:plot_range[1]], DATASTART)
def rsc_annual_string_expectance(all_filtered, value_starts, value_lengths, flags, start, end, st_var, times, diagnostics = False, plots = False): ''' Find years where have more strings than expected, but not long enough to set off test :param array all_filtered: data filtered by all flags set so far :param array value_starts: locations of start of strings/streaks of data :param array value_lengths: lengths of each streak :param array flags: array of flags to be set :param datetime start: start of data :param datetime end: end of data :param bool diagnostics: do diagnostic output :param bool plots: do plots ''' month_starts = utils.month_starts(start,end) month_starts = np.array(month_starts).reshape(-1,12) year_proportions = np.zeros(month_starts.shape[0]) year_proportions.fill(st_var.mdi) # churn through each year in turn for y in range(month_starts.shape[0]): if y != month_starts.shape[0] -1: year = all_filtered[month_starts[y,0] : month_starts[y+1,0]] else: year = all_filtered[month_starts[y,0] :] if len(year.compressed()) >= 200: # if there are strings (streaks of same value) in this year if y != month_starts.shape[0] -1: string_starts = np.where(np.logical_and((value_starts >= month_starts[y,0]),(value_starts < month_starts[y+1,0]))) else: string_starts = np.where(value_starts >= month_starts[y,0]) year_proportions[y] = 0 if len(string_starts[0]) >= 1: # work out the proportion of the amount of data year_proportions[y] = np.sum(value_lengths[string_starts[0]])/float(len(year.compressed())) # if enough dirty years good_years = np.where(year_proportions != st_var.mdi) if len(good_years[0]) >= 10: median = np.median(year_proportions[good_years]) if median < 0.005 : median = 0.005 # find the number which have proportions > 5 x median bad_years = np.where(year_proportions > 5.*median) if len(bad_years[0]) >= 1: for bad in bad_years[0]: # and flag if bad == month_starts.shape[0]-1: # if last year, just select all locs, = np.where(value_starts >= month_starts[bad,0]) else: locs, = np.where((value_starts >= month_starts[bad,0]) & (value_starts <= month_starts[bad+1,0])) for loc in locs: # need to account for missing values here 26/9/2014 goods, = np.where(all_filtered.mask[value_starts[loc]:] == False) flags[value_starts[loc]+goods[:value_lengths[loc]]] = 1 if plots or diagnostics: plot_year = all_filtered[month_starts[bad,0]:month_starts[bad+1,0]] plot_time = times[month_starts[bad,0]:month_starts[bad+1,0]] plot_flags = np.where(flags[month_starts[bad,0]:month_starts[bad+1,0]] == 1)[0] rsc_diagnostics_and_plot(plot_time, plot_year, plot_flags, st_var.name, start, plots = plots) return flags # rsc_annual_string_expectance
float(stat[3])) break else: sys.exit(0) # read attributes and qc_flags ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_external.nc"), station, process_vars, [], read_qc_flags=True) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, []) # nyears x 12 months month_start_locs = np.array(utils.month_starts(DATASTART, DATAEND)).reshape(-1, 12) # which years years = DATASTART.year + np.arange(month_start_locs.shape[0]) # find which year and test to plot year_loc, = np.where(years == year) test_loc, = np.where(qc_test == test)[0] # and get the plot range if year != DATAEND.year - 1: plot_range = (month_start_locs[year_loc, 0], month_start_locs[year_loc + 1, 0]) else: plot_range = (month_start_locs[year_loc, 0], -1) # misses last hour