def whichbox(boxes, cell): """Return the box in *boxes* that contains (the centre of the) *cell*. """ lat, lon = eqarea.centre(cell) for box in boxes: s, n, w, e = box if s <= lat < n and w <= lon < e: return box
def whichbox(boxes, cell): """Return the box in *boxes* that contains (the centre of the) *cell*. """ lat,lon = eqarea.centre(cell) for box in boxes: s,n,w,e = box if s <= lat < n and w <= lon < e: return box
def __init__(self, series, **k): super(SubboxRecord, self).__init__() self.__dict__.update(k) # Synthesize a uid attribute if necessary, based on the box's # centre. if not hasattr(self, 'uid'): if hasattr(self, 'box'): import eqarea lat,lon = eqarea.centre(self.box) self.uid = "%+05.1f%+06.1fC" % (lat,lon) self.set_series(series)
def __init__(self, series, **k): super(SubboxRecord, self).__init__() self.__dict__.update(k) # Synthesize a uid attribute if necessary, based on the box's # centre. if not hasattr(self, 'uid'): if hasattr(self, 'box'): import eqarea lat, lon = eqarea.centre(self.box) self.uid = "%+05.1f%+06.1fC" % (lat, lon) self.set_series(series)
def boxuid(box, celltype='QXQ'): """Synthesize a uid attribute based on the box's centre. *box* is a 4-tuple of the boxes bounds: (south, north, west, east). There are 2 sorts of 12 character string returned: +NN.N+EEE.EQ QXQ@+NN-EEET The first is used when the width of the box is less than 10 degrees; the second otherwise. This is the distinction of cells versus boxes. *celltype* is used to determine either the last character (1st form, above) or the first 3 characters (2nd form, above). In the 1st form, the first character of celltype determines the last character. """ import eqarea lat,lon = eqarea.centre(box) _,_,w,e = box if e - w < 10: return "%+05.1f%+06.1f%s" % (lat, lon, celltype[0]) else: return "%s@%+03.0f%+04.0fT" % (celltype[:3], lat, lon)
def boxuid(box, celltype='QXQ'): """Synthesize a uid attribute based on the box's centre. *box* is a 4-tuple of the boxes bounds: (south, north, west, east). There are 2 sorts of 12 character string returned: +NN.N+EEE.EQ QXQ@+NN-EEET The first is used when the width of the box is less than 10 degrees; the second otherwise. This is the distinction of cells versus boxes. *celltype* is used to determine either the last character (1st form, above) or the first 3 characters (2nd form, above). In the 1st form, the first character of celltype determines the last character. """ import eqarea lat, lon = eqarea.centre(box) _, _, w, e = box if e - w < 10: return "%+05.1f%+06.1f%s" % (lat, lon, celltype[0]) else: return "%s@%+03.0f%+04.0fT" % (celltype[:3], lat, lon)
def iter_subbox_grid(station_records, max_months, first_year, radius): """Convert the input *station_records*, into a gridded anomaly dataset which is returned as an iterator. *max_months* is the maximum number of months in any station record. *first_year* is the first year in the dataset. *radius* is the combining radius in kilometres. """ station_records = list(station_records) log = sys.stdout # Critical radius as an angle of arc arc = radius / earth.radius arcdeg = arc * 180 / math.pi regions = list(eqarea.gridsub()) for region in regions: box, subboxes = region[0], list(region[1]) # Extend box, by half a box east and west and by arc north # and south. extent = [ box[0] - arcdeg, box[1] + arcdeg, box[2] - 0.5 * (box[3] - box[2]), box[3] + 0.5 * (box[3] - box[2]) ] if box[0] <= -90 or box[1] >= 90: # polar extent[2] = -180.0 extent[3] = +180.0 region_records = list(inbox(station_records, *extent)) # Descending sort by number of good records # TODO: Switch to using Python's sort method here, although it # will change the results. sort(region_records, lambda x, y: y.good_count - x.good_count) # Count how many cells are empty n_empty_cells = 0 # Used to generate the "subbox at" rows in the log. lastcentre = (None, None) for subbox in subboxes: # Select and weight stations centre = eqarea.centre(subbox) log.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % (centre + (n_empty_cells, ))) log.flush() lastcentre = centre # Of possible station records for this region, filter for those # from stations within radius of subbox centre. incircle_records = list(incircle(region_records, arc, *centre)) # Combine data. subbox_series = [MISSING] * max_months if len(incircle_records) == 0: box_obj = giss_data.SubboxRecord(subbox_series, box=list(subbox), stations=0, station_months=0, d=MISSING) n_empty_cells += 1 yield box_obj continue # Initialise data with first station record = incircle_records[0] total_good_months = record.good_count total_stations = 1 max_weight = record.weight offset = record.rel_first_month - 1 a = record.series # just a temporary subbox_series[offset:offset + len(a)] = a weight = [0.0] * max_months for i in range(len(a)): if valid(a[i]): weight[i + offset] = record.weight # Add in the remaining stations for record in incircle_records[1:]: # TODO: A StationMethod method to produce a padded data series # would be good here. Hence we could just do: # new = record.padded_series(max_months) new = [MISSING] * max_months aa, bb = record.rel_first_month, record.rel_last_month new[aa - 1:bb] = record.series station_months = series.combine( subbox_series, weight, new, record.weight, record.rel_first_year, record.rel_last_year + 1, parameters.gridding_min_overlap) total_good_months += station_months if station_months == 0: continue total_stations += 1 if max_weight < record.weight: max_weight = record.weight series.anomalize(subbox_series, parameters.gridding_reference_period, first_year) box_obj = giss_data.SubboxRecord(subbox_series, n=max_months, box=list(subbox), stations=total_stations, station_months=total_good_months, d=radius * (1 - max_weight)) yield box_obj plural_suffix = 's' if n_empty_cells == 1: plural_suffix = '' log.write( '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' % (tuple(box) + (n_empty_cells, plural_suffix))) log.write("\n")
def iter_subbox_grid(station_records, max_months, first_year, radius): """Convert the input *station_records*, into a gridded anomaly dataset which is returned as an iterator. *max_months* is the maximum number of months in any station record. *first_year* is the first year in the dataset. *radius* is the combining radius in kilometres. """ # Clear Climate Code import earth # required for radius. # Convert to list because we re-use it for each box (region). station_records = list(station_records) # Descending sort by number of good records. # TODO: Switch to using Python's sort method here, although it # will change the results. sort(station_records, lambda x,y: y.good_count - x.good_count) # A dribble of progress messages. dribble = sys.stdout # Critical radius as an angle of arc arc = radius / earth.radius arcdeg = arc * 180 / math.pi regions = list(eqarea.gridsub()) for region in regions: box, subboxes = region[0], list(region[1]) # Count how many cells are empty n_empty_cells = 0 for subbox in subboxes: # Select and weight stations centre = eqarea.centre(subbox) dribble.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % ( centre + (n_empty_cells,))) dribble.flush() # Determine the contributing stations to this grid cell. contributors = list(incircle(station_records, arc, *centre)) # Combine data. subbox_series = [MISSING] * max_months if not contributors: box_obj = giss_data.Series(series=subbox_series, box=list(subbox), stations=0, station_months=0, d=MISSING) n_empty_cells += 1 yield box_obj continue # Initialise series and weight arrays with first station. record,wt = contributors[0] total_good_months = record.good_count total_stations = 1 offset = record.rel_first_month - 1 a = record.series # just a temporary subbox_series[offset:offset + len(a)] = a max_weight = wt weight = [wt*valid(v) for v in subbox_series] # For logging, keep a list of stations that contributed. # Each item in this list is a triple (in list form, so that # it can be converted to JSON easily) of [id12, weight, # months]. *id12* is the 12 character station identifier; # *weight* (a float) is the weight (computed based on # distance) of the station's series; *months* is a 12 digit # string that records whether each of the 12 months is used. # '0' in position *i* indicates that the month was not used, # a '1' indicates that is was used. January is position 0. l = [any(valid(v) for v in subbox_series[i::12]) for i in range(12)] s = ''.join('01'[x] for x in l) contributed = [[record.uid,wt,s]] # Add in the remaining stations for record,wt in contributors[1:]: # TODO: A method to produce a padded data series # would be good here. Hence we could just do: # new = record.padded_series(max_months) new = [MISSING] * max_months aa, bb = record.rel_first_month, record.rel_last_month new[aa - 1:bb] = record.series station_months = series.combine( subbox_series, weight, new, wt, parameters.gridding_min_overlap) n_good_months = sum(station_months) total_good_months += n_good_months if n_good_months == 0: contributed.append([record.uid, 0.0, '0'*12]) continue total_stations += 1 s = ''.join('01'[bool(x)] for x in station_months) contributed.append([record.uid,wt,s]) max_weight = max(max_weight, wt) series.anomalize(subbox_series, parameters.gridding_reference_period, first_year) box_obj = giss_data.Series(series=subbox_series, n=max_months, box=list(subbox), stations=total_stations, station_months=total_good_months, d=radius*(1-max_weight)) log.write("%s stations %s\n" % (box_obj.uid, asjson(contributed))) yield box_obj plural_suffix = 's' if n_empty_cells == 1: plural_suffix = '' dribble.write( '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' % (tuple(box) + (n_empty_cells,plural_suffix))) dribble.write("\n")
def iter_subbox_grid(station_records, max_months, first_year, radius): """Convert the input *station_records*, into a gridded anomaly dataset which is returned as an iterator. *max_months* is the maximum number of months in any station record. *first_year* is the first year in the dataset. *radius* is the combining radius in kilometres. """ station_records = list(station_records) log = sys.stdout # Critical radius as an angle of arc arc = radius / earth.radius arcdeg = arc * 180 / math.pi regions = list(eqarea.gridsub()) for region in regions: box, subboxes = region[0], list(region[1]) # Extend box, by half a box east and west and by arc north # and south. extent = [box[0] - arcdeg, box[1] + arcdeg, box[2] - 0.5 * (box[3] - box[2]), box[3] + 0.5 * (box[3] - box[2])] if box[0] <= -90 or box[1] >= 90: # polar extent[2] = -180.0 extent[3] = +180.0 region_records = list(inbox(station_records, *extent)) # Descending sort by number of good records # TODO: Switch to using Python's sort method here, although it # will change the results. sort(region_records, lambda x,y: y.good_count - x.good_count) # Count how many cells are empty n_empty_cells = 0 # Used to generate the "subbox at" rows in the log. lastcentre = (None, None) for subbox in subboxes: # Select and weight stations centre = eqarea.centre(subbox) log.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % ( centre + (n_empty_cells,))) log.flush() lastcentre = centre # Of possible station records for this region, filter for those # from stations within radius of subbox centre. incircle_records = list(incircle(region_records, arc, *centre)) # Combine data. subbox_series = [MISSING] * max_months if len(incircle_records) == 0: box_obj = giss_data.SubboxRecord(subbox_series, box=list(subbox), stations=0, station_months=0, d=MISSING) n_empty_cells += 1 yield box_obj continue # Initialise data with first station record = incircle_records[0] total_good_months = record.good_count total_stations = 1 max_weight = record.weight offset = record.rel_first_month - 1 a = record.series # just a temporary subbox_series[offset:offset + len(a)] = a weight = [0.0] * max_months for i in range(len(a)): if valid(a[i]): weight[i + offset] = record.weight # Add in the remaining stations for record in incircle_records[1:]: # TODO: A StationMethod method to produce a padded data series # would be good here. Hence we could just do: # new = record.padded_series(max_months) new = [MISSING] * max_months aa, bb = record.rel_first_month, record.rel_last_month new[aa - 1:bb] = record.series station_months = series.combine( subbox_series, weight, new, record.weight, record.rel_first_year, record.rel_last_year + 1, parameters.gridding_min_overlap) total_good_months += station_months if station_months == 0: continue total_stations += 1 if max_weight < record.weight: max_weight = record.weight series.anomalize(subbox_series, parameters.gridding_reference_period, first_year) box_obj = giss_data.SubboxRecord(subbox_series, n=max_months, box=list(subbox), stations=total_stations, station_months=total_good_months, d=radius*(1-max_weight)) yield box_obj plural_suffix = 's' if n_empty_cells == 1: plural_suffix = '' log.write( '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' % (tuple(box) + (n_empty_cells,plural_suffix))) log.write("\n")
def iter_subbox_grid(station_records, max_months, first_year, radius): """Convert the input *station_records*, into a gridded anomaly dataset which is returned as an iterator. *max_months* is the maximum number of months in any station record. *first_year* is the first year in the dataset. *radius* is the combining radius in kilometres. """ # Clear Climate Code import earth # required for radius. # Convert to list because we re-use it for each box (region). station_records = list(station_records) # Descending sort by number of good records. # TODO: Switch to using Python's sort method here, although it # will change the results. sort(station_records, lambda x, y: y.good_count - x.good_count) # A dribble of progress messages. dribble = sys.stdout # Critical radius as an angle of arc arc = radius / earth.radius arcdeg = arc * 180 / math.pi regions = list(eqarea.gridsub()) for region in regions: box, subboxes = region[0], list(region[1]) # Count how many cells are empty n_empty_cells = 0 for subbox in subboxes: # Select and weight stations centre = eqarea.centre(subbox) dribble.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % (centre + (n_empty_cells, ))) dribble.flush() # Determine the contributing stations to this grid cell. contributors = list(incircle(station_records, arc, *centre)) # Combine data. subbox_series = [MISSING] * max_months if not contributors: box_obj = giss_data.Series(series=subbox_series, box=list(subbox), stations=0, station_months=0, d=MISSING) n_empty_cells += 1 yield box_obj continue # Initialise series and weight arrays with first station. record, wt = contributors[0] total_good_months = record.good_count total_stations = 1 offset = record.rel_first_month - 1 a = record.series # just a temporary subbox_series[offset:offset + len(a)] = a max_weight = wt weight = [wt * valid(v) for v in subbox_series] # For logging, keep a list of stations that contributed. # Each item in this list is a triple (in list form, so that # it can be converted to JSON easily) of [id12, weight, # months]. *id12* is the 12 character station identifier; # *weight* (a float) is the weight (computed based on # distance) of the station's series; *months* is a 12 digit # string that records whether each of the 12 months is used. # '0' in position *i* indicates that the month was not used, # a '1' indicates that is was used. January is position 0. l = [ any(valid(v) for v in subbox_series[i::12]) for i in range(12) ] s = ''.join('01'[x] for x in l) contributed = [[record.uid, wt, s]] # Add in the remaining stations for record, wt in contributors[1:]: # TODO: A method to produce a padded data series # would be good here. Hence we could just do: # new = record.padded_series(max_months) new = [MISSING] * max_months aa, bb = record.rel_first_month, record.rel_last_month new[aa - 1:bb] = record.series station_months = series.combine( subbox_series, weight, new, wt, parameters.gridding_min_overlap) n_good_months = sum(station_months) total_good_months += n_good_months if n_good_months == 0: contributed.append([record.uid, 0.0, '0' * 12]) continue total_stations += 1 s = ''.join('01'[bool(x)] for x in station_months) contributed.append([record.uid, wt, s]) max_weight = max(max_weight, wt) series.anomalize(subbox_series, parameters.gridding_reference_period, first_year) box_obj = giss_data.Series(series=subbox_series, n=max_months, box=list(subbox), stations=total_stations, station_months=total_good_months, d=radius * (1 - max_weight)) log.write("%s stations %s\n" % (box_obj.uid, asjson(contributed))) yield box_obj plural_suffix = 's' if n_empty_cells == 1: plural_suffix = '' dribble.write( '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' % (tuple(box) + (n_empty_cells, plural_suffix))) dribble.write("\n")