def _bin_catalog_spatial_counts(lons, lats, n_poly, mask, idx_map, binx, biny): """ Returns a list of event counts as ndarray with shape (n_poly) where each value represents the event counts within the polygon. Using [:, :, 1] index of the mask, we store the mapping between the index of n_poly and that polygon in the mask. Additionally, the polygons are ordered such that the index of n_poly in the result corresponds to the index of the polygons. We can make a structure that could contain both of these, but the trade-offs will need to be compared against performance. """ ai, bi = binx, biny # will return negative idx = bin1d_vec(lons, ai) idy = bin1d_vec(lats, bi) # bin1d returns -1 if outside the region # todo: think about how to change this behavior for less confusions, bc -1 is an actual value that can be chosen bad = (idx == -1) | (idy == -1) | (mask[idy, idx] == 1) # this can be memory optimized by keeping short list and storing index, only for case where n/2 events event_counts = numpy.zeros(n_poly) # selecting the indexes into polygons correspoding to lons and lats within the grid hash_idx = idx_map[idy[~bad], idx[~bad]].astype(int) # aggregate in counts numpy.add.at(event_counts, hash_idx, 1) return event_counts
def test_scalar_outside(self): from csep.utils.calc import bin1d_vec mbins = numpy.arange(5.95, 9, 0.1) # This gives bins from 5.95 to 8.95 idx = bin1d_vec(5.95, mbins, tol=0.00001, right_continuous=True) self.assertEqual(idx, 0) idx = bin1d_vec(6, mbins, tol=0.00001, right_continuous=True) # This would give 0: Which is fine. self.assertEqual(idx, 0) idx = bin1d_vec(5, mbins, tol=0.00001, right_continuous=True) self.assertEqual(idx, -1) idx = bin1d_vec(4, mbins, tol=0.00001, right_continuous=True) self.assertEqual(idx, -1)
def magnitude_counts(self, mag_bins=None, tol=0.00001, retbins=False): """ Computes the count of events within mag_bins Args: mag_bins: uses csep.utils.constants.CSEP_MW_BINS as default magnitude bins retbins (bool): if this is true, return the bins used Returns: numpy.ndarray: showing the counts of hte events in each magnitude bin """ # todo: keep track of events that are ignored if mag_bins is None: try: # a forecast is a type of region, but region does not need a magnitude mag_bins = self.region.magnitudes except AttributeError: # use default magnitude bins from csep mag_bins = CSEP_MW_BINS self.region.magnitudes = mag_bins self.region.num_mag_bins = len(mag_bins) out = numpy.zeros(len(mag_bins)) if self.event_count == 0: if retbins: return (mag_bins, out) else: return out idx = bin1d_vec(self.get_magnitudes(), mag_bins, tol=tol, right_continuous=True) numpy.add.at(out, idx, 1) if retbins: return (mag_bins, out) else: return out
def test_bin1d_single_bin1(self): data = [-1, 0, 2, 3, 1, 1.5, 1.0, 0.999999999999999] bin_edges = [1] # purposely leaving right_continous flag=False bc it should be forced in the bin1d_vec function test = bin1d_vec(data, bin_edges) expected = [-1, -1, 0, 0, 0, 0, 0, -1] self.assertListEqual(test.tolist(), expected)
def _bin_catalog_spatio_magnitude_counts(lons, lats, mags, n_poly, mask, idx_map, binx, biny, mag_bins, tol=0.00001): """ Returns a list of event counts as ndarray with shape (n_poly, n_cat) where each value represents the event counts within the polygon. Using [:, :, 1] index of the mask, we store the mapping between the index of n_poly and that polygon in the mask. Additionally, the polygons are ordered such that the index of n_poly in the result corresponds to the index of the polygons. Eventually, we can make a structure that could contain both of these, but the trade-offs will need to be compared against performance. """ # index in cartesian grid for events in data. note, this has a different index than the # vector of polygons. this mapping is stored in [:,:,1] index of mask # index in 2d grid idx = bin1d_vec(lons, binx) idy = bin1d_vec(lats, biny) mag_idxs = bin1d_vec(mags, mag_bins, tol=tol, right_continuous=True) # start with zero event counts in each bin event_counts = numpy.zeros((n_poly, len(mag_bins))) # does not seem that we can vectorize this part skipped = [] for i in range(idx.shape[0]): if not mask[idy[i], idx[i]] and idy[i] != -1 and idx[ i] != -1 and mag_idxs[i] != -1: # getting spatial bin from mask hash_idx = int(idx_map[idy[i], idx[i]]) mag_idx = mag_idxs[i] # update event counts in that polygon event_counts[(hash_idx, mag_idx)] += 1 else: skipped.append((lons[i], lats[i], mags[i])) return event_counts, skipped
def _build_bitmask_vec(self): """ same as build mask but using vectorized calls to bin1d """ # build bounding box of set of polygons based on origins nd_origins = numpy.array([poly.origin for poly in self.polygons]) bbox = [(numpy.min(nd_origins[:, 0]), numpy.min(nd_origins[:, 1])), (numpy.max(nd_origins[:, 0]), numpy.max(nd_origins[:, 1]))] # get midpoints for hashing midpoints = numpy.array([poly.centroid() for poly in self.polygons]) # compute nx and ny nx = numpy.rint((bbox[1][0] - bbox[0][0]) / self.dh) ny = numpy.rint((bbox[1][1] - bbox[0][1]) / self.dh) # set up grid of bounding box xs = self.dh * numpy.arange(nx + 1) + bbox[0][0] ys = self.dh * numpy.arange(ny + 1) + bbox[0][1] # set up mask array, 1 is index 0 is mask a = numpy.ones([len(ys), len(xs), 2]) # set all indices to nan a[:, :, 1] = numpy.nan # bin1d returns the index of polygon within the cartesian grid idx = bin1d_vec(midpoints[:, 0], xs) idy = bin1d_vec(midpoints[:, 1], ys) for i in range(len(self.polygons)): a[idy[i], idx[i], 1] = int(i) # build mask in dim=0; here masked values are 1. see note below. if idx[i] >= 0 and idy[i] >= 0: if self.poly_mask is not None: # note: csep1 gridded forecast file format convention states that a "1" indicates a valid cell, which is the opposite # of the masking criterion if self.poly_mask[i] == 1: a[idy[i], idx[i], 0] = 0 else: a[idy[i], idx[i], 0] = 0 return a, xs, ys
def get_index_of(self, lons, lats): """ Returns the index of lons, lats in self.polygons Args: lons: ndarray-like lats: ndarray-like Returns: idx: ndarray-like """ idx = bin1d_vec(numpy.array(lons), self.xs) idy = bin1d_vec(numpy.array(lats), self.ys) if numpy.any(idx == -1) or numpy.any(idy == -1): raise ValueError( "at least one lon and lat pair contain values that are outside of the valid region." ) if numpy.any(self.bbox_mask[idy, idx] == 1): raise ValueError( "at least one lon and lat pair contain values that are outside of the valid region." ) return self.idx_map[idy, idx].astype(numpy.int)
def get_masked(self, lons, lats): """Returns bool array lons and lats are not included in the spatial region. .. note:: The ordering of lons and lats should correspond to the ordering of the lons and lats in the data. Args: lons: array-like lats: array-like Returns: idx: array-like """ idx = bin1d_vec(lons, self.xs) idy = bin1d_vec(lats, self.ys) # handles the case where values are outside of the region bad_idx = numpy.where((idx == -1) | (idy == -1)) mask = self.bbox_mask[idy, idx].astype(bool) # manually set values outside region mask[bad_idx] = True return mask
def _merge(self, bins, data): # 1) current bins dont exist if self.bins.size == 0: self.bins = bins self.data = np.zeros(len(self.bins)) idx = bin1d_vec(data, self.bins) np.add.at(self.data, idx, 1) return # 2) new bins subset of current bins if bins[0] >= self.bins[0] and bins[-1] <= self.bins[-1]: idx = bin1d_vec(data, self.bins) np.add.at(self.data, idx, 1) return # 3) new bins are outside current bins if bins[0] < self.bins[0]: bin_min = bins[0] else: bin_min = self.bins[0] if bins[-1] > self.bins[-1]: bin_max = bins[-1] else: bin_max = self.bins[-1] # generate new bins new_bins = np.arange(bin_min, bin_max + self.dh / 2, self.dh) tmp_data = np.zeros(len(new_bins)) # merge new data to new bins # get old bin locations relative to new bins idx = bin1d_vec(self.bins, new_bins) # add old data tmp_data[idx] = self.data self.data = tmp_data idx = bin1d_vec(data, new_bins) np.add.at(self.data, idx, 1) self.bins = new_bins return
def _bin_catalog_probability(lons, lats, n_poly, mask, idx_map, binx, biny): """ Returns a list of event counts as ndarray with shape (n_poly) where each value represents the event counts within the polygon. Using [:, :, 1] index of the mask, we store the mapping between the index of n_poly and that polygon in the mask. Additionally, the polygons are ordered such that the index of n_poly in the result corresponds to the index of the polygons. We can make a structure that could contain both of these, but the trade-offs will need to be compared against performance. """ ai, bi = binx, biny # returns -1 if outside of the bbox idx = bin1d_vec(lons, ai) idy = bin1d_vec(lats, bi) bad = (idx == -1) | (idy == -1) | (mask[idy, idx] == 1) event_counts = numpy.zeros(n_poly) # [:,:,1] is a mapping from the polygon array to cartesian grid hash_idx = idx_map[idy[~bad], idx[~bad]].astype(int) # dont accumulate just set to one for probability event_counts[hash_idx] = 1 return event_counts
def get_magnitude_index(self, mags, tol=0.00001): """ Returns the indices into the magnitude bins of selected magnitudes Note: the right-most bin is treated as extending to infinity. Args: mags (array-like): list of magnitudes Returns: idm (array-like): indices corresponding to mags Raises: ValueError """ idm = bin1d_vec(mags, self.magnitudes, tol=tol, right_continuous=True) if numpy.any(idm == -1): raise ValueError("mags outside the range of forecast magnitudes.") return idm
def test_upper_limit_not_continuous(self): data = [30, 30, 30] bin_edges = [0, 10, 20, 30] test = bin1d_vec(data, bin_edges) expected = [3, 3, 3] self.assertListEqual(test.tolist(), expected)
def test_bin1d_vec2(self): data = [0.9999999] bin_edges = [0.8, 0.9, 1.0] test = bin1d_vec(data, bin_edges) expected = [1] self.assertListEqual(test.tolist(), expected)
def get_mag_idx(self): """ Return magnitude index from region magnitudes """ try: return bin1d_vec(self.get_magnitudes(), self.region.magnitudes, tol=0.00001, right_continuous=True) except AttributeError: raise CSEPCatalogException("Cannot return magnitude index without self.region.magnitudes")
def test_bin1d_vec6(self): data = [1189999.99999] bin_edges = [1189999.9, 1190000.0, 1200000.0] test = bin1d_vec(data, bin_edges) expected = [0] self.assertListEqual(test.tolist(), expected)
def test_bin1d_vec3(self): data = [-118.9999999] bin_edges = [-119.0, -118.9, -118.8] test = bin1d_vec(data, bin_edges) expected = [0] self.assertListEqual(test.tolist(), expected)
def test_bin1d_vec(self): data = [0.34, 0.35] bin_edges = [0.33, 0.34, 0.35, 0.36] test = bin1d_vec(data, bin_edges).tolist() expected = [1, 2] self.assertListEqual(test, expected)
def test_bin1d_vec9(self): data = [-118.97999999] bin_edges = [-119.0, -118.98, -118.96] test = bin1d_vec(data, bin_edges) expected = [1] self.assertListEqual(test.tolist(), expected)
def test_bin1d_vec_int(self): data = [1, 3, 5, 10, 20] bin_edges = [0, 10, 20, 30] test = bin1d_vec(data, bin_edges) expected = [0, 0, 0, 1, 2] self.assertListEqual(test.tolist(), expected)
def test_less_and_greater_than(self): data = [-1, 35, 40] bin_edges = [0, 10, 20, 30] test = bin1d_vec(data, bin_edges) expected = [-1, 3, -1] self.assertListEqual(test.tolist(), expected)
def test_lower_limit(self): data = [0] bin_edges = [0, 10, 20, 30] test = bin1d_vec(data, bin_edges) expected = [0] self.assertListEqual(test.tolist(), expected)