def _map(tile_in_spark): tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] tile_service = NexusTileService() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) days_at_a_time = 30 t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) for tile in nexus_tiles: tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 return (min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile)
class TestService(unittest.TestCase): def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=9042 [solr] host=http://localhost:8983 core=nexustiles [datastore] store=cassandra""") cp = ConfigParser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp) def test_get_distinct_bounding_boxes_in_polygon(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "MXLDEPTH_ECCO_version4_release1", 1, time.time()) for b in boxes: print b.bounds def test_get_distinct_bounding_boxes_in_polygon_mur(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for b in boxes: print b.bounds def test_find_tiles_by_exact_bounds(self): tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.get_summary() def test_sorted_box(self): tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0, "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.min_time def test_time_series_tile(self): tiles = self.tile_service.find_tile_by_id("055c0b51-d0fb-3f39-b48a-4f762bf0c994") for tile in tiles: print tile.get_summary() def test_get_tiles_by_metadata(self): tiles = self.tile_service.get_tiles_by_metadata(['id:60758e00-5721-3a6e-bf57-78448bb0aeeb'], "MUR-JPL-L4-GLOB-v4.1", 1514764800, 1514764800) for tile in tiles: print tile.get_summary()
def _calc_variance(tile_in_spark): # tile_in_spark is a spatial tile that corresponds to nexus tiles of the same area tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] x_bar = tile_in_spark[4] tile_service = NexusTileService() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) # hardcorded - limiting the amount of nexus tiles pulled at a time days_at_a_time = 30 t_incr = 86400 * days_at_a_time data_anomaly_squared_tile = np.array( np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) x_bar = np.asarray(x_bar) x_bar[:, :] = np.nan_to_num(x_bar) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) for tile in nexus_tiles: # Taking the data, converted masked nans to 0 tile.data.data[:, :] = np.nan_to_num(tile.data.data) # subtract x_bar from each value, then square it data_anomaly_tile = tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] - x_bar data_anomaly_squared_tile += data_anomaly_tile * data_anomaly_tile # Taking the opposite of the value of the bool of mask - add 0 if it's a masked value cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 return (min_lat, max_lat, min_lon, max_lon), (data_anomaly_squared_tile, cnt_tile)
class TestService(unittest.TestCase): def setUp(self): config = StringIO("""[cassandra] host=127.0.0.1 keyspace=nexustiles local_datacenter=datacenter1 protocol_version=3 port=32769 [solr] host=localhost:8986 core=nexustiles""") cp = ConfigParser.RawConfigParser() cp.readfp(config) self.tile_service = NexusTileService(config=cp) def test_get_distinct_bounding_boxes_in_polygon(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "MXLDEPTH_ECCO_version4_release1", 1, time.time()) for b in boxes: print b.bounds def test_get_distinct_bounding_boxes_in_polygon_mur(self): boxes = self.tile_service.get_distinct_bounding_boxes_in_polygon(box(-180, -90, 180, 90), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for b in boxes: print b.bounds def test_find_tiles_by_exact_bounds(self): tiles = self.tile_service.find_tiles_by_exact_bounds((175.01, -42.68, 180.0, -40.2), "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.get_summary() def test_sorted_box(self): tiles = self.tile_service.get_tiles_bounded_by_box(-42.68, -40.2, 175.01, 180.0, "JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1", 1, time.time()) for tile in tiles: print tile.min_time
def _map(tile_in_spark): # tile_in_spark is a spatial tile that corresponds to nexus tiles of the same area tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] tile_service = NexusTileService() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) # hardcorded - limiting the amount of nexus tiles pulled at a time days_at_a_time = 30 t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) for tile in nexus_tiles: # Taking the data, converted masked nans to 0 tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] # Taking the opposite of the value of the bool of mask - add 0 if it's a masked value cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 print("sum tile", sum_tile) print("count tile", cnt_tile) return tile_bounds, (sum_tile, cnt_tile)
def _map(tile_in): # Unpack input tile_bounds, start_time, end_time, ds = tile_in (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds # Create arrays to hold intermediate results during # correlation coefficient calculation. tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) sumx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumxx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumyy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumxy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) n_tile = np.zeros(tile_inbounds_shape, dtype=np.uint32) # Can only retrieve some number of days worth of data from Solr # at a time. Set desired value here. days_at_a_time = 90 # days_at_a_time = 30 # days_at_a_time = 7 # days_at_a_time = 1 # print 'days_at_a_time = ', days_at_a_time t_incr = 86400 * days_at_a_time tile_service = NexusTileService() # Compute the intermediate summations needed for the Pearson # Correlation Coefficient. We use a one-pass online algorithm # so that not all of the data needs to be kept in memory all at once. t_start = start_time while t_start <= end_time: t_end = min(t_start + t_incr, end_time) # t1 = time() # print 'nexus call start at time %f' % t1 # sys.stdout.flush() ds1tiles = tile_service.get_tiles_bounded_by_box( min_lat, max_lat, min_lon, max_lon, ds[0], t_start, t_end) ds2tiles = tile_service.get_tiles_bounded_by_box( min_lat, max_lat, min_lon, max_lon, ds[1], t_start, t_end) # t2 = time() # print 'nexus call end at time %f' % t2 # print 'secs in nexus call: ', t2-t1 # sys.stdout.flush() len1 = len(ds1tiles) len2 = len(ds2tiles) # print 't %d to %d - Got %d and %d tiles' % (t_start, t_end, # len1, len2) # sys.stdout.flush() i1 = 0 i2 = 0 time1 = 0 time2 = 0 while i1 < len1 and i2 < len2: tile1 = ds1tiles[i1] tile2 = ds2tiles[i2] # print 'tile1.data = ',tile1.data # print 'tile2.data = ',tile2.data # print 'i1, i2, t1, t2 times: ', i1, i2, tile1.times[0], tile2.times[0] assert tile1.times[0] >= time1, 'DS1 time out of order!' assert tile2.times[0] >= time2, 'DS2 time out of order!' time1 = tile1.times[0] time2 = tile2.times[0] # print 'i1=%d,i2=%d,time1=%d,time2=%d'%(i1,i2,time1,time2) if time1 < time2: i1 += 1 continue elif time2 < time1: i2 += 1 continue assert (time1 == time2), \ "Mismatched tile times %d and %d" % (time1, time2) # print 'processing time:',time1,time2 t1_data = tile1.data.data t1_mask = tile1.data.mask t2_data = tile2.data.data t2_mask = tile2.data.mask t1_data = np.nan_to_num(t1_data) t2_data = np.nan_to_num(t2_data) joint_mask = ((~t1_mask).astype(np.uint8) * (~t2_mask).astype(np.uint8)) # print 'joint_mask=',joint_mask sumx_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumx_tile=',sumx_tile sumy_tile += (t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumy_tile=',sumy_tile sumxx_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumxx_tile=',sumxx_tile sumyy_tile += (t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumyy_tile=',sumyy_tile sumxy_tile += (t1_data[0, min_y:max_y + 1, min_x:max_x + 1] * t2_data[0, min_y:max_y + 1, min_x:max_x + 1] * joint_mask[0, min_y:max_y + 1, min_x:max_x + 1]) # print 'sumxy_tile=',sumxy_tile n_tile += joint_mask[0, min_y:max_y + 1, min_x:max_x + 1] # print 'n_tile=',n_tile i1 += 1 i2 += 1 t_start = t_end + 1 # print 'Finished tile', tile_bounds # sys.stdout.flush() return ((min_lat, max_lat, min_lon, max_lon), (sumx_tile, sumy_tile, sumxx_tile, sumyy_tile, sumxy_tile, n_tile))
def _map(tile_in_spark): tile_bounds = tile_in_spark[0] (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds startTime = tile_in_spark[1] endTime = tile_in_spark[2] ds = tile_in_spark[3] tile_service = NexusTileService() # print 'Started tile {0}'.format(tile_bounds) # sys.stdout.flush() tile_inbounds_shape = (max_y - min_y + 1, max_x - min_x + 1) # days_at_a_time = 90 days_at_a_time = 30 # days_at_a_time = 7 # days_at_a_time = 1 # print 'days_at_a_time = {0}'.format(days_at_a_time) t_incr = 86400 * days_at_a_time sum_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.float64)) cnt_tile = np.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) t_start = startTime while t_start <= endTime: t_end = min(t_start + t_incr, endTime) # t1 = time() # print 'nexus call start at time {0}'.format(t1) # sys.stdout.flush() # nexus_tiles = \ # TimeAvgMapSparkHandlerImpl.query_by_parts(tile_service, # min_lat, max_lat, # min_lon, max_lon, # ds, # t_start, # t_end, # part_dim=2) nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds=ds, start_time=t_start, end_time=t_end) # t2 = time() # print 'nexus call end at time %f' % t2 # print 'secs in nexus call: ', t2 - t1 # print 't %d to %d - Got %d tiles' % (t_start, t_end, # len(nexus_tiles)) # for nt in nexus_tiles: # print nt.granule # print nt.section_spec # print 'lat min/max:', np.ma.min(nt.latitudes), np.ma.max(nt.latitudes) # print 'lon min/max:', np.ma.min(nt.longitudes), np.ma.max(nt.longitudes) # sys.stdout.flush() for tile in nexus_tiles: tile.data.data[:, :] = np.nan_to_num(tile.data.data) sum_tile += tile.data.data[0, min_y:max_y + 1, min_x:max_x + 1] cnt_tile += (~tile.data.mask[0, min_y:max_y + 1, min_x:max_x + 1]).astype(np.uint8) t_start = t_end + 1 # print 'cnt_tile = ', cnt_tile # cnt_tile.mask = ~(cnt_tile.data.astype(bool)) # sum_tile.mask = cnt_tile.mask # avg_tile = sum_tile / cnt_tile # stats_tile = [[{'avg': avg_tile.data[y,x], 'cnt': cnt_tile.data[y,x]} for x in range(tile_inbounds_shape[1])] for y in range(tile_inbounds_shape[0])] # print 'Finished tile', tile_bounds # print 'Tile avg = ', avg_tile # sys.stdout.flush() return ((min_lat, max_lat, min_lon, max_lon), (sum_tile, cnt_tile))
def _map(tile_in): # Unpack input tile_bounds, start_time, end_time, ds = tile_in (min_lat, max_lat, min_lon, max_lon, min_y, max_y, min_x, max_x) = tile_bounds # Create masked arrays to hold intermediate results during # correlation coefficient calculation. tile_inbounds_shape = (max_y-min_y+1, max_x-min_x+1) sumx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumxx_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumyy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) sumxy_tile = np.zeros(tile_inbounds_shape, dtype=np.float64) n_tile = np.ma.array(np.zeros(tile_inbounds_shape, dtype=np.uint32)) # Can only retrieve some number of days worth of data from Solr # at a time. Set desired value here. days_at_a_time = 90 #days_at_a_time = 30 #days_at_a_time = 7 #days_at_a_time = 1 print 'days_at_a_time = ', days_at_a_time t_incr = 86400 * days_at_a_time tile_service = NexusTileService() # Compute Pearson Correlation Coefficient. We use an online algorithm # so that not all of the data needs to be kept in memory all at once. t_start = start_time while t_start <= end_time: t_end = min(t_start+t_incr,end_time) t1 = time() print 'nexus call start at time %f' % t1 sys.stdout.flush() ds1tiles = tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds[0], t_start, t_end) ds2tiles = tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, ds[1], t_start, t_end) t2 = time() print 'nexus call end at time %f' % t2 print 'secs in nexus call: ', t2-t1 sys.stdout.flush() CorrMapSparkHandlerImpl._prune_tiles(ds1tiles) CorrMapSparkHandlerImpl._prune_tiles(ds2tiles) len1 = len(ds1tiles) len2 = len(ds2tiles) print 't %d to %d - Got %d and %d tiles' % (t_start, t_end, len1, len2) sys.stdout.flush() i1 = 0 i2 = 0 while i1 < len1 and i2 < len2: tile1 = ds1tiles[i1] tile2 = ds2tiles[i2] #print 'tile1.data = ',tile1.data #print 'tile2.data = ',tile2.data time1 = tile1.times[0] time2 = tile2.times[0] if time1 < time2: i1 += 1 continue elif time2 < time1: i2 += 1 continue assert (time1 == time2),\ "Mismatched tile times %d and %d" % (time1, time2) t1_data = tile1.data.data t1_mask = tile1.data.mask t2_data = tile2.data.data t2_mask = tile2.data.mask t1_data = np.nan_to_num(t1_data) t2_data = np.nan_to_num(t2_data) joint_mask = ((~t1_mask).astype(np.uint8) * (~t2_mask).astype(np.uint8)) #print 'joint_mask=',joint_mask sumx_tile += (t1_data[0,min_y:max_y+1,min_x:max_x+1] * joint_mask[0,min_y:max_y+1,min_x:max_x+1]) #print 'sumx_tile=',sumx_tile sumy_tile += (t2_data[0,min_y:max_y+1,min_x:max_x+1] * joint_mask[0,min_y:max_y+1,min_x:max_x+1]) #print 'sumy_tile=',sumy_tile sumxx_tile += (t1_data[0,min_y:max_y+1,min_x:max_x+1] * t1_data[0,min_y:max_y+1,min_x:max_x+1] * joint_mask[0,min_y:max_y+1,min_x:max_x+1]) #print 'sumxx_tile=',sumxx_tile sumyy_tile += (t2_data[0,min_y:max_y+1,min_x:max_x+1] * t2_data[0,min_y:max_y+1,min_x:max_x+1] * joint_mask[0,min_y:max_y+1,min_x:max_x+1]) #print 'sumyy_tile=',sumyy_tile sumxy_tile += (t1_data[0,min_y:max_y+1,min_x:max_x+1] * t2_data[0,min_y:max_y+1,min_x:max_x+1] * joint_mask[0,min_y:max_y+1,min_x:max_x+1]) #print 'sumxy_tile=',sumxy_tile n_tile.data[:,:] += joint_mask[0,min_y:max_y+1,min_x:max_x+1] #print 'n_tile=',n_tile i1 += 1 i2 += 1 t_start = t_end + 1 r_tile = np.ma.array((sumxy_tile-sumx_tile*sumy_tile/n_tile) / np.sqrt((sumxx_tile-sumx_tile*sumx_tile/n_tile)* (sumyy_tile - sumy_tile*sumy_tile/n_tile))) #print 'r_tile=',r_tile n_tile.mask = ~(n_tile.data.astype(bool)) r_tile.mask = n_tile.mask #print 'r_tile masked=',r_tile stats_tile = [[{'r': r_tile.data[y,x], 'cnt': n_tile.data[y,x]} for x in range(tile_inbounds_shape[1])] for y in range(tile_inbounds_shape[0])] #print 'stats_tile = ', stats_tile print 'Finished tile', tile_bounds sys.stdout.flush() return (stats_tile,min_lat,max_lat,min_lon,max_lon)
def calc_average_on_day(tile_in_spark): (min_lat, max_lat, min_lon, max_lon, dataset, timestamps, cwd) = tile_in_spark os.chdir(cwd) start_time = timestamps[0] end_time = timestamps[-1] tile_service = NexusTileService() #ds1_nexus_tiles = \ # tile_service.get_tiles_bounded_by_box_at_time(min_lat, max_lat, # min_lon, max_lon, # dataset, # timeinseconds) ds1_nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, dataset, timestamps[0], timestamps[-1]) #ds1_nexus_tiles = TimeSeriesCalculator.query_by_parts(tile_service, # min_lat, # max_lat, # min_lon, # max_lon, # dataset, # timestamps[0], # timestamps[-1]) # debug code #for tile in ds1_nexus_tiles: #print 'tile shape: ', tile.data.shape #print 'tile data shape: ', tile.data.data.shape #print 'tile mask shape: ', tile.data.mask.shape #print 'tile: ', tile.data stats_arr = [] for timeinseconds in timestamps: tile_data_agg = np.ma.array([tile.data.flatten() \ for tile in ds1_nexus_tiles \ if (tile.times[0] == timeinseconds)]) if (len(tile_data_agg) == 0) or tile_data_agg.mask.all(): data_min = 0. data_max = 0. daily_mean = 0. data_count = 0 data_std = 0. else: data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) daily_mean = np.ma.mean(tile_data_agg).item() data_count = np.ma.count(tile_data_agg) data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } stats_arr.append(stat) return stats_arr
def calc_average_on_day(tile_in_spark): (min_lat, max_lat, min_lon, max_lon, dataset, timestamps, fill) = tile_in_spark if len(timestamps) == 0: return [] start_time = timestamps[0] end_time = timestamps[-1] tile_service = NexusTileService() # ds1_nexus_tiles = \ # tile_service.get_tiles_bounded_by_box_at_time(min_lat, max_lat, # min_lon, max_lon, # dataset, # timeinseconds) ds1_nexus_tiles = \ tile_service.get_tiles_bounded_by_box(min_lat, max_lat, min_lon, max_lon, dataset, timestamps[0], timestamps[-1], rows=5000) # ds1_nexus_tiles = TimeSeriesCalculator.query_by_parts(tile_service, # min_lat, # max_lat, # min_lon, # max_lon, # dataset, # timestamps[0], # timestamps[-1]) tile_dict = {} for timeinseconds in timestamps: tile_dict[timeinseconds] = [] for i in range(len(ds1_nexus_tiles)): tile = ds1_nexus_tiles[i] tile_dict[tile.times[0]].append(i) stats_arr = [] for timeinseconds in timestamps: cur_tile_list = tile_dict[timeinseconds] if len(cur_tile_list) == 0: continue tile_data_agg = \ np.ma.array(data=np.hstack([ds1_nexus_tiles[i].data.data.flatten() for i in cur_tile_list if (ds1_nexus_tiles[i].times[0] == timeinseconds)]), mask=np.hstack([ds1_nexus_tiles[i].data.mask.flatten() for i in cur_tile_list if (ds1_nexus_tiles[i].times[0] == timeinseconds)])) lats_agg = np.hstack([ np.repeat(ds1_nexus_tiles[i].latitudes, len(ds1_nexus_tiles[i].longitudes)) for i in cur_tile_list if (ds1_nexus_tiles[i].times[0] == timeinseconds) ]) if (len(tile_data_agg) == 0) or tile_data_agg.mask.all(): continue else: data_min = np.ma.min(tile_data_agg) data_max = np.ma.max(tile_data_agg) # daily_mean = np.ma.mean(tile_data_agg).item() daily_mean = \ np.ma.average(tile_data_agg, weights=np.cos(np.radians(lats_agg))).item() data_count = np.ma.count(tile_data_agg) data_std = np.ma.std(tile_data_agg) # Return Stats by day stat = { 'min': data_min, 'max': data_max, 'mean': daily_mean, 'cnt': data_count, 'std': data_std, 'time': int(timeinseconds) } stats_arr.append(stat) return stats_arr