def read_frac(fname, hdfs_client=None): """ This returns data or None if the fraction is empty """ if not rasterio.fs_exists(fname, hdfs_client): return None else: if fname.startswith('hdfs://'): blob = rasterio.fs_read(fname, hdfs_client) return np.load(StringIO.StringIO(blob)) else: # If reading from fs://, we short-circuit fs_read return np.load(rasterio.strip_uri_proto(fname, 'fs://'))
def list_available_fractions(self, hdfs_client=None): """ Returns the list of available (existing) fractions ids. Returns: a list of tuple (frac_num, time_chunk) """ data_dir = os.path.join(self.grid_root, 'jdata') if not rasterio.fs_exists(data_dir, hdfs_client): return [] else: fractions = rasterio.fs_list(data_dir, hdfs_client) # fractions is a list of fractions filenames (e.g. 14123.jdata) fractions = [frac_id_from_fname(fname) for fname in fractions if fname.endswith('jdata')] return fractions
def exists(grid_root, hdfs_client=None): fname = os.path.join(grid_root, 'header.jghdr3') return rasterio.fs_exists(fname, hdfs_client)
args = parser.parse_args() frac_num = args.fraction frac_d = args.fraction_part frac_id = (frac_num, frac_d) modis_dir = utils.get_modis_hdf_dir() worldgrid = args.worldgrid ndvi_root = os.path.join(worldgrid, 'ndvi') qa_root = os.path.join(worldgrid, 'qa') assert jgrid.Header.exists(ndvi_root) ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) fname = ndvi_header.frac_fname(frac_id) if not io.fs_exists(fname): print 'The selected fraction does not exist in HDFS' exit(0) assert np.all(ndvi_header.timestamps_ms == qa_header.timestamps_ms) # Select dates for the requested fraction_part start_date_i = ndvi_header.frac_ndates * frac_d end_date_i = np.amin([len(ndvi_header.timestamps_ms) - start_date_i, ndvi_header.frac_ndates]) selected_dates = ndvi_header.timestamps_ms[start_date_i:end_date_i] modgrid = grids.MODISGrid() # Build a dict of frac_num:tilename tiles = config.MODIS_TERRA_TILES frac_tilename = {}
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename, tilename_fileindex): """ Given a frac_num, will make sure it contains data for all dates in ndvi_header.timestamps_ms """ _start = time.time() modgrid = grids.MODISGrid() ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) tilename = frac_tilename[frac_num] tile_h, tile_v = modis.parse_tilename(tilename) hdf_files = tilename_fileindex[tilename] d_from = 0 d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1 frac_id = None frac_d = None # Find the most recent existing fraction and the most recent timestamp for frac_d in range(d_from, d_to)[::-1]: frac_id = (frac_num, frac_d) fname = ndvi_header.frac_fname(frac_id) if io.fs_exists(fname): break assert frac_id is not None assert frac_d is not None # Read the data of the most recent fraction in HDFS ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id)) qa = jgrid.read_frac(qa_header.frac_fname(frac_id)) assert ndvi.shape == qa.shape # Compute the index of the last date in HDFS most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2] i_range, j_range = modgrid.get_cell_indices_in_tile( frac_num, tile_h, tile_v) # At this point, we just have to complete with the missing dates for t in range(most_recent_t, len(ndvi_header.timestamps_ms)): ts = ndvi_header.timestamps_ms[t] fname = hdf_files[ts] new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range) if ndvi.shape[2] == ndvi_header.frac_ndates: # Write a complete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) # Prepare variables for a new fraction frac_d += 1 ndvi = new_ndvi[:, :, None] qa = new_qa[:, :, None] else: # TODO: If we end up completing multiple dates, we could preallocate # But for now, this is unlikely (we'll complete with the most # recent data) ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2) qa = np.concatenate([qa, new_qa[:, :, None]], axis=2) assert ndvi.shape == qa.shape # Write last incomplete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) print 'Processed %d, appended %d dates, took %.02f [s]' % ( frac_num, len(ndvi_header.timestamps_ms) - most_recent_t, time.time() - _start) sys.stdout.flush()
def complete_frac(frac_num, ndvi_root, qa_root, frac_tilename, tilename_fileindex): """ Given a frac_num, will make sure it contains data for all dates in ndvi_header.timestamps_ms """ _start = time.time() modgrid = grids.MODISGrid() ndvi_header = jgrid.load(ndvi_root) qa_header = jgrid.load(qa_root) tilename = frac_tilename[frac_num] tile_h, tile_v = modis.parse_tilename(tilename) hdf_files = tilename_fileindex[tilename] d_from = 0 d_to = ndvi_header.shape[2] // ndvi_header.frac_ndates + 1 frac_id = None frac_d = None # Find the most recent existing fraction and the most recent timestamp for frac_d in range(d_from, d_to)[::-1]: frac_id = (frac_num, frac_d) fname = ndvi_header.frac_fname(frac_id) if io.fs_exists(fname): break assert frac_id is not None assert frac_d is not None # Read the data of the most recent fraction in HDFS ndvi = jgrid.read_frac(ndvi_header.frac_fname(frac_id)) qa = jgrid.read_frac(qa_header.frac_fname(frac_id)) assert ndvi.shape == qa.shape # Compute the index of the last date in HDFS most_recent_t = frac_d * ndvi_header.frac_ndates + ndvi.shape[2] i_range, j_range = modgrid.get_cell_indices_in_tile(frac_num, tile_h, tile_v) # At this point, we just have to complete with the missing dates for t in range(most_recent_t, len(ndvi_header.timestamps_ms)): ts = ndvi_header.timestamps_ms[t] fname = hdf_files[ts] new_ndvi, new_qa = read_ndvi_qa(fname, i_range, j_range) if ndvi.shape[2] == ndvi_header.frac_ndates: # Write a complete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) # Prepare variables for a new fraction frac_d += 1 ndvi = new_ndvi[:, :, None] qa = new_qa[:, :, None] else: # TODO: If we end up completing multiple dates, we could preallocate # But for now, this is unlikely (we'll complete with the most # recent data) ndvi = np.concatenate([ndvi, new_ndvi[:, :, None]], axis=2) qa = np.concatenate([qa, new_qa[:, :, None]], axis=2) assert ndvi.shape == qa.shape # Write last incomplete fraction frac_id = (frac_num, frac_d) ndvi_header.write_frac(frac_id, ndvi) qa_header.write_frac(frac_id, qa) print 'Processed %d, appended %d dates, took %.02f [s]' % ( frac_num, len(ndvi_header.timestamps_ms) - most_recent_t, time.time() - _start ) sys.stdout.flush()