def image_to_sepal( credentials, description: str, download_dir: str, image: ee.Image, band_names: list = None, dimensions=None, region: ee.Geometry = None, scale: int = None, crs: str = None, crs_transform: str = None, max_pixels: Union[int, float] = None, shard_size: int = None, file_dimensions=None, skip_empty_tiles=None, file_format: str = None, format_options: str = None, retries: int = 0 ): drive_folder_path = '_'.join(['Sepal', description, str(uuid.uuid4())]) destination_path = download_dir + '/' + description def _create_drive_folder(): return concat( progress( default_message='Creating Google Drive download folder...', message_key='tasks.retrieve.image_to_sepal.creating_drive_folder' ), create_folder_with_path(credentials, drive_folder_path).pipe( flat_map(lambda _: empty()) ) ) def _export_to_drive(): return export_image_to_drive( credentials, image, description=description, folder=drive_folder_path, dimensions=dimensions, region=region, scale=scale, crs=crs, crs_transform=crs_transform, max_pixels=max_pixels, shard_size=shard_size, file_dimensions=file_dimensions, skip_empty_tiles=skip_empty_tiles, file_format=file_format, format_options=format_options, retries=retries, ) def _download_from_drive(): return download_path( credentials, path=drive_folder_path, destination=destination_path, delete_after_download=True ) def _delete_drive_folder(): return concat( progress( default_message='Deleting Google Drive download folder...', message_key='tasks.retrieve.image_to_sepal.deleting_drive_folder' ), delete_file_with_path( credentials, path=drive_folder_path ).pipe( flat_map(lambda _: empty()) ) ) def _build_vrt(): return concat( progress( default_message='Building VRT...', message_key='tasks.retrieve.image_to_sepal.building_vrt' ), build_vrt( destination=destination_path + '/' + description + '.vrt', files=destination_path + '/*.tif' ).pipe( flat_map(lambda _: empty()) ) ) def _set_band_names(): band_names_stream = of(band_names) if band_names else get_band_names(credentials, image) return concat( progress( default_message='Setting band names...', message_key='tasks.retrieve.image_to_sepal.setting_band_names' ), band_names_stream.pipe( flat_map( lambda names: set_band_names( band_names=names, files=[destination_path + '/*.tif', destination_path + '/*.vrt'] ) ), flat_map(lambda _: empty()) ) ) ee.InitializeThread(credentials) return concat( _create_drive_folder(), _export_to_drive(), _download_from_drive(), _delete_drive_folder(), _build_vrt(), _set_band_names() ).pipe( merge_finalize(_delete_drive_folder) )
def time_series_to_sepal( credentials, description: str, download_dir: str, image_collection_factory, start_date: Union[int, str, date], end_date: Union[int, str, date], region: Union[ee.Geometry, ee.Feature, ee.FeatureCollection], dimensions=None, scale: int = None, crs: str = None, crs_transform: str = None, max_pixels: Union[int, float] = None, file_dimensions=None, skip_empty_tiles=None, file_format: str = None, format_options: str = None, precision: str = None, nodata_value: int = None, retries: int = 0 ): start_date = to_date(start_date) end_date = to_date(end_date) year_ranges = split_range_by_year(start_date, end_date) drive_folder_path = '_'.join(['Sepal', description, str(uuid.uuid4())]) def _create_drive_folder(): return concat( progress( default_message='Creating Google Drive download folder...', message_key='tasks.retrieve.time_series_to_sepal.creating_drive_folder' ), create_folder_with_path(credentials, drive_folder_path).pipe( flat_map(lambda _: empty()) ) ) def _export_geometries(): def aggregate_progress(progresses, count): p = _sum_dicts(progresses.values(), excluded_keys=['geometry']) exported = round(100 * p['exported'] / count) downloaded = round(100 * p['downloaded'] / count) downloaded_bytes = format_bytes(p['downloaded_bytes']) processed = round(100 * p['processed'] / count) return progress( default_message='Exported {}%, Downloaded {}% ({}), Processed {}%'.format( exported, downloaded, downloaded_bytes, processed ), message_key='tasks.retrieve.time_series_to_sepal.progress', exported=exported, downloaded=downloaded, downloaded_bytes=downloaded_bytes, processed=processed ) features_collection = _to_features_collection(region) def export_geometry(geometry, i, geometry_count): geometry_description = str(i + 1).zfill(len(str(geometry_count))) return defer( lambda _: _export_geometry( geometry, geometry_description=geometry_description ) ) return concat( progress( default_message='Tiling AOI...', message_key='tasks.retrieve.time_series_to_sepal.tiling' ), _extract_feature_indexes(features_collection).pipe( flat_map( lambda feature_indexes: _to_geometries(features_collection, feature_indexes).pipe( flat_map( lambda geometries: concat( *[ export_geometry(geometry, i, len(feature_indexes)) for i, geometry in enumerate(geometries) ] ) ), scan(lambda acc, p: {**acc, p['geometry']: p}, {}), flat_map(lambda progresses: aggregate_progress( progresses, count=len(feature_indexes) * len(year_ranges) )) ) ) ) ) def _export_geometry(geometry, geometry_description): export_years = combine_latest( *[_export_year( geometry=geometry, year_start=year_range[0], year_end=year_range[1], export_description='{}_{}_{}'.format(geometry_description, year_range[0].year, description), year_dir='/'.join([download_dir, description, geometry_description, str(year_range[0].year)]) ) for year_range in year_ranges] ).pipe( map(lambda progresses: _sum_dicts(progresses)), map(lambda p: {**p, 'geometry': geometry_description}) ) process_geometry = _process_geometry('/'.join([download_dir, description, geometry_description])) return concat( export_years, process_geometry ) def _to_features_collection(r) -> ee.FeatureCollection: return tile(r, TILE_SIZE_IN_DEGREES) def _extract_feature_indexes(_feature_collection): def action(): return _feature_collection.aggregate_array('system:index').getInfo() return execute(credentials, action, description='Feature indexes from FeatureCollection') def _to_geometries(_feature_collection, feature_indexes) -> Observable: def action(): return [ ee.Feature( _feature_collection .filterMetadata('system:index', 'equals', feature_index) .first() ).geometry() for feature_index in feature_indexes ] return execute(credentials, action, description='FeatureCollection to geometries') def _export_year(geometry, year_start, year_end, export_description, year_dir): stack = _create_stack(geometry, year_start, year_end) ee.InitializeThread(credentials) if not stack.bandNames().size().getInfo(): logging.info('No data between {} and {}'.format(year_start, year_end)) return of({ 'exported': 1, 'downloaded': 1, 'downloaded_bytes': 0, 'processed': 1 }) initial_progress = of({ 'exported': 0, 'stack_bytes': 0, 'dates_bytes': 0, 'downloaded': 0, 'processed': 0 }) def aggregate_downloaded_bytes(p): return { 'exported': p['exported'], 'downloaded': p['downloaded'], 'downloaded_bytes': p['stack_bytes'] + p['dates_bytes'], 'processed': p['processed'] } return concat( initial_progress, merge( _export_and_download_stack(stack, export_description, year_dir), _export_and_download_dates(stack, export_description, year_dir) ), _process_year(year_dir), of({'processed': 1}) ).pipe( scan(lambda acc, p: {**acc, **p}, {}), map(aggregate_downloaded_bytes) ) def _create_stack(geometry, start, end): def to_daily_mosaic(image): return ee.ImageCollection(ee.List(image.get('images'))) \ .median() \ .rename(ee.Image(image).getString('date')) image_collection = image_collection_factory(geometry, start, end) \ .map(lambda image: image.set('date', image.date().format('yyyy-MM-dd'))) distinct_date_images = image_collection.distinct('date') daily_mosaics = ee.ImageCollection( ee.Join.saveAll('images').apply( primary=distinct_date_images, secondary=image_collection, condition=ee.Filter.equals(leftField='date', rightField='date') ).map(to_daily_mosaic) ) stack = daily_mosaics \ .toBands() \ .regexpRename('.*(.{10})', '$1') \ .clip(geometry) return set_precision(stack, precision) if precision else stack def _export_and_download_stack(stack, export_description, year_dir): stack_drive_description = 'stack_' + export_description stack_drive_folder = '{}-{}'.format(stack_drive_description, str(uuid.uuid4())) stack_drive_path = '{}/{}'.format(drive_folder_path, stack_drive_folder) create_stack_drive_folder = create_folder_with_path(credentials, stack_drive_path).pipe( flat_map(lambda _: empty()) ) export_stack = _export_stack(stack, stack_drive_description, stack_drive_folder).pipe( flat_map(lambda _: empty()) ) download_stack_from_drive = _download_from_drive( path=stack_drive_path, destination=year_dir ).pipe( map(lambda p: {'stack_bytes': p.downloaded_bytes}) ) return concat( create_stack_drive_folder, export_stack, of({'exported': 1}), download_stack_from_drive, of({'downloaded': 1}), ) def _export_stack(stack, drive_description, folder): return export_image_to_drive( credentials, stack, description=drive_description, folder=folder, dimensions=dimensions, scale=scale, crs=crs, crs_transform=crs_transform, max_pixels=max_pixels, file_dimensions=file_dimensions, skip_empty_tiles=skip_empty_tiles, file_format=file_format, format_options=format_options, retries=retries, ) def _export_and_download_dates(stack, export_description, year_dir): table_drive_description = 'dates_' + export_description dates_drive_folder = '{}-{}'.format(table_drive_description, str(uuid.uuid4())) dates_drive_path = '{}/{}'.format(drive_folder_path, dates_drive_folder) create_dates_drive_folder = create_folder_with_path(credentials, dates_drive_path).pipe( flat_map(lambda _: empty()) ) export_dates = _export_dates(stack.bandNames(), table_drive_description, dates_drive_folder) download_dates_from_drive = _download_from_drive( path=dates_drive_path, destination=year_dir ).pipe( map(lambda p: {'dates_bytes': p.downloaded_bytes}) ) return concat( create_dates_drive_folder, export_dates, download_dates_from_drive ) def _export_dates(dates, drive_description, folder): date_table = ee.FeatureCollection( dates.map(lambda d: ee.Feature(None, {'date': d})) ) return export_table_to_drive( credentials, date_table, description=drive_description, folder=folder, file_format='CSV' ).pipe( flat_map(lambda _: empty()) ) def _download_from_drive(path, destination): return download_path( credentials, path=path, destination=destination, delete_after_download=True, retries=2 ) def _process_year(year_dir): def action(): parent_dir = join(year_dir, pardir) tif_names = [f for f in listdir(year_dir) if f.endswith('.tif')] tile_pattern = re.compile('.*-(\d{10}-\d{10}).tif') for tif_name in tif_names: tile_name = tile_pattern.match(tif_name).group(1) \ if tile_pattern.match(tif_name) else '0000000000-0000000000' tile_dir = abspath(join(parent_dir, tile_name)) subprocess.check_call(['mkdir', '-p', tile_dir]) subprocess.check_call( 'mv {0} {1}'.format(abspath(join(year_dir, tif_name)), abspath(join(tile_dir, tif_name))), shell=True) subprocess.check_call('mv {0} {1}'.format(join(year_dir, '*.csv'), parent_dir), shell=True) subprocess.check_call('rm -rf {0}'.format(year_dir).split(' ')) return from_callable(action).pipe( flat_map(lambda _: empty()) ) def _process_geometry(geometry_dir): def action(): dates = create_dates_csv() if dates: tiles_to_vrts(dates) def create_dates_csv(): dates_path = join(geometry_dir, 'dates.csv') csv_paths = sorted(glob(join(geometry_dir, '*.csv'))) if dates_path in csv_paths: csv_paths.remove(dates_path) if not csv_paths: return None with open(dates_path, 'w') as dates_file: for csv_path in csv_paths: with open(csv_path, 'r') as csv_file: for row in csv.DictReader(csv_file): if row['date']: dates_file.write(row['date'] + '\n') subprocess.check_call('rm -rf {0}'.format(csv_path).split(' ')) dates_file.flush() os.fsync(dates_file.fileno()) return [d.rstrip('\n') for d in open(dates_path)] def tiles_to_vrts(dates): tile_dirs = sorted([d for d in glob(join(geometry_dir, '*')) if isdir(d)]) for tile_dir in tile_dirs: tile_to_vrt(tile_dir) gdal.SetConfigOption('VRT_SHARED_SOURCE', '0') vrt = gdal.BuildVRT( geometry_dir + '/stack.vrt', sorted(glob(join(geometry_dir, '*.vrt'))), VRTNodata=nodata_value ) if vrt: vrt.FlushCache() set_band_metadata('DATE', dates, [geometry_dir + '/*.vrt']) def tile_to_vrt(tile_dir): tif_paths = sorted(glob(join(tile_dir, '*.tif'))) for tif_path in tif_paths: tif_file = gdal.Open(tif_path) tif_path_no_extension = os.path.splitext(tif_path)[0] if tif_file: for band_index in range(1, tif_file.RasterCount + 1): tif_vrt_path = '{0}_{1}.vrt'.format(tif_path_no_extension, str(band_index).zfill(10)) gdal.SetConfigOption('VRT_SHARED_SOURCE', '0') vrt = gdal.BuildVRT( tif_vrt_path, tif_path, bandList=[band_index], VRTNodata=nodata_value) if vrt: vrt.FlushCache() stack_vrt_path = tile_dir + '_stack.vrt' vrt_paths = sorted(glob(join(tile_dir, '*.vrt'))) gdal.SetConfigOption('VRT_SHARED_SOURCE', '0') vrt = gdal.BuildVRT( stack_vrt_path, vrt_paths, separate=True, VRTNodata=0) if vrt: vrt.FlushCache() return from_callable(action).pipe( flat_map(lambda _: empty()) ) def _delete_drive_folder(): return concat( progress( default_message='Deleting Google Drive download folder...', message_key='tasks.retrieve.time_series_to_sepal.deleting_drive_folder' ), delete_file_with_path( credentials, path=drive_folder_path ).pipe( flat_map(lambda _: empty()) ) ) def _sum_dicts(dicts: list, excluded_keys: list = ()): keys = reduce(lambda acc, d: acc.union(set(d)), dicts, set()) keys = [key for key in keys if key not in excluded_keys] return {key: sum([d.get(key, 0) for d in dicts]) for key in keys} return concat( _create_drive_folder(), _export_geometries(), _delete_drive_folder() ).pipe( merge_finalize(_delete_drive_folder) )
def merge_finalize(handler: Callable[[], Observable]) -> Observable: return of(True).pipe( operators.merge_finalize(handler) )