def warped_datacube(self) -> Collection: """Retrieve cached datacube defintion.""" if not self._warped: if self.properties.get('reuse_from'): reused_datacube: Collection = Collection.query().filter( Collection.name == self.properties['reuse_from']).first() if reused_datacube is None: raise RuntimeError(f'Data cube {self.properties["reuse_from"]} not found.') if reused_datacube.composite_function.alias != 'IDT': raise RuntimeError(f'Data cube {self.properties["reuse_from"]} must be IDT.') if reused_datacube.grid_ref_sys_id != self.datacube.grid_ref_sys_id: raise RuntimeError( f'The grid of data cube {self.datacube.name} and {reused_datacube.name} mismatch.') self.reused_datacube = reused_datacube # set warped_collection to reused if self.params['force']: raise RuntimeError( f'Cannot use flag --force to dispatch data cube derived from {reused_datacube.name}') self._warped = reused_datacube else: datacube_warped = get_cube_id(self.datacube.name) self._warped = Collection.query().filter(Collection.name == datacube_warped).first() return self._warped
def get_cube_or_404(cube_id: Union[int, str] = None, cube_full_name: str = '-'): if cube_id: return Collection.query().filter( Collection.id == cube_id).first_or_404() else: cube_name, cube_version = cube_full_name.split('-') return Collection.query().filter( Collection.name == cube_name, Collection.version == cube_version).first_or_404()
def get_cube_or_404(cube_id=None, cube_full_name: str = '-'): """Try to retrieve a data cube on database and raise 404 when not found.""" if cube_id: return Collection.query().filter( Collection.id == cube_id).first_or_404() else: cube_fragments = cube_full_name.split('-') cube_name = '-'.join(cube_fragments[:-1]) cube_version = cube_fragments[-1] return Collection.query().filter( Collection.name == cube_name, Collection.version == cube_version).first_or_404()
def list_cubes(self): """Retrieve the list of data cubes from Brazil Data Cube database.""" cubes = Collection.query().filter( Collection.collection_type == 'cube').all() serializer = CollectionForm() list_cubes = [] for cube in cubes: cube_dict = serializer.dump(cube) cube_name = cube.name if cube.composite_function.alias == 'IDT': cube_name += '_' activities = self.services.get_control_activities(cube_name) count = int( sum([a['tobe_done'] for a in activities if 'tobe_done' in a])) done = int(sum([a['mycount'] for a in activities])) errors = int(sum([a['erros'] for a in activities])) not_done = count - done - errors cube_dict[ 'status'] = 'Error' if errors > 0 else 'Pending' if not_done > 0 else 'Finished' cube_dict['timeline'] = [ t['time_inst'] for t in cube_dict['timeline'] ] list_cubes.append(cube_dict) return list_cubes, 200
def warped_datacube(self) -> Collection: """Retrieve cached datacube defintion.""" if not self._warped: datacube_warped = get_cube_id(self.datacube.name) self._warped = Collection.query().filter( Collection.name == datacube_warped).first() return self._warped
def validate_provider(cls, collection_id): """Check if the given collection has any provider set.""" collection = Collection.query().filter(Collection.id == collection_id).first_or_404() collector_extension: CollectorExtension = current_app.extensions['bdc:collector'] download_order = collector_extension.get_provider_order(collection, lazy=True) if len(download_order) == 0: abort(400, f'Collection {collection.name} does not have any data provider set.')
def correction(self, scene): """Apply atmospheric correction on collection. Args: scene - Serialized Activity """ logging.debug('Starting Correction Sentinel...') # Get Resolver for Landsat scene level 2 sentinel_scene = factory.get_from_sceneid(scene['sceneid'], level=2) collection = Collection.query().filter( Collection.name == sentinel_scene.id).first() # Set Collection to the Sentinel Surface Reflectance scene['collection_id'] = collection.id scene['activity_type'] = 'correctionS2' # Create/update activity self.create_execution(scene) synchronizer = DataSynchronizer(scene['args']['compressed_file']) try: output_dir = sentinel_scene.path() synchronizer.check_data() # TODO: Add the sen2cor again as optional processor correction_result = correction_laSRC( scene['args']['compressed_file'], str(output_dir)) if DataSynchronizer.is_remote_sync_configured(): synchronizer.sync_data(correction_result, auto_remove=True) except BaseException as e: logging.error( 'An error occurred during task execution - {}'.format( scene.get('sceneid'))) raise e finally: if DataSynchronizer.is_remote_sync_configured(): synchronizer.remove_data(raise_error=False) logging.info( f'File {scene["args"].get("compressed_file")} removed.') scene['args']['level'] = 2 scene['args']['file'] = correction_result scene['activity_type'] = 'publishS2' return scene
def list_cubes(self): """Retrieve the list of data cubes from Brazil Data Cube database.""" cubes = Collection.query().filter( Collection.collection_type == 'cube').all() serializer = CollectionForm() list_cubes = [] for cube in cubes: cube_dict = serializer.dump(cube) not_done = 0 sum_acts = 0 error = 0 if cube.composite_function.alias != 'IDT': activities = self.services.get_activities_by_datacube( cube.name) not_done = len( list( filter(lambda i: i['mystatus'] == 'NOTDONE', activities))) error = len( list(filter(lambda i: i['mystatus'] == 'ERROR', activities))) sum_acts += len(activities) parts = get_cube_parts(cube.name) data_cube_identity = '_'.join(parts[:2]) activities = self.services.get_activities_by_datacube( data_cube_identity) not_done_identity = len( list(filter(lambda i: i['mystatus'] == 'NOTDONE', activities))) error_identity = len( list(filter(lambda i: i['mystatus'] == 'ERROR', activities))) sum_acts += len(activities) cube_dict['status'] = 'Pending' if sum_acts > 0: sum_not_done = not_done + not_done_identity sum_errors = error + error_identity cube_dict['status'] = 'Error' if sum_errors > 0 else 'Finished' \ if (sum_not_done + sum_errors) == 0 else 'Pending' list_cubes.append(cube_dict) return list_cubes, 200
def list_cubes(cls): """Retrieve the list of data cubes from Brazil Data Cube database.""" cubes = Collection.query().filter(Collection.collection_type == 'cube').all() serializer = CollectionForm() list_cubes = [] for cube in cubes: cube_dict = serializer.dump(cube) # list_tasks = list_pending_tasks() + list_running_tasks() # count_tasks = len(list(filter(lambda t: t['collection_id'] == cube.name, list_tasks))) count_tasks = 0 cube_dict['status'] = 'Finished' if count_tasks == 0 else 'Pending' list_cubes.append(cube_dict) return list_cubes, 200
def download(scene_ids): """Download the Landsat-8 products using scene id. TODO: Support Sentinel 2 and Landsat 5/7. """ from bdc_catalog.models import Collection from .collections.business import RadcorBusiness from .collections.landsat.utils import LandsatSurfaceReflectance08, factory from .collections.utils import get_earth_explorer_api, EARTH_EXPLORER_DOWNLOAD_URI, EARTH_EXPLORER_PRODUCT_ID from .utils import initialize_factories initialize_factories() scenes = scene_ids.split(',') api = get_earth_explorer_api() dataset = 'LANDSAT_8_C1' collection = Collection.query().filter( Collection.name == LandsatSurfaceReflectance08.id).first_or_404() for scene in scenes: landsat_scene_level_1 = factory.get_from_sceneid(scene_id=scene, level=1) formal = api.lookup(dataset, [scene], inverse=True) link = EARTH_EXPLORER_DOWNLOAD_URI.format( folder=EARTH_EXPLORER_PRODUCT_ID[dataset], sid=formal[0]) activity = dict(collection_id=collection.id, activity_type='downloadLC8', tags=[], sceneid=scene, scene_type='SCENE', args=dict(link=link)) _ = RadcorBusiness.create_activity(activity) RadcorBusiness.start(activity)
def priority(collection_id, scene_id, output): """Download a scene seeking in CollectionProviders. Notes: You must configure the BDC-Catalog. Args: collection_id - Collection Identifier scene_id - A scene identifier (Landsat Scene Id/Sentinel Scene Id, etc) output - Directory to save. """ ext = current_app.extensions['bdc:collector'] collection = Collection.query().get(collection_id) order = ext.get_provider_order(collection) for driver in order: try: file_destination = driver.download(scene_id, output=output) except Exception as e: logging.warning(f'Download error for provider {driver.provider_name} - {str(e)}')
def post_publish(self, scene): logging.info(f'Applying post-processing for {scene["sceneid"]}') collection = Collection.query().filter( Collection.id == scene['collection_id']).first() assets = scene['args']['assets'] synchronizer = DataSynchronizer(scene['args']['file']) if DataSynchronizer.is_remote_sync_configured(): synchronizer.check_data() for entry in assets.values(): if entry['file'].endswith('Fmask4.tif'): post_processing(entry['file'], collection, assets, 10) if DataSynchronizer.is_remote_sync_configured(): synchronizer.sync_data(bucket=Config.AWS_BUCKET_NAME, auto_remove=True) synchronizer.remove_data(raise_error=False) return scene
def get_collection_items( collection_id=None, roles=None, item_id=None, bbox=None, datetime=None, ids=None, collections=None, intersects=None, page=1, limit=10, query=None, **kwargs, ) -> Pagination: """Retrieve a list of collection items based on filters. :param collection_id: Single Collection ID to include in the search for items. Only Items in one of the provided Collection will be searched, defaults to None :type collection_id: str, optional :param item_id: item identifier, defaults to None :type item_id: str, optional :param bbox: bounding box for intersection [west, north, east, south], defaults to None :type bbox: list, optional :param datetime: Single date+time, or a range ('/' seperator), formatted to RFC 3339, section 5.6. Use double dots '..' for open date ranges, defaults to None. If the start or end date of an image generated by a temporal composition intersects the given datetime or range it will be included in the result. :type datetime: str, optional :param ids: Array of Item ids to return. All other filter parameters that further restrict the number of search results are ignored, defaults to None :type ids: list, optional :param collections: Array of Collection IDs to include in the search for items. Only Items in one of the provided Collections will be searched, defaults to None :type collections: list, optional :param intersects: Searches items by performing intersection between their geometry and provided GeoJSON geometry. All GeoJSON geometry types must be supported., defaults to None :type intersects: dict, optional :param page: The page offset of results, defaults to 1 :type page: int, optional :param limit: The maximum number of results to return (page size), defaults to 10 :type limit: int, optional :return: list of collectio items :rtype: list """ columns = [ func.concat(Collection.name, "-", Collection.version).label("collection"), Collection.collection_type, Collection._metadata.label("meta"), Item._metadata.label("item_meta"), Item.name.label("item"), Item.id, Item.collection_id, Item.start_date.label("start"), Item.end_date.label("end"), Item.assets, Item.created, Item.updated, cast(Item.cloud_cover, Float).label("cloud_cover"), func.ST_AsGeoJSON(Item.geom).label("geom"), func.ST_XMin(Item.geom).label("xmin"), func.ST_XMax(Item.geom).label("xmax"), func.ST_YMin(Item.geom).label("ymin"), func.ST_YMax(Item.geom).label("ymax"), Tile.name.label("tile"), ] if roles is None: roles = [] where = [ Collection.id == Item.collection_id, or_(Collection.is_public.is_(True), Collection.id.in_([int(r.split(":")[0]) for r in roles])), ] collections_where = _where_collections(collection_id, collections) collections_where.append( or_(Collection.is_public.is_(True), Collection.id.in_([int(r.split(":")[0]) for r in roles]))) outer_join = [(Tile, [Item.tile_id == Tile.id])] _geom_tables = [] _collections = Collection.query().filter(*collections_where).all() if bbox or intersects: grids = GridRefSys.query().filter( GridRefSys.id.in_([c.grid_ref_sys_id for c in _collections])).all() for grid in grids: geom_table = grid.geom_table if geom_table is None: continue _geom_tables.append(geom_table) if ids is not None: if isinstance(ids, str): ids = ids.split(",") where += [Item.name.in_(ids)] else: where += _where_collections(collection_id, collections) if item_id is not None: where += [Item.name.like(item_id)] if query: filters = create_query_filter(query) if filters: where += filters if intersects is not None: # Intersect with native grid if there is geom_expr = func.ST_GeomFromGeoJSON(str(intersects)) grids_where, joins = intersect_grids(geom_expr, geom_tables=_geom_tables) where += grids_where outer_join += joins elif bbox is not None: try: if isinstance(bbox, str): bbox = bbox.split(",") bbox = [float(x) for x in bbox] if bbox[0] == bbox[2] or bbox[1] == bbox[3]: raise InvalidBoundingBoxError("") geom_expr = func.ST_MakeEnvelope(bbox[0], bbox[1], bbox[2], bbox[3], func.ST_SRID(Item.geom)) grid_where, joins = intersect_grids(geom_expr, geom_tables=_geom_tables) where += grid_where outer_join += joins except (ValueError, InvalidBoundingBoxError) as e: abort(400, f"'{bbox}' is not a valid bbox.") if datetime is not None: if "/" in datetime: matches_open = ("..", "") time_start, time_end = datetime.split("/") if time_start in matches_open: # open start date_filter = [ or_(Item.start_date <= time_end, Item.end_date <= time_end) ] elif time_end in matches_open: # open end date_filter = [ or_(Item.start_date >= time_start, Item.end_date >= time_start) ] else: # closed range date_filter = [ or_( and_(Item.start_date >= time_start, Item.start_date <= time_end), and_(Item.end_date >= time_start, Item.end_date <= time_end), and_(Item.start_date < time_start, Item.end_date > time_end), ) ] else: date_filter = [ and_(Item.start_date <= datetime, Item.end_date >= datetime) ] where += date_filter query = session.query(*columns) for entity, join_conditions in outer_join: query = query.outerjoin(entity, *join_conditions) try: query = query.filter(*where).order_by(Item.start_date.desc(), Item.id) result = query.paginate(page=int(page), per_page=int(limit), error_out=False, max_per_page=BDC_STAC_MAX_LIMIT) return result except Exception as err: msg = str(err) if hasattr(err, "orig"): msg = str(err.orig) abort(400, msg.rstrip())
def publish(blends, band_map, **kwargs): """Execute publish task and catalog datacube result. Args: activity - Datacube Activity Model """ period = blends[0]['period'] logging.info(f'Executing publish {period}') version = blends[0]['version'] cube: Collection = Collection.query().filter( Collection.name == blends[0]['datacube'], Collection.version == version).first() warped_datacube = blends[0]['warped_datacube'] tile_id = blends[0]['tile_id'] # Retrieve which bands to generate quick look bands = cube.bands band_id_map = {band.id: band.name for band in bands} quicklook = cube.quicklook[0] quick_look_bands = [ band_id_map[quicklook.red], band_id_map[quicklook.green], band_id_map[quicklook.blue] ] merges = dict() blend_files = dict() composite_function = DataCubeFragments(cube.name).composite_function for blend_result in blends: if composite_function != 'IDENTITY': blend_files[blend_result['band']] = blend_result['blends'] if blend_result.get('clear_observation_file'): blend_files[CLEAR_OBSERVATION_NAME] = { composite_function: blend_result['clear_observation_file'] } if blend_result.get('total_observation'): blend_files[TOTAL_OBSERVATION_NAME] = { composite_function: blend_result['total_observation'] } if blend_result.get('provenance'): blend_files[PROVENANCE_NAME] = { composite_function: blend_result['provenance'] } for merge_date, definition in blend_result['scenes'].items(): merges.setdefault( merge_date, dict(dataset=definition['dataset'], cloudratio=definition['cloudratio'], ARDfiles=dict())) merges[merge_date]['ARDfiles'].update(definition['ARDfiles']) if composite_function != 'IDT': cloudratio = blends[0]['cloudratio'] # Generate quick looks for cube scenes publish_datacube(cube, quick_look_bands, tile_id, period, blend_files, cloudratio, band_map, **kwargs) # Generate quick looks of irregular cube wcube = Collection.query().filter(Collection.name == warped_datacube, Collection.version == version).first() for merge_date, definition in merges.items(): date = merge_date.replace(definition['dataset'], '') publish_merge(quick_look_bands, wcube, tile_id, date, definition, band_map) try: db.session.commit() except: db.session.rollback()
def orchestrate(self): """Orchestrate datacube defintion and prepare temporal resolutions.""" self.datacube = Collection.query().filter( Collection.name == self.params['datacube']).one() temporal_schema = self.datacube.temporal_composition_schema cube_parameters: CubeParameters = CubeParameters.query().filter( CubeParameters.collection_id == self.datacube.id).first() if cube_parameters is None: raise RuntimeError( f'No parameters configured for data cube "{self.datacube.id}"') # This step acts like first execution. When no stac_url defined in cube parameters but it was given, save it. if self.properties.get( 'stac_url') and not cube_parameters.metadata_.get('stac_url'): logging.debug( f'No "stac_url"/"token" configured yet for cube parameters.' f'Using {self.properties["stac_url"]}') meta = cube_parameters.metadata_.copy() meta['stac_url'] = self.properties['stac_url'] meta['token'] = self.properties.get('token') cube_parameters.metadata_ = meta cube_parameters.save(commit=True) # Validate parameters cube_parameters.validate() # Pass the cube parameters to the data cube functions arguments props = deepcopy(cube_parameters.metadata_) props.update(self.properties) self.properties = props dstart = self.params['start_date'] dend = self.params['end_date'] if self.datacube.composite_function.alias == 'IDT': timeline = [[dstart, dend]] else: if self.datacube.composite_function.alias == 'STK': warnings.warn( 'The composite function STK is deprecated. Use LCF (Least Cloud Cover First) instead.', DeprecationWarning, stacklevel=2) timeline = Timeline(**temporal_schema, start_date=dstart, end_date=dend).mount() where = [Tile.grid_ref_sys_id == self.datacube.grid_ref_sys_id] if self.params.get('tiles'): where.append(Tile.name.in_(self.params['tiles'])) self.tiles = db.session.query(Tile).filter(*where).all() self.bands = Band.query().filter( Band.collection_id == self.warped_datacube.id).all() bands = self.datacube_bands self.band_map = { b.name: dict(name=b.name, data_type=b.data_type, nodata=b.nodata, min_value=b.min_value, max_value=b.max_value) for b in bands } if self.properties.get('reuse_from'): warnings.warn( 'The parameter `reuse_from` is deprecated and will be removed in next version. ' 'Use `reuse_data_cube` instead.') common_bands = _common_bands() collection_bands = [ b.name for b in self.datacube.bands if b.name not in common_bands ] reused_collection_bands = [b.name for b in self.bands] # The input cube (STK/MED) must have all bands of reused. Otherwise raise Error. if not set(collection_bands).issubset( set(reused_collection_bands)): raise RuntimeError( f'Reused data cube {self.warped_datacube.name} must have all bands of {self.datacube.name}' ) # Extra filter to only use bands of Input data cube. self.bands = [b for b in self.bands if b.name in collection_bands] if cube_parameters.reuse_cube: self.reused_datacube = cube_parameters.reuse_cube for tile in self.tiles: tile_name = tile.name grs: GridRefSys = tile.grs grid_geom: sqlalchemy.Table = grs.geom_table srid_column = get_srid_column(grid_geom.c) # TODO: Raise exception when using a native grid argument # Use bands resolution and match with SRID context (degree x degree) etc. tile_stats = db.session.query( (func.ST_XMin(grid_geom.c.geom)).label('min_x'), (func.ST_YMax(grid_geom.c.geom)).label('max_y'), (func.ST_XMax(grid_geom.c.geom) - func.ST_XMin(grid_geom.c.geom)).label('dist_x'), (func.ST_YMax(grid_geom.c.geom) - func.ST_YMin(grid_geom.c.geom)).label('dist_y'), (func.ST_Transform( func.ST_SetSRID(grid_geom.c.geom, srid_column), 4326)).label('feature')).filter( grid_geom.c.tile == tile_name).first() self.mosaics[tile_name] = dict(periods=dict()) for interval in timeline: startdate = interval[0] enddate = interval[1] if dstart is not None and startdate < dstart: continue if dend is not None and enddate > dend: continue period = f'{startdate}_{enddate}' self.mosaics[tile_name]['periods'][period] = {} self.mosaics[tile_name]['periods'][period][ 'start'] = startdate.strftime('%Y-%m-%d') self.mosaics[tile_name]['periods'][period][ 'end'] = enddate.strftime('%Y-%m-%d') self.mosaics[tile_name]['periods'][period][ 'dist_x'] = tile_stats.dist_x self.mosaics[tile_name]['periods'][period][ 'dist_y'] = tile_stats.dist_y self.mosaics[tile_name]['periods'][period][ 'min_x'] = tile_stats.min_x self.mosaics[tile_name]['periods'][period][ 'max_y'] = tile_stats.max_y self.mosaics[tile_name]['periods'][period][ 'feature'] = tile_stats.feature if self.properties.get('shape', None): self.mosaics[tile_name]['periods'][period][ 'shape'] = self.properties['shape']
def radcor(cls, args: dict): """Search for Landsat/Sentinel Images and dispatch download task.""" args.setdefault('limit', 299) args.setdefault('cloud', CLOUD_DEFAULT) args['tileid'] = 'notile' args['satsen'] = args['satsen'] args['start'] = args.get('start') args['end'] = args.get('end') # Get bbox w = float(args['w']) e = float(args['e']) s = float(args['s']) n = float(args['n']) # Get the requested period to be processed rstart = args['start'] rend = args['end'] sat = args['satsen'] cloud = float(args['cloud']) limit = args['limit'] action = args.get('action', 'preview') do_harmonization = (args['harmonize'].lower() == 'true') if 'harmonize' in args else False extra_args = args.get('args', dict()) activities = [] collections = Collection.query().filter( Collection.collection_type == 'collection').all() # TODO: Review this code. The collection name is not unique anymore. collections_map = {c.name: c.id for c in collections} scenes = {} def __get_collection(name: str) -> str: """Ensure collection name exists on database.""" collection = collections_map.get(name) if collection is None: abort(404, f'Collection {collection} not found.') return collection try: if 'landsat' in sat.lower(): result = get_landsat_scenes(w, n, e, s, rstart, rend, cloud, sat) scenes.update(result) for id in result: scene = result[id] sceneid = scene['sceneid'] landsat_scene_level_2 = landsat_factory.get_from_sceneid( sceneid, level=2) collection_id = __get_collection(landsat_scene_level_2.id) # Set collection_id as L1 by default. Change to L2 when skip L1 tasks (AWS) activity = dict(collection_id=collection_id, activity_type='downloadLC8', tags=args.get('tags', []), sceneid=sceneid, scene_type='SCENE', args=dict(link=scene['link'], cloud=scene.get('cloud'), harmonize=do_harmonization)) created = cls.create_activity(activity) if action == 'start' and not created: logging.warning( 'radcor - activity already done {}'.format( activity['sceneid'])) continue activities.append(activity) if 'S2' in sat: result = get_sentinel_scenes(w, n, e, s, rstart, rend, cloud, limit) scenes.update(result) for id in result: scene = result[id] sceneid = scene['sceneid'] sentinel_scene_level_2 = sentinel_factory.get_from_sceneid( sceneid, level=2) collection_id = __get_collection(sentinel_scene_level_2.id) activity = dict(collection_id=collection_id, activity_type='downloadS2', tags=args.get('tags', []), sceneid=sceneid, scene_type='SCENE', args=dict(link=scene['link'], cloud=scene.get('cloud'), harmonize=do_harmonization)) created = cls.create_activity(activity) if action == 'start' and not created: logging.warning( 'radcor - activity already done {}'.format( sceneid)) continue scenes[id] = scene activities.append(activity) if action == 'start': db.session.commit() for activity in activities: cls.start(activity, **extra_args) else: db.session.rollback() except BaseException: db.session.rollback() raise return scenes
def publish(collection_item: Item, scene: RadcorActivity, skip_l1=False, **kwargs): """Publish Landsat collection. It works with both Digital Number (DN) and Surface Reflectance (SR). Args: collection_item - Collection Item scene - Current Activity """ identifier = scene.sceneid # Get collection level to publish. Default is l1 collection_level = scene.args.get('level') or 1 landsat_scene = factory.get_from_sceneid(identifier, level=collection_level) productdir = scene.args.get('file') logging.warning('Publish {} - {} (id={})'.format(scene.collection_id, productdir, scene.id)) if productdir and productdir.endswith('.gz'): target_dir = landsat_scene.path() makedirs(target_dir, exist_ok=True) productdir = uncompress(productdir, str(target_dir)) collection = Collection.query().filter( Collection.id == collection_item.collection_id).one() quicklook = Quicklook.query().filter( Quicklook.collection_id == collection.id).all() if quicklook: quicklook_bands = Band.query().filter( Band.id.in_(quicklook.red, quicklook.green, quicklook.blue)).all() quicklook = [ quicklook_bands[0].name, quicklook_bands[1].name, quicklook_bands[2].name ] else: quicklook = DEFAULT_QUICK_LOOK_BANDS files = {} qlfiles = {} bands = landsat_scene.get_band_map() for gband, band in bands.items(): fs = landsat_scene.get_files() if not fs: continue for f in fs: if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'): files[gband] = f if gband in quicklook: qlfiles[gband] = str(f) # Generate Vegetation Index files generate_vi(productdir, files) # Apply valid range and Cog files for band, file_path in files.items(): tif_file = str(file_path) if landsat_scene.level == 2: _ = apply_valid_range(tif_file, tif_file) # Set destination of COG file files[band] = generate_cogs(tif_file, tif_file) if not is_valid_tif(tif_file): raise RuntimeError('Not Valid {}'.format(tif_file)) # Extract basic scene information and build the quicklook pngname = productdir + '/{}.png'.format(identifier) dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly) numlin = 768 numcol = int( float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin) del dataset create_quick_look(pngname, [qlfiles[band] for band in quicklook if band in qlfiles], rows=numlin, cols=numcol) productdir = productdir.replace(Config.DATA_DIR, '') assets_to_upload = { 'quicklook': dict(file=pngname, asset=productdir.replace('/Repository/Archive', '')) } for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if landsat_scene.level == 1 and instance == 'aws': continue if instance == 'aws': if Config.DISABLE_PUBLISH_SECOND_DB: logging.info('Skipping publish in second db.') continue asset_url = productdir.replace('/Repository/Archive', Config.AWS_BUCKET_NAME) else: asset_url = productdir pngname_relative = resource_path.join(asset_url, Path(pngname).name) assets_to_upload['quicklook']['asset'] = pngname_relative with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(Item).filter( Item.name == collection_item.name, Item.collection_id == collection_item.collection_id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) collection_item = Item(**cloned_properties) engine.session.add(collection_item) collection_bands = engine.session.query(Band)\ .filter(Band.collection_id == collection_item.collection_id)\ .all() assets = dict(thumbnail=create_asset_definition( str(pngname_relative), 'image/png', ['thumbnail'], str(pngname))) geom = min_convex_hull = None # Inserting data into Product table for band in files: template = resource_path.join(asset_url, Path(files[band]).name) band_model = next( filter(lambda b: band == b.common_name, collection_bands), None) if not band_model: logging.warning( 'Band {} of collection {} not found in database. Skipping...' .format(band, collection_item.collection_id)) continue if geom is None: geom = raster_extent(files[band]) min_convex_hull = raster_convexhull(files[band]) assets[band_model.name] = create_asset_definition( template, COG_MIME_TYPE, ['data'], files[band], is_raster=True) assets_to_upload[band] = dict(file=files[band], asset=template) collection_item.assets = assets collection_item.geom = from_shape(geom, srid=4326) collection_item.min_convex_hull = from_shape(min_convex_hull, srid=4326) # Add into scope of local and remote database add_instance(engine, collection_item) # Persist database commit(engine) return assets_to_upload
def radcor(cls, args: dict): """Search for Landsat/Sentinel Images and dispatch download task.""" args.setdefault('cloud', 100) cloud = float(args['cloud']) action = args.get('action', 'preview') collections = Collection.query().filter(Collection.collection_type.in_(['collection', 'cube'])).all() # TODO: Review this code. The collection name is not unique anymore. collections_map = {f'{c.name}-{c.version}': c.id for c in collections} tasks = args.get('tasks', []) force = args.get('force', False) options = dict() if 'platform' in args: options['platform'] = args['platform'] if 'scenes' not in args and 'tiles' not in args: w, e = float(args['w']), float(args['e']) s, n = float(args['s']), float(args['n']) bbox = [w, s, e, n] options['bbox'] = bbox try: catalog_provider, provider = get_provider(catalog=args['catalog']) if 'scenes' in args: result = [] unique_scenes = set(args['scenes']) for scene in unique_scenes: query_result = provider.search( query=args['dataset'], filename=f'{scene}*', **options ) result.extend(query_result) elif 'tiles' in args: result = [] for tile in args['tiles']: query_result = provider.search( query=args['dataset'], tile=tile, start_date=args['start'], end_date=args['end'], cloud_cover=cloud, **options ) result.extend(query_result) else: result = provider.search( query=args['dataset'], start_date=args['start'], end_date=args['end'], cloud_cover=cloud, **options ) def _recursive(scene, task, parent=None, parallel=True, pass_args=True): """Create task dispatcher recursive.""" collection_id = collections_map[task['collection']] # Create activity definition example activity = cls._activity_definition(collection_id, task['type'], scene, **task['args']) activity['args'].update(dict(catalog=args['catalog'], dataset=args['dataset'])) _task = cls._task_definition(task['type']) # Try to create activity in database and the parent if there is. instance, created = cls.create_activity(activity, parent) # When activity already exists and force is not set, skips to avoid collect multiple times if not created and not force: return None dump = RadcorActivityForm().dump(instance) dump['args'].update(activity['args']) keywords = dict(collection_id=collection_id, activity_type=task['type']) # If no children if not task.get('tasks'): if parent is None: return _task.s(dump, force=force) return _task.s(**keywords) res = [] for child in task['tasks']: # When triggering children, use parallel=False to use chain workflow child_task = _recursive(scene, child, parent=instance, parallel=False, pass_args=False) if child_task: res.append(child_task) handler = group(*res) if parallel else chain(*res) arguments = [] if pass_args: arguments.append(dump) return _task.s(*arguments, **keywords) | handler if action == 'start': to_dispatch = [] with db.session.begin_nested(): for task in tasks: if task['type'] == 'download': cls.validate_provider(collections_map[task['collection']]) for scene_result in result: children_task = _recursive(scene_result, task, parent=None) if children_task: to_dispatch.append(children_task) db.session.commit() if len(to_dispatch) > 0: group(to_dispatch).apply_async() except Exception: db.session.rollback() raise return result
def check_scenes(cls, collections: str, start_date: datetime, end_date: datetime, catalog: str = None, dataset: str = None, grid: str = None, tiles: list = None, bbox: list = None, catalog_kwargs=None, only_tiles=False): """Check for the scenes in remote provider and compares with the Collection Builder.""" bbox_list = [] if grid and tiles: grid = GridRefSys.query().filter(GridRefSys.name == grid).first_or_404(f'Grid "{grid}" not found.') geom_table = grid.geom_table rows = db.session.query( geom_table.c.tile, func.ST_Xmin(func.ST_Transform(geom_table.c.geom, 4326)).label('xmin'), func.ST_Ymin(func.ST_Transform(geom_table.c.geom, 4326)).label('ymin'), func.ST_Xmax(func.ST_Transform(geom_table.c.geom, 4326)).label('xmax'), func.ST_Ymax(func.ST_Transform(geom_table.c.geom, 4326)).label('ymax'), ).filter(geom_table.c.tile.in_(tiles)).all() for row in rows: bbox_list.append((row.tile, (row.xmin, row.ymin, row.xmax, row.ymax))) else: bbox_list.append(('', bbox)) instance, provider = get_provider(catalog) collection_map = dict() collection_ids = list() for _collection in collections: collection, version = _collection.split('-') collection = Collection.query().filter( Collection.name == collection, Collection.version == version ).first_or_404(f'Collection "{collection}-{version}" not found.') collection_ids.append(collection.id) collection_map[_collection] = collection options = dict(start_date=start_date, end_date=end_date) if catalog_kwargs: options.update(catalog_kwargs) redis = current_app.redis output = dict( collections={cname: dict(total_scenes=0, total_missing=0, missing_external=[]) for cname in collections} ) items = {cid: set() for cid in collection_ids} external_scenes = set() for tile, _bbox in bbox_list: with redis.pipeline() as pipe: if only_tiles: entry = tile options['tile'] = tile else: options['bbox'] = _bbox entry = _bbox periods = _generate_periods(start_date.replace(tzinfo=None), end_date.replace(tzinfo=None)) for period_start, period_end in periods: _items = db.session.query(Item.name, Item.collection_id).filter( Item.collection_id.in_(collection_ids), func.ST_Intersects( func.ST_MakeEnvelope( *_bbox, func.ST_SRID(Item.geom) ), Item.geom ), or_( and_(Item.start_date >= period_start, Item.start_date <= period_end), and_(Item.end_date >= period_start, Item.end_date <= period_end), and_(Item.start_date < period_start, Item.end_date > period_end), ) ).order_by(Item.name).all() for item in _items: items[item.collection_id].add(item.name) options['start_date'] = period_start.strftime('%Y-%m-%d') options['end_date'] = period_end.strftime('%Y-%m-%d') key = f'scenes:{catalog}:{dataset}:{period_start.strftime("%Y%m%d")}_{period_end.strftime("%Y%m%d")}_{entry}' pipe.get(key) provider_scenes = [] if not redis.exists(key): provider_scenes = provider.search(dataset, **options) provider_scenes = [s.scene_id for s in provider_scenes] pipe.set(key, json.dumps(provider_scenes)) external_scenes = external_scenes.union(set(provider_scenes)) cached_scenes = pipe.execute() for cache in cached_scenes: # When cache is True, represents set the value were cached. if cache is not None and cache is not True: external_scenes = external_scenes.union(set(json.loads(cache))) output['total_external'] = len(external_scenes) for _collection_name, _collection in collection_map.items(): _items = set(items[_collection.id]) diff = list(external_scenes.difference(_items)) output['collections'][_collection_name]['total_scenes'] = len(_items) output['collections'][_collection_name]['total_missing'] = len(diff) output['collections'][_collection_name]['missing_external'] = diff for cname, _internal_collection in collection_map.items(): if cname != _collection_name: diff = list(_items.difference(set(items[_internal_collection.id]))) output['collections'][_collection_name][f'total_missing_{cname}'] = len(diff) output['collections'][_collection_name][f'missing_{cname}'] = diff return output
def start_process(self, params): response = {} datacube_identify = f'{params["datacube_name"]}-{params["datacube_version"]}' response = self.services.get_process_by_id(datacube_identify) if 'Items' not in response or len(response['Items']) == 0: raise NotFound('Datacube not found in proccess table!') # get process infos by dynameDB process_info = response['Items'][0] process_params = json.loads(process_info['infos']) indexes = process_params['indexes'] quality_band = process_params['quality_band'] functions = [process_params['composite_function'], 'IDT'] satellite = process_info['metadata']['platform']['code'] mask = process_info.get('mask', None) tiles = params['tiles'] start_date = datetime.strptime(params['start_date'], '%Y-%m-%d').strftime('%Y-%m-%d') end_date = datetime.strptime(params['end_date'], '%Y-%m-%d').strftime('%Y-%m-%d') \ if params.get('end_date') else datetime.now().strftime('%Y-%m-%d') # verify cube info cube_infos = Collection.query().filter( Collection.id == process_info['datacube_id']).first() cube_infos_irregular = Collection.query().filter( Collection.id == process_info['irregular_datacube_id']).first() if not cube_infos or not cube_infos_irregular: return 'Cube not found!', 404 # get bands list bands = Band.query().filter( Band.collection_id == cube_infos_irregular.id).all() bands_list = [] indexes_list = [] for band in bands: if band.name.upper() not in [ i['common_name'].upper() for i in indexes ]: bands_list.append(band.name) else: indexes_available = { 'NDVI': ['NIR', 'RED'], 'EVI': ['NIR', 'RED', 'BLUE'] } if not indexes_available.get(band.name.upper()): return 'Index not available', 400 index = dict( name=band.name, bands=[ dict( name=b.name, common_name=b.common_name ) for b in bands \ if b.common_name.upper() in indexes_available[band.name.upper()] ] ) if len(index['bands']) != len( indexes_available[band.name.upper()]): return 'bands: {}, are needed to create the {} index'.format( ','.join(indexes_available[band.name.upper()]), band.name), 400 indexes_list.append(index) # get quicklook bands bands_ql = Quicklook.query().filter( Quicklook.collection_id == cube_infos_irregular.id).first() bands_ql_list = [ list(filter(lambda b: b.id == bands_ql.red, bands))[0].name, list(filter(lambda b: b.id == bands_ql.green, bands))[0].name, list(filter(lambda b: b.id == bands_ql.blue, bands))[0].name ] cub_ref = cube_infos or cube_infos_irregular # items => old mosaic # orchestrate shape = params.get('shape', None) self.score['items'] = orchestrate(cub_ref, tiles, start_date, end_date, functions, shape) # prepare merge crs = cube_infos.grs.crs formatted_version = format_version(cube_infos.version) prepare_merge(self, cube_infos['name'], params['collections'], satellite, bands_list, indexes_list, bands_ql_list, float(bands[0].resolution_x), float(bands[0].resolution_y), int(bands[0].nodata), crs, quality_band, functions, formatted_version, params.get('force', False), mask) return dict(message='Processing started with succesfully'), 201
def start_process(self, params): response = {} datacube_identify = f'{params["datacube"]}-{params["datacube_version"]}' response = self.services.get_process_by_id(datacube_identify) if 'Items' not in response or len(response['Items']) == 0: raise NotFound('Datacube not found in proccess table!') # get process infos by dynameDB process_info = response['Items'][0] process_params = json.dumps(process_info['infos'], cls=DecimalEncoder) process_params = json.loads(process_params) indexes = process_params['indexes'] quality_band = process_params['quality_band'] functions = [process_params['composite_function'], 'IDT'] satellite = process_params['metadata']['platform']['code'] mask = process_params['parameters'].get('mask') if not mask: raise NotFound( 'Mask values not found in item allocated in processing table - dynamoDB' ) stac_list = params.get('stac_list', None) if not stac_list and process_params['parameters'].get('stac_list'): stac_list = process_params['parameters']['stac_list'] elif not stac_list: raise NotFound('STAC url and collection is required') landsat_harmonization = process_params['parameters'].get( 'landsat_harmonization', {}) if not landsat_harmonization.get('apply', False): landsat_harmonization = None self.services = CubeServices(bucket=self.services.bucket_name, stac_list=stac_list) collections = [stac['collection'] for stac in stac_list] tiles = params['tiles'] start_date = params['start_date'].strftime('%Y-%m-%d') end_date = params['end_date'].strftime('%Y-%m-%d') \ if params.get('end_date') else datetime.now().strftime('%Y-%m-%d') # verify cube info cube_infos = Collection.query().filter( Collection.id == process_info['datacube_id']).first() cube_infos_irregular = Collection.query().filter( Collection.id == process_info['irregular_datacube_id']).first() if not cube_infos or not cube_infos_irregular: return 'Cube not found!', 404 # get bands list bands = Band.query().filter( Band.collection_id == cube_infos_irregular.id).all() bands_expressions = dict() bands_list = [] bands_ids_list = {} for band in bands: if band.name.upper() not in [ i['common_name'].upper() for i in indexes ]: bands_list.append(band.name) bands_ids_list[band.id] = band.name elif band._metadata and band._metadata.get( 'expression') and band._metadata['expression'].get( 'value'): meta = deepcopy(band._metadata) meta['data_type'] = band.data_type bands_expressions[band.name] = meta # get quicklook bands bands_ql = Quicklook.query().filter( Quicklook.collection_id == cube_infos_irregular.id).first() bands_ql_list = [ list(filter(lambda b: b.id == bands_ql.red, bands))[0].name, list(filter(lambda b: b.id == bands_ql.green, bands))[0].name, list(filter(lambda b: b.id == bands_ql.blue, bands))[0].name ] # items => { 'tile_id': bbox, xmin, ..., periods: {'start_end': collection, ... } } # orchestrate shape = params.get('shape', None) temporal_schema = cube_infos.temporal_composition_schema self.score['items'] = orchestrate(cube_infos_irregular, temporal_schema, tiles, start_date, end_date, shape, item_prefix=ITEM_PREFIX) # prepare merge crs = cube_infos.grs.crs formatted_version = format_version(cube_infos.version) not_started = prepare_merge( self, cube_infos.name, cube_infos_irregular.name, collections, satellite, bands_list, bands_ids_list, bands_ql_list, float(bands[0].resolution_x), float(bands[0].resolution_y), int(bands[0].nodata), crs, quality_band, functions, formatted_version, params.get('force', False), mask, bands_expressions=bands_expressions, indexes_only_regular_cube=params.get('indexes_only_regular_cube'), landsat_harmonization=landsat_harmonization) if len(not_started): return dict( message= 'Some scenes have not been started! If necessary, use the force parameter.', scenes_not_started=not_started), 200 return dict(message='Processing started with succesfully'), 200
def warp_merge(activity, band_map, force=False, **kwargs): """Execute datacube merge task. This task consists in the following steps: **1.** Prepare a raster using dimensions of datacube GRS schema. **2.** Open collection dataset with RasterIO and reproject to datacube GRS Schema. **3.** Fill the respective pathrow into raster Args: activity - Datacube Activity Model force - Flag to build data cube without cache. Returns: Validated activity """ logging.warning('Executing merge {} - {}'.format( activity.get('warped_collection_id'), activity['band'])) record = create_execution(activity) record.warped_collection_id = activity['warped_collection_id'] merge_date = activity['date'] tile_id = activity['tile_id'] version = activity['args']['version'] merge_file_path = None if activity['args'].get('reuse_datacube'): collection = Collection.query().filter( Collection.id == activity['args']['reuse_datacube']).first() if not force: # TODO: Should we search in Activity instead? merge_file_path = build_cube_path(collection.name, merge_date, tile_id, version=collection.version, band=record.band) if not merge_file_path.exists(): # TODO: Should we raise exception?? logging.warning( f'Cube {record.warped_collection_id} requires {collection.name}, but the file {str(merge_file_path)} not found. Skipping' ) raise RuntimeError( f"""Cube {record.warped_collection_id} is derived from {collection.name}, but the file {str(merge_file_path)} was not found.""") else: raise RuntimeError( f'Cannot use option "force" for derived data cube - {record.warped_collection_id} of {collection.name}' ) if merge_file_path is None: merge_file_path = build_cube_path(record.warped_collection_id, merge_date, tile_id, version=version, band=record.band) if activity['band'] == band_map['quality'] and len( activity['args']['datasets']): kwargs['build_provenance'] = True reused = False # Reuse merges already done. Rebuild only with flag ``--force`` if not force and merge_file_path.exists() and merge_file_path.is_file(): efficacy = cloudratio = 0 if activity['band'] == band_map['quality']: # When file exists, compute the file statistics efficacy, cloudratio = compute_data_set_stats(str(merge_file_path)) reused = True activity['args']['file'] = str(merge_file_path) activity['args']['efficacy'] = efficacy activity['args']['cloudratio'] = cloudratio record.traceback = '' args = deepcopy(record.args) args.update(activity['args']) activity['args'] = args record.args = args record.save() else: record.status = 'STARTED' record.save() record.args = activity['args'] try: args = deepcopy(activity.get('args')) args.pop('period', None) args['tile_id'] = tile_id args['date'] = record.date.strftime('%Y-%m-%d') args['cube'] = record.warped_collection_id empty = args.get('empty', False) # Create base directory merge_file_path.parent.mkdir(parents=True, exist_ok=True) if empty: # create empty raster file_path = create_empty_raster( str(merge_file_path), proj4=args['srs'], cog=True, nodata=args['nodata'], dtype='int16', # TODO: Pass through args dist=[args['dist_x'], args['dist_y']], resolution=[args['resx'], args['resy']], xmin=args['xmin'], ymax=args['ymax']) res = dict(file=str(file_path), efficacy=100, cloudratio=0, resolution=args['resx'], nodata=args['nodata']) else: res = merge_processing(str(merge_file_path), band_map=band_map, band=record.band, **args, **kwargs) merge_args = deepcopy(activity['args']) merge_args.update(res) record.traceback = '' record.status = 'SUCCESS' record.args = merge_args activity['args'].update(merge_args) except BaseException as e: record.status = 'FAILURE' record.traceback = capture_traceback(e) logging.error('Error in merge. Activity {}'.format(record.id), exc_info=True) raise e finally: record.save() logging.warning( 'Merge {} executed successfully. Efficacy={}, cloud_ratio={}'.format( str(merge_file_path), activity['args']['efficacy'], activity['args']['cloudratio'])) activity['args']['reused'] = reused return activity
def _check_reuse_cube(self): self._reuse_cube = None if self.metadata_.get('reuse_data_cube'): self._reuse_cube = Collection.query().get(self.metadata_['reuse_data_cube'])
def orchestrate(self): """Orchestrate datacube defintion and prepare temporal resolutions.""" self.datacube = Collection.query().filter( Collection.name == self.params['datacube']).one() temporal_schema = self.datacube.temporal_composition_schema dstart = self.params['start_date'] dend = self.params['end_date'] timeline = Timeline(**temporal_schema, start_date=dstart, end_date=dend).mount() where = [Tile.grid_ref_sys_id == self.datacube.grid_ref_sys_id] if self.params.get('tiles'): where.append(Tile.name.in_(self.params['tiles'])) self.tiles = db.session.query(Tile).filter(*where).all() self.bands = Band.query().filter( Band.collection_id == self.warped_datacube.id).all() if self.properties.get('reuse_from'): common_bands = _common_bands() collection_bands = [ b.name for b in self.datacube.bands if b.name not in common_bands ] reused_collection_bands = [b.name for b in self.bands] # The input cube (STK/MED) must have all bands of reused. Otherwise raise Error. if not set(collection_bands).issubset( set(reused_collection_bands)): raise RuntimeError( f'Reused data cube {self.warped_datacube.name} must have all bands of {self.datacube.name}' ) # Extra filter to only use bands of Input data cube. self.bands = [b for b in self.bands if b.name in collection_bands] for tile in self.tiles: tile_name = tile.name grs: GridRefSys = tile.grs grid_geom = grs.geom_table tile_stats = db.session.query( (func.ST_XMin(grid_geom.c.geom)).label('min_x'), (func.ST_YMax(grid_geom.c.geom)).label('max_y'), (func.ST_XMax(grid_geom.c.geom) - func.ST_XMin(grid_geom.c.geom)).label('dist_x'), (func.ST_YMax(grid_geom.c.geom) - func.ST_YMin(grid_geom.c.geom)).label('dist_y')).filter( grid_geom.c.tile == tile_name).first() self.mosaics[tile_name] = dict(periods=dict()) for interval in timeline: startdate = interval[0] enddate = interval[1] if dstart is not None and startdate < dstart: continue if dend is not None and enddate > dend: continue period = f'{startdate}_{enddate}' cube_relative_path = f'{self.datacube.name}/v{self.datacube.version:03d}/{tile_name}/{period}' self.mosaics[tile_name]['periods'][period] = {} self.mosaics[tile_name]['periods'][period][ 'start'] = startdate.strftime('%Y-%m-%d') self.mosaics[tile_name]['periods'][period][ 'end'] = enddate.strftime('%Y-%m-%d') self.mosaics[tile_name]['periods'][period][ 'dist_x'] = tile_stats.dist_x self.mosaics[tile_name]['periods'][period][ 'dist_y'] = tile_stats.dist_y self.mosaics[tile_name]['periods'][period][ 'min_x'] = tile_stats.min_x self.mosaics[tile_name]['periods'][period][ 'max_y'] = tile_stats.max_y self.mosaics[tile_name]['periods'][period][ 'dirname'] = cube_relative_path if self.properties.get('shape', None): self.mosaics[tile_name]['periods'][period][ 'shape'] = self.properties['shape']
def orchestrate(self): """Orchestrate datacube defintion and prepare temporal resolutions.""" self.datacube = Collection.query().filter( Collection.name == self.params['datacube']).one() temporal_schema = self.datacube.temporal_composition_schema dstart = self.params['start_date'] dend = self.params['end_date'] timeline = Timeline(**temporal_schema, start_date=dstart, end_date=dend).mount() where = [Tile.grid_ref_sys_id == self.datacube.grid_ref_sys_id] if self.params.get('tiles'): where.append(Tile.name.in_(self.params['tiles'])) self.tiles = db.session.query(Tile).filter(*where).all() self.bands = Band.query().filter( Band.collection_id == self.warped_datacube.id).all() for tile in self.tiles: tile_name = tile.name grs: GridRefSys = tile.grs grid_geom = grs.geom_table tile_stats = db.session.query( (func.ST_XMin(grid_geom.c.geom)).label('min_x'), (func.ST_YMax(grid_geom.c.geom)).label('max_y'), (func.ST_XMax(grid_geom.c.geom) - func.ST_XMin(grid_geom.c.geom)).label('dist_x'), (func.ST_YMax(grid_geom.c.geom) - func.ST_YMin(grid_geom.c.geom)).label('dist_y')).filter( grid_geom.c.tile == tile_name).first() self.mosaics[tile_name] = dict(periods=dict()) for interval in timeline: startdate = interval[0] enddate = interval[1] if dstart is not None and startdate < dstart: continue if dend is not None and enddate > dend: continue period = f'{startdate}_{enddate}' cube_relative_path = f'{self.datacube.name}/v{self.datacube.version:03d}/{tile_name}/{period}' self.mosaics[tile_name]['periods'][period] = {} self.mosaics[tile_name]['periods'][period][ 'start'] = startdate.strftime('%Y-%m-%d') self.mosaics[tile_name]['periods'][period][ 'end'] = enddate.strftime('%Y-%m-%d') self.mosaics[tile_name]['periods'][period][ 'dist_x'] = tile_stats.dist_x self.mosaics[tile_name]['periods'][period][ 'dist_y'] = tile_stats.dist_y self.mosaics[tile_name]['periods'][period][ 'min_x'] = tile_stats.min_x self.mosaics[tile_name]['periods'][period][ 'max_y'] = tile_stats.max_y self.mosaics[tile_name]['periods'][period][ 'dirname'] = cube_relative_path
def _create_cube_definition(cls, cube_id: str, params: dict) -> dict: """Create a data cube definition. Basically, the definition consists in `Collection` and `Band` attributes. Note: It does not try to create when data cube already exists. Args: cube_id - Data cube params - Dict of required values to create data cube. See @validators.py Returns: A serialized data cube information. """ cube_parts = get_cube_parts(cube_id) function = cube_parts.composite_function cube_id = cube_parts.datacube cube = Collection.query().filter( Collection.name == cube_id, Collection.version == params['version']).first() grs = GridRefSys.query().filter( GridRefSys.name == params['grs']).first() if grs is None: abort(404, f'Grid {params["grs"]} not found.') cube_function = CompositeFunction.query().filter( CompositeFunction.alias == function).first() if cube_function is None: abort(404, f'Function {function} not found.') data = dict(name='Meter', symbol='m') resolution_meter, _ = get_or_create_model(ResolutionUnit, defaults=data, symbol='m') mime_type, _ = get_or_create_model(MimeType, defaults=dict(name=COG_MIME_TYPE), name=COG_MIME_TYPE) if cube is None: cube = Collection( name=cube_id, title=params['title'], temporal_composition_schema=params['temporal_composition'] if function != 'IDT' else None, composite_function_id=cube_function.id, grs=grs, _metadata=params['metadata'], description=params['description'], collection_type='cube', is_public=params.get('public', True), version=params['version']) cube.save(commit=False) bands = [] default_bands = (CLEAR_OBSERVATION_NAME.lower(), TOTAL_OBSERVATION_NAME.lower(), PROVENANCE_NAME.lower()) band_map = dict() for band in params['bands']: name = band['name'].strip() if name in default_bands: continue is_not_cloud = params['quality_band'] != band['name'] if band['name'] == params['quality_band']: data_type = 'uint8' else: data_type = band['data_type'] band_model = Band(name=name, common_name=band['common_name'], collection=cube, min_value=0, max_value=10000 if is_not_cloud else 4, nodata=-9999 if is_not_cloud else 255, scale=0.0001 if is_not_cloud else 1, data_type=data_type, resolution_x=params['resolution'], resolution_y=params['resolution'], resolution_unit_id=resolution_meter.id, description='', mime_type_id=mime_type.id) if band.get('metadata'): band_model._metadata = cls._validate_band_metadata( deepcopy(band['metadata']), band_map) band_model.save(commit=False) bands.append(band_model) band_map[name] = band_model if band_model._metadata: for _band_origin_id in band_model._metadata['expression'][ 'bands']: band_provenance = BandSRC(band_src_id=_band_origin_id, band_id=band_model.id) band_provenance.save(commit=False) quicklook = Quicklook( red=band_map[params['bands_quicklook'][0]].id, green=band_map[params['bands_quicklook'][1]].id, blue=band_map[params['bands_quicklook'][2]].id, collection=cube) quicklook.save(commit=False) # Create default Cube Bands if function != 'IDT': _ = cls.get_or_create_band(cube.id, **CLEAR_OBSERVATION_ATTRIBUTES, resolution_unit_id=resolution_meter.id, resolution_x=params['resolution'], resolution_y=params['resolution']) _ = cls.get_or_create_band(cube.id, **TOTAL_OBSERVATION_ATTRIBUTES, resolution_unit_id=resolution_meter.id, resolution_x=params['resolution'], resolution_y=params['resolution']) if function == 'STK': _ = cls.get_or_create_band( cube.id, **PROVENANCE_ATTRIBUTES, resolution_unit_id=resolution_meter.id, resolution_x=params['resolution'], resolution_y=params['resolution']) if params.get('is_combined') and function != 'MED': _ = cls.get_or_create_band(cube.id, **DATASOURCE_ATTRIBUTES, resolution_unit_id=resolution_meter.id, resolution_x=params['resolution'], resolution_y=params['resolution']) return CollectionForm().dump(cube)
def publish(blends, band_map, quality_band: str, reuse_data_cube=None, **kwargs): """Execute publish task and catalog datacube result. Args: activity - Datacube Activity Model """ if isinstance(blends, Iterable): blend_reference = blends[0] else: blend_reference = blends period = blend_reference['period'] logging.info(f'Executing publish {period}') version = blend_reference['version'] cube: Collection = Collection.query().filter( Collection.name == blend_reference['datacube'], Collection.version == version).first() warped_datacube = blend_reference['warped_datacube'] tile_id = blend_reference['tile_id'] reused_cube = blend_reference.get('reuse_datacube') # Retrieve which bands to generate quick look bands = cube.bands band_id_map = {band.id: band.name for band in bands} quicklook = cube.quicklook[0] quick_look_bands = [ band_id_map[quicklook.red], band_id_map[quicklook.green], band_id_map[quicklook.blue] ] merges = dict() blend_files = dict() composite_function = DataCubeFragments(cube.name).composite_function quality_blend = dict(efficacy=100, cloudratio=0) for blend_result in blends: if composite_function != 'IDT': blend_files[blend_result['band']] = blend_result['blends'] if blend_result.get('clear_observation_file'): blend_files[CLEAR_OBSERVATION_NAME] = { composite_function: blend_result['clear_observation_file'] } if blend_result.get('total_observation'): blend_files[TOTAL_OBSERVATION_NAME] = { composite_function: blend_result['total_observation'] } if blend_result.get('provenance'): blend_files[PROVENANCE_NAME] = { composite_function: blend_result['provenance'] } if blend_result.get('datasource'): blend_files[DATASOURCE_NAME] = { composite_function: blend_result['datasource'] } for merge_date, definition in blend_result['scenes'].items(): merges.setdefault( merge_date, dict(datasets=definition.get('datasets', definition.get('dataset')), cloudratio=definition['cloudratio'], ARDfiles=dict())) merges[merge_date]['ARDfiles'].update(definition['ARDfiles']) merges[merge_date]['empty'] = definition.get('empty', False) if blend_result['band'] == quality_band: quality_blend = blend_result _blend_result = [] cube_geom_table = cube.grs.geom_table srid_column = get_srid_column(cube_geom_table.c) srid = None result = db.session.query(srid_column.label('srid')).first() if result is not None: srid = result.srid if composite_function != 'IDT': cloudratio = quality_blend['cloudratio'] # Generate quick looks for cube scenes _blend_result = publish_datacube(cube, quick_look_bands, tile_id, period, blend_files, cloudratio, band_map, reuse_data_cube=reuse_data_cube, srid=srid, **kwargs) # Generate quick looks of irregular cube wcube = Collection.query().filter(Collection.name == warped_datacube, Collection.version == version).first() _merge_result = dict() if not reused_cube: for merge_date, definition in merges.items(): if definition.get('empty') and definition['empty']: # Empty data cubes, Keep only composite item clear_merge(merge_date, definition) continue _merge_result[merge_date] = publish_merge( quick_look_bands, wcube, tile_id, merge_date, definition, band_map, reuse_data_cube=reuse_data_cube, srid=srid) try: db.session.commit() except: db.session.rollback() return _blend_result, _merge_result