def publish(blends): """Execute publish task and catalog datacube result. Args: activity - Datacube Activity Model """ logging.warning('Executing publish') cube = Collection.query().filter( Collection.id == blends[0]['datacube']).first() warped_datacube = blends[0]['warped_datacube'] tile_id = blends[0]['tile_id'] period = blends[0]['period'] cloudratio = blends[0]['cloudratio'] # Retrieve which bands to generate quick look quick_look_bands = cube.bands_quicklook.split(',') merges = dict() blend_files = dict() for blend_result in blends: blend_files[blend_result['band']] = blend_result['blends'] if blend_result.get('cloud_count_file'): blend_files['cnc'] = dict(MED=blend_result['cloud_count_file'], STK=blend_result['cloud_count_file']) for merge_date, definition in blend_result['scenes'].items(): merges.setdefault( merge_date, dict(dataset=definition['dataset'], cloudratio=definition['cloudratio'], ARDfiles=dict())) merges[merge_date]['ARDfiles'].update(definition['ARDfiles']) # Generate quick looks for cube scenes publish_datacube(cube, quick_look_bands, cube.id, tile_id, period, blend_files, cloudratio) # Generate quick looks of irregular cube wcube = Collection.query().filter(Collection.id == warped_datacube).first() for merge_date, definition in merges.items(): date = merge_date.replace(definition['dataset'], '') publish_merge(quick_look_bands, wcube, definition['dataset'], tile_id, period, date, definition) try: refresh_materialized_view(db.session, AssetMV.__table__) db.session.commit() logging.info('View refreshed.') except: db.session.rollback()
def start_process(self, params): cube_id = get_cube_id(params['datacube'], 'MED') tiles = params['tiles'].split(',') start_date = datetime.strptime(params['start_date'], '%Y-%m-%d').strftime('%Y-%m-%d') end_date = datetime.strptime(params['end_date'], '%Y-%m-%d').strftime('%Y-%m-%d') \ if params.get('end_date') else datetime.now().strftime('%Y-%m-%d') # verify cube info cube_infos = Collection.query().filter( Collection.id == cube_id).first() if not cube_infos: return 'Cube not found!', 404 # get bands list bands = Band.query().filter( Band.collection_id == get_cube_id(params['datacube'])).all() bands_list = [band.name for band in bands] # items => old mosaic # orchestrate self.score['items'] = orchestrate(params['datacube'], cube_infos, tiles, start_date, end_date) # prepare merge prepare_merge(self, params['datacube'], params['collections'].split(','), bands_list, cube_infos.bands_quicklook, bands[0].resolution_x, bands[0].resolution_y, bands[0].fill, cube_infos.raster_size_schemas.raster_size_x, cube_infos.raster_size_schemas.raster_size_y, cube_infos.raster_size_schemas.chunk_size_x, cube_infos.grs_schema.crs) return 'Succesfully', 201
def check_for_invalid_merges(cls, datacube: str, tile: str, start_date: str, last_date: str) -> dict: """List merge files used in data cube and check for invalid scenes. Args: datacube: Data cube name tile: Brazil Data Cube Tile identifier start_date: Activity start date (period) last_date: Activity End (period) Returns: List of Images used in period """ cube = Collection.query().filter(Collection.id == datacube).first() if cube is None or not cube.is_cube: raise NotFound('Cube {} not found'.format(datacube)) # TODO validate schema to avoid start/end too abroad res = Activity.list_merge_files(datacube, tile, start_date, last_date) result = validate_merges(res) return result, 200
def get_cube(cls, cube_name: str): collection = Collection.query().filter( Collection.id == cube_name).first() if collection is None or not collection.is_cube: return 'Cube "{}" not found.'.format(cube_name), 404 return Serializer.serialize(collection), 200
def warped_datacube(self) -> Collection: """Retrieve cached datacube defintion.""" if not self._warped: datacube_warped = get_cube_id(self.datacube.id) self._warped = Collection.query().filter( Collection.id == datacube_warped).first() return self._warped
def publish(blends): logging.warning('Executing publish') cube = Collection.query().filter( Collection.id == blends[0]['datacube']).first() warped_datacube = blends[0]['warped_datacube'] tile_id = blends[0]['tile_id'] period = blends[0]['period'] cloudratio = blends[0]['cloudratio'] # Retrieve which bands to generate quick look quick_look_bands = cube.bands_quicklook.split(',') merges = dict() blend_files = dict() for blend_result in blends: blend_files[blend_result['band']] = blend_result['blends'] for merge_date, definition in blend_result['scenes'].items(): merges.setdefault( merge_date, dict(dataset=definition['dataset'], cloudratio=definition['cloudratio'], ARDfiles=dict())) merges[merge_date]['ARDfiles'].update(definition['ARDfiles']) # Generate quick looks for cube scenes publish_datacube(cube, quick_look_bands, cube.id, tile_id, period, blend_files, cloudratio) # Generate quick looks of irregular cube for merge_date, definition in merges.items(): date = merge_date.replace(definition['dataset'], '') wcube = Collection.query().filter( Collection.id == warped_datacube).first() publish_merge(quick_look_bands, wcube, definition['dataset'], tile_id, period, date, definition)
def load_collections(fixture_path: str): """Load default collections to database. Args: fixture_path - Path relative to fixtures. i.e 'data/tiles.json' """ collections = json_parser(resource_string(__name__, fixture_path)) with db.session.begin_nested(): for collection in collections: bands = collection.pop('bands') c = Collection(**collection) c.save(commit=False) for band in bands: b = Band(**band) b.collection = c b.save(commit=False)
def dispatch(activity: dict): """Dispatches the activity to the respective celery task handler. Args: activity (RadcorActivity) - A not done activity """ from .sentinel import tasks as sentinel_tasks from .landsat import tasks as landsat_tasks # TODO: Implement it as factory (TaskDispatcher) and pass the responsibility to the task type handler app = activity.get('activity_type') if app == 'downloadS2': # We are assuming that collection TOA collection_sr = Collection.query().filter( Collection.id == 'S2SR_SEN28').first() if collection_sr is None: raise RuntimeError('The collection "S2SR_SEN28" not found') # Raw chain represents TOA publish chain publish_raw_data_chain = sentinel_tasks.publish_sentinel.s() # Atm Correction chain atm_corr_publish_chain = sentinel_tasks.atm_correction.s( ) | sentinel_tasks.publish_sentinel.s() # Publish ATM Correction upload_chain = sentinel_tasks.upload_sentinel.s() inner_group = upload_chain if activity['args'].get('harmonize'): # Harmonization chain harmonize_chain = sentinel_tasks.harmonization_sentinel.s() | sentinel_tasks.publish_sentinel.s() | \ sentinel_tasks.upload_sentinel.s() inner_group = group(upload_chain, harmonize_chain) inner_group = atm_corr_publish_chain | inner_group outer_group = group(publish_raw_data_chain, inner_group) task_chain = sentinel_tasks.download_sentinel.s(activity) | outer_group return chain(task_chain).apply_async() elif app == 'correctionS2': task_chain = sentinel_tasks.atm_correction.s(activity) | \ sentinel_tasks.publish_sentinel.s() | \ sentinel_tasks.upload_sentinel.s() return chain(task_chain).apply_async() elif app == 'publishS2': tasks = [sentinel_tasks.publish_sentinel.s(activity)] if 'S2SR' in activity['collection_id']: tasks.append(sentinel_tasks.upload_sentinel.s()) return chain(*tasks).apply_async() elif app == 'harmonizeS2': task_chain = sentinel_tasks.harmonization_sentinel.s(activity) | sentinel_tasks.publish_sentinel.s() | \ sentinel_tasks.upload_sentinel.s() return chain(task_chain).apply_async() elif app == 'uploadS2': return sentinel_tasks.upload_sentinel.s(activity).apply_async() elif app == 'downloadLC8': # We are assuming that collection DN collection_lc8 = Collection.query().filter( Collection.id == 'LC8SR').first() if collection_lc8 is None: raise RuntimeError('The collection "LC8SR" not found') # Raw chain represents DN publish chain raw_data_chain = landsat_tasks.publish_landsat.s() # Atm Correction chain atm_corr_chain = landsat_tasks.atm_correction_landsat.s() # Publish ATM Correction publish_atm_chain = landsat_tasks.publish_landsat.s( ) | landsat_tasks.upload_landsat.s() inner_group = publish_atm_chain # Check if will add harmonization chain on group if activity['args'].get('harmonize'): # Harmonization chain harmonize_chain = landsat_tasks.harmonization_landsat.s() | landsat_tasks.publish_landsat.s() | \ landsat_tasks.upload_landsat.s() inner_group = group(publish_atm_chain, harmonize_chain) atm_chain = atm_corr_chain | inner_group outer_group = group(raw_data_chain, atm_chain) task_chain = landsat_tasks.download_landsat.s(activity) | outer_group return chain(task_chain).apply_async() elif app == 'correctionLC8': # Atm Correction chain atm_corr_chain = landsat_tasks.atm_correction_landsat.s(activity) # Publish ATM Correction publish_atm_chain = landsat_tasks.publish_landsat.s( ) | landsat_tasks.upload_landsat.s() inner_group = publish_atm_chain # Check if will add harmonization chain on group if activity['args'].get('harmonize'): # Harmonization chain harmonize_chain = landsat_tasks.harmonization_landsat.s() | landsat_tasks.publish_landsat.s() | \ landsat_tasks.upload_landsat.s() inner_group = group(publish_atm_chain, harmonize_chain) task_chain = atm_corr_chain | inner_group return chain(task_chain).apply_async() elif app == 'publishLC8': task_chain = landsat_tasks.publish_landsat.s( activity) | landsat_tasks.upload_landsat.s() return chain(task_chain).apply_async() elif app == 'harmonizeLC8': task_chain = landsat_tasks.harmonization_landsat.s(activity) | landsat_tasks.publish_landsat.s() | \ landsat_tasks.upload_landsat.s() return chain(task_chain).apply_async() elif app == 'uploadLC8': return landsat_tasks.upload_landsat.s(activity).apply_async()
def publish(self, activity): print('==> start PUBLISH') services = self.services activity['mystart'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') warped_cube = '_'.join(activity['datacube'].split('_')[0:2]) # Generate quicklooks for CUBES (MEDIAN, STACK ...) qlbands = activity['quicklook'].split(',') for function in ['MED', 'STK']: cube_id = get_cube_id(activity['datacube'], function) general_scene_id = '{}_{}_{}_{}'.format( cube_id, activity['tileid'], activity['start'], activity['end']) qlfiles = [] for band in qlbands: qlfiles.append(services.prefix + activity['blended'][band][function + 'file']) pngname = generateQLook(general_scene_id, qlfiles) dirname_ql = activity['dirname'].replace( '{}/'.format(warped_cube), '{}/'.format(cube_id)) if pngname is None: print('publish - Error generateQLook for {}'.format(general_scene_id)) return False s3pngname = os.path.join(dirname_ql, '{}_{}'.format(activity['start'], activity['end']), os.path.basename(pngname)) services.upload_file_S3(pngname, s3pngname, {'ACL': 'public-read'}) os.remove(pngname) # Generate quicklooks for all ARD scenes (WARPED) for datedataset in activity['scenes']: scene = activity['scenes'][datedataset] cube_id = get_cube_id(activity['datacube']) general_scene_id = '{}_{}_{}'.format( cube_id, activity['tileid'], str(scene['date'])[0:10]) qlfiles = [] for band in qlbands: filename = os.path.join(services.prefix + activity['dirname'], str(scene['date'])[0:10], scene['ARDfiles'][band]) qlfiles.append(filename) pngname = generateQLook(general_scene_id, qlfiles) if pngname is None: print('publish - Error generateQLook for {}'.format(general_scene_id)) return False s3pngname = os.path.join(activity['dirname'], str(scene['date'])[0:10], os.path.basename(pngname)) services.upload_file_S3(pngname, s3pngname, {'ACL': 'public-read'}) os.remove(pngname) # register collection_items and assets in DB (MEDIAN, STACK ...) for function in ['MED', 'STK']: cube_id = '{}_{}'.format(activity['datacube'], function) cube = Collection.query().filter( Collection.id == cube_id ).first() if not cube: print('cube {} not found!'.format(cube_id)) continue general_scene_id = '{}_{}_{}_{}'.format( cube_id, activity['tileid'], activity['start'], activity['end']) # delete collection_items and assets if exists assets = Asset.query().filter( Asset.collection_item_id == general_scene_id ).all() for asset in assets: db.session().delete(asset) db.session().commit() coll_item = CollectionItem.query().filter( CollectionItem.id == general_scene_id ).first() if coll_item: db.session().delete(coll_item) db.session().commit() # insert 'collection_item' range_date = '{}_{}'.format(activity['start'], activity['end']) png_name = '{}.png'.format(general_scene_id) dirname_ql = activity['dirname'].replace( '{}/'.format(warped_cube), '{}/'.format(cube_id)) s3_pngname = os.path.join(dirname_ql, range_date, png_name) CollectionItem( id=general_scene_id, collection_id=cube_id, grs_schema_id=cube.grs_schema_id, tile_id=activity['tileid'], item_date=activity['start'], composite_start=activity['start'], composite_end=activity['end'], quicklook='{}/{}'.format(BUCKET_NAME, s3_pngname), cloud_cover=activity['cloudratio'], scene_type=function, compressed_file=None ).save() # insert 'assets' bands_by_cube = Band.query().filter( Band.collection_id == cube_id ).all() for band in activity['bands']: if band == 'quality': continue band_id = list(filter(lambda b: str(b.common_name) == band, bands_by_cube)) if not band_id: print('band {} not found!'.format(band)) continue Asset( collection_id=cube_id, band_id=band_id[0].id, grs_schema_id=cube.grs_schema_id, tile_id=activity['tileid'], collection_item_id=general_scene_id, url='{}/{}'.format(BUCKET_NAME, activity['blended'][band][function + 'file']), source=None, raster_size_x=activity['raster_size_x'], raster_size_y=activity['raster_size_y'], raster_size_t=1, chunk_size_x=activity['chunk_size_x'], chunk_size_y=activity['chunk_size_y'], chunk_size_t=1 ).save() # Register all ARD scenes - WARPED Collection for datedataset in activity['scenes']: scene = activity['scenes'][datedataset] cube_id = get_cube_id(activity['datacube']) cube = Collection.query().filter( Collection.id == cube_id ).first() if not cube: print('cube {} not found!'.format(cube_id)) continue general_scene_id = '{}_{}_{}'.format( cube_id, activity['tileid'], str(scene['date'])[0:10]) # delete 'assets' and 'collection_items' if exists assets = Asset.query().filter( Asset.collection_item_id == general_scene_id ).all() for asset in assets: db.session().delete(asset) db.session().commit() coll_item = CollectionItem.query().filter( CollectionItem.id == general_scene_id ).first() if coll_item: db.session().delete(coll_item) db.session().commit() # insert 'collection_item' pngname = '{}.png'.format(general_scene_id) s3pngname = os.path.join(activity['dirname'], str(scene['date'])[0:10], pngname) CollectionItem( id=general_scene_id, collection_id=cube_id, grs_schema_id=cube.grs_schema_id, tile_id=activity['tileid'], item_date=scene['date'], composite_start=scene['date'], composite_end=scene['date'], quicklook='{}/{}'.format(BUCKET_NAME, s3pngname), cloud_cover=int(scene['cloudratio']), scene_type='WARPED', compressed_file=None ).save() # insert 'assets' bands_by_cube = Band.query().filter( Band.collection_id == cube_id ).all() for band in activity['bands']: if band not in scene['ARDfiles']: print('publish - problem - band {} not in scene[files]'.format(band)) continue band_id = list(filter(lambda b: str(b.common_name) == band, bands_by_cube)) if not band_id: print('band {} not found!'.format(band)) continue raster_size_x = scene['raster_size_x'] if scene.get('raster_size_x') else activity.get('raster_size_x') raster_size_y = scene['raster_size_y'] if scene.get('raster_size_y') else activity.get('raster_size_y') block_size = scene['block_size'] if scene.get('block_size') else activity.get('block_size') Asset( collection_id=cube_id, band_id=band_id[0].id, grs_schema_id=cube.grs_schema_id, tile_id=activity['tileid'], collection_item_id=general_scene_id, url='{}/{}'.format(BUCKET_NAME, os.path.join(activity['dirname'], str(scene['date'])[0:10], scene['ARDfiles'][band])), source=None, raster_size_x=raster_size_x, raster_size_y=raster_size_y, raster_size_t=1, chunk_size_x=block_size, chunk_size_y=block_size, chunk_size_t=1 ).save() # Update status and end time in DynamoDB activity['myend'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') activity['mystatus'] = 'DONE' services.put_item_kinesis(activity) refresh_materialized_view(db.session, AssetMV.__table__) return True
def publish(collection_item: CollectionItem, scene: RadcorActivity): """Publish Landsat collection. It works with both Digital Number (DN) and Surface Reflectance (SR). Args: collection_item - Collection Item scene - Current Activity """ identifier = scene.sceneid cc = identifier.split('_') pathrow = cc[2] date = cc[3] yyyymm = '{}-{}'.format(date[0:4], date[4:6]) productdir = scene.args.get('file') logging.warning('Publish {} - {} (id={})'.format(scene.collection_id, productdir, scene.id)) if productdir and productdir.endswith('.gz'): target_dir = Path( Config.DATA_DIR) / 'Repository/Archive/{}/{}/{}'.format( collection_item.collection_id, yyyymm, pathrow) makedirs(target_dir, exist_ok=True) productdir = uncompress(productdir, str(target_dir)) collection = Collection.query().filter( Collection.id == collection_item.collection_id).one() quicklook = collection.bands_quicklook.split( ',') if collection.bands_quicklook else DEFAULT_QUICK_LOOK_BANDS files = {} qlfiles = {} if collection.id == 'LC8DN': bands = BAND_MAP_DN elif collection.id == 'LC8NBAR': bands = BAND_MAP_NBAR else: bands = BAND_MAP_SR for gband, band in bands.items(): template = productdir + '/LC08_*_{}_{}_*_{}.*'.format( pathrow, date, band) fs = glob.glob(template) if not fs: continue for f in fs: if f.lower().endswith('.tif'): files[gband] = f if gband in quicklook: qlfiles[gband] = f # Skip EVI/NDVI generation for Surface Reflectance # since the espa-science already done if collection.id == 'LC8DN' or collection.id == 'LC8NBAR': generate_vi(productdir, files) # Apply valid range and Cog files for band, file_path in files.items(): if collection.id == 'LC8SR': _ = apply_valid_range(file_path, file_path) # Set destination of COG file files[band] = generate_cogs(file_path, file_path) if not is_valid_tif(file_path): raise RuntimeError('Not Valid {}'.format(file_path)) # Extract basic scene information and build the quicklook pngname = productdir + '/{}.png'.format(identifier) dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly) numlin = 768 numcol = int( float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin) image = numpy.zeros(( numlin, numcol, len(qlfiles), ), dtype=numpy.uint8) del dataset nb = 0 for band in quicklook: template = qlfiles[band] dataset = GDALOpen(template, GA_ReadOnly) raster = dataset.GetRasterBand(1).ReadAsArray(0, 0, dataset.RasterXSize, dataset.RasterYSize) del dataset raster = resize(raster, (numlin, numcol), order=1, preserve_range=True) nodata = raster == -9999 # Evaluate minimum and maximum values a = numpy.array(raster.flatten()) p1, p99 = numpy.percentile(a[a > 0], (1, 99)) # Convert minimum and maximum values to 1,255 - 0 is nodata raster = exposure.rescale_intensity(raster, in_range=(p1, p99), out_range=(1, 255)).astype( numpy.uint8) image[:, :, nb] = raster.astype(numpy.uint8) * numpy.invert(nodata) nb += 1 write_png(pngname, image, transparent=(0, 0, 0)) productdir = productdir.replace(Config.DATA_DIR, '') assets_to_upload = { 'quicklook': dict(file=pngname, asset=productdir.replace('/Repository/Archive', '')) } for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if collection_item.collection_id == 'LC8DN' and instance == 'aws': continue if instance == 'aws': asset_url = productdir.replace('/Repository/Archive', Config.AWS_BUCKET_NAME) else: asset_url = productdir pngname = resource_path.join(asset_url, Path(pngname).name) assets_to_upload['quicklook']['asset'] = pngname with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(CollectionItem).filter( CollectionItem.id == collection_item.id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) collection_item = CollectionItem(**cloned_properties) engine.session.add(collection_item) collection_item.quicklook = pngname collection_bands = engine.session.query(Band).filter( Band.collection_id == collection_item.collection_id).all() # Inserting data into Product table for band in files: template = resource_path.join(asset_url, Path(files[band]).name) dataset = GDALOpen(files[band], GA_ReadOnly) asset_band = dataset.GetRasterBand(1) chunk_x, chunk_y = asset_band.GetBlockSize() band_model = next( filter(lambda b: band == b.common_name, collection_bands), None) if not band_model: logging.warning( 'Band {} of collection {} not found in database. Skipping...' .format(band, collection_item.collection_id)) continue defaults = dict(url=template, source=cc[0], raster_size_x=dataset.RasterXSize, raster_size_y=dataset.RasterYSize, raster_size_t=1, chunk_size_t=1, chunk_size_x=chunk_x, chunk_size_y=chunk_y) asset, _ = get_or_create_model( Asset, engine=engine, defaults=defaults, collection_id=scene.collection_id, band_id=band_model.id, grs_schema_id=scene.collection.grs_schema_id, tile_id=collection_item.tile_id, collection_item_id=collection_item.id, ) asset.url = defaults['url'] assets_to_upload[band] = dict(file=files[band], asset=asset.url) # Add into scope of local and remote database add_instance(engine, asset) # Persist database commit(engine) return assets_to_upload
def publish(collection_item: CollectionItem, scene: RadcorActivity): """Publish Landsat collection. It works with both Digital Number (DN) and Surface Reflectance (SR). Args: collection_item - Collection Item scene - Current Activity """ identifier = scene.sceneid # Get collection level to publish. Default is l1 collection_level = scene.args.get('level') or 1 landsat_scene = factory.get_from_sceneid(identifier, level=collection_level) productdir = scene.args.get('file') logging.warning('Publish {} - {} (id={})'.format(scene.collection_id, productdir, scene.id)) if productdir and productdir.endswith('.gz'): target_dir = landsat_scene.path() makedirs(target_dir, exist_ok=True) productdir = uncompress(productdir, str(target_dir)) collection = Collection.query().filter( Collection.id == collection_item.collection_id).one() quicklook = collection.bands_quicklook.split( ',') if collection.bands_quicklook else DEFAULT_QUICK_LOOK_BANDS files = {} qlfiles = {} bands = landsat_scene.get_band_map() for gband, band in bands.items(): fs = landsat_scene.get_files() if not fs: continue for f in fs: if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'): files[gband] = f if gband in quicklook: qlfiles[gband] = str(f) # Generate Vegetation Index files generate_vi(productdir, files) # Apply valid range and Cog files for band, file_path in files.items(): tif_file = str(file_path) if landsat_scene.level == 2: _ = apply_valid_range(tif_file, tif_file) # Set destination of COG file files[band] = generate_cogs(tif_file, tif_file) if not is_valid_tif(tif_file): raise RuntimeError('Not Valid {}'.format(tif_file)) # Extract basic scene information and build the quicklook pngname = productdir + '/{}.png'.format(identifier) dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly) numlin = 768 numcol = int( float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin) del dataset create_quick_look(pngname, [qlfiles[band] for band in quicklook if band in qlfiles], rows=numlin, cols=numcol) productdir = productdir.replace(Config.DATA_DIR, '') assets_to_upload = { 'quicklook': dict(file=pngname, asset=productdir.replace('/Repository/Archive', '')) } for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if landsat_scene.level == 1 and instance == 'aws': continue if instance == 'aws': asset_url = productdir.replace('/Repository/Archive', Config.AWS_BUCKET_NAME) else: asset_url = productdir pngname = resource_path.join(asset_url, Path(pngname).name) assets_to_upload['quicklook']['asset'] = pngname with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(CollectionItem).filter( CollectionItem.id == collection_item.id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) collection_item = CollectionItem(**cloned_properties) engine.session.add(collection_item) collection_item.quicklook = pngname collection_bands = engine.session.query(Band)\ .filter(Band.collection_id == collection_item.collection_id)\ .all() # Inserting data into Product table for band in files: template = resource_path.join(asset_url, Path(files[band]).name) dataset = GDALOpen(files[band], GA_ReadOnly) asset_band = dataset.GetRasterBand(1) chunk_x, chunk_y = asset_band.GetBlockSize() band_model = next( filter(lambda b: band == b.common_name, collection_bands), None) if not band_model: logging.warning( 'Band {} of collection {} not found in database. Skipping...' .format(band, collection_item.collection_id)) continue defaults = dict(url=template, source=landsat_scene.source(), raster_size_x=dataset.RasterXSize, raster_size_y=dataset.RasterYSize, raster_size_t=1, chunk_size_t=1, chunk_size_x=chunk_x, chunk_size_y=chunk_y) asset, _ = get_or_create_model( Asset, engine=engine, defaults=defaults, collection_id=scene.collection_id, band_id=band_model.id, grs_schema_id=scene.collection.grs_schema_id, tile_id=collection_item.tile_id, collection_item_id=collection_item.id, ) asset.url = defaults['url'] assets_to_upload[band] = dict(file=files[band], asset=asset.url) # Add into scope of local and remote database add_instance(engine, asset) # Persist database commit(engine) return assets_to_upload
def list_cubes(cls): """Retrieve the list of data cubes from Brazil Data Cube database.""" cubes = Collection.query().filter(Collection.is_cube.is_(True)).all() return [Serializer.serialize(cube) for cube in cubes], 200
def create_cube(self, params): params['composite_function_list'] = ['IDENTITY', 'STK', 'MED'] # generate cubes metadata cubes_db = Collection.query().filter().all() cubes = [] cubes_serealized = [] for composite_function in params['composite_function_list']: c_function_id = composite_function.upper() raster_size_id = '{}-{}'.format(params['grs'], int(params['resolution'])) cube_id = get_cube_id(params['datacube'], c_function_id) # add cube if not list(filter(lambda x: x.id == cube_id, cubes)) and not list( filter(lambda x: x.id == cube_id, cubes_db)): cube = Collection( id=cube_id, temporal_composition_schema_id=params['temporal_schema'] if c_function_id.upper() != 'IDENTITY' else 'Anull', raster_size_schema_id=raster_size_id, composite_function_schema_id=c_function_id, grs_schema_id=params['grs'], description=params['description'], radiometric_processing=None, geometry_processing=None, sensor=None, is_cube=True, oauth_scope=params.get('oauth_scope', None), license=params['license'], bands_quicklook=','.join(params['bands_quicklook']), metadata=params['metadata']) cubes.append(cube) cubes_serealized.append(Serializer.serialize(cube)) BaseModel.save_all(cubes) bands = [] for cube in cubes: # save bands for band in params['bands']: band = band.strip() if (band == 'cnc' and cube.composite_function_schema_id == 'IDENTITY') or \ (band =='quality' and cube.composite_function_schema_id != 'IDENTITY'): continue is_not_cloud = band != 'quality' and band != 'cnc' bands.append( Band(name=band, collection_id=cube.id, min=0 if is_not_cloud else 0, max=10000 if is_not_cloud else 255, fill=-9999 if is_not_cloud else 0, scale=0.0001 if is_not_cloud else 1, data_type='int16' if is_not_cloud else 'Uint16', common_name=band, resolution_x=params['resolution'], resolution_y=params['resolution'], resolution_unit='m', description='', mime_type='image/tiff')) BaseModel.save_all(bands) return cubes_serealized, 201
def create(cls, params: dict): """Create and persist datacube on database.""" params['composite_function_list'] = ['IDENTITY', 'STK', 'MED'] # generate cubes metadata cubes_db = Collection.query().filter().all() cubes = [] cubes_serealized = [] for composite_function in params['composite_function_list']: c_function_id = composite_function.upper() cube_id = get_cube_id(params['datacube'], c_function_id) raster_size_id = '{}-{}'.format(params['grs'], int(params['resolution'])) temporal_composition = params[ 'temporal_schema'] if c_function_id.upper( ) != 'IDENTITY' else 'Anull' # add cube if not list(filter(lambda x: x.id == cube_id, cubes)) and not list( filter(lambda x: x.id == cube_id, cubes_db)): cube = Collection( id=cube_id, temporal_composition_schema_id=temporal_composition, raster_size_schema_id=raster_size_id, composite_function_schema_id=c_function_id, grs_schema_id=params['grs'], description=params['description'], radiometric_processing=None, geometry_processing=None, sensor=None, is_cube=True, oauth_scope=params.get('oauth_scope', None), bands_quicklook=','.join(params['bands_quicklook']), license=params.get('license')) cubes.append(cube) cubes_serealized.append(CollectionForm().dump(cube)) BaseModel.save_all(cubes) bands = [] for cube in cubes: fragments = get_cube_parts(cube.id) # A IDENTITY data cube is composed by CollectionName and Resolution (LC8_30, S2_10) is_identity = len(fragments) == 2 # save bands for band in params['bands']: # Skip creation of band CNC for IDENTITY data cube # or band quality for composite data cube if (band == 'cnc' and is_identity) or (band == 'quality' and not is_identity): continue is_not_cloud = band != 'quality' and band != 'cnc' band = band.strip() bands.append( Band(name=band, collection_id=cube.id, min=0 if is_not_cloud else 0, max=10000 if is_not_cloud else 255, fill=-9999 if is_not_cloud else 0, scale=0.0001 if is_not_cloud else 1, data_type='int16' if is_not_cloud else 'Uint16', common_name=band, resolution_x=params['resolution'], resolution_y=params['resolution'], resolution_unit='m', description='', mime_type='image/tiff')) BaseModel.save_all(bands) return cubes_serealized, 201
def orchestrate(self): """Orchestrate datacube defintion and prepare temporal resolutions.""" self.datacube = Collection.query().filter( Collection.id == self.params['datacube']).one() temporal_schema = self.datacube.temporal_composition_schema.temporal_schema temporal_step = self.datacube.temporal_composition_schema.temporal_composite_t # Create tiles self.create_tiles(self.params['tiles'], self.datacube) cube_start_date = self.params['start_date'] dstart = self.params['start_date'] dend = self.params['end_date'] if cube_start_date is None: cube_start_date = dstart.strftime('%Y-%m-%d') cube_end_date = dend.strftime('%Y-%m-%d') periodlist = decode_periods(temporal_schema, cube_start_date, cube_end_date, int(temporal_step)) where = [Tile.grs_schema_id == self.datacube.grs_schema_id] if self.params.get('tiles'): where.append(Tile.id.in_(self.params['tiles'])) self.tiles = Tile.query().filter(*where).all() self.bands = Band.query().filter( Band.collection_id == self.warped_datacube.id).all() number_cols = int(self.datacube.raster_size_schemas.raster_size_x) number_rows = int(self.datacube.raster_size_schemas.raster_size_y) for tile in self.tiles: self.mosaics[tile.id] = dict(periods=dict()) for datekey in sorted(periodlist): requested_period = periodlist[datekey] for periodkey in requested_period: _, startdate, enddate = periodkey.split('_') if dstart is not None and startdate < dstart.strftime( '%Y-%m-%d'): continue if dend is not None and enddate > dend.strftime( '%Y-%m-%d'): continue self.mosaics[tile.id]['periods'][periodkey] = {} self.mosaics[ tile.id]['periods'][periodkey]['start'] = startdate self.mosaics[ tile.id]['periods'][periodkey]['end'] = enddate self.mosaics[ tile.id]['periods'][periodkey]['cols'] = number_cols self.mosaics[ tile.id]['periods'][periodkey]['rows'] = number_rows self.mosaics[tile.id]['periods'][periodkey][ 'dirname'] = '{}/{}/{}-{}/'.format( self.datacube.id, tile.id, startdate, enddate)
def get_collection(self, activity) -> Collection: """Retrieve the collection associated with Builder Activity.""" return Collection.query().filter( Collection.id == activity.collection_id).one()
def warped_datacube(self): datacube_warped = '{}WARPED'.format(self.datacube.id[:-3]) return Collection.query().filter( Collection.id == datacube_warped).first()
def orchestrate(self): self.datacube = Collection.query().filter( Collection.id == self.params['datacube']).one() temporal_schema = self.datacube.temporal_composition_schema.temporal_schema temporal_step = self.datacube.temporal_composition_schema.temporal_composite_t # Create tiles self.create_tiles(self.params['tiles'], self.datacube) # TODO: Check in STAC for cube item # datacube_stac = stac_cli.collection(self.datacube.id) collections_items = CollectionItem.query().filter( CollectionItem.collection_id == self.datacube.id, CollectionItem.grs_schema_id == self.datacube.grs_schema_id).order_by( CollectionItem.composite_start).all() cube_start_date = self.params['start_date'] if list( filter(lambda c_i: c_i.tile_id == self.params['tiles'][0], collections_items)): cube_start_date = collections_items[0].composite_start dstart = self.params['start_date'] dend = self.params['end_date'] if cube_start_date is None: cube_start_date = dstart.strftime('%Y-%m-%d') cube_end_date = dend.strftime('%Y-%m-%d') periodlist = decode_periods(temporal_schema, cube_start_date, cube_end_date, int(temporal_step)) where = [Tile.grs_schema_id == self.datacube.grs_schema_id] if self.params.get('tiles'): where.append(Tile.id.in_(self.params['tiles'])) self.tiles = Tile.query().filter(*where).all() self.bands = Band.query().filter( Band.collection_id == self.datacube.id).all() number_cols = self.datacube.raster_size_schemas.raster_size_x number_rows = self.datacube.raster_size_schemas.raster_size_y for tile in self.tiles: self.mosaics[tile.id] = dict(periods=dict()) for datekey in sorted(periodlist): requested_period = periodlist[datekey] for periodkey in requested_period: _, startdate, enddate = periodkey.split('_') if dstart is not None and startdate < dstart.strftime( '%Y-%m-%d'): continue if dend is not None and enddate > dend.strftime( '%Y-%m-%d'): continue self.mosaics[tile.id]['periods'][periodkey] = {} self.mosaics[ tile.id]['periods'][periodkey]['start'] = startdate self.mosaics[ tile.id]['periods'][periodkey]['end'] = enddate self.mosaics[ tile.id]['periods'][periodkey]['cols'] = number_cols self.mosaics[ tile.id]['periods'][periodkey]['rows'] = number_rows self.mosaics[tile.id]['periods'][periodkey][ 'dirname'] = '{}/{}/{}-{}/'.format( self.datacube.id, tile.id, startdate, enddate)