def get_cube_status(cls, cube_name: str) -> Tuple[dict, int]: cube = cls.get_cube_or_404(cube_full_name=cube_name) dates = db.session.query(sqlalchemy.func.min(Activity.created), sqlalchemy.func.max( Activity.created)).first() count_items = Item.query().filter( Item.collection_id == cube.id).count() # list_tasks = list_pending_tasks() + list_running_tasks() # count_tasks = len(list(filter(lambda t: t['collection_id'] == cube_name, list_tasks))) count_tasks = 0 count_acts_errors = Activity.query().filter( Activity.collection_id == cube.name, Activity.status == 'FAILURE').count() count_acts_success = Activity.query().filter( Activity.collection_id == cube.name, Activity.status == 'SUCCESS').count() if count_tasks > 0: return dict(finished=False, done=count_acts_success, not_done=count_tasks, error=count_acts_errors) return dict(finished=True, start_date=str(dates[0]), last_date=str(dates[1]), done=count_acts_success, error=count_acts_errors, collection_item=count_items)
def post(activity: dict, collection_id=None, **kwargs): """Celery task to deal with data post processing.""" execution = execution_from_collection(activity, collection_id=collection_id, activity_type=post.__name__) collection = execution.activity.collection scene_id = activity['sceneid'] logging.info( f'Starting Post Processing Task for {collection.name}(id={collection.id}, scene_id={scene_id})' ) item = Item.query().filter( Item.name == activity['sceneid'], Item.collection_id == collection.id).first_or_404() scenes = {} quality_path = None for asset_name, asset in item.assets.items(): if asset_name in ( 'thumbnail', ) or 'ndvi' in asset_name or 'evi' in asset_name: continue if asset_name == 'Fmask4': quality_path = get_item_path(asset['href']) continue scenes[asset_name] = get_item_path(asset['href']) # TODO: Look in bands and get resolution resample = None if activity['sceneid'].startswith('S2'): resample = 10 post_processing(quality_path, collection, scenes, resample_to=resample) # TODO: Create new band return activity
def publish(collection_item: Item, scene: RadcorActivity, skip_l1=False, **kwargs): """Publish Landsat collection. It works with both Digital Number (DN) and Surface Reflectance (SR). Args: collection_item - Collection Item scene - Current Activity """ identifier = scene.sceneid # Get collection level to publish. Default is l1 collection_level = scene.args.get('level') or 1 landsat_scene = factory.get_from_sceneid(identifier, level=collection_level) productdir = scene.args.get('file') logging.warning('Publish {} - {} (id={})'.format(scene.collection_id, productdir, scene.id)) if productdir and productdir.endswith('.gz'): target_dir = landsat_scene.path() makedirs(target_dir, exist_ok=True) productdir = uncompress(productdir, str(target_dir)) collection = Collection.query().filter( Collection.id == collection_item.collection_id).one() quicklook = Quicklook.query().filter( Quicklook.collection_id == collection.id).all() if quicklook: quicklook_bands = Band.query().filter( Band.id.in_(quicklook.red, quicklook.green, quicklook.blue)).all() quicklook = [ quicklook_bands[0].name, quicklook_bands[1].name, quicklook_bands[2].name ] else: quicklook = DEFAULT_QUICK_LOOK_BANDS files = {} qlfiles = {} bands = landsat_scene.get_band_map() for gband, band in bands.items(): fs = landsat_scene.get_files() if not fs: continue for f in fs: if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'): files[gband] = f if gband in quicklook: qlfiles[gband] = str(f) # Generate Vegetation Index files generate_vi(productdir, files) # Apply valid range and Cog files for band, file_path in files.items(): tif_file = str(file_path) if landsat_scene.level == 2: _ = apply_valid_range(tif_file, tif_file) # Set destination of COG file files[band] = generate_cogs(tif_file, tif_file) if not is_valid_tif(tif_file): raise RuntimeError('Not Valid {}'.format(tif_file)) # Extract basic scene information and build the quicklook pngname = productdir + '/{}.png'.format(identifier) dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly) numlin = 768 numcol = int( float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin) del dataset create_quick_look(pngname, [qlfiles[band] for band in quicklook if band in qlfiles], rows=numlin, cols=numcol) productdir = productdir.replace(Config.DATA_DIR, '') assets_to_upload = { 'quicklook': dict(file=pngname, asset=productdir.replace('/Repository/Archive', '')) } for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if landsat_scene.level == 1 and instance == 'aws': continue if instance == 'aws': if Config.DISABLE_PUBLISH_SECOND_DB: logging.info('Skipping publish in second db.') continue asset_url = productdir.replace('/Repository/Archive', Config.AWS_BUCKET_NAME) else: asset_url = productdir pngname_relative = resource_path.join(asset_url, Path(pngname).name) assets_to_upload['quicklook']['asset'] = pngname_relative with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(Item).filter( Item.name == collection_item.name, Item.collection_id == collection_item.collection_id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) collection_item = Item(**cloned_properties) engine.session.add(collection_item) collection_bands = engine.session.query(Band)\ .filter(Band.collection_id == collection_item.collection_id)\ .all() assets = dict(thumbnail=create_asset_definition( str(pngname_relative), 'image/png', ['thumbnail'], str(pngname))) geom = min_convex_hull = None # Inserting data into Product table for band in files: template = resource_path.join(asset_url, Path(files[band]).name) band_model = next( filter(lambda b: band == b.common_name, collection_bands), None) if not band_model: logging.warning( 'Band {} of collection {} not found in database. Skipping...' .format(band, collection_item.collection_id)) continue if geom is None: geom = raster_extent(files[band]) min_convex_hull = raster_convexhull(files[band]) assets[band_model.name] = create_asset_definition( template, COG_MIME_TYPE, ['data'], files[band], is_raster=True) assets_to_upload[band] = dict(file=files[band], asset=template) collection_item.assets = assets collection_item.geom = from_shape(geom, srid=4326) collection_item.min_convex_hull = from_shape(min_convex_hull, srid=4326) # Add into scope of local and remote database add_instance(engine, collection_item) # Persist database commit(engine) return assets_to_upload
def get_cube_status(self, cube_name): cube = self.get_cube_or_404(cube_full_name=cube_name) irregular_cube = cube # split and format datacube NAME datacube = cube.name parts_cube_name = get_cube_parts(datacube) irregular_datacube = '_'.join(parts_cube_name[:2]) is_regular = cube.composite_function.alias != 'IDT' if not is_regular: irregular_datacube += '_' activities = self.services.get_control_activities(irregular_datacube) count = int( sum([a['tobe_done'] for a in activities if 'tobe_done' in a])) done = int(sum([a['mycount'] for a in activities])) errors = int(sum([a['errors'] for a in activities])) not_done = count - done if not_done > 0: return dict(finished=False, done=done, error=errors, not_done=not_done), 200 # TIME acts = sorted(activities, key=lambda i: i['start_date']) start_date = get_date(acts[0]['start_date']) acts_order_by_end = sorted(activities, key=lambda i: i['end_date']) end_date = get_date(acts_order_by_end[-1]['end_date']) if len(acts): time = 0 list_dates = [] for a in acts: start = get_date(a['start_date']) end = get_date(a['end_date']) if len(list_dates) == 0: time += (end - start).seconds list_dates.append({'s': start, 'e': end}) continue time_by_act = 0 i = 0 for dates in list_dates: i += 1 if dates['s'] < start < dates['e']: value = (end - dates['e']).seconds if value > 0 and value < time_by_act: time_by_act = value elif dates['s'] < end < dates['e']: value = (dates['s'] - start).seconds if value > 0 and value < time_by_act: time_by_act = value elif start >= dates['e'] or end <= dates['s']: value = (end - start).seconds if value < time_by_act or i == 1: time_by_act = value elif start < dates['s'] or end > dates['e']: time_by_act = 0 time += time_by_act list_dates.append({'s': start, 'e': end}) time_str = '{} h {} m {} s'.format(int(time / 60 / 60), int(time / 60), (time % 60)) quantity_coll_items = Item.query().filter( Item.collection_id == cube.id).count() return dict(finished=True, start_date=str(start_date), last_date=str(end_date), done=done, duration=time_str, collection_item=quantity_coll_items), 200 return dict(finished=False, done=0, not_done=0, error=0), 200
def publish(collection_item: Item, scene: RadcorActivity, skip_l1=False, **kwargs): """Publish Sentinel collection. It works with both L1C and L2A. Args: collection_item - Collection Item scene - Current Activity """ qlband = 'TCI' # Get collection level to publish. Default is l1 # TODO: Check in database the scenes level 2 already published. We must set to level 2 collection_level = scene.args.get('level') or 1 if collection_level == 1 and skip_l1: logging.info( f'Skipping publish skip_l1={skip_l1} L1 - {collection_item.collection_id}' ) return dict() sentinel_scene = factory.get_from_sceneid(scene.sceneid, level=collection_level) harmonized_scene = factory.get_from_sceneid(scene.sceneid, level=3) product_uri = sentinel_scene.path() product_uri.mkdir(parents=True, exist_ok=True) band_map = sentinel_scene.get_band_map() if scene.collection_id == harmonized_scene.id: # Retrieves all tif files from scene tiffiles = get_tif_files(scene) # Find the desired files to be published and put then in files bands = [] files = {} for tiffile in sorted(tiffiles): filename = os.path.basename(tiffile) parts = filename.split('_') band = parts[2][:-4] # Select removing .tif extension if band not in bands and band in band_map.keys(): bands.append(band) files[band_map[band]] = tiffile logging.warning('Publish {} - {} (id={}, tiffiles={})'.format( scene.collection_id, scene.args.get('file'), scene.id, len(tiffiles))) # Define filenames for products parts = os.path.basename(tiffiles[0]).split('_') file_basename = '_'.join(parts[:-1]) pngname = os.path.join(scene.args.get('file'), file_basename + '.png') copy(pngname, str(product_uri)) else: # Retrieves all jp2 files from scene if sentinel_scene.level == 1: files_list = get_jp2_files(scene) else: files_list = sentinel_scene.get_files() # Find the desired files to be published and put then in files bands = [] files = {} for file in sorted(files_list): filename = Path(file).stem parts = filename.split('_') if len(parts) in (3, 8): band = parts[-1] else: band = '_'.join(parts[-2:]) if band not in bands and band in band_map.keys(): bands.append(band) files[band_map[band]] = str(file) elif band == qlband: files['qlfile'] = str(file) logging.warning('Publish {} - {} (id={}, files={})'.format( scene.collection_id, scene.args.get('file'), scene.id, len(files))) if len(files.keys()) == 0: raise RuntimeError('No files found for {} - {}'.format( scene.sceneid, str(product_uri))) # Retrieve a file name and use as reference for the Vegetation Index files file_name = Path(files.get('quality', list(files.values())[0])).name file_basename = '_'.join(file_name.split('_')[:-1]) # Create vegetation index generate_vi(file_basename, str(product_uri), files) bands.append('NDVI') bands.append('EVI') band_map['NDVI'] = 'ndvi' band_map['EVI'] = 'evi' for sband in bands: band = band_map[sband] file = files[band] # Set destination of COG file cog_file_name = '{}_{}.tif'.format(file_basename, sband) cog_file_path = product_uri / cog_file_name files[band] = generate_cogs(str(file), str(cog_file_path)) if not is_valid_tif(cog_file_path): raise RuntimeError('Not Valid {}'.format(cog_file_path)) assets_to_upload = {} for instance in ['local', 'aws']: engine_instance = {'local': db, 'aws': db_aws} engine = engine_instance[instance] # Skip catalog on aws for digital number if sentinel_scene.level == 1 and instance == 'aws': continue base_file_prefix = 'Repository/Archive' if instance == 'aws': if Config.DISABLE_PUBLISH_SECOND_DB: logging.info('Skipping publish in second db.') continue asset_url = Config.AWS_BUCKET_NAME / (product_uri.relative_to( Path(Config.DATA_DIR) / base_file_prefix)) else: asset_url = Path( Config.ITEM_ASSET_PREFIX) / product_uri.relative_to( Path(Config.DATA_DIR) / base_file_prefix) collection_bands = engine.session.query(Band).filter( Band.collection_id == scene.collection_id).all() with engine.session.begin_nested(): with engine.session.no_autoflush: # Add collection item to the session if not present if collection_item not in engine.session: item = engine.session.query(Item).filter( Item.name == collection_item.name, Item.collection_id == collection_item.collection_id).first() if not item: cloned_properties = CollectionItemForm().dump( collection_item) cloned_item = Item(**cloned_properties) engine.session.add(cloned_item) assets = dict() # Create Qlook file pngname = product_uri / '{}.png'.format(file_basename) if not pngname.exists(): # When TCI band found, use it to generate quicklook if files.get('qlfile'): create_quick_look_from_tci(str(pngname), files['qlfile']) else: create_quick_look( str(pngname), [files['red'], files['green'], files['blue']]) normalized_quicklook_path = os.path.normpath('{}/{}'.format( str(asset_url), os.path.basename(pngname.name))) assets_to_upload['quicklook'] = dict( asset=str(normalized_quicklook_path), file=str(pngname)) assets['thumbnail'] = create_asset_definition( str(normalized_quicklook_path), 'image/png', ['thumbnail'], str(pngname)) geom = min_convex_hull = None # Convert original format to COG for sband in bands: # Set destination of COG file cog_file_name = '{}_{}.tif'.format(file_basename, sband) cog_file_path = product_uri / cog_file_name band_model = next( filter(lambda b: b.name == sband, collection_bands), None) if band_model is None: logging.warning( 'Band {} not registered on database. Skipping'. format(sband)) continue if geom is None: geom = raster_extent(cog_file_path) min_convex_hull = raster_convexhull(cog_file_path) assets[band_model.name] = create_asset_definition( f'{str(asset_url)}/{cog_file_name}', COG_MIME_TYPE, ['data'], cog_file_path, is_raster=True) assets_to_upload[sband] = (dict( file=str(cog_file_path), asset=assets[band_model.name]['href'])) collection_item.geom = from_shape(geom, srid=4326) collection_item.min_convex_hull = from_shape(min_convex_hull, srid=4326) collection_item.assets = assets commit(engine) return assets_to_upload
def get_cube_status(self, cube_name): cube = self.get_cube_or_404(cube_full_name=cube_name) datacube = cube.name # split and format datacube NAME parts_cube_name = get_cube_parts(datacube) irregular_datacube = '_'.join(parts_cube_name[:2]) is_irregular = len(parts_cube_name) > 2 datacube = '_'.join(get_cube_parts(datacube) [:3]) if is_irregular else irregular_datacube # STATUS acts_datacube = [] not_done_datacube = 0 error_datacube = 0 if is_irregular: acts_datacube = self.services.get_activities_by_datacube(datacube) not_done_datacube = len( list( filter(lambda i: i['mystatus'] == 'NOTDONE', acts_datacube))) error_datacube = len( list(filter(lambda i: i['mystatus'] == 'ERROR', acts_datacube))) acts_irregular = self.services.get_activities_by_datacube( irregular_datacube) not_done_irregular = len( list(filter(lambda i: i['mystatus'] == 'NOTDONE', acts_irregular))) error_irregular = len( list(filter(lambda i: i['mystatus'] == 'ERROR', acts_irregular))) activities = acts_irregular + acts_datacube errors = error_irregular + error_datacube not_done = not_done_irregular + not_done_datacube if (not_done + errors): return dict(finished=False, done=len(activities) - (not_done + errors), not_done=not_done, error=errors), 200 # TIME acts = sorted(activities, key=lambda i: i['mylaunch'], reverse=True) start_date = get_date(acts[-1]['mylaunch']) end_date = get_date(acts[0]['myend']) time = 0 list_dates = [] for a in acts: start = get_date(a['mylaunch']) end = get_date(a['myend']) if len(list_dates) == 0: time += (end - start).seconds list_dates.append({'s': start, 'e': end}) continue time_by_act = 0 i = 0 for dates in list_dates: i += 1 if dates['s'] < start < dates['e']: value = (end - dates['e']).seconds if value > 0 and value < time_by_act: time_by_act = value elif dates['s'] < end < dates['e']: value = (dates['s'] - start).seconds if value > 0 and value < time_by_act: time_by_act = value elif start > dates['e'] or end < dates['s']: value = (end - start).seconds if value < time_by_act or i == 1: time_by_act = value elif start < dates['s'] or end > dates['e']: time_by_act = 0 time += time_by_act list_dates.append({'s': start, 'e': end}) time_str = '{} h {} m {} s'.format(int(time / 60 / 60), int(time / 60), (time % 60)) quantity_coll_items = Item.query().filter( Item.collection_id == cube.id).count() return dict(finished=True, start_date=str(start_date), last_date=str(end_date), done=len(activities), duration=time_str, collection_item=quantity_coll_items), 200
def download(activity: dict, **kwargs): """Celery tasks to deal with download data product from given providers.""" execution = create_execution(activity) collector_extension = flask_app.extensions['bdc:collector'] collection = execution.activity.collection scene_id = execution.activity.sceneid logging.info( f'Starting Download Task for {collection.name}(id={collection.id}, scene_id={scene_id})' ) # Use parallel flag for providers which has number maximum of connections per client (Sentinel-Hub only) download_order = collector_extension.get_provider_order(collection, lazy=True, parallel=True, progress=False) if len(download_order) == 0: raise RuntimeError( f'No provider set for collection {collection.id}({collection.name})' ) data_collection = get_provider_collection_from_activity(activity) download_file = data_collection.compressed_file(collection) has_compressed_file = download_file is not None # For files that does not have compressed file (Single file/folder), use native path if download_file is None: download_file = data_collection.path(collection) is_valid_file = False item = Item.query().filter(Item.collection_id == collection.id, Item.name == scene_id).first() if item: # TODO: Get asset name of download file item_path = item.assets['asset']['href'] item_path = item_path if not item_path.startswith( '/') else item_path[1:] item_path = Path(Config.DATA_DIR) / item_path if item_path.exists(): logging.info( f'Item {scene_id} exists. {str(item_path)} -> {str(download_file)}' ) download_file = item_path if download_file.exists() and has_compressed_file: logging.info('File {} downloaded. Checking file integrity...'.format( str(download_file))) # TODO: Should we validate using Factory Provider.is_valid() ? is_valid_file = is_valid_compressed_file( str(download_file)) if download_file.is_file() else False if not download_file.exists() or not is_valid_file: # Ensure file is removed since it may be corrupted if download_file.exists() and download_file.is_file(): download_file.unlink() if not has_compressed_file: download_file.mkdir(exist_ok=True, parents=True) else: download_file.parent.mkdir(exist_ok=True, parents=True) with TemporaryDirectory(prefix='download_', suffix=f'_{scene_id}') as tmp: temp_file: Path = None should_retry = False for collector in download_order: try: logging.info( f'Trying to download from {collector.provider_name}(id={collector.instance.id})' ) temp_file = Path( collector.download( scene_id, output=tmp, dataset=activity['args']['dataset'])) activity['args']['provider_id'] = collector.instance.id break except DataOfflineError: should_retry = True except Exception as e: logging.error( f'Download error in provider {collector.provider_name} - {str(e)}' ) if temp_file is None or not temp_file.exists(): if should_retry: raise DataOfflineError(scene_id) raise RuntimeError(f'Download fails {activity["sceneid"]}.') shutil.move(str(temp_file), str(download_file)) refresh_execution_args(execution, activity, compressed_file=str(download_file)) return activity