Exemplo n.º 1
0
    def get_cube_status(cls, cube_name: str) -> Tuple[dict, int]:
        cube = cls.get_cube_or_404(cube_full_name=cube_name)

        dates = db.session.query(sqlalchemy.func.min(Activity.created),
                                 sqlalchemy.func.max(
                                     Activity.created)).first()

        count_items = Item.query().filter(
            Item.collection_id == cube.id).count()

        # list_tasks = list_pending_tasks() + list_running_tasks()
        # count_tasks = len(list(filter(lambda t: t['collection_id'] == cube_name, list_tasks)))
        count_tasks = 0

        count_acts_errors = Activity.query().filter(
            Activity.collection_id == cube.name,
            Activity.status == 'FAILURE').count()

        count_acts_success = Activity.query().filter(
            Activity.collection_id == cube.name,
            Activity.status == 'SUCCESS').count()

        if count_tasks > 0:
            return dict(finished=False,
                        done=count_acts_success,
                        not_done=count_tasks,
                        error=count_acts_errors)

        return dict(finished=True,
                    start_date=str(dates[0]),
                    last_date=str(dates[1]),
                    done=count_acts_success,
                    error=count_acts_errors,
                    collection_item=count_items)
Exemplo n.º 2
0
def post(activity: dict, collection_id=None, **kwargs):
    """Celery task to deal with data post processing."""
    execution = execution_from_collection(activity,
                                          collection_id=collection_id,
                                          activity_type=post.__name__)

    collection = execution.activity.collection

    scene_id = activity['sceneid']

    logging.info(
        f'Starting Post Processing Task for {collection.name}(id={collection.id}, scene_id={scene_id})'
    )

    item = Item.query().filter(
        Item.name == activity['sceneid'],
        Item.collection_id == collection.id).first_or_404()

    scenes = {}

    quality_path = None

    for asset_name, asset in item.assets.items():
        if asset_name in (
                'thumbnail', ) or 'ndvi' in asset_name or 'evi' in asset_name:
            continue

        if asset_name == 'Fmask4':
            quality_path = get_item_path(asset['href'])
            continue

        scenes[asset_name] = get_item_path(asset['href'])

    # TODO: Look in bands and get resolution
    resample = None

    if activity['sceneid'].startswith('S2'):
        resample = 10

    post_processing(quality_path, collection, scenes, resample_to=resample)
    # TODO: Create new band

    return activity
Exemplo n.º 3
0
def publish(collection_item: Item,
            scene: RadcorActivity,
            skip_l1=False,
            **kwargs):
    """Publish Landsat collection.

    It works with both Digital Number (DN) and Surface Reflectance (SR).

    Args:
        collection_item - Collection Item
        scene - Current Activity
    """
    identifier = scene.sceneid

    # Get collection level to publish. Default is l1
    collection_level = scene.args.get('level') or 1

    landsat_scene = factory.get_from_sceneid(identifier,
                                             level=collection_level)

    productdir = scene.args.get('file')

    logging.warning('Publish {} - {} (id={})'.format(scene.collection_id,
                                                     productdir, scene.id))

    if productdir and productdir.endswith('.gz'):
        target_dir = landsat_scene.path()
        makedirs(target_dir, exist_ok=True)

        productdir = uncompress(productdir, str(target_dir))

    collection = Collection.query().filter(
        Collection.id == collection_item.collection_id).one()

    quicklook = Quicklook.query().filter(
        Quicklook.collection_id == collection.id).all()

    if quicklook:
        quicklook_bands = Band.query().filter(
            Band.id.in_(quicklook.red, quicklook.green, quicklook.blue)).all()
        quicklook = [
            quicklook_bands[0].name, quicklook_bands[1].name,
            quicklook_bands[2].name
        ]
    else:
        quicklook = DEFAULT_QUICK_LOOK_BANDS

    files = {}
    qlfiles = {}

    bands = landsat_scene.get_band_map()

    for gband, band in bands.items():
        fs = landsat_scene.get_files()

        if not fs:
            continue

        for f in fs:
            if f.stem.endswith(band) and f.suffix.lower().endswith('.tif'):
                files[gband] = f
                if gband in quicklook:
                    qlfiles[gband] = str(f)

    # Generate Vegetation Index files
    generate_vi(productdir, files)

    # Apply valid range and Cog files
    for band, file_path in files.items():
        tif_file = str(file_path)

        if landsat_scene.level == 2:
            _ = apply_valid_range(tif_file, tif_file)

        # Set destination of COG file
        files[band] = generate_cogs(tif_file, tif_file)
        if not is_valid_tif(tif_file):
            raise RuntimeError('Not Valid {}'.format(tif_file))

    # Extract basic scene information and build the quicklook
    pngname = productdir + '/{}.png'.format(identifier)

    dataset = GDALOpen(qlfiles['nir'], GA_ReadOnly)
    numlin = 768
    numcol = int(
        float(dataset.RasterXSize) / float(dataset.RasterYSize) * numlin)
    del dataset

    create_quick_look(pngname,
                      [qlfiles[band] for band in quicklook if band in qlfiles],
                      rows=numlin,
                      cols=numcol)

    productdir = productdir.replace(Config.DATA_DIR, '')

    assets_to_upload = {
        'quicklook':
        dict(file=pngname, asset=productdir.replace('/Repository/Archive', ''))
    }

    for instance in ['local', 'aws']:
        engine_instance = {'local': db, 'aws': db_aws}
        engine = engine_instance[instance]

        # Skip catalog on aws for digital number
        if landsat_scene.level == 1 and instance == 'aws':
            continue

        if instance == 'aws':
            if Config.DISABLE_PUBLISH_SECOND_DB:
                logging.info('Skipping publish in second db.')
                continue

            asset_url = productdir.replace('/Repository/Archive',
                                           Config.AWS_BUCKET_NAME)
        else:
            asset_url = productdir

        pngname_relative = resource_path.join(asset_url, Path(pngname).name)

        assets_to_upload['quicklook']['asset'] = pngname_relative

        with engine.session.begin_nested():
            with engine.session.no_autoflush:
                # Add collection item to the session if not present
                if collection_item not in engine.session:
                    item = engine.session.query(Item).filter(
                        Item.name == collection_item.name, Item.collection_id
                        == collection_item.collection_id).first()

                    if not item:
                        cloned_properties = CollectionItemForm().dump(
                            collection_item)
                        collection_item = Item(**cloned_properties)
                        engine.session.add(collection_item)

                collection_bands = engine.session.query(Band)\
                    .filter(Band.collection_id == collection_item.collection_id)\
                    .all()

                assets = dict(thumbnail=create_asset_definition(
                    str(pngname_relative), 'image/png', ['thumbnail'],
                    str(pngname)))

                geom = min_convex_hull = None

                # Inserting data into Product table
                for band in files:
                    template = resource_path.join(asset_url,
                                                  Path(files[band]).name)

                    band_model = next(
                        filter(lambda b: band == b.common_name,
                               collection_bands), None)

                    if not band_model:
                        logging.warning(
                            'Band {} of collection {} not found in database. Skipping...'
                            .format(band, collection_item.collection_id))
                        continue

                    if geom is None:
                        geom = raster_extent(files[band])
                        min_convex_hull = raster_convexhull(files[band])

                    assets[band_model.name] = create_asset_definition(
                        template,
                        COG_MIME_TYPE, ['data'],
                        files[band],
                        is_raster=True)

                    assets_to_upload[band] = dict(file=files[band],
                                                  asset=template)

                collection_item.assets = assets
                collection_item.geom = from_shape(geom, srid=4326)
                collection_item.min_convex_hull = from_shape(min_convex_hull,
                                                             srid=4326)
                # Add into scope of local and remote database
                add_instance(engine, collection_item)

        # Persist database
        commit(engine)

    return assets_to_upload
    def get_cube_status(self, cube_name):
        cube = self.get_cube_or_404(cube_full_name=cube_name)
        irregular_cube = cube

        # split and format datacube NAME
        datacube = cube.name
        parts_cube_name = get_cube_parts(datacube)
        irregular_datacube = '_'.join(parts_cube_name[:2])
        is_regular = cube.composite_function.alias != 'IDT'

        if not is_regular:
            irregular_datacube += '_'

        activities = self.services.get_control_activities(irregular_datacube)
        count = int(
            sum([a['tobe_done'] for a in activities if 'tobe_done' in a]))
        done = int(sum([a['mycount'] for a in activities]))
        errors = int(sum([a['errors'] for a in activities]))
        not_done = count - done

        if not_done > 0:
            return dict(finished=False,
                        done=done,
                        error=errors,
                        not_done=not_done), 200

        # TIME
        acts = sorted(activities, key=lambda i: i['start_date'])
        start_date = get_date(acts[0]['start_date'])

        acts_order_by_end = sorted(activities, key=lambda i: i['end_date'])
        end_date = get_date(acts_order_by_end[-1]['end_date'])

        if len(acts):
            time = 0
            list_dates = []
            for a in acts:
                start = get_date(a['start_date'])
                end = get_date(a['end_date'])
                if len(list_dates) == 0:
                    time += (end - start).seconds
                    list_dates.append({'s': start, 'e': end})
                    continue

                time_by_act = 0
                i = 0
                for dates in list_dates:
                    i += 1
                    if dates['s'] < start < dates['e']:
                        value = (end - dates['e']).seconds
                        if value > 0 and value < time_by_act:
                            time_by_act = value

                    elif dates['s'] < end < dates['e']:
                        value = (dates['s'] - start).seconds
                        if value > 0 and value < time_by_act:
                            time_by_act = value

                    elif start >= dates['e'] or end <= dates['s']:
                        value = (end - start).seconds
                        if value < time_by_act or i == 1:
                            time_by_act = value

                    elif start < dates['s'] or end > dates['e']:
                        time_by_act = 0

                time += time_by_act
                list_dates.append({'s': start, 'e': end})

            time_str = '{} h {} m {} s'.format(int(time / 60 / 60),
                                               int(time / 60), (time % 60))

            quantity_coll_items = Item.query().filter(
                Item.collection_id == cube.id).count()

            return dict(finished=True,
                        start_date=str(start_date),
                        last_date=str(end_date),
                        done=done,
                        duration=time_str,
                        collection_item=quantity_coll_items), 200

        return dict(finished=False, done=0, not_done=0, error=0), 200
Exemplo n.º 5
0
def publish(collection_item: Item,
            scene: RadcorActivity,
            skip_l1=False,
            **kwargs):
    """Publish Sentinel collection.

    It works with both L1C and L2A.

    Args:
        collection_item - Collection Item
        scene - Current Activity
    """
    qlband = 'TCI'

    # Get collection level to publish. Default is l1
    # TODO: Check in database the scenes level 2 already published. We must set to level 2
    collection_level = scene.args.get('level') or 1

    if collection_level == 1 and skip_l1:
        logging.info(
            f'Skipping publish skip_l1={skip_l1} L1 - {collection_item.collection_id}'
        )
        return dict()

    sentinel_scene = factory.get_from_sceneid(scene.sceneid,
                                              level=collection_level)
    harmonized_scene = factory.get_from_sceneid(scene.sceneid, level=3)

    product_uri = sentinel_scene.path()
    product_uri.mkdir(parents=True, exist_ok=True)

    band_map = sentinel_scene.get_band_map()

    if scene.collection_id == harmonized_scene.id:
        # Retrieves all tif files from scene
        tiffiles = get_tif_files(scene)

        # Find the desired files to be published and put then in files
        bands = []

        files = {}
        for tiffile in sorted(tiffiles):
            filename = os.path.basename(tiffile)
            parts = filename.split('_')
            band = parts[2][:-4]  # Select removing .tif extension
            if band not in bands and band in band_map.keys():
                bands.append(band)
                files[band_map[band]] = tiffile
        logging.warning('Publish {} - {} (id={}, tiffiles={})'.format(
            scene.collection_id, scene.args.get('file'), scene.id,
            len(tiffiles)))
        # Define filenames for products
        parts = os.path.basename(tiffiles[0]).split('_')
        file_basename = '_'.join(parts[:-1])
        pngname = os.path.join(scene.args.get('file'), file_basename + '.png')
        copy(pngname, str(product_uri))
    else:
        # Retrieves all jp2 files from scene

        if sentinel_scene.level == 1:
            files_list = get_jp2_files(scene)
        else:
            files_list = sentinel_scene.get_files()

        # Find the desired files to be published and put then in files
        bands = []

        files = {}
        for file in sorted(files_list):
            filename = Path(file).stem
            parts = filename.split('_')

            if len(parts) in (3, 8):
                band = parts[-1]
            else:
                band = '_'.join(parts[-2:])

            if band not in bands and band in band_map.keys():
                bands.append(band)
                files[band_map[band]] = str(file)
            elif band == qlband:
                files['qlfile'] = str(file)

        logging.warning('Publish {} - {} (id={}, files={})'.format(
            scene.collection_id, scene.args.get('file'), scene.id, len(files)))

        if len(files.keys()) == 0:
            raise RuntimeError('No files found for {} - {}'.format(
                scene.sceneid, str(product_uri)))

        # Retrieve a file name and use as reference for the Vegetation Index files
        file_name = Path(files.get('quality', list(files.values())[0])).name

        file_basename = '_'.join(file_name.split('_')[:-1])

    # Create vegetation index
    generate_vi(file_basename, str(product_uri), files)

    bands.append('NDVI')
    bands.append('EVI')

    band_map['NDVI'] = 'ndvi'
    band_map['EVI'] = 'evi'

    for sband in bands:
        band = band_map[sband]
        file = files[band]

        # Set destination of COG file
        cog_file_name = '{}_{}.tif'.format(file_basename, sband)
        cog_file_path = product_uri / cog_file_name

        files[band] = generate_cogs(str(file), str(cog_file_path))
        if not is_valid_tif(cog_file_path):
            raise RuntimeError('Not Valid {}'.format(cog_file_path))

    assets_to_upload = {}

    for instance in ['local', 'aws']:
        engine_instance = {'local': db, 'aws': db_aws}
        engine = engine_instance[instance]

        # Skip catalog on aws for digital number
        if sentinel_scene.level == 1 and instance == 'aws':
            continue

        base_file_prefix = 'Repository/Archive'

        if instance == 'aws':
            if Config.DISABLE_PUBLISH_SECOND_DB:
                logging.info('Skipping publish in second db.')
                continue

            asset_url = Config.AWS_BUCKET_NAME / (product_uri.relative_to(
                Path(Config.DATA_DIR) / base_file_prefix))
        else:
            asset_url = Path(
                Config.ITEM_ASSET_PREFIX) / product_uri.relative_to(
                    Path(Config.DATA_DIR) / base_file_prefix)

        collection_bands = engine.session.query(Band).filter(
            Band.collection_id == scene.collection_id).all()

        with engine.session.begin_nested():
            with engine.session.no_autoflush:
                # Add collection item to the session if not present
                if collection_item not in engine.session:
                    item = engine.session.query(Item).filter(
                        Item.name == collection_item.name, Item.collection_id
                        == collection_item.collection_id).first()

                    if not item:
                        cloned_properties = CollectionItemForm().dump(
                            collection_item)
                        cloned_item = Item(**cloned_properties)
                        engine.session.add(cloned_item)

                assets = dict()

                # Create Qlook file
                pngname = product_uri / '{}.png'.format(file_basename)
                if not pngname.exists():
                    # When TCI band found, use it to generate quicklook
                    if files.get('qlfile'):
                        create_quick_look_from_tci(str(pngname),
                                                   files['qlfile'])
                    else:
                        create_quick_look(
                            str(pngname),
                            [files['red'], files['green'], files['blue']])

                normalized_quicklook_path = os.path.normpath('{}/{}'.format(
                    str(asset_url), os.path.basename(pngname.name)))
                assets_to_upload['quicklook'] = dict(
                    asset=str(normalized_quicklook_path), file=str(pngname))

                assets['thumbnail'] = create_asset_definition(
                    str(normalized_quicklook_path), 'image/png', ['thumbnail'],
                    str(pngname))

                geom = min_convex_hull = None

                # Convert original format to COG
                for sband in bands:
                    # Set destination of COG file
                    cog_file_name = '{}_{}.tif'.format(file_basename, sband)
                    cog_file_path = product_uri / cog_file_name

                    band_model = next(
                        filter(lambda b: b.name == sband, collection_bands),
                        None)

                    if band_model is None:
                        logging.warning(
                            'Band {} not registered on database. Skipping'.
                            format(sband))
                        continue

                    if geom is None:
                        geom = raster_extent(cog_file_path)
                        min_convex_hull = raster_convexhull(cog_file_path)

                    assets[band_model.name] = create_asset_definition(
                        f'{str(asset_url)}/{cog_file_name}',
                        COG_MIME_TYPE, ['data'],
                        cog_file_path,
                        is_raster=True)

                    assets_to_upload[sband] = (dict(
                        file=str(cog_file_path),
                        asset=assets[band_model.name]['href']))

                collection_item.geom = from_shape(geom, srid=4326)
                collection_item.min_convex_hull = from_shape(min_convex_hull,
                                                             srid=4326)
                collection_item.assets = assets

        commit(engine)

    return assets_to_upload
Exemplo n.º 6
0
    def get_cube_status(self, cube_name):
        cube = self.get_cube_or_404(cube_full_name=cube_name)
        datacube = cube.name

        # split and format datacube NAME
        parts_cube_name = get_cube_parts(datacube)
        irregular_datacube = '_'.join(parts_cube_name[:2])
        is_irregular = len(parts_cube_name) > 2
        datacube = '_'.join(get_cube_parts(datacube)
                            [:3]) if is_irregular else irregular_datacube

        # STATUS
        acts_datacube = []
        not_done_datacube = 0
        error_datacube = 0
        if is_irregular:
            acts_datacube = self.services.get_activities_by_datacube(datacube)
            not_done_datacube = len(
                list(
                    filter(lambda i: i['mystatus'] == 'NOTDONE',
                           acts_datacube)))
            error_datacube = len(
                list(filter(lambda i: i['mystatus'] == 'ERROR',
                            acts_datacube)))
        acts_irregular = self.services.get_activities_by_datacube(
            irregular_datacube)
        not_done_irregular = len(
            list(filter(lambda i: i['mystatus'] == 'NOTDONE', acts_irregular)))
        error_irregular = len(
            list(filter(lambda i: i['mystatus'] == 'ERROR', acts_irregular)))

        activities = acts_irregular + acts_datacube
        errors = error_irregular + error_datacube
        not_done = not_done_irregular + not_done_datacube
        if (not_done + errors):
            return dict(finished=False,
                        done=len(activities) - (not_done + errors),
                        not_done=not_done,
                        error=errors), 200

        # TIME
        acts = sorted(activities, key=lambda i: i['mylaunch'], reverse=True)
        start_date = get_date(acts[-1]['mylaunch'])
        end_date = get_date(acts[0]['myend'])

        time = 0
        list_dates = []
        for a in acts:
            start = get_date(a['mylaunch'])
            end = get_date(a['myend'])
            if len(list_dates) == 0:
                time += (end - start).seconds
                list_dates.append({'s': start, 'e': end})
                continue

            time_by_act = 0
            i = 0
            for dates in list_dates:
                i += 1
                if dates['s'] < start < dates['e']:
                    value = (end - dates['e']).seconds
                    if value > 0 and value < time_by_act:
                        time_by_act = value

                elif dates['s'] < end < dates['e']:
                    value = (dates['s'] - start).seconds
                    if value > 0 and value < time_by_act:
                        time_by_act = value

                elif start > dates['e'] or end < dates['s']:
                    value = (end - start).seconds
                    if value < time_by_act or i == 1:
                        time_by_act = value

                elif start < dates['s'] or end > dates['e']:
                    time_by_act = 0

            time += time_by_act
            list_dates.append({'s': start, 'e': end})

        time_str = '{} h {} m {} s'.format(int(time / 60 / 60), int(time / 60),
                                           (time % 60))

        quantity_coll_items = Item.query().filter(
            Item.collection_id == cube.id).count()

        return dict(finished=True,
                    start_date=str(start_date),
                    last_date=str(end_date),
                    done=len(activities),
                    duration=time_str,
                    collection_item=quantity_coll_items), 200
Exemplo n.º 7
0
def download(activity: dict, **kwargs):
    """Celery tasks to deal with download data product from given providers."""
    execution = create_execution(activity)

    collector_extension = flask_app.extensions['bdc:collector']

    collection = execution.activity.collection
    scene_id = execution.activity.sceneid

    logging.info(
        f'Starting Download Task for {collection.name}(id={collection.id}, scene_id={scene_id})'
    )

    # Use parallel flag for providers which has number maximum of connections per client (Sentinel-Hub only)
    download_order = collector_extension.get_provider_order(collection,
                                                            lazy=True,
                                                            parallel=True,
                                                            progress=False)

    if len(download_order) == 0:
        raise RuntimeError(
            f'No provider set for collection {collection.id}({collection.name})'
        )

    data_collection = get_provider_collection_from_activity(activity)

    download_file = data_collection.compressed_file(collection)

    has_compressed_file = download_file is not None

    # For files that does not have compressed file (Single file/folder), use native path
    if download_file is None:
        download_file = data_collection.path(collection)

    is_valid_file = False

    item = Item.query().filter(Item.collection_id == collection.id,
                               Item.name == scene_id).first()

    if item:
        # TODO: Get asset name of download file
        item_path = item.assets['asset']['href']
        item_path = item_path if not item_path.startswith(
            '/') else item_path[1:]
        item_path = Path(Config.DATA_DIR) / item_path

        if item_path.exists():
            logging.info(
                f'Item {scene_id} exists. {str(item_path)} -> {str(download_file)}'
            )
            download_file = item_path

    if download_file.exists() and has_compressed_file:
        logging.info('File {} downloaded. Checking file integrity...'.format(
            str(download_file)))
        # TODO: Should we validate using Factory Provider.is_valid() ?
        is_valid_file = is_valid_compressed_file(
            str(download_file)) if download_file.is_file() else False

    if not download_file.exists() or not is_valid_file:
        # Ensure file is removed since it may be corrupted
        if download_file.exists() and download_file.is_file():
            download_file.unlink()

        if not has_compressed_file:
            download_file.mkdir(exist_ok=True, parents=True)
        else:
            download_file.parent.mkdir(exist_ok=True, parents=True)

        with TemporaryDirectory(prefix='download_',
                                suffix=f'_{scene_id}') as tmp:
            temp_file: Path = None

            should_retry = False

            for collector in download_order:
                try:
                    logging.info(
                        f'Trying to download from {collector.provider_name}(id={collector.instance.id})'
                    )
                    temp_file = Path(
                        collector.download(
                            scene_id,
                            output=tmp,
                            dataset=activity['args']['dataset']))

                    activity['args']['provider_id'] = collector.instance.id

                    break
                except DataOfflineError:
                    should_retry = True
                except Exception as e:
                    logging.error(
                        f'Download error in provider {collector.provider_name} - {str(e)}'
                    )

            if temp_file is None or not temp_file.exists():
                if should_retry:
                    raise DataOfflineError(scene_id)
                raise RuntimeError(f'Download fails {activity["sceneid"]}.')

            shutil.move(str(temp_file), str(download_file))

    refresh_execution_args(execution,
                           activity,
                           compressed_file=str(download_file))

    return activity