Пример #1
0
def get_tiffs_to_eopatches_workflow(config: TiffsToEopatchConfig,
                                    delete_tiffs: bool = False) -> EOWorkflow:
    """ Set up workflow to ingest tiff files into EOPatches """

    # Set up credentials in sh config
    sh_config = set_sh_config(config)

    import_bands = [(ImportFromTiff(
        (FeatureType.DATA, band),
        folder=f's3://{config.bucket_name}/{config.tiffs_folder}',
        config=sh_config), f'Import band {band}')
                    for band in config.band_names]
    import_clp = (ImportFromTiff(
        (FeatureType.DATA, config.clp_name),
        folder=f's3://{config.bucket_name}/{config.tiffs_folder}',
        config=sh_config), f'Import {config.clp_name}')

    import_mask = (ImportFromTiff(
        (FeatureType.MASK, config.mask_name),
        folder=f's3://{config.bucket_name}/{config.tiffs_folder}',
        config=sh_config), f'Import {config.mask_name}')

    rearrange_bands = (RearrangeBands(), 'Swap time and band axis')
    add_timestamps = (AddTimestampsUpdateTime(
        f's3://{config.bucket_name}/{config.tiffs_folder}'), 'Load timestamps')

    merge_bands = (MergeFeatureTask(
        input_features={FeatureType.DATA: config.band_names},
        output_feature=(FeatureType.DATA, config.data_name)),
                   'Merge band features')

    remove_bands = (RemoveFeature(
        features={FeatureType.DATA: config.band_names}), 'Remove bands')

    rename_mask = (RenameFeature((FeatureType.MASK, config.mask_name,
                                  config.is_data_mask)), 'Rename is data mask')

    calculate_clm = (CloudMasking(), 'Get CLM mask from CLP')

    save_task = (SaveTask(
        path=f's3://{config.bucket_name}/{config.eopatches_folder}',
        config=sh_config,
        overwrite_permission=OverwritePermission.OVERWRITE_FEATURES),
                 'Save EOPatch')

    filenames = [f'{band}.tif' for band in config.band_names] + \
                [f'{config.mask_name}.tif', f'{config.clp_name}.tif', 'userdata.json']
    delete_files = (DeleteFiles(path=config.tiffs_folder,
                                filenames=filenames), 'Delete batch files')

    workflow = [
        *import_bands, import_clp, import_mask, rearrange_bands,
        add_timestamps, merge_bands, remove_bands, rename_mask, calculate_clm,
        save_task
    ]

    if delete_tiffs:
        workflow.append(delete_files)

    return LinearWorkflow(*workflow)
Пример #2
0
    def test_time_dependent_feature(self):
        feature = FeatureType.DATA, 'NDVI'
        filename_export = 'relative-path/*.tiff'
        filename_import = [
            f'relative-path/{timestamp.strftime("%Y%m%dT%H%M%S")}.tiff'
            for timestamp in self.eopatch.timestamp
        ]

        export_task = ExportToTiff(feature, folder=self.path)
        import_task = ImportFromTiff(feature,
                                     folder=self.path,
                                     timestamp_size=68)

        export_task.execute(self.eopatch, filename=filename_export)
        new_eopatch = import_task.execute(filename=filename_import)

        self.assertTrue(
            np.array_equal(new_eopatch[feature], self.eopatch[feature]))

        self.eopatch.timestamp[-1] = datetime.datetime(2020, 10, 10)
        filename_import = [
            f'relative-path/{timestamp.strftime("%Y%m%dT%H%M%S")}.tiff'
            for timestamp in self.eopatch.timestamp
        ]

        with self.assertRaises(ResourceNotFound):
            import_task.execute(filename=filename_import)
Пример #3
0
    def test_import_tiff_intersecting(self):
        path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            '../../../example_data/import-tiff-test2.tiff')

        mask_feature = FeatureType.MASK_TIMELESS, 'TEST_TIF'
        mask_type, mask_name = mask_feature
        no_data_value = 1.0

        task = ImportFromTiff(mask_feature,
                              path,
                              image_dtype=np.float64,
                              no_data_value=no_data_value)
        task.execute(self.eopatch)

        tiff_img = read_data(path)

        self.assertTrue(
            np.array_equal(tiff_img[-6:, :3, :],
                           self.eopatch[mask_type][mask_name][:6, -3:, :]),
            msg='Imported tiff data should be the same as original')
        feature_dtype = self.eopatch[mask_type][mask_name].dtype
        self.assertEqual(
            feature_dtype,
            np.float64,
            msg='Feature should have dtype numpy.float64 but {} found'.format(
                feature_dtype))

        self.eopatch[mask_type][mask_name][:6, -3:, :] = no_data_value
        unique_values = list(
            np.unique(self.eopatch[mask_type][mask_name][:6, -3:, :]))
        self.assertEqual(unique_values, [no_data_value],
                         msg='No data values should all be equal to {}'.format(
                             no_data_value))
Пример #4
0
    def test_export_import(self):
        for test_case in self.test_cases:
            with self.subTest(msg='Test case {}'.format(test_case.name)):

                self.eopatch[test_case.feature_type][test_case.name] = test_case.data

                with tempfile.TemporaryDirectory() as tmp_dir_name:
                    tmp_file_name = 'temp_file.tiff'
                    feature = test_case.feature_type, test_case.name

                    export_task = ExportToTiff(feature, folder=tmp_dir_name,
                                               band_indices=test_case.bands, date_indices=test_case.times)
                    export_task.execute(self.eopatch, filename=tmp_file_name)

                    import_task = ImportFromTiff(feature, folder=tmp_dir_name,
                                                 timestamp_size=test_case.get_expected_timestamp_size())

                    expected_raster = test_case.get_expected()

                    new_eop = import_task.execute(filename=tmp_file_name)
                    old_eop = import_task.execute(self.eopatch, filename=tmp_file_name)

                    self.assertTrue(np.array_equal(expected_raster, new_eop[test_case.feature_type][test_case.name]),
                                    msg='Tiff imported into new EOPatch is not the same as expected')
                    self.assertTrue(np.array_equal(expected_raster, old_eop[test_case.feature_type][test_case.name]),
                                    msg='Tiff imported into old EOPatch is not the same as expected')
                    self.assertEqual(expected_raster.dtype, new_eop[test_case.feature_type][test_case.name].dtype,
                                     msg='Tiff imported into new EOPatch has different dtype as expected')
Пример #5
0
    def test_time_dependent_feature_with_timestamps(self):
        feature = FeatureType.DATA, 'NDVI'
        filename = 'relative-path/%Y%m%dT%H%M%S.tiff'

        export_task = ExportToTiff(feature, folder=self.path)
        import_task = ImportFromTiff(feature, folder=self.path)

        export_task.execute(self.eopatch, filename=filename)
        new_eopatch = import_task.execute(self.eopatch, filename=filename)

        self.assertTrue(
            np.array_equal(new_eopatch[feature], self.eopatch[feature]))
Пример #6
0
    def test_timeless_feature(self):
        feature = FeatureType.DATA_TIMELESS, 'DEM'
        filename = 'relative-path/my-filename.tiff'

        export_task = ExportToTiff(feature, folder=self.path)
        import_task = ImportFromTiff(feature, folder=self.path)

        export_task.execute(self.eopatch, filename=filename)
        new_eopatch = import_task.execute(self.eopatch, filename=filename)

        self.assertTrue(
            np.array_equal(new_eopatch[feature], self.eopatch[feature]))
Пример #7
0
def load_tiffs(datapath: str,
               feature: Tuple[FeatureType, str],
               filename: str = None,
               image_dtype: np.generic = np.float32,
               no_data_value: float = np.nan,
               data_source: str = 's5p',
               offset: int = 2100):
    """ Helper function to load the data sources provided as tiffs """
    assert data_source in ['s5p', 'modis', 'era5', 'cams', 's3']

    tiles = sorted(os.listdir(datapath)) if filename is None else [filename]

    # only keep files
    tiles = [tile for tile in tiles if not os.path.isdir(datapath / tile)]

    # unzip tiffs if they have .gz extension and delete them
    compressed_tiles = [tile for tile in tiles if tile.endswith('.gz')]
    for ctile in compressed_tiles:
        uctile = ctile.split('.gz')[0]
        ungz_file(str(datapath / ctile), str(datapath / uctile), delete=True)

    tiles = [
        tile if not tile.endswith('.gz') else tile.split('.gz')[0]
        for tile in tiles
    ]

    zipped_tiles = [tile for tile in tiles if tile.endswith('.zip')]
    for ztile in zipped_tiles:
        unzip_file(str(datapath / ztile), str(datapath))

    tiles = tiles + [tile.replace('.zip', '.tif') for tile in zipped_tiles]

    # remove files which don't have .tif extension
    tiles = [tile for tile in tiles if os.path.splitext(tile)[1] == '.tif']

    timestamp_size = len(tiles) if feature[0].is_time_dependent() else None

    import_task = ImportFromTiff(feature=feature,
                                 folder=datapath,
                                 image_dtype=image_dtype,
                                 no_data_value=no_data_value,
                                 timestamp_size=len(tiles))

    if not feature[0].is_time_dependent():
        assert len(tiles) == 1
        return import_task.execute(filename=tiles[0])

    assert len(tiles) >= 1
    eop = import_task.execute(filename=(sorted(tiles)))
    eop.timestamp = TIMESTAMP_PARSER[data_source](sorted(tiles), offset)

    return eop
Пример #8
0
    def test_import_tiff_subset(self):
        path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../example_data/import-tiff-test1.tiff')

        mask_feature = FeatureType.MASK_TIMELESS, 'TEST_TIF'
        mask_type, mask_name = mask_feature

        task = ImportFromTiff(mask_feature, path)
        task.execute(self.eopatch)

        tiff_img = read_data(path)

        self.assertTrue(np.array_equal(tiff_img[20: 53, 21: 54], self.eopatch[mask_type][mask_name][..., 0]),
                        msg='Imported tiff data should be the same as original')