Пример #1
0
    def __init__(self, collection, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)
        self.acquisition_dict = {}
        self.acquisiton_id = None  # set below

        # Fill a dictonary with data for the acquisition.
        # Start with fields from the dataset metadata.
        for field in self.ACQUISITION_METADATA_FIELDS:
            self.acquisition_dict[field] = dataset.metadata_dict[field]

        # Next look up the satellite_id and sensor_id in the
        # database and fill these in.
        self.acquisition_dict['satellite_id'] = \
            self.db.get_satellite_id(self.acquisition_dict['satellite_tag'])
        self.acquisition_dict['sensor_id'] = \
            self.db.get_sensor_id(self.acquisition_dict['satellite_id'],
                                  self.acquisition_dict['sensor_name'])

        # Finally look up the acquisiton_id, or create a new record if it
        # does not exist, and fill it into the dictionary.
        self.acquisition_id = \
            self.db.get_acquisition_id_fuzzy(self.acquisition_dict)
        if self.acquisition_id is None:
            self.acquisition_id = \
                self.db.insert_acquisition_record(self.acquisition_dict)
        else:
            # Do we update the acquisition record here?
            pass
        self.acquisition_dict['acquisition_id'] = self.acquisition_id
Пример #2
0
    def __init__(self, collection, acquisition, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)

        dataset_key = collection.get_dataset_key(dataset)
        self.dataset_bands = collection.new_bands[dataset_key]

        self.dataset = dataset
        self.mdd = dataset.metadata_dict

        self.dataset_dict = {}
        for field in self.DATASET_METADATA_FIELDS:
            self.dataset_dict[field] = self.mdd[field]

        self.dataset_dict['acquisition_id'] = acquisition.acquisition_id
        self.dataset_dict['crs'] = self.mdd['projection']
        self.dataset_dict['level_name'] = self.mdd['processing_level']
        self.dataset_dict['level_id'] = \
            self.db.get_level_id(self.dataset_dict['level_name'])

        self.dataset_dict['dataset_id'] = \
            self.db.get_dataset_id(self.dataset_dict)
        if self.dataset_dict['dataset_id'] is None:
            # create a new dataset record in the database
            self.dataset_dict['dataset_id'] = \
                self.db.insert_dataset_record(self.dataset_dict)
            self.needs_update = False
        else:
            # check that the old dataset record can be updated
            self.__check_update_ok()
            self.needs_update = True

        self.dataset_id = self.dataset_dict['dataset_id']
Пример #3
0
    def __init__(self, datacube):
        """Initialise the collection object."""

        self.datacube = datacube
        self.db = IngestDBWrapper(datacube.db_connection)
        self.new_bands = self.__reindex_bands(datacube.bands)
        self.transaction_stack = []

        self.temp_tile_directory = os.path.join(self.datacube.tile_root,
                                                'ingest_temp',
                                                self.datacube.process_id)
        create_directory(self.temp_tile_directory)
Пример #4
0
class AcquisitionRecord(object):
    """AcquisitionRecord database interface class."""

    ACQUISITION_METADATA_FIELDS = [
        'satellite_tag', 'sensor_name', 'x_ref', 'y_ref', 'start_datetime',
        'end_datetime', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon',
        'ul_lat', 'ur_lon', 'ur_lat', 'gcp_count', 'mtl_text', 'cloud_cover'
    ]

    def __init__(self, collection, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)
        self.acquisition_dict = {}
        self.acquisiton_id = None  # set below

        # Fill a dictonary with data for the acquisition.
        # Start with fields from the dataset metadata.
        for field in self.ACQUISITION_METADATA_FIELDS:
            self.acquisition_dict[field] = dataset.metadata_dict[field]

        # Next look up the satellite_id and sensor_id in the
        # database and fill these in.
        self.acquisition_dict['satellite_id'] = \
            self.db.get_satellite_id(self.acquisition_dict['satellite_tag'])
        self.acquisition_dict['sensor_id'] = \
            self.db.get_sensor_id(self.acquisition_dict['satellite_id'],
                                  self.acquisition_dict['sensor_name'])

        # Finally look up the acquisiton_id, or create a new record if it
        # does not exist, and fill it into the dictionary.
        self.acquisition_id = \
            self.db.get_acquisition_id_fuzzy(self.acquisition_dict)
        if self.acquisition_id is None:
            self.acquisition_id = \
                self.db.insert_acquisition_record(self.acquisition_dict)
        else:
            # Do we update the acquisition record here?
            pass
        self.acquisition_dict['acquisition_id'] = self.acquisition_id

    def create_dataset_record(self, dataset):
        """Factory method to create an instance of the DatasetRecord class.

        This method creates a new record in the database if one does not
        already exist. It will overwrite an earlier dataset record (and its
        tiles) if one exists. It will raise a DatasetError if a later (or
        equal time) record for this dataset already exists in the database.
        """

        return DatasetRecord(self.collection, self, dataset)
    def __init__(self, collection, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)
        self.acquisition_dict = {}
        self.acquisiton_id = None  # set below

        # Fill a dictonary with data for the acquisition.
        # Start with fields from the dataset metadata.
        for field in self.ACQUISITION_METADATA_FIELDS:
            self.acquisition_dict[field] = dataset.metadata_dict[field]

        # Next look up the satellite_id and sensor_id in the
        # database and fill these in.
        self.acquisition_dict['satellite_id'] = \
            self.db.get_satellite_id(self.acquisition_dict['satellite_tag'])
        self.acquisition_dict['sensor_id'] = \
            self.db.get_sensor_id(self.acquisition_dict['satellite_id'],
                                  self.acquisition_dict['sensor_name'])

        # Finally look up the acquisiton_id, or create a new record if it
        # does not exist, and fill it into the dictionary.
        self.acquisition_id = \
            self.db.get_acquisition_id_fuzzy(self.acquisition_dict)
        if self.acquisition_id is None:
            self.acquisition_id = \
                self.db.insert_acquisition_record(self.acquisition_dict)
        else:
            # Do we update the acquisition record here?
            pass
        self.acquisition_dict['acquisition_id'] = self.acquisition_id
Пример #6
0
    def __init__(self, collection, acquisition, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)

        dataset_key = collection.get_dataset_key(dataset)
        self.dataset_bands = collection.new_bands[dataset_key]

        self.dataset = dataset
        self.mdd = dataset.metadata_dict

        self.dataset_dict = {}
        for field in self.DATASET_METADATA_FIELDS:
            self.dataset_dict[field] = self.mdd[field]

        self.dataset_dict['acquisition_id'] = acquisition.acquisition_id
        self.dataset_dict['crs'] = self.mdd['projection']
        self.dataset_dict['level_name'] = self.mdd['processing_level']
        self.dataset_dict['level_id'] = \
            self.db.get_level_id(self.dataset_dict['level_name'])

        self.dataset_dict['dataset_id'] = \
            self.db.get_dataset_id(self.dataset_dict)
        if self.dataset_dict['dataset_id'] is None:
            # create a new dataset record in the database
            self.dataset_dict['dataset_id'] = \
                self.db.insert_dataset_record(self.dataset_dict)
            self.needs_update = False
        else:
            # check that the old dataset record can be updated
            self.__check_update_ok()
            self.needs_update = True

        self.dataset_id = self.dataset_dict['dataset_id']
Пример #7
0
    def update_tile_footprint(self):
        """Update the tile footprint entry in the database"""

        if not self.db.tile_footprint_exists(self.tile_dict):
            # We may need to create a new footprint record.
            footprint_dict = {
                'x_index': self.tile_footprint[0],
                'y_index': self.tile_footprint[1],
                'tile_type_id': self.tile_type_id,
                'x_min': self.tile_contents.tile_extents[0],
                'y_min': self.tile_contents.tile_extents[1],
                'x_max': self.tile_contents.tile_extents[2],
                'y_max': self.tile_contents.tile_extents[3],
                'bbox': 'Populate this within sql query?'
            }

            # Create an independent database connection for this transaction.
            my_db = IngestDBWrapper(self.datacube.create_connection())
            try:
                with self.collection.transaction(my_db):
                    if not my_db.tile_footprint_exists(self.tile_dict):
                        my_db.insert_tile_footprint(footprint_dict)

            except psycopg2.IntegrityError:
                # If we get an IntegrityError we assume the tile_footprint
                # is already in the database, and we do not need to add it.
                pass

            finally:
                my_db.close()
Пример #8
0
    def __init__(self, datacube):
        """Initialise the collection object."""

        self.datacube = datacube
        self.db = IngestDBWrapper(datacube.db_connection)
        self.new_bands = self.__reindex_bands(datacube.bands)
        self.transaction_stack = []

        self.temp_tile_directory = os.path.join(self.datacube.tile_root,
                                                'ingest_temp',
                                                self.datacube.process_id)
        create_directory(self.temp_tile_directory)
Пример #9
0
    def update_tile_footprint(self):
        """Update the tile footprint entry in the database"""

        if not self.db.tile_footprint_exists(self.tile_dict):
            # We may need to create a new footprint record.
            footprint_dict = {'x_index': self.tile_footprint[0],
                              'y_index': self.tile_footprint[1],
                              'tile_type_id': self.tile_type_id,
                              'x_min': self.tile_contents.tile_extents[0],
                              'y_min': self.tile_contents.tile_extents[1],
                              'x_max': self.tile_contents.tile_extents[2],
                              'y_max': self.tile_contents.tile_extents[3],
                              'bbox': 'Populate this within sql query?'}

            # Create an independent database connection for this transaction.
            my_db = IngestDBWrapper(self.datacube.create_connection())
            try:
                with self.collection.transaction(my_db):
                    if not my_db.tile_footprint_exists(self.tile_dict):
                        my_db.insert_tile_footprint(footprint_dict)

            except psycopg2.IntegrityError:
                # If we get an IntegrityError we assume the tile_footprint
                # is already in the database, and we do not need to add it.
                pass

            finally:
                my_db.close()
Пример #10
0
    def __init__(self, collection, dataset_record, tile_contents):
        self.collection = collection
        self.datacube = collection.datacube
        self.dataset_record = dataset_record
        self.tile_contents = tile_contents
        self.tile_footprint = tile_contents.tile_footprint
        self.tile_type_id = tile_contents.tile_type_id
        #Set tile_class_id to pending.
        self.tile_class_id = TC_PENDING
        #Set tile_id, determined below from database query
        self.tile_id = None
        self.db = IngestDBWrapper(self.datacube.db_connection)
        # Fill a dictionary with data for the tile
        tile_dict = {}
        self.tile_dict = tile_dict
        tile_dict['x_index'] = self.tile_footprint[0]
        tile_dict['y_index'] = self.tile_footprint[1]
        tile_dict['tile_type_id'] = self.tile_type_id
        tile_dict['dataset_id'] = self.dataset_record.dataset_id
        # Store final destination in the 'tile_pathname' field
        tile_dict['tile_pathname'] = self.tile_contents.tile_output_path
        tile_dict['tile_class_id'] = 1
        # The physical file is currently in the temporary location
        tile_dict['tile_size'] = \
            get_file_size_mb(self.tile_contents
                                       .temp_tile_output_path)

        self.update_tile_footprint()

        # Make the tile record entry on the database:
        self.tile_id = self.db.get_tile_id(tile_dict)
        if self.tile_id is None:
            self.tile_id = self.db.insert_tile_record(tile_dict)
        else:
            # If there was any existing tile corresponding to tile_dict then
            # it should already have been removed.
            raise AssertionError("Attempt to recreate an existing tile.")
        tile_dict['tile_id'] = self.tile_id
Пример #11
0
    def __init__(self, collection, dataset_record, tile_contents):
        self.collection = collection
        self.datacube = collection.datacube
        self.dataset_record = dataset_record
        self.tile_contents = tile_contents
        self.tile_footprint = tile_contents.tile_footprint
        self.tile_type_id = tile_contents.tile_type_id
        #Set tile_class_id to pending.
        self.tile_class_id = TC_PENDING
        #Set tile_id, determined below from database query
        self.tile_id = None
        self.db = IngestDBWrapper(self.datacube.db_connection)
        # Fill a dictionary with data for the tile
        tile_dict = {}
        self.tile_dict = tile_dict
        tile_dict['x_index'] = self.tile_footprint[0]
        tile_dict['y_index'] = self.tile_footprint[1]
        tile_dict['tile_type_id'] = self.tile_type_id
        tile_dict['dataset_id'] = self.dataset_record.dataset_id
        # Store final destination in the 'tile_pathname' field
        tile_dict['tile_pathname'] = self.tile_contents.tile_output_path
        tile_dict['tile_class_id'] = 1
        # The physical file is currently in the temporary location
        tile_dict['tile_size'] = \
            get_file_size_mb(self.tile_contents
                                       .temp_tile_output_path)

        self.update_tile_footprint()

        # Make the tile record entry on the database:
        self.tile_id = self.db.get_tile_id(tile_dict)
        if self.tile_id is None:
            self.tile_id = self.db.insert_tile_record(tile_dict)
        else:
            # If there was any existing tile corresponding to tile_dict then
            # it should already have been removed.
            raise AssertionError("Attempt to recreate an existing tile.")
        tile_dict['tile_id'] = self.tile_id
Пример #12
0
class DatasetRecord(object):
    """DatasetRecord database interface class."""

    DATASET_METADATA_FIELDS = [
        'dataset_path', 'datetime_processed', 'dataset_size', 'll_x', 'll_y',
        'lr_x', 'lr_y', 'ul_x', 'ul_y', 'ur_x', 'ur_y', 'x_pixels', 'y_pixels',
        'xml_text'
    ]

    def __init__(self, collection, acquisition, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)

        dataset_key = collection.get_dataset_key(dataset)
        self.dataset_bands = collection.new_bands[dataset_key]

        self.dataset = dataset
        self.mdd = dataset.metadata_dict

        self.dataset_dict = {}
        for field in self.DATASET_METADATA_FIELDS:
            self.dataset_dict[field] = self.mdd[field]

        self.dataset_dict['acquisition_id'] = acquisition.acquisition_id
        self.dataset_dict['crs'] = self.mdd['projection']
        self.dataset_dict['level_name'] = self.mdd['processing_level']
        self.dataset_dict['level_id'] = \
            self.db.get_level_id(self.dataset_dict['level_name'])

        self.dataset_dict['dataset_id'] = \
            self.db.get_dataset_id(self.dataset_dict)
        if self.dataset_dict['dataset_id'] is None:
            # create a new dataset record in the database
            self.dataset_dict['dataset_id'] = \
                self.db.insert_dataset_record(self.dataset_dict)
            self.needs_update = False
        else:
            # check that the old dataset record can be updated
            self.__check_update_ok()
            self.needs_update = True

        self.dataset_id = self.dataset_dict['dataset_id']

    def remove_mosaics(self, dataset_filter):
        """Remove mosaics associated with the dataset.

        This will mark mosaic files for removal, delete mosaic database
        records if they exist, and update the tile class of overlapping
        tiles (from other datasets) to reflect the lack of a mosaic. The
        'dataset_filter' is a list of dataset_ids to filter on. It should
        be the list of dataset_ids that have been locked (including this
        dataset). It is used to avoid operating on the tiles of an
        unlocked dataset.
        """

        # remove new mosaics (those with database records)
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED,
                                     TC_MOSAIC),
            output_tile_class_filter=(TC_MOSAIC, ),
            dataset_filter=dataset_filter)

        for tile_record_list in overlap_dict.values():
            for tr in tile_record_list:
                self.db.remove_tile_record(tr['tile_id'])
                self.collection.mark_tile_for_removal(tr['tile_pathname'])

        # build a dictionary of overlaps (ignoring mosaics)
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED),
            output_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED),
            dataset_filter=dataset_filter)

        # update tile classes for overlap tiles from other datasets
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 2:
                raise DatasetError("Attempt to update a mosaic of three or " +
                                   "more datasets. Handling for this case " +
                                   "is not yet implemented.")
            for tr in tile_record_list:
                if tr['dataset_id'] != self.dataset_id:
                    self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)

        # remove old mosaics (those without database records)
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 1:
                # tile_record_list is sorted by acquisition start time, so
                # the first record should be the one the mosaic filename is
                # based on.
                tr = tile_record_list[0]
                mosaic_pathname = \
                    self.__make_mosaic_pathname(tr['tile_pathname'])
                if os.path.isfile(mosaic_pathname):
                    self.collection.mark_tile_for_removal(mosaic_pathname)

    def remove_tiles(self):
        """Remove the tiles associated with the dataset.

        This will remove ALL the tiles belonging to this dataset, deleting
        database records and marking tile files for removal on commit. Mosaics
        should be removed BEFORE calling this (as it will delete the tiles
        needed to figure out the overlaps, but may not delete all the mosaics).
        """

        tile_list = self.db.get_dataset_tile_ids(self.dataset_id)

        for tile_id in tile_list:
            tile_pathname = self.db.get_tile_pathname(tile_id)
            self.db.remove_tile_record(tile_id)
            self.collection.mark_tile_for_removal(tile_pathname)

    def update(self):
        """Update the dataset record in the database.

        This first checks that the new dataset is more recent than
        the record in the database. If not it raises a dataset error.
        """

        self.__check_update_ok()
        self.db.update_dataset_record(self.dataset_dict)

    def make_tiles(self, tile_type_id, band_stack):
        """Tile the dataset, returning a list of tile_content objects."""

        tile_list = []
        tile_footprint_list = self.get_coverage(tile_type_id)
        for tile_footprint in tile_footprint_list:
            tile_contents = self.collection.create_tile_contents(
                tile_type_id, tile_footprint, band_stack)
            tile_contents.reproject()

            if tile_contents.has_data():
                tile_list.append(tile_contents)
            else:
                tile_contents.remove()

        return tile_list

    def store_tiles(self, tile_list):
        """Store tiles in the database and file store.

        'tile_list' is a list of tile_contents objects. This
        method will create the corrisponding database records and
        mark tiles for creation when the transaction commits.
        """

        tile_record_list = []
        for tile_contents in tile_list:
            tile_record = self.create_tile_record(tile_contents)
            tile_record_list.append(tile_record)

        return tile_record_list

    def create_mosaics(self, dataset_filter):
        """Create mosaics associated with the dataset.

        'dataset_filter' is a list of dataset_ids to filter on. It should
        be the list of dataset_ids that have been locked (including this
        dataset). It is used to avoid operating on the tiles of an
        unlocked dataset.
        """

        # Build a dictionary of overlaps (ignoring mosaics, including pending).
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE,
                                     TC_SUPERSEDED),
            output_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE,
                                      TC_SUPERSEDED),
            dataset_filter=dataset_filter)

        # Make mosaics and update tile classes as needed.
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 2:
                raise DatasetError("Attempt to create a mosaic of three or " +
                                   "more datasets. Handling for this case " +
                                   "is not yet implemented.")
            elif len(tile_record_list) == 2:
                self.__make_one_mosaic(tile_record_list)
                for tr in tile_record_list:
                    self.db.update_tile_class(tr['tile_id'], TC_SUPERSEDED)
            else:
                for tr in tile_record_list:
                    self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)

    def get_removal_overlaps(self):
        """Returns a list of overlapping dataset ids for mosaic removal."""

        tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED, TC_MOSAIC)
        return self.get_overlaps(tile_class_filter)

    def get_creation_overlaps(self):
        """Returns a list of overlapping dataset_ids for mosaic creation."""

        tile_class_filter = (TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED)
        return self.get_overlaps(tile_class_filter)

    def get_overlaps(self, tile_class_filter):
        """Returns a list of overlapping dataset ids, including this dataset.

        A dataset is overlapping if it contains tiles that overlap with
        tiles belonging to this dataset. Only tiles in the tile_class_filter
        are considered.
        """

        dataset_list = self.db.get_overlapping_dataset_ids(
            self.dataset_id, tile_class_filter=tile_class_filter)

        if not dataset_list:
            dataset_list = [self.dataset_id]

        return dataset_list

    def create_tile_record(self, tile_contents):
        """Factory method to create an instance of the TileRecord class.

        The created object will be responsible for inserting tile table records
        into the database for reprojected or mosaiced tiles."""
        self.collection.mark_tile_for_creation(tile_contents)
        return TileRecord(self.collection, self, tile_contents)

    def mark_as_tiled(self):
        """Flag the dataset record as tiled in the database.

        This flag does not exist in the current database schema,
        so this method does nothing at the moment."""

        pass

    def list_tile_types(self):
        """Returns a list of the tile type ids for this dataset."""

        return self.dataset_bands.keys()

    def get_tile_bands(self, tile_type_id):
        """Returns a dictionary containing the band info for one tile type.

        The tile_type_id must valid for this dataset, available from
        list_tile_types above.
        """

        return self.dataset_bands[tile_type_id]

    def get_coverage(self, tile_type_id):
        """Given the coordinate reference system of the dataset and that of the
        tile_type_id, return a list of tiles within the dataset footprint"""
        tile_type_info = self.collection.datacube.tile_type_dict[tile_type_id]
        #Get geospatial information from the dataset.
        dataset_crs = self.mdd['projection']
        dataset_geotransform = self.mdd['geo_transform']
        pixels = self.mdd['x_pixels']
        lines = self.mdd['y_pixels']
        #Look up the datacube's projection information for this tile_type
        tile_crs = tile_type_info['crs']
        #Get the transformation between the two projections
        transformation = self.define_transformation(dataset_crs, tile_crs)
        #Determine the bounding quadrilateral of the dataset extent
        #in tile coordinates
        dataset_bbox = self.get_bbox(transformation, dataset_geotransform,
                                     pixels, lines)
        #Determine maximum inner rectangle, which is guaranteed to need  tiling
        #and the minimum outer rectangle outside which no tiles will exist.
        cube_origin = (tile_type_info['x_origin'], tile_type_info['y_origin'])
        cube_tile_size = (tile_type_info['x_size'], tile_type_info['y_size'])
        coverage = self.get_touched_tiles(dataset_bbox, cube_origin,
                                          cube_tile_size)
        return coverage

    #
    # worker methods
    #

    def __check_update_ok(self):
        """Checks if an update is possible, raises a DatasetError otherwise."""

        tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED)
        if self.db.dataset_older_than_database(
                self.dataset_dict['dataset_id'],
                self.dataset_dict['datetime_processed'], tile_class_filter):
            raise DatasetError("Dataset to be ingested is older than " +
                               "the version in the database.")

    def __make_one_mosaic(self, tile_record_list):
        """Create a single mosaic.

        This create the mosaic contents, creates the database record,
        and marks the mosaic contents for creation on transaction commit.
        """
        mosaic = MosaicContents(tile_record_list, self.datacube.tile_type_dict,
                                self.dataset_dict['level_name'],
                                self.collection.get_temp_tile_directory())
        mosaic.create_record(self.db)
        self.collection.mark_tile_for_creation(mosaic)

    def __make_mosaic_pathname(self, tile_pathname):
        """Return the pathname of the mosaic corrisponding to a tile."""

        (tile_dir, tile_basename) = os.path.split(tile_pathname)

        mosaic_dir = os.path.join(tile_dir, 'mosaic_cache')
        if self.dataset_dict['level_name'] == 'PQA':
            mosaic_basename = tile_basename
        else:
            mosaic_basename = re.sub(r'\.\w+$', '.vrt', tile_basename)

        return os.path.join(mosaic_dir, mosaic_basename)

#
# Worker methods for coverage.
#
# These are public so that they can be called by test_dataset_record.
#

    def define_transformation(self, dataset_crs, tile_crs):
        """Return the transformation between dataset_crs
        and tile_crs projections"""
        osr.UseExceptions()
        try:
            dataset_spatial_reference = self.create_spatial_ref(dataset_crs)
            tile_spatial_reference = self.create_spatial_ref(tile_crs)
            if dataset_spatial_reference is None:
                raise DatasetError('Unknown projecton %s' % str(dataset_crs))
            if tile_spatial_reference is None:
                raise DatasetError('Unknown projecton %s' % str(tile_crs))
            return osr.CoordinateTransformation(dataset_spatial_reference,
                                                tile_spatial_reference)
        except Exception:
            raise DatasetError('Coordinate transformation error ' +
                               'for transforming %s to %s' %
                               (str(dataset_crs), str(tile_crs)))

    @staticmethod
    def create_spatial_ref(crs):
        """Create a spatial reference system for projecton crs.
        Called by define_transformation()"""
        # pylint: disable=broad-except

        osr.UseExceptions()
        try:
            spatial_ref = osr.SpatialReference()
        except Exception:
            raise DatasetError('No spatial reference done for %s' % str(crs))
        try:
            spatial_ref.ImportFromWkt(crs)
            return spatial_ref
        except Exception:
            pass
        try:
            matchobj = re.match(r'EPSG:(\d+)', crs)
            epsg_code = int(matchobj.group(1))
            spatial_ref.ImportFromEPSG(epsg_code)
            return spatial_ref
        except Exception:
            return None

    @staticmethod
    def get_bbox(transform, geotrans, pixels, lines):
        """Return the coordinates of the dataset footprint in clockwise order
        from upper-left"""
        xul, yul, dummy_z =  \
            transform.TransformPoint(geotrans[0], geotrans[3], 0)
        xur, yur, dummy_z = \
            transform.TransformPoint(geotrans[0] + geotrans[1] * pixels,
                                     geotrans[3] + geotrans[4] * pixels, 0)
        xll, yll, dummy_z = \
            transform.TransformPoint(geotrans[0] + geotrans[2] * lines,
                                     geotrans[3] + geotrans[5] * lines, 0)
        xlr, ylr, dummy_z = \
            transform.TransformPoint(
                geotrans[0] + geotrans[1] * pixels + geotrans[2] * lines,
                geotrans[3] + geotrans[4] * pixels + geotrans[5] * lines, 0)
        return [(xul, yul), (xur, yur), (xlr, ylr), (xll, yll)]

    def get_touched_tiles(self, dataset_bbox, cube_origin, cube_tile_size):
        """Return a list of tuples (itile, jtile) comprising all tiles
        footprints that intersect the dataset bounding box"""
        definite_tiles, possible_tiles = \
            self.get_definite_and_possible_tiles(dataset_bbox,
                                                 cube_origin, cube_tile_size)
        coverage_set = definite_tiles
        #Check possible tiles:
        #Check if the tile perimeter intersects the dataset bbox perimeter:
        intersected_tiles = \
            self.get_intersected_tiles(possible_tiles, dataset_bbox,
                                       cube_origin, cube_tile_size)
        coverage_set = coverage_set.union(intersected_tiles)
        possible_tiles = possible_tiles.difference(intersected_tiles)
        #Otherwise the tile might be wholly contained in the dataset bbox
        contained_tiles = \
            self.get_contained_tiles(possible_tiles, dataset_bbox,
                                     cube_origin, cube_tile_size)
        coverage_set = coverage_set.union(contained_tiles)
        return coverage_set

    @staticmethod
    def get_definite_and_possible_tiles(bbox, cube_origin, cube_tile_size):
        """Return two lists of tile footprints: from the largest rectangle
        wholly contained within the dataset bbox and the smallest rectangle
        containing the bbox."""
        #pylint: disable=too-many-locals
        #unpack the bbox vertices in clockwise order from upper-left
        xyul, xyur, xylr, xyll = bbox
        xul, yul = xyul
        xur, yur = xyur
        xlr, ylr = xylr
        xll, yll = xyll
        #unpack the origin of the tiled datacube (e.g. lat=0, lon=0) and the
        #datacube tile size
        xorigin, yorigin = cube_origin
        xsize, ysize = cube_tile_size
        #Define the largest rectangle wholly contained within footprint
        xmin = max(xll, xul)
        xmax = min(xlr, xur)
        ymin = max(yll, ylr)
        ymax = min(yul, yur)
        xmin_index = int(floor((xmin - xorigin) / xsize))
        xmax_index = int(floor((xmax - xorigin) / xsize))
        ymin_index = int(floor((ymin - yorigin) / ysize))
        ymax_index = int(floor((ymax - yorigin) / ysize))
        definite_tiles = set([(itile, jtile)
                              for itile in range(xmin_index, xmax_index + 1)
                              for jtile in range(ymin_index, ymax_index + 1)])
        #Define the smallest rectangle which is guaranteed to include all tiles
        #in the foorprint.
        xmin = min(xll, xul)
        xmax = max(xlr, xur)
        ymin = min(yll, ylr)
        ymax = max(yul, yur)
        xmin_index = int(floor((xmin - xorigin) / xsize))
        xmax_index = int(floor((xmax - xorigin) / xsize))
        ymin_index = int(floor((ymin - yorigin) / ysize))
        ymax_index = int(floor((ymax - yorigin) / ysize))
        possible_tiles = set([(itile, jtile)
                              for itile in range(xmin_index, xmax_index + 1)
                              for jtile in range(ymin_index, ymax_index + 1)
                              ]).difference(definite_tiles)
        return (definite_tiles, possible_tiles)

    def get_intersected_tiles(self, candidate_tiles, dset_bbox, cube_origin,
                              cube_tile_size):
        """Return the subset of candidate_tiles that have an intersection with
        the dataset bounding box"""
        #pylint: disable=too-many-locals
        xorigin, yorigin = cube_origin
        xsize, ysize = cube_tile_size
        keep_list = []
        for itile, jtile in candidate_tiles:
            intersection_exists = False
            (x0, y0) = (xorigin + itile * xsize, yorigin + (jtile + 1) * ysize)
            tile_bbox = [(x0, y0), (x0 + xsize, y0), (x0 + xsize, y0 - ysize),
                         (x0, y0 - ysize)]
            tile_vtx_number = len(tile_bbox)
            dset_vtx_number = len(dset_bbox)
            for tile_vtx in range(tile_vtx_number):
                x1, y1 = tile_bbox[tile_vtx]
                x2, y2 = tile_bbox[(tile_vtx + 1) % tile_vtx_number]
                for dset_vtx in range(dset_vtx_number):
                    x3, y3 = dset_bbox[dset_vtx]
                    x4, y4 = dset_bbox[(dset_vtx + 1) % dset_vtx_number]
                    xcoords = [x1, x2, x3, x4]
                    ycoords = [y1, y2, y3, y4]
                    intersection_exists = \
                        self.check_intersection(xcoords, ycoords)
                    if intersection_exists:
                        keep_list.append((itile, jtile))
                        break
                if intersection_exists:
                    break
        return set(keep_list)

    @staticmethod
    def get_contained_tiles(candidate_tiles, dset_bbox, cube_origin,
                            cube_tile_size):
        """Return the subset of candidate tiles that lie wholly within the
        dataset bounding box"""
        #pylint: disable=too-many-locals
        xorigin, yorigin = cube_origin
        xsize, ysize = cube_tile_size
        keep_list = []
        for itile, jtile in candidate_tiles:
            tile_vtx_inside = []
            (x0, y0) = (xorigin + itile * xsize, yorigin + (jtile + 1) * ysize)
            tile_bbox = [(x0, y0), (x0 + xsize, y0), (x0 + xsize, y0 - ysize),
                         (x0, y0 - ysize)]
            dset_vtx_number = len(dset_bbox)
            for x, y in tile_bbox:
                #Check if this vertex lies within the dataset bounding box:
                winding_number = 0
                for dset_vtx in range(dset_vtx_number):
                    x1, y1 = dset_bbox[dset_vtx]
                    x2, y2 = dset_bbox[(dset_vtx + 1) % dset_vtx_number]
                    if y >= y1 and y < y2:
                        if (x - x1) * (y2 - y1) > (x2 - x1) * (y - y1):
                            winding_number += 1
                    elif y <= y1 and y > y2:
                        if (x - x1) * (y2 - y1) < (x2 - x1) * (y - y1):
                            winding_number += 1
                tile_vtx_inside.append(winding_number % 2 == 1)
            if tile_vtx_inside.count(True) == len(tile_bbox):
                keep_list.append((itile, jtile))
            assert tile_vtx_inside.count(True) == 4 or \
                tile_vtx_inside.count(True) == 0, \
                "Tile partially inside dataset bounding box but has" \
                "no intersection"
        return set(keep_list)

    @staticmethod
    def check_intersection(xpts, ypts):
        """Determines if the line segments
        (xpts[0], ypts[0]) to (xpts[1], ypts[1]) and
        (xpts[2], ypts[2]) to (xpts[3], ypts[3]) intersect"""
        pvec = (xpts[0], ypts[0])
        qvec = (xpts[2], ypts[2])
        rvec = (xpts[1] - xpts[0], ypts[1] - ypts[0])
        svec = (xpts[3] - xpts[2], ypts[3] - ypts[2])
        rvec_cross_svec = rvec[0] * svec[1] - rvec[1] * svec[0]
        if rvec_cross_svec == 0:
            return False
        qminusp_cross_svec = \
            (qvec[0] - pvec[0]) * svec[1] - (qvec[1] - pvec[1]) * svec[0]
        qminusp_cross_rvec = \
            (qvec[0] - pvec[0]) * rvec[1] - (qvec[1] - pvec[1]) * rvec[0]
        tparameter = qminusp_cross_svec / rvec_cross_svec
        uparameter = qminusp_cross_rvec / rvec_cross_svec
        if tparameter > 0 and tparameter < 1 and \
                uparameter > 0 and uparameter < 1:
            return True
Пример #13
0
class Collection(object):
    """Collection database interface class."""

    #
    # Interface methods
    #

    def __init__(self, datacube):
        """Initialise the collection object."""

        self.datacube = datacube
        self.db = IngestDBWrapper(datacube.db_connection)
        self.new_bands = self.__reindex_bands(datacube.bands)
        self.transaction_stack = []

        self.temp_tile_directory = os.path.join(self.datacube.tile_root,
                                                'ingest_temp',
                                                self.datacube.process_id)
        create_directory(self.temp_tile_directory)

    def cleanup(self):
        """Do end-of-process cleanup.

        Deletes the process-specific temporary dirctory. Does not
        close the database connection (at present), because the datacube
        object has a destructor which does that.
        """

        shutil.rmtree(self.temp_tile_directory, ignore_errors=True)

    @staticmethod
    def get_dataset_key(dataset):
        """Return the dataset key for use with the new_bands dictionary.

        This is a tuple (satellite_tag, sensor_name, processing_level) except
        that for derived datasets (currently PQA and FC) the satellite_tag is
        replaced with 'DERIVED' and the processing_level is used as the
        sensor_name. So the tuple looks like:
        ('DERIVED', processing_level, processing_level).
        """

        derived_levels = {'PQA', 'FC'}

        satellite = dataset.get_satellite_tag()
        sensor = dataset.get_sensor_name()
        level = dataset.get_processing_level()

        if level in derived_levels:
            satellite = 'DERIVED'
            sensor = level

        return (satellite, sensor, level)

    def get_temp_tile_directory(self):
        """Return a path to a directory for temporary tile related files."""

        return self.temp_tile_directory

    def check_metadata(self, dataset):
        """Check that the satellite, sensor, and bands are in the database.

        Checks that the dataset is of a kind that the database knows about
        (by checking basic metadata), and the bands that the database expects
        are present. Raises a DatasetError if the checks fail.
        """

        self.__check_satellite_and_sensor(dataset)
        self.__check_processing_level(dataset)
        self.__check_bands(dataset)

    def transaction(self, db=None):
        """Returns a Transaction context manager object.

        This is for use in a 'with' statement. It uses the Collection's
        database collection if one is not provided.
        """

        return Transaction(self.db if db is None else db,
                           self.transaction_stack)

    def lock_datasets(self, dataset_list):
        """Returns a Lock context manager object.

        dataset_list is a list of dataset ids for the datasets to be
        locked.

        This is for use in a 'with' statement. It uses the Collection's
        datacube object to manage the individual locks.
        """

        lock_list = ['Dataset-' + str(dataset_id)
                     for dataset_id in dataset_list]
        return Lock(self.datacube, lock_list)

    def create_acquisition_record(self, dataset):
        """Factory method to create an instance of the AcquisitonRecord class.

        This method creates a corresponding record in the database if one
        does not already exist.
        """

        return AcquisitionRecord(self, dataset)

    def create_tile_contents(self, tile_type_id, tile_footprint,
                             band_stack):
        """Factory method to create an instance of the TileContents class.

        The tile_type_dict contains the information required for
        resampling extents and resolution.
        """

        tile_type_info = self.datacube.tile_type_dict[tile_type_id]
        tile_contents = TileContents(self.datacube.tile_root, tile_type_info,
                                     tile_footprint, band_stack)
        return tile_contents

    def current_transaction(self):
        """Returns the current transaction."""

        return self.transaction_stack[-1]

    def mark_tile_for_removal(self, tile_pathname):
        """Mark a tile file for removal on transaction commit."""

        self.current_transaction().mark_tile_for_removal(tile_pathname)

    def mark_tile_for_creation(self, tile_contents):
        """Mark a tile file for creation on transaction commit."""

        self.current_transaction().mark_tile_for_creation(tile_contents)

    #
    # worker methods
    #

    @staticmethod
    def __reindex_bands(bands):
        """Reindex the datacube.bands nested dict structure.

        This method returns the new nested dict which is indexed by:
            new_bands[dataset_key][tile_type][file_number]
        where dataset_key is a tuple:
            (satellite_tag, sensor_name, processing_level).

        The original indexing is
            bands[tile_type][satellite_sensor][file_number]
        where satellite_sensor is a tuple:
            (satellite_tag, sensor_name)

        Note that satellite_tag and sensor_name are replaced by 'DERIVED' and
        the processing_level for PQA and FC datasets. This needs to be taken
        into account when constructing a dataset_key.
        """

        new_bands = {}

        for (tile_type, band_dict) in bands.items():
            for ((satellite, sensor), sensor_dict) in band_dict.items():
                for (file_number, band_info) in sensor_dict.items():

                    dataset_key = (satellite, sensor, band_info['level_name'])

                    new_bands.setdefault(dataset_key, {})
                    new_bands[dataset_key].setdefault(tile_type, {})
                    new_bands[dataset_key][tile_type][file_number] = band_info

        return new_bands

    def __check_satellite_and_sensor(self, dataset):
        """Check that the dataset's satellite and sensor are in the database.

        Raises a DatasetError if they are not.
        """

        satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag())
        if satellite_id is None:
            raise DatasetError("Unknown satellite tag: '%s'" %
                               dataset.get_satellite_tag())

        sensor_id = self.db.get_sensor_id(satellite_id,
                                          dataset.get_sensor_name())
        if sensor_id is None:
            msg = ("Unknown satellite and sensor pair: '%s', '%s'" %
                   (dataset.get_satellite_tag(), dataset.get_sensor_name()))
            raise DatasetError(msg)

    def __check_processing_level(self, dataset):
        """Check that the dataset's processing_level is in the database.

        Raises a DatasetError if it is not.
        """

        level_id = self.db.get_level_id(dataset.get_processing_level())
        if level_id is None:
            raise DatasetError("Unknown processing level: '%s'" %
                               dataset.get_processing_level())

    def __check_bands(self, dataset):
        """Check that the dataset has the expected bands.

        Raises a DatasetError if any band expected for this dataset (according
        to the database) is missing.
        """

        try:
            dataset_bands = self.new_bands[self.get_dataset_key(dataset)]
        except KeyError:
            raise DatasetError('No tile types for this dataset.')

        for tile_type_bands in dataset_bands.values():
            for band_info in tile_type_bands.values():
                dataset.find_band_file(band_info['file_pattern'])
Пример #14
0
class DatasetRecord(object):
    """DatasetRecord database interface class."""

    DATASET_METADATA_FIELDS = ['dataset_path',
                               'datetime_processed',
                               'dataset_size',
                               'll_x',
                               'll_y',
                               'lr_x',
                               'lr_y',
                               'ul_x',
                               'ul_y',
                               'ur_x',
                               'ur_y',
                               'x_pixels',
                               'y_pixels',
                               'xml_text'
                               ]

    def __init__(self, collection, acquisition, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)

        dataset_key = collection.get_dataset_key(dataset)
        self.dataset_bands = collection.new_bands[dataset_key]

        self.dataset = dataset
        self.mdd = dataset.metadata_dict

        self.dataset_dict = {}
        for field in self.DATASET_METADATA_FIELDS:
            self.dataset_dict[field] = self.mdd[field]

        self.dataset_dict['acquisition_id'] = acquisition.acquisition_id
        self.dataset_dict['crs'] = self.mdd['projection']
        self.dataset_dict['level_name'] = self.mdd['processing_level']
        self.dataset_dict['level_id'] = \
            self.db.get_level_id(self.dataset_dict['level_name'])

        self.dataset_dict['dataset_id'] = \
            self.db.get_dataset_id(self.dataset_dict)
        if self.dataset_dict['dataset_id'] is None:
            # create a new dataset record in the database
            self.dataset_dict['dataset_id'] = \
                self.db.insert_dataset_record(self.dataset_dict)
            self.needs_update = False
        else:
            # check that the old dataset record can be updated
            self.__check_update_ok()
            self.needs_update = True

        self.dataset_id = self.dataset_dict['dataset_id']

    def remove_mosaics(self, dataset_filter):
        """Remove mosaics associated with the dataset.

        This will mark mosaic files for removal, delete mosaic database
        records if they exist, and update the tile class of overlapping
        tiles (from other datasets) to reflect the lack of a mosaic. The
        'dataset_filter' is a list of dataset_ids to filter on. It should
        be the list of dataset_ids that have been locked (including this
        dataset). It is used to avoid operating on the tiles of an
        unlocked dataset.
        """

        # remove new mosaics (those with database records)
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_SINGLE_SCENE,
                                     TC_SUPERSEDED,
                                     TC_MOSAIC),
            output_tile_class_filter=(TC_MOSAIC,),
            dataset_filter=dataset_filter
            )

        for tile_record_list in overlap_dict.values():
            for tr in tile_record_list:
                self.db.remove_tile_record(tr['tile_id'])
                self.collection.mark_tile_for_removal(tr['tile_pathname'])

        # build a dictionary of overlaps (ignoring mosaics)
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_SINGLE_SCENE,
                                     TC_SUPERSEDED),
            output_tile_class_filter=(TC_SINGLE_SCENE,
                                      TC_SUPERSEDED),
            dataset_filter=dataset_filter
            )

        # update tile classes for overlap tiles from other datasets
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 2:
                raise DatasetError("Attempt to update a mosaic of three or " +
                                   "more datasets. Handling for this case " +
                                   "is not yet implemented.")
            for tr in tile_record_list:
                if tr['dataset_id'] != self.dataset_id:
                    self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)

        # remove old mosaics (those without database records)
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 1:
                # tile_record_list is sorted by acquisition start time, so
                # the first record should be the one the mosaic filename is
                # based on.
                tr = tile_record_list[0]
                mosaic_pathname = \
                    self.__make_mosaic_pathname(tr['tile_pathname'])
                if os.path.isfile(mosaic_pathname):
                    self.collection.mark_tile_for_removal(mosaic_pathname)

    def remove_tiles(self):
        """Remove the tiles associated with the dataset.

        This will remove ALL the tiles belonging to this dataset, deleting
        database records and marking tile files for removal on commit. Mosaics
        should be removed BEFORE calling this (as it will delete the tiles
        needed to figure out the overlaps, but may not delete all the mosaics).
        """

        tile_list = self.db.get_dataset_tile_ids(self.dataset_id)

        for tile_id in tile_list:
            tile_pathname = self.db.get_tile_pathname(tile_id)
            self.db.remove_tile_record(tile_id)
            self.collection.mark_tile_for_removal(tile_pathname)

    def update(self):
        """Update the dataset record in the database.

        This first checks that the new dataset is more recent than
        the record in the database. If not it raises a dataset error.
        """

        self.__check_update_ok()
        self.db.update_dataset_record(self.dataset_dict)

    def make_tiles(self, tile_type_id, band_stack):
        """Tile the dataset, returning a list of tile_content objects."""

        tile_list = []
        tile_footprint_list = self.get_coverage(tile_type_id)
        for tile_footprint in tile_footprint_list:
            tile_contents = self.collection.create_tile_contents(
                tile_type_id,
                tile_footprint,
                band_stack
                )
            tile_contents.reproject()

            if tile_contents.has_data():
                tile_list.append(tile_contents)
            else:
                tile_contents.remove()

        return tile_list

    def store_tiles(self, tile_list):
        """Store tiles in the database and file store.

        'tile_list' is a list of tile_contents objects. This
        method will create the corrisponding database records and
        mark tiles for creation when the transaction commits.
        """

        tile_record_list = []
        for tile_contents in tile_list:
            tile_record = self.create_tile_record(tile_contents)
            tile_record_list.append(tile_record)

        return tile_record_list

    def create_mosaics(self, dataset_filter):
        """Create mosaics associated with the dataset.

        'dataset_filter' is a list of dataset_ids to filter on. It should
        be the list of dataset_ids that have been locked (including this
        dataset). It is used to avoid operating on the tiles of an
        unlocked dataset.
        """

        # Build a dictionary of overlaps (ignoring mosaics, including pending).
        overlap_dict = self.db.get_overlapping_tiles_for_dataset(
            self.dataset_id,
            input_tile_class_filter=(TC_PENDING,
                                     TC_SINGLE_SCENE,
                                     TC_SUPERSEDED),
            output_tile_class_filter=(TC_PENDING,
                                      TC_SINGLE_SCENE,
                                      TC_SUPERSEDED),
            dataset_filter=dataset_filter
            )

        # Make mosaics and update tile classes as needed.
        for tile_record_list in overlap_dict.values():
            if len(tile_record_list) > 2:
                raise DatasetError("Attempt to create a mosaic of three or " +
                                   "more datasets. Handling for this case " +
                                   "is not yet implemented.")
            elif len(tile_record_list) == 2:
                self.__make_one_mosaic(tile_record_list)
                for tr in tile_record_list:
                    self.db.update_tile_class(tr['tile_id'], TC_SUPERSEDED)
            else:
                for tr in tile_record_list:
                    self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)

    def get_removal_overlaps(self):
        """Returns a list of overlapping dataset ids for mosaic removal."""

        tile_class_filter = (TC_SINGLE_SCENE,
                             TC_SUPERSEDED,
                             TC_MOSAIC)
        return self.get_overlaps(tile_class_filter)

    def get_creation_overlaps(self):
        """Returns a list of overlapping dataset_ids for mosaic creation."""

        tile_class_filter = (TC_PENDING,
                             TC_SINGLE_SCENE,
                             TC_SUPERSEDED)
        return self.get_overlaps(tile_class_filter)

    def get_overlaps(self, tile_class_filter):
        """Returns a list of overlapping dataset ids, including this dataset.

        A dataset is overlapping if it contains tiles that overlap with
        tiles belonging to this dataset. Only tiles in the tile_class_filter
        are considered.
        """

        dataset_list = self.db.get_overlapping_dataset_ids(
            self.dataset_id,
            tile_class_filter=tile_class_filter
            )

        if not dataset_list:
            dataset_list = [self.dataset_id]

        return dataset_list

    def create_tile_record(self, tile_contents):
        """Factory method to create an instance of the TileRecord class.

        The created object will be responsible for inserting tile table records
        into the database for reprojected or mosaiced tiles."""
        self.collection.mark_tile_for_creation(tile_contents)
        return TileRecord(self.collection, self, tile_contents)

    def mark_as_tiled(self):
        """Flag the dataset record as tiled in the database.

        This flag does not exist in the current database schema,
        so this method does nothing at the moment."""

        pass

    def list_tile_types(self):
        """Returns a list of the tile type ids for this dataset."""

        return self.dataset_bands.keys()

    def get_tile_bands(self, tile_type_id):
        """Returns a dictionary containing the band info for one tile type.

        The tile_type_id must valid for this dataset, available from
        list_tile_types above.
        """

        return self.dataset_bands[tile_type_id]

    def get_coverage(self, tile_type_id):
        """Given the coordinate reference system of the dataset and that of the
        tile_type_id, return a list of tiles within the dataset footprint"""
        tile_type_info = self.collection.datacube.tile_type_dict[tile_type_id]
        #Get geospatial information from the dataset.
        dataset_crs = self.mdd['projection']
        dataset_geotransform = self.mdd['geo_transform']
        pixels = self.mdd['x_pixels']
        lines = self.mdd['y_pixels']
        #Look up the datacube's projection information for this tile_type
        tile_crs = tile_type_info['crs']
        #Get the transformation between the two projections
        transformation = self.define_transformation(dataset_crs, tile_crs)
        #Determine the bounding quadrilateral of the dataset extent
        #in tile coordinates
        dataset_bbox = self.get_bbox(transformation, dataset_geotransform,
                                     pixels, lines)
        #Determine maximum inner rectangle, which is guaranteed to need  tiling
        #and the minimum outer rectangle outside which no tiles will exist.
        cube_origin = (tile_type_info['x_origin'], tile_type_info['y_origin'])
        cube_tile_size = (tile_type_info['x_size'], tile_type_info['y_size'])
        coverage = self.get_touched_tiles(dataset_bbox,
                                          cube_origin, cube_tile_size)
        return coverage

    #
    # worker methods
    #

    def __check_update_ok(self):
        """Checks if an update is possible, raises a DatasetError otherwise."""

        tile_class_filter = (TC_SINGLE_SCENE,
                             TC_SUPERSEDED)
        if self.db.dataset_older_than_database(
                self.dataset_dict['dataset_id'],
                self.dataset_dict['datetime_processed'],
                tile_class_filter):
            raise DatasetError("Dataset to be ingested is older than " +
                               "the version in the database.")

    def __make_one_mosaic(self, tile_record_list):
        """Create a single mosaic.

        This create the mosaic contents, creates the database record,
        and marks the mosaic contents for creation on transaction commit.
        """
        mosaic = MosaicContents(
            tile_record_list,
            self.datacube.tile_type_dict,
            self.dataset_dict['level_name'],
            self.collection.get_temp_tile_directory()
            )
        mosaic.create_record(self.db)
        self.collection.mark_tile_for_creation(mosaic)

    def __make_mosaic_pathname(self, tile_pathname):
        """Return the pathname of the mosaic corrisponding to a tile."""

        (tile_dir, tile_basename) = os.path.split(tile_pathname)

        mosaic_dir = os.path.join(tile_dir, 'mosaic_cache')
        if self.dataset_dict['level_name'] == 'PQA':
            mosaic_basename = tile_basename
        else:
            mosaic_basename = re.sub(r'\.\w+$', '.vrt', tile_basename)

        return os.path.join(mosaic_dir, mosaic_basename)

#
# Worker methods for coverage.
#
# These are public so that they can be called by test_dataset_record.
#

    def define_transformation(self, dataset_crs, tile_crs):
        """Return the transformation between dataset_crs
        and tile_crs projections"""
        osr.UseExceptions()
        try:
            dataset_spatial_reference = self.create_spatial_ref(dataset_crs)
            tile_spatial_reference = self.create_spatial_ref(tile_crs)
            if dataset_spatial_reference is None:
                raise DatasetError('Unknown projecton %s'
                                   % str(dataset_crs))
            if tile_spatial_reference is None:
                raise DatasetError('Unknown projecton %s'
                                   % str(tile_crs))
            return osr.CoordinateTransformation(dataset_spatial_reference,
                                                tile_spatial_reference)
        except Exception:
            raise DatasetError('Coordinate transformation error ' +
                               'for transforming %s to %s' %
                               (str(dataset_crs), str(tile_crs)))

    @staticmethod
    def create_spatial_ref(crs):
        """Create a spatial reference system for projecton crs.
        Called by define_transformation()"""
        # pylint: disable=broad-except

        osr.UseExceptions()
        try:
            spatial_ref = osr.SpatialReference()
        except Exception:
            raise DatasetError('No spatial reference done for %s' % str(crs))
        try:
            spatial_ref.ImportFromWkt(crs)
            return spatial_ref
        except Exception:
            pass
        try:
            matchobj = re.match(r'EPSG:(\d+)', crs)
            epsg_code = int(matchobj.group(1))
            spatial_ref.ImportFromEPSG(epsg_code)
            return spatial_ref
        except Exception:
            return None

    @staticmethod
    def get_bbox(transform, geotrans, pixels, lines):
        """Return the coordinates of the dataset footprint in clockwise order
        from upper-left"""
        xul, yul, dummy_z =  \
            transform.TransformPoint(geotrans[0], geotrans[3], 0)
        xur, yur, dummy_z = \
            transform.TransformPoint(geotrans[0] + geotrans[1] * pixels,
                                     geotrans[3] + geotrans[4] * pixels, 0)
        xll, yll, dummy_z = \
            transform.TransformPoint(geotrans[0] + geotrans[2] * lines,
                                     geotrans[3] + geotrans[5] * lines, 0)
        xlr, ylr, dummy_z = \
            transform.TransformPoint(
                geotrans[0] + geotrans[1] * pixels + geotrans[2] * lines,
                geotrans[3] + geotrans[4] * pixels + geotrans[5] * lines, 0)
        return [(xul, yul), (xur, yur), (xlr, ylr), (xll, yll)]

    def get_touched_tiles(self, dataset_bbox, cube_origin, cube_tile_size):
        """Return a list of tuples (itile, jtile) comprising all tiles
        footprints that intersect the dataset bounding box"""
        definite_tiles, possible_tiles = \
            self.get_definite_and_possible_tiles(dataset_bbox,
                                                 cube_origin, cube_tile_size)
        coverage_set = definite_tiles
        #Check possible tiles:
        #Check if the tile perimeter intersects the dataset bbox perimeter:
        intersected_tiles = \
            self.get_intersected_tiles(possible_tiles, dataset_bbox,
                                       cube_origin, cube_tile_size)
        coverage_set = coverage_set.union(intersected_tiles)
        possible_tiles = possible_tiles.difference(intersected_tiles)
        #Otherwise the tile might be wholly contained in the dataset bbox
        contained_tiles = \
            self.get_contained_tiles(possible_tiles, dataset_bbox,
                                     cube_origin, cube_tile_size)
        coverage_set = coverage_set.union(contained_tiles)
        return coverage_set

    @staticmethod
    def get_definite_and_possible_tiles(bbox, cube_origin, cube_tile_size):
        """Return two lists of tile footprints: from the largest rectangle
        wholly contained within the dataset bbox and the smallest rectangle
        containing the bbox."""
        #pylint: disable=too-many-locals
        #unpack the bbox vertices in clockwise order from upper-left
        xyul, xyur, xylr, xyll = bbox
        xul, yul = xyul
        xur, yur = xyur
        xlr, ylr = xylr
        xll, yll = xyll
        #unpack the origin of the tiled datacube (e.g. lat=0, lon=0) and the
        #datacube tile size
        xorigin, yorigin = cube_origin
        xsize, ysize = cube_tile_size
        #Define the largest rectangle wholly contained within footprint
        xmin = max(xll, xul)
        xmax = min(xlr, xur)
        ymin = max(yll, ylr)
        ymax = min(yul, yur)
        xmin_index = int(floor((xmin - xorigin) / xsize))
        xmax_index = int(floor((xmax - xorigin) / xsize))
        ymin_index = int(floor((ymin - yorigin) / ysize))
        ymax_index = int(floor((ymax - yorigin) / ysize))
        definite_tiles = set([(itile, jtile)
                              for itile in range(xmin_index, xmax_index + 1)
                              for jtile in range(ymin_index, ymax_index + 1)])
        #Define the smallest rectangle which is guaranteed to include all tiles
        #in the foorprint.
        xmin = min(xll, xul)
        xmax = max(xlr, xur)
        ymin = min(yll, ylr)
        ymax = max(yul, yur)
        xmin_index = int(floor((xmin - xorigin) / xsize))
        xmax_index = int(floor((xmax - xorigin) / xsize))
        ymin_index = int(floor((ymin - yorigin) / ysize))
        ymax_index = int(floor((ymax - yorigin) / ysize))
        possible_tiles = set([(itile, jtile)
                              for itile in range(xmin_index, xmax_index + 1)
                              for jtile in range(ymin_index, ymax_index + 1)
                              ]).difference(definite_tiles)
        return (definite_tiles, possible_tiles)

    def get_intersected_tiles(self, candidate_tiles, dset_bbox,
                              cube_origin, cube_tile_size):
        """Return the subset of candidate_tiles that have an intersection with
        the dataset bounding box"""
        #pylint: disable=too-many-locals
        xorigin, yorigin = cube_origin
        xsize, ysize = cube_tile_size
        keep_list = []
        for itile, jtile in candidate_tiles:
            intersection_exists = False
            (x0, y0) = (xorigin + itile * xsize,
                        yorigin + (jtile + 1) * ysize)
            tile_bbox = [(x0, y0), (x0 + xsize, y0),
                         (x0 + xsize, y0 - ysize), (x0, y0 - ysize)]
            tile_vtx_number = len(tile_bbox)
            dset_vtx_number = len(dset_bbox)
            for tile_vtx in range(tile_vtx_number):
                x1, y1 = tile_bbox[tile_vtx]
                x2, y2 = tile_bbox[(tile_vtx + 1) % tile_vtx_number]
                for dset_vtx in range(dset_vtx_number):
                    x3, y3 = dset_bbox[dset_vtx]
                    x4, y4 = dset_bbox[(dset_vtx + 1) % dset_vtx_number]
                    xcoords = [x1, x2, x3, x4]
                    ycoords = [y1, y2, y3, y4]
                    intersection_exists = \
                        self.check_intersection(xcoords, ycoords)
                    if intersection_exists:
                        keep_list.append((itile, jtile))
                        break
                if intersection_exists:
                    break
        return set(keep_list)

    @staticmethod
    def get_contained_tiles(candidate_tiles, dset_bbox,
                            cube_origin, cube_tile_size):
        """Return the subset of candidate tiles that lie wholly within the
        dataset bounding box"""
        #pylint: disable=too-many-locals
        xorigin, yorigin = cube_origin
        xsize, ysize = cube_tile_size
        keep_list = []
        for itile, jtile in candidate_tiles:
            tile_vtx_inside = []
            (x0, y0) = (xorigin + itile * xsize,
                        yorigin + (jtile + 1) * ysize)
            tile_bbox = [(x0, y0), (x0 + xsize, y0),
                         (x0 + xsize, y0 - ysize), (x0, y0 - ysize)]
            dset_vtx_number = len(dset_bbox)
            for x, y in tile_bbox:
                #Check if this vertex lies within the dataset bounding box:
                winding_number = 0
                for dset_vtx in range(dset_vtx_number):
                    x1, y1 = dset_bbox[dset_vtx]
                    x2, y2 = dset_bbox[(dset_vtx + 1) % dset_vtx_number]
                    if y >= y1 and y < y2:
                        if (x - x1) * (y2 - y1) > (x2 - x1) * (y - y1):
                            winding_number += 1
                    elif y <= y1 and y > y2:
                        if (x - x1) * (y2 - y1) < (x2 - x1) * (y - y1):
                            winding_number += 1
                tile_vtx_inside.append(winding_number % 2 == 1)
            if tile_vtx_inside.count(True) == len(tile_bbox):
                keep_list.append((itile, jtile))
            assert tile_vtx_inside.count(True) == 4 or \
                tile_vtx_inside.count(True) == 0, \
                "Tile partially inside dataset bounding box but has" \
                "no intersection"
        return set(keep_list)

    @staticmethod
    def check_intersection(xpts, ypts):
        """Determines if the line segments
        (xpts[0], ypts[0]) to (xpts[1], ypts[1]) and
        (xpts[2], ypts[2]) to (xpts[3], ypts[3]) intersect"""
        pvec = (xpts[0], ypts[0])
        qvec = (xpts[2], ypts[2])
        rvec = (xpts[1] - xpts[0], ypts[1] - ypts[0])
        svec = (xpts[3] - xpts[2], ypts[3] - ypts[2])
        rvec_cross_svec = rvec[0] * svec[1] - rvec[1] * svec[0]
        if rvec_cross_svec == 0:
            return False
        qminusp_cross_svec = \
            (qvec[0] - pvec[0]) * svec[1] - (qvec[1] - pvec[1]) * svec[0]
        qminusp_cross_rvec = \
            (qvec[0] - pvec[0]) * rvec[1] - (qvec[1] - pvec[1]) * rvec[0]
        tparameter = qminusp_cross_svec / rvec_cross_svec
        uparameter = qminusp_cross_rvec / rvec_cross_svec
        if tparameter > 0 and tparameter < 1 and \
                uparameter > 0 and uparameter < 1:
            return True
Пример #15
0
class TileRecord(object):
    # pylint: disable=too-many-instance-attributes
    """TileRecord database interface class."""

    TILE_METADATA_FIELDS = [
        'tile_id', 'x_index', 'y_index', 'tile_type_id', 'dataset_id',
        'tile_pathname', 'tile_class_id', 'tile_size', 'ctime'
    ]

    def __init__(self, collection, dataset_record, tile_contents):
        self.collection = collection
        self.datacube = collection.datacube
        self.dataset_record = dataset_record
        self.tile_contents = tile_contents
        self.tile_footprint = tile_contents.tile_footprint
        self.tile_type_id = tile_contents.tile_type_id
        #Set tile_class_id to pending.
        self.tile_class_id = TC_PENDING
        #Set tile_id, determined below from database query
        self.tile_id = None
        self.db = IngestDBWrapper(self.datacube.db_connection)
        # Fill a dictionary with data for the tile
        tile_dict = {}
        self.tile_dict = tile_dict
        tile_dict['x_index'] = self.tile_footprint[0]
        tile_dict['y_index'] = self.tile_footprint[1]
        tile_dict['tile_type_id'] = self.tile_type_id
        tile_dict['dataset_id'] = self.dataset_record.dataset_id
        # Store final destination in the 'tile_pathname' field
        tile_dict['tile_pathname'] = self.tile_contents.tile_output_path
        tile_dict['tile_class_id'] = 1
        # The physical file is currently in the temporary location
        tile_dict['tile_size'] = \
            get_file_size_mb(self.tile_contents
                                       .temp_tile_output_path)

        self.update_tile_footprint()

        # Make the tile record entry on the database:
        self.tile_id = self.db.get_tile_id(tile_dict)
        if self.tile_id is None:
            self.tile_id = self.db.insert_tile_record(tile_dict)
        else:
            # If there was any existing tile corresponding to tile_dict then
            # it should already have been removed.
            raise AssertionError("Attempt to recreate an existing tile.")
        tile_dict['tile_id'] = self.tile_id

    def update_tile_footprint(self):
        """Update the tile footprint entry in the database"""

        if not self.db.tile_footprint_exists(self.tile_dict):
            # We may need to create a new footprint record.
            footprint_dict = {
                'x_index': self.tile_footprint[0],
                'y_index': self.tile_footprint[1],
                'tile_type_id': self.tile_type_id,
                'x_min': self.tile_contents.tile_extents[0],
                'y_min': self.tile_contents.tile_extents[1],
                'x_max': self.tile_contents.tile_extents[2],
                'y_max': self.tile_contents.tile_extents[3],
                'bbox': 'Populate this within sql query?'
            }

            # Create an independent database connection for this transaction.
            my_db = IngestDBWrapper(self.datacube.create_connection())
            try:
                with self.collection.transaction(my_db):
                    if not my_db.tile_footprint_exists(self.tile_dict):
                        my_db.insert_tile_footprint(footprint_dict)

            except psycopg2.IntegrityError:
                # If we get an IntegrityError we assume the tile_footprint
                # is already in the database, and we do not need to add it.
                pass

            finally:
                my_db.close()
Пример #16
0
class TileRecord(object):
    # pylint: disable=too-many-instance-attributes
    """TileRecord database interface class."""

    TILE_METADATA_FIELDS = ['tile_id',
                            'x_index',
                            'y_index',
                            'tile_type_id',
                            'dataset_id',
                            'tile_pathname',
                            'tile_class_id',
                            'tile_size',
                            'ctime'
                            ]
    def __init__(self, collection, dataset_record, tile_contents):
        self.collection = collection
        self.datacube = collection.datacube
        self.dataset_record = dataset_record
        self.tile_contents = tile_contents
        self.tile_footprint = tile_contents.tile_footprint
        self.tile_type_id = tile_contents.tile_type_id
        #Set tile_class_id to pending.
        self.tile_class_id = TC_PENDING
        #Set tile_id, determined below from database query
        self.tile_id = None
        self.db = IngestDBWrapper(self.datacube.db_connection)
        # Fill a dictionary with data for the tile
        tile_dict = {}
        self.tile_dict = tile_dict
        tile_dict['x_index'] = self.tile_footprint[0]
        tile_dict['y_index'] = self.tile_footprint[1]
        tile_dict['tile_type_id'] = self.tile_type_id
        tile_dict['dataset_id'] = self.dataset_record.dataset_id
        # Store final destination in the 'tile_pathname' field
        tile_dict['tile_pathname'] = self.tile_contents.tile_output_path
        tile_dict['tile_class_id'] = 1
        # The physical file is currently in the temporary location
        tile_dict['tile_size'] = \
            get_file_size_mb(self.tile_contents
                                       .temp_tile_output_path)

        self.update_tile_footprint()

        # Make the tile record entry on the database:
        self.tile_id = self.db.get_tile_id(tile_dict)
        if self.tile_id is None:
            self.tile_id = self.db.insert_tile_record(tile_dict)
        else:
            # If there was any existing tile corresponding to tile_dict then
            # it should already have been removed.
            raise AssertionError("Attempt to recreate an existing tile.")
        tile_dict['tile_id'] = self.tile_id

    def update_tile_footprint(self):
        """Update the tile footprint entry in the database"""

        if not self.db.tile_footprint_exists(self.tile_dict):
            # We may need to create a new footprint record.
            footprint_dict = {'x_index': self.tile_footprint[0],
                              'y_index': self.tile_footprint[1],
                              'tile_type_id': self.tile_type_id,
                              'x_min': self.tile_contents.tile_extents[0],
                              'y_min': self.tile_contents.tile_extents[1],
                              'x_max': self.tile_contents.tile_extents[2],
                              'y_max': self.tile_contents.tile_extents[3],
                              'bbox': 'Populate this within sql query?'}

            # Create an independent database connection for this transaction.
            my_db = IngestDBWrapper(self.datacube.create_connection())
            try:
                with self.collection.transaction(my_db):
                    if not my_db.tile_footprint_exists(self.tile_dict):
                        my_db.insert_tile_footprint(footprint_dict)

            except psycopg2.IntegrityError:
                # If we get an IntegrityError we assume the tile_footprint
                # is already in the database, and we do not need to add it.
                pass

            finally:
                my_db.close()
Пример #17
0
class Collection(object):
    """Collection database interface class."""

    #
    # Interface methods
    #

    def __init__(self, datacube):
        """Initialise the collection object."""

        self.datacube = datacube
        self.db = IngestDBWrapper(datacube.db_connection)
        self.new_bands = self.__reindex_bands(datacube.bands)
        self.transaction_stack = []

        self.temp_tile_directory = os.path.join(self.datacube.tile_root,
                                                'ingest_temp',
                                                self.datacube.process_id)
        create_directory(self.temp_tile_directory)

    def cleanup(self):
        """Do end-of-process cleanup.

        Deletes the process-specific temporary dirctory. Does not
        close the database connection (at present), because the datacube
        object has a destructor which does that.
        """

        shutil.rmtree(self.temp_tile_directory, ignore_errors=True)

    @staticmethod
    def get_dataset_key(dataset):
        """Return the dataset key for use with the new_bands dictionary.

        This is a tuple (satellite_tag, sensor_name, processing_level) except
        that for derived datasets (currently PQA and FC) the satellite_tag is
        replaced with 'DERIVED' and the processing_level is used as the
        sensor_name. So the tuple looks like:
        ('DERIVED', processing_level, processing_level).
        """

        derived_levels = {'PQA', 'FC'}

        satellite = dataset.get_satellite_tag()
        sensor = dataset.get_sensor_name()
        level = dataset.get_processing_level()

        if level in derived_levels:
            satellite = 'DERIVED'
            sensor = level

        return (satellite, sensor, level)

    def get_temp_tile_directory(self):
        """Return a path to a directory for temporary tile related files."""

        return self.temp_tile_directory

    def check_metadata(self, dataset):
        """Check that the satellite, sensor, and bands are in the database.

        Checks that the dataset is of a kind that the database knows about
        (by checking basic metadata), and the bands that the database expects
        are present. Raises a DatasetError if the checks fail.
        """

        self.__check_satellite_and_sensor(dataset)
        self.__check_processing_level(dataset)
        self.__check_bands(dataset)

    def transaction(self, db=None):
        """Returns a Transaction context manager object.

        This is for use in a 'with' statement. It uses the Collection's
        database collection if one is not provided.
        """

        return Transaction(self.db if db is None else db,
                           self.transaction_stack)

    def lock_datasets(self, dataset_list):
        """Returns a Lock context manager object.

        dataset_list is a list of dataset ids for the datasets to be
        locked.

        This is for use in a 'with' statement. It uses the Collection's
        datacube object to manage the individual locks.
        """

        lock_list = [
            'Dataset-' + str(dataset_id) for dataset_id in dataset_list
        ]
        return Lock(self.datacube, lock_list)

    def create_acquisition_record(self, dataset):
        """Factory method to create an instance of the AcquisitonRecord class.

        This method creates a corresponding record in the database if one
        does not already exist.
        """

        return AcquisitionRecord(self, dataset)

    def create_tile_contents(self, tile_type_id, tile_footprint, band_stack):
        """Factory method to create an instance of the TileContents class.

        The tile_type_dict contains the information required for
        resampling extents and resolution.
        """

        tile_type_info = self.datacube.tile_type_dict[tile_type_id]
        tile_contents = TileContents(self.datacube.tile_root, tile_type_info,
                                     tile_footprint, band_stack)
        return tile_contents

    def current_transaction(self):
        """Returns the current transaction."""

        return self.transaction_stack[-1]

    def mark_tile_for_removal(self, tile_pathname):
        """Mark a tile file for removal on transaction commit."""

        self.current_transaction().mark_tile_for_removal(tile_pathname)

    def mark_tile_for_creation(self, tile_contents):
        """Mark a tile file for creation on transaction commit."""

        self.current_transaction().mark_tile_for_creation(tile_contents)

    #
    # worker methods
    #

    @staticmethod
    def __reindex_bands(bands):
        """Reindex the datacube.bands nested dict structure.

        This method returns the new nested dict which is indexed by:
            new_bands[dataset_key][tile_type][file_number]
        where dataset_key is a tuple:
            (satellite_tag, sensor_name, processing_level).

        The original indexing is
            bands[tile_type][satellite_sensor][file_number]
        where satellite_sensor is a tuple:
            (satellite_tag, sensor_name)

        Note that satellite_tag and sensor_name are replaced by 'DERIVED' and
        the processing_level for PQA and FC datasets. This needs to be taken
        into account when constructing a dataset_key.
        """

        new_bands = {}

        for (tile_type, band_dict) in bands.items():
            for ((satellite, sensor), sensor_dict) in band_dict.items():
                for (file_number, band_info) in sensor_dict.items():

                    dataset_key = (satellite, sensor, band_info['level_name'])

                    new_bands.setdefault(dataset_key, {})
                    new_bands[dataset_key].setdefault(tile_type, {})
                    new_bands[dataset_key][tile_type][file_number] = band_info

        return new_bands

    def __check_satellite_and_sensor(self, dataset):
        """Check that the dataset's satellite and sensor are in the database.

        Raises a DatasetError if they are not.
        """

        satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag())
        if satellite_id is None:
            raise DatasetError("Unknown satellite tag: '%s'" %
                               dataset.get_satellite_tag())

        sensor_id = self.db.get_sensor_id(satellite_id,
                                          dataset.get_sensor_name())
        if sensor_id is None:
            msg = ("Unknown satellite and sensor pair: '%s', '%s'" %
                   (dataset.get_satellite_tag(), dataset.get_sensor_name()))
            raise DatasetError(msg)

    def __check_processing_level(self, dataset):
        """Check that the dataset's processing_level is in the database.

        Raises a DatasetError if it is not.
        """

        level_id = self.db.get_level_id(dataset.get_processing_level())
        if level_id is None:
            raise DatasetError("Unknown processing level: '%s'" %
                               dataset.get_processing_level())

    def __check_bands(self, dataset):
        """Check that the dataset has the expected bands.

        Raises a DatasetError if any band expected for this dataset (according
        to the database) is missing.
        """

        try:
            dataset_bands = self.new_bands[self.get_dataset_key(dataset)]
        except KeyError:
            raise DatasetError('No tile types for this dataset.')

        for tile_type_bands in dataset_bands.values():
            for band_info in tile_type_bands.values():
                dataset.find_band_file(band_info['file_pattern'])
Пример #18
0
class AcquisitionRecord(object):
    """AcquisitionRecord database interface class."""

    ACQUISITION_METADATA_FIELDS = ['satellite_tag',
                                   'sensor_name',
                                   'x_ref',
                                   'y_ref',
                                   'start_datetime',
                                   'end_datetime',
                                   'll_lon',
                                   'll_lat',
                                   'lr_lon',
                                   'lr_lat',
                                   'ul_lon',
                                   'ul_lat',
                                   'ur_lon',
                                   'ur_lat',
                                   'gcp_count',
                                   'mtl_text',
                                   'cloud_cover'
                                   ]

    def __init__(self, collection, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)
        self.acquisition_dict = {}
        self.acquisiton_id = None  # set below

        # Fill a dictonary with data for the acquisition.
        # Start with fields from the dataset metadata.
        for field in self.ACQUISITION_METADATA_FIELDS:
            self.acquisition_dict[field] = dataset.metadata_dict[field]

        # Next look up the satellite_id and sensor_id in the
        # database and fill these in.
        self.acquisition_dict['satellite_id'] = \
            self.db.get_satellite_id(self.acquisition_dict['satellite_tag'])
        self.acquisition_dict['sensor_id'] = \
            self.db.get_sensor_id(self.acquisition_dict['satellite_id'],
                                  self.acquisition_dict['sensor_name'])

        # Finally look up the acquisiton_id, or create a new record if it
        # does not exist, and fill it into the dictionary.
        self.acquisition_id = \
            self.db.get_acquisition_id_fuzzy(self.acquisition_dict)
        if self.acquisition_id is None:
            self.acquisition_id = \
                self.db.insert_acquisition_record(self.acquisition_dict)
        else:
            # Do we update the acquisition record here?
            pass
        self.acquisition_dict['acquisition_id'] = self.acquisition_id

    def create_dataset_record(self, dataset):
        """Factory method to create an instance of the DatasetRecord class.

        This method creates a new record in the database if one does not
        already exist. It will overwrite an earlier dataset record (and its
        tiles) if one exists. It will raise a DatasetError if a later (or
        equal time) record for this dataset already exists in the database.
        """

        return DatasetRecord(self.collection, self, dataset)