Пример #1
0
class AcquisitionRecord(object):
    """AcquisitionRecord database interface class."""

    ACQUISITION_METADATA_FIELDS = [
        'satellite_tag', 'sensor_name', 'x_ref', 'y_ref', 'start_datetime',
        'end_datetime', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon',
        'ul_lat', 'ur_lon', 'ur_lat', 'gcp_count', 'mtl_text', 'cloud_cover'
    ]

    def __init__(self, collection, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)
        self.acquisition_dict = {}
        self.acquisiton_id = None  # set below

        # Fill a dictonary with data for the acquisition.
        # Start with fields from the dataset metadata.
        for field in self.ACQUISITION_METADATA_FIELDS:
            self.acquisition_dict[field] = dataset.metadata_dict[field]

        # Next look up the satellite_id and sensor_id in the
        # database and fill these in.
        self.acquisition_dict['satellite_id'] = \
            self.db.get_satellite_id(self.acquisition_dict['satellite_tag'])
        self.acquisition_dict['sensor_id'] = \
            self.db.get_sensor_id(self.acquisition_dict['satellite_id'],
                                  self.acquisition_dict['sensor_name'])

        # Finally look up the acquisiton_id, or create a new record if it
        # does not exist, and fill it into the dictionary.
        self.acquisition_id = \
            self.db.get_acquisition_id_fuzzy(self.acquisition_dict)
        if self.acquisition_id is None:
            self.acquisition_id = \
                self.db.insert_acquisition_record(self.acquisition_dict)
        else:
            # Do we update the acquisition record here?
            pass
        self.acquisition_dict['acquisition_id'] = self.acquisition_id

    def create_dataset_record(self, dataset):
        """Factory method to create an instance of the DatasetRecord class.

        This method creates a new record in the database if one does not
        already exist. It will overwrite an earlier dataset record (and its
        tiles) if one exists. It will raise a DatasetError if a later (or
        equal time) record for this dataset already exists in the database.
        """

        return DatasetRecord(self.collection, self, dataset)
Пример #2
0
class Collection(object):
    """Collection database interface class."""

    #
    # Interface methods
    #

    def __init__(self, datacube):
        """Initialise the collection object."""

        self.datacube = datacube
        self.db = IngestDBWrapper(datacube.db_connection)
        self.new_bands = self.__reindex_bands(datacube.bands)
        self.transaction_stack = []

        self.temp_tile_directory = os.path.join(self.datacube.tile_root,
                                                'ingest_temp',
                                                self.datacube.process_id)
        create_directory(self.temp_tile_directory)

    def cleanup(self):
        """Do end-of-process cleanup.

        Deletes the process-specific temporary dirctory. Does not
        close the database connection (at present), because the datacube
        object has a destructor which does that.
        """

        shutil.rmtree(self.temp_tile_directory, ignore_errors=True)

    @staticmethod
    def get_dataset_key(dataset):
        """Return the dataset key for use with the new_bands dictionary.

        This is a tuple (satellite_tag, sensor_name, processing_level) except
        that for derived datasets (currently PQA and FC) the satellite_tag is
        replaced with 'DERIVED' and the processing_level is used as the
        sensor_name. So the tuple looks like:
        ('DERIVED', processing_level, processing_level).
        """

        derived_levels = {'PQA', 'FC'}

        satellite = dataset.get_satellite_tag()
        sensor = dataset.get_sensor_name()
        level = dataset.get_processing_level()

        if level in derived_levels:
            satellite = 'DERIVED'
            sensor = level

        return (satellite, sensor, level)

    def get_temp_tile_directory(self):
        """Return a path to a directory for temporary tile related files."""

        return self.temp_tile_directory

    def check_metadata(self, dataset):
        """Check that the satellite, sensor, and bands are in the database.

        Checks that the dataset is of a kind that the database knows about
        (by checking basic metadata), and the bands that the database expects
        are present. Raises a DatasetError if the checks fail.
        """

        self.__check_satellite_and_sensor(dataset)
        self.__check_processing_level(dataset)
        self.__check_bands(dataset)

    def transaction(self, db=None):
        """Returns a Transaction context manager object.

        This is for use in a 'with' statement. It uses the Collection's
        database collection if one is not provided.
        """

        return Transaction(self.db if db is None else db,
                           self.transaction_stack)

    def lock_datasets(self, dataset_list):
        """Returns a Lock context manager object.

        dataset_list is a list of dataset ids for the datasets to be
        locked.

        This is for use in a 'with' statement. It uses the Collection's
        datacube object to manage the individual locks.
        """

        lock_list = ['Dataset-' + str(dataset_id)
                     for dataset_id in dataset_list]
        return Lock(self.datacube, lock_list)

    def create_acquisition_record(self, dataset):
        """Factory method to create an instance of the AcquisitonRecord class.

        This method creates a corresponding record in the database if one
        does not already exist.
        """

        return AcquisitionRecord(self, dataset)

    def create_tile_contents(self, tile_type_id, tile_footprint,
                             band_stack):
        """Factory method to create an instance of the TileContents class.

        The tile_type_dict contains the information required for
        resampling extents and resolution.
        """

        tile_type_info = self.datacube.tile_type_dict[tile_type_id]
        tile_contents = TileContents(self.datacube.tile_root, tile_type_info,
                                     tile_footprint, band_stack)
        return tile_contents

    def current_transaction(self):
        """Returns the current transaction."""

        return self.transaction_stack[-1]

    def mark_tile_for_removal(self, tile_pathname):
        """Mark a tile file for removal on transaction commit."""

        self.current_transaction().mark_tile_for_removal(tile_pathname)

    def mark_tile_for_creation(self, tile_contents):
        """Mark a tile file for creation on transaction commit."""

        self.current_transaction().mark_tile_for_creation(tile_contents)

    #
    # worker methods
    #

    @staticmethod
    def __reindex_bands(bands):
        """Reindex the datacube.bands nested dict structure.

        This method returns the new nested dict which is indexed by:
            new_bands[dataset_key][tile_type][file_number]
        where dataset_key is a tuple:
            (satellite_tag, sensor_name, processing_level).

        The original indexing is
            bands[tile_type][satellite_sensor][file_number]
        where satellite_sensor is a tuple:
            (satellite_tag, sensor_name)

        Note that satellite_tag and sensor_name are replaced by 'DERIVED' and
        the processing_level for PQA and FC datasets. This needs to be taken
        into account when constructing a dataset_key.
        """

        new_bands = {}

        for (tile_type, band_dict) in bands.items():
            for ((satellite, sensor), sensor_dict) in band_dict.items():
                for (file_number, band_info) in sensor_dict.items():

                    dataset_key = (satellite, sensor, band_info['level_name'])

                    new_bands.setdefault(dataset_key, {})
                    new_bands[dataset_key].setdefault(tile_type, {})
                    new_bands[dataset_key][tile_type][file_number] = band_info

        return new_bands

    def __check_satellite_and_sensor(self, dataset):
        """Check that the dataset's satellite and sensor are in the database.

        Raises a DatasetError if they are not.
        """

        satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag())
        if satellite_id is None:
            raise DatasetError("Unknown satellite tag: '%s'" %
                               dataset.get_satellite_tag())

        sensor_id = self.db.get_sensor_id(satellite_id,
                                          dataset.get_sensor_name())
        if sensor_id is None:
            msg = ("Unknown satellite and sensor pair: '%s', '%s'" %
                   (dataset.get_satellite_tag(), dataset.get_sensor_name()))
            raise DatasetError(msg)

    def __check_processing_level(self, dataset):
        """Check that the dataset's processing_level is in the database.

        Raises a DatasetError if it is not.
        """

        level_id = self.db.get_level_id(dataset.get_processing_level())
        if level_id is None:
            raise DatasetError("Unknown processing level: '%s'" %
                               dataset.get_processing_level())

    def __check_bands(self, dataset):
        """Check that the dataset has the expected bands.

        Raises a DatasetError if any band expected for this dataset (according
        to the database) is missing.
        """

        try:
            dataset_bands = self.new_bands[self.get_dataset_key(dataset)]
        except KeyError:
            raise DatasetError('No tile types for this dataset.')

        for tile_type_bands in dataset_bands.values():
            for band_info in tile_type_bands.values():
                dataset.find_band_file(band_info['file_pattern'])
Пример #3
0
class Collection(object):
    """Collection database interface class."""

    #
    # Interface methods
    #

    def __init__(self, datacube):
        """Initialise the collection object."""

        self.datacube = datacube
        self.db = IngestDBWrapper(datacube.db_connection)
        self.new_bands = self.__reindex_bands(datacube.bands)
        self.transaction_stack = []

        self.temp_tile_directory = os.path.join(self.datacube.tile_root,
                                                'ingest_temp',
                                                self.datacube.process_id)
        create_directory(self.temp_tile_directory)

    def cleanup(self):
        """Do end-of-process cleanup.

        Deletes the process-specific temporary dirctory. Does not
        close the database connection (at present), because the datacube
        object has a destructor which does that.
        """

        shutil.rmtree(self.temp_tile_directory, ignore_errors=True)

    @staticmethod
    def get_dataset_key(dataset):
        """Return the dataset key for use with the new_bands dictionary.

        This is a tuple (satellite_tag, sensor_name, processing_level) except
        that for derived datasets (currently PQA and FC) the satellite_tag is
        replaced with 'DERIVED' and the processing_level is used as the
        sensor_name. So the tuple looks like:
        ('DERIVED', processing_level, processing_level).
        """

        derived_levels = {'PQA', 'FC'}

        satellite = dataset.get_satellite_tag()
        sensor = dataset.get_sensor_name()
        level = dataset.get_processing_level()

        if level in derived_levels:
            satellite = 'DERIVED'
            sensor = level

        return (satellite, sensor, level)

    def get_temp_tile_directory(self):
        """Return a path to a directory for temporary tile related files."""

        return self.temp_tile_directory

    def check_metadata(self, dataset):
        """Check that the satellite, sensor, and bands are in the database.

        Checks that the dataset is of a kind that the database knows about
        (by checking basic metadata), and the bands that the database expects
        are present. Raises a DatasetError if the checks fail.
        """

        self.__check_satellite_and_sensor(dataset)
        self.__check_processing_level(dataset)
        self.__check_bands(dataset)

    def transaction(self, db=None):
        """Returns a Transaction context manager object.

        This is for use in a 'with' statement. It uses the Collection's
        database collection if one is not provided.
        """

        return Transaction(self.db if db is None else db,
                           self.transaction_stack)

    def lock_datasets(self, dataset_list):
        """Returns a Lock context manager object.

        dataset_list is a list of dataset ids for the datasets to be
        locked.

        This is for use in a 'with' statement. It uses the Collection's
        datacube object to manage the individual locks.
        """

        lock_list = [
            'Dataset-' + str(dataset_id) for dataset_id in dataset_list
        ]
        return Lock(self.datacube, lock_list)

    def create_acquisition_record(self, dataset):
        """Factory method to create an instance of the AcquisitonRecord class.

        This method creates a corresponding record in the database if one
        does not already exist.
        """

        return AcquisitionRecord(self, dataset)

    def create_tile_contents(self, tile_type_id, tile_footprint, band_stack):
        """Factory method to create an instance of the TileContents class.

        The tile_type_dict contains the information required for
        resampling extents and resolution.
        """

        tile_type_info = self.datacube.tile_type_dict[tile_type_id]
        tile_contents = TileContents(self.datacube.tile_root, tile_type_info,
                                     tile_footprint, band_stack)
        return tile_contents

    def current_transaction(self):
        """Returns the current transaction."""

        return self.transaction_stack[-1]

    def mark_tile_for_removal(self, tile_pathname):
        """Mark a tile file for removal on transaction commit."""

        self.current_transaction().mark_tile_for_removal(tile_pathname)

    def mark_tile_for_creation(self, tile_contents):
        """Mark a tile file for creation on transaction commit."""

        self.current_transaction().mark_tile_for_creation(tile_contents)

    #
    # worker methods
    #

    @staticmethod
    def __reindex_bands(bands):
        """Reindex the datacube.bands nested dict structure.

        This method returns the new nested dict which is indexed by:
            new_bands[dataset_key][tile_type][file_number]
        where dataset_key is a tuple:
            (satellite_tag, sensor_name, processing_level).

        The original indexing is
            bands[tile_type][satellite_sensor][file_number]
        where satellite_sensor is a tuple:
            (satellite_tag, sensor_name)

        Note that satellite_tag and sensor_name are replaced by 'DERIVED' and
        the processing_level for PQA and FC datasets. This needs to be taken
        into account when constructing a dataset_key.
        """

        new_bands = {}

        for (tile_type, band_dict) in bands.items():
            for ((satellite, sensor), sensor_dict) in band_dict.items():
                for (file_number, band_info) in sensor_dict.items():

                    dataset_key = (satellite, sensor, band_info['level_name'])

                    new_bands.setdefault(dataset_key, {})
                    new_bands[dataset_key].setdefault(tile_type, {})
                    new_bands[dataset_key][tile_type][file_number] = band_info

        return new_bands

    def __check_satellite_and_sensor(self, dataset):
        """Check that the dataset's satellite and sensor are in the database.

        Raises a DatasetError if they are not.
        """

        satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag())
        if satellite_id is None:
            raise DatasetError("Unknown satellite tag: '%s'" %
                               dataset.get_satellite_tag())

        sensor_id = self.db.get_sensor_id(satellite_id,
                                          dataset.get_sensor_name())
        if sensor_id is None:
            msg = ("Unknown satellite and sensor pair: '%s', '%s'" %
                   (dataset.get_satellite_tag(), dataset.get_sensor_name()))
            raise DatasetError(msg)

    def __check_processing_level(self, dataset):
        """Check that the dataset's processing_level is in the database.

        Raises a DatasetError if it is not.
        """

        level_id = self.db.get_level_id(dataset.get_processing_level())
        if level_id is None:
            raise DatasetError("Unknown processing level: '%s'" %
                               dataset.get_processing_level())

    def __check_bands(self, dataset):
        """Check that the dataset has the expected bands.

        Raises a DatasetError if any band expected for this dataset (according
        to the database) is missing.
        """

        try:
            dataset_bands = self.new_bands[self.get_dataset_key(dataset)]
        except KeyError:
            raise DatasetError('No tile types for this dataset.')

        for tile_type_bands in dataset_bands.values():
            for band_info in tile_type_bands.values():
                dataset.find_band_file(band_info['file_pattern'])
class AcquisitionRecord(object):
    """AcquisitionRecord database interface class."""

    ACQUISITION_METADATA_FIELDS = ['satellite_tag',
                                   'sensor_name',
                                   'x_ref',
                                   'y_ref',
                                   'start_datetime',
                                   'end_datetime',
                                   'll_lon',
                                   'll_lat',
                                   'lr_lon',
                                   'lr_lat',
                                   'ul_lon',
                                   'ul_lat',
                                   'ur_lon',
                                   'ur_lat',
                                   'gcp_count',
                                   'mtl_text',
                                   'cloud_cover'
                                   ]

    def __init__(self, collection, dataset):

        self.collection = collection
        self.datacube = collection.datacube
        self.db = IngestDBWrapper(self.datacube.db_connection)
        self.acquisition_dict = {}
        self.acquisiton_id = None  # set below

        # Fill a dictonary with data for the acquisition.
        # Start with fields from the dataset metadata.
        for field in self.ACQUISITION_METADATA_FIELDS:
            self.acquisition_dict[field] = dataset.metadata_dict[field]

        # Next look up the satellite_id and sensor_id in the
        # database and fill these in.
        self.acquisition_dict['satellite_id'] = \
            self.db.get_satellite_id(self.acquisition_dict['satellite_tag'])
        self.acquisition_dict['sensor_id'] = \
            self.db.get_sensor_id(self.acquisition_dict['satellite_id'],
                                  self.acquisition_dict['sensor_name'])

        # Finally look up the acquisiton_id, or create a new record if it
        # does not exist, and fill it into the dictionary.
        self.acquisition_id = \
            self.db.get_acquisition_id_fuzzy(self.acquisition_dict)
        if self.acquisition_id is None:
            self.acquisition_id = \
                self.db.insert_acquisition_record(self.acquisition_dict)
        else:
            # Do we update the acquisition record here?
            pass
        self.acquisition_dict['acquisition_id'] = self.acquisition_id

    def create_dataset_record(self, dataset):
        """Factory method to create an instance of the DatasetRecord class.

        This method creates a new record in the database if one does not
        already exist. It will overwrite an earlier dataset record (and its
        tiles) if one exists. It will raise a DatasetError if a later (or
        equal time) record for this dataset already exists in the database.
        """

        return DatasetRecord(self.collection, self, dataset)