示例#1
0
    def __init__(self,
                 filename,
                 gdal_dataset,
                 gdal_metadata,
                 date=None,
                 ds=None,
                 bands=None,
                 cachedir=None,
                 *args,
                 **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)
        ds = Dataset(filename)
        try:
            self.srcDSProjection = NSR(ds.variables['projection_3'].proj4 +
                                       ' +to_meter=0.0174532925199 +wktext')
        except KeyError:
            raise WrongMapperError

        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds,
                        bands, cachedir)

        self.dataset.SetMetadataItem(
            'instrument', json.dumps(pti.get_gcmd_instrument('Computer')))
        self.dataset.SetMetadataItem(
            'platform', json.dumps(pti.get_gcmd_platform('MODELS')))
        self.dataset.SetMetadataItem(
            'Data Center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        self.dataset.SetMetadataItem('Entry Title', str(ds.getncattr('title')))
        self.dataset.SetMetadataItem(
            'Entry Title',
            json.dumps(pti.get_iso19115_topic_category('Oceans')))
        self.dataset.SetMetadataItem(
            'gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))
示例#2
0
    def __init__(self, filename, gdal_dataset, gdal_metadata, date=None,
                 ds=None, bands=None, cachedir=None, *args, **kwargs):

        self.test_mapper(filename)
        timestamp = date if date else self.get_date(filename)
        ds = Dataset(filename)
        self.create_vrt(filename, gdal_dataset, gdal_metadata, timestamp, ds, bands, cachedir)
        self.dataset.SetMetadataItem('entry_title', str(ds.getncattr('title')))
        self.dataset.SetMetadataItem('data_center', json.dumps(pti.get_gcmd_provider('UK/MOD/MET')))
        self.dataset.SetMetadataItem('ISO_topic_category',
                pti.get_iso19115_topic_category('oceans')['iso_topic_category'])
        self.dataset.SetMetadataItem('gcmd_location', json.dumps(pti.get_gcmd_location('sea surface')))

        #mm = pti.get_gcmd_instrument('amsr-e')
        #ee = pti.get_gcmd_platform('aqua')
        #self.dataset.SetMetadataItem('instrument', json.dumps(mm))
        #self.dataset.SetMetadataItem('platform', json.dumps(ee))
        self.dataset.SetMetadataItem('platform/instrument',
                json.dumps(self.get_platform_and_instrument_list(ds)))
 def test_get_location_by_type(self):
     type = 'africa'
     a = pti.get_gcmd_location(type)
     self.assertEqual(a['Location_Type'], 'AFRICA')
示例#4
0
    def get_or_create(self,
                      uri,
                      n_points=10,
                      uri_filter_args=None,
                      *args,
                      **kwargs):
        ''' Create dataset and corresponding metadata

        Parameters:
        ----------
            uri : str
                  URI to file or stream openable by Nansat
            n_points : int
                  Number of border points (default is 10)
            uri_filter_args : dict
                Extra DatasetURI filter arguments if several datasets can refer to the same URI

        Returns:
        -------
            dataset and flag
        '''
        if not uri_filter_args:
            uri_filter_args = {}

        # Validate uri - this should raise an exception if the uri doesn't point to a valid
        # file or stream
        validate_uri(uri)

        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
        if len(uris) > 0:
            return uris[0].dataset, False

        # Open file with Nansat
        n = Nansat(nansat_filename(uri), **kwargs)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()

        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)

        default_char_fields = {
            'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()),
            'entry_title': lambda: 'NONE',
            'summary': lambda: 'NONE',
        }

        # set optional CharField metadata from Nansat or from default_char_fields
        options = {}
        for name in default_char_fields:
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
                options[name] = default_char_fields[name]()
            else:
                options[name] = n_metadata[name]

        default_foreign_keys = {
            'gcmd_location': {
                'model': Location,
                'value': pti.get_gcmd_location('SEA SURFACE')
            },
            'data_center': {
                'model': DataCenter,
                'value': pti.get_gcmd_provider('NERSC')
            },
            'ISO_topic_category': {
                'model': ISOTopicCategory,
                'value': pti.get_iso19115_topic_category('Oceans')
            },
        }

        # set optional ForeignKey metadata from Nansat or from default_foreign_keys
        for name in default_foreign_keys:
            value = default_foreign_keys[name]['value']
            model = default_foreign_keys[name]['model']
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
            else:
                try:
                    value = json.loads(n_metadata[name])
                except:
                    warnings.warn(
                        '%s value of %s  metadata provided in Nansat is wrong!'
                        % (n_metadata[name], name))
            options[name], _ = model.objects.get_or_create(value)

        # Find coverage to set number of points in the geolocation
        if len(n.vrt.dataset.GetGCPs()) > 0:
            n.reproject_gcps()
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]

        # create dataset
        ds, created = Dataset.objects.get_or_create(
            time_coverage_start=n.get_metadata('time_coverage_start'),
            time_coverage_end=n.get_metadata('time_coverage_end'),
            source=source,
            geographic_location=geolocation,
            **options)
        # create dataset URI
        ds_uri, _ = DatasetURI.objects.get_or_create(uri=uri, dataset=ds)

        return ds, created
示例#5
0
 def test_get_location_by_type(self):
     type = 'africa'
     a = pti.get_gcmd_location(type)
     self.assertEqual(a['Location_Type'], 'AFRICA')
示例#6
0
    def get_or_create(self,
                      uri,
                      n_points=10,
                      uri_filter_args=None,
                      uri_service_name=FILE_SERVICE_NAME,
                      uri_service_type=LOCAL_FILE_SERVICE,
                      *args,
                      **kwargs):
        """ Create dataset and corresponding metadata

        Parameters:
        ----------
            uri : str
                  URI to file or stream openable by Nansat
            n_points : int
                  Number of border points (default is 10)
            uri_filter_args : dict
                Extra DatasetURI filter arguments if several datasets can refer to the same URI
            uri_service_name : str
                name of the service which is used  ('dapService', 'fileService', 'http' or 'wms')
            uri_service_type : str
                type of the service which is used  ('OPENDAP', 'local', 'HTTPServer' or 'WMS')

        Returns:
        -------
            dataset and flag
        """
        if not uri_filter_args:
            uri_filter_args = {}

        # Validate uri - this should raise an exception if the uri doesn't point to a valid
        # file or stream
        validate_uri(uri)

        # Several datasets can refer to the same uri (e.g., scatterometers and svp drifters), so we
        # need to pass uri_filter_args
        uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
        if len(uris) > 0:
            return uris[0].dataset, False

        # Open file with Nansat
        n = Nansat(nansat_filename(uri), **kwargs)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = n.get_metadata()

        entry_id = n_metadata.get('entry_id', None)
        # set compulsory metadata (source)
        platform, _ = Platform.objects.get_or_create(
            json.loads(n_metadata['platform']))
        instrument, _ = Instrument.objects.get_or_create(
            json.loads(n_metadata['instrument']))
        specs = n_metadata.get('specs', '')
        source, _ = Source.objects.get_or_create(platform=platform,
                                                 instrument=instrument,
                                                 specs=specs)

        default_char_fields = {
            # Adding NERSC_ in front of the id violates the string representation of the uuid
            #'entry_id': lambda: 'NERSC_' + str(uuid.uuid4()),
            'entry_id': lambda: str(uuid.uuid4()),
            'entry_title': lambda: 'NONE',
            'summary': lambda: 'NONE',
        }

        # set optional CharField metadata from Nansat or from default_char_fields
        options = {}
        try:
            existing_ds = Dataset.objects.get(entry_id=entry_id)
        except Dataset.DoesNotExist:
            existing_ds = None
        for name in default_char_fields:
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
                # prevent overwriting of existing values by defaults
                if existing_ds:
                    options[name] = existing_ds.__getattribute__(name)
                else:
                    options[name] = default_char_fields[name]()
            else:
                options[name] = n_metadata[name]

        default_foreign_keys = {
            'gcmd_location': {
                'model': Location,
                'value': pti.get_gcmd_location('SEA SURFACE')
            },
            'data_center': {
                'model': DataCenter,
                'value': pti.get_gcmd_provider('NERSC')
            },
            'ISO_topic_category': {
                'model': ISOTopicCategory,
                'value': pti.get_iso19115_topic_category('Oceans')
            },
        }

        # set optional ForeignKey metadata from Nansat or from default_foreign_keys
        for name in default_foreign_keys:
            value = default_foreign_keys[name]['value']
            model = default_foreign_keys[name]['model']
            if name not in n_metadata:
                warnings.warn('%s is not provided in Nansat metadata!' % name)
            else:
                try:
                    value = json.loads(n_metadata[name])
                except:
                    warnings.warn(
                        '%s value of %s  metadata provided in Nansat is wrong!'
                        % (n_metadata[name], name))
            if existing_ds:
                options[name] = existing_ds.__getattribute__(name)
            else:
                options[name], _ = model.objects.get_or_create(value)

        # Find coverage to set number of points in the geolocation
        if len(n.vrt.dataset.GetGCPs()) > 0:
            n.reproject_gcps()
        geolocation = GeographicLocation.objects.get_or_create(
            geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]

        # create dataset
        # - the get_or_create method should use get_or_create here as well,
        #   or its name should be changed - see issue #127
        ds, created = Dataset.objects.update_or_create(
            entry_id=options['entry_id'],
            defaults={
                'time_coverage_start': n.get_metadata('time_coverage_start'),
                'time_coverage_end': n.get_metadata('time_coverage_end'),
                'source': source,
                'geographic_location': geolocation,
                'gcmd_location': options["gcmd_location"],
                'ISO_topic_category': options["ISO_topic_category"],
                "data_center": options["data_center"],
                'entry_title': options["entry_title"],
                'summary': options["summary"]
            })

        # create parameter
        all_band_meta = n.bands()
        for band_id in range(1, len(all_band_meta) + 1):
            band_meta = all_band_meta[band_id]
            standard_name = band_meta.get('standard_name', None)
            short_name = band_meta.get('short_name', None)
            units = band_meta.get('units', None)
            if standard_name in ['latitude', 'longitude', None]:
                continue
            params = Parameter.objects.filter(standard_name=standard_name)
            if params.count() > 1 and short_name is not None:
                params = params.filter(short_name=short_name)
            if params.count() > 1 and units is not None:
                params = params.filter(units=units)
            if params.count() >= 1:
                ds.parameters.add(params[0])

        # create dataset URI
        DatasetURI.objects.get_or_create(name=uri_service_name,
                                         service=uri_service_type,
                                         uri=uri,
                                         dataset=ds)

        return ds, created
 def test_get_gcmd_location(self):
     item = 'NORWEGIAN SEA'
     self.assertIsInstance(pti.get_gcmd_location(item),
             collections.OrderedDict)
示例#8
0
 def test_get_gcmd_location(self):
     item = 'NORWEGIAN SEA'
     self.assertIsInstance(pti.get_gcmd_location(item),
             collections.OrderedDict)
示例#9
0
    def __init__(self, filename, gdal_dataset, gdal_metadata, GCP_COUNT=10, timestamp=None, **kwargs):
        filename_name = os.path.split(filename)[-1].split('.')[0]
        # Check if correct mapper
        correct_mapper = False 
        for location in self.SUPPORTED_LOCATIONS:
            # If it matches with one of locateions break the loop and flag True
            if filename_name.startswith(location):
                correct_mapper = True
                break
        if not correct_mapper:
            raise WrongMapperError

        # Import NetCDF4 dataset
        nc_dataset = Dataset(filename)
        # Define projection (depending on the HFR)
        if nc_dataset.getncattr('site') == 'TORU':
            proj4 = '+proj=utm +zone=32 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
            GRID_PX_SIZE = 1500 # Final raster px size in meters
        elif nc_dataset.getncattr('site') == 'FRUH':
            proj4 = '+proj=utm +zone=34 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
            GRID_PX_SIZE = 5000 # Final raster px size in meters
        elif nc_dataset.getncattr('site') == 'BERL':
            proj4 = '+proj=utm +zone=35 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
            GRID_PX_SIZE = 5000 # Final raster px size in meters
        else:
            raise WrongMapperError

        srs = osr.SpatialReference()
        srs.ImportFromProj4(proj4)
        projection = srs.ExportToWkt()
        # Get x grid and y grid
        x_grd, y_grd = self.create_linear_grid(nc_dataset['x'][:], nc_dataset['y'][:], GRID_PX_SIZE)
        raster_x_size, raster_y_size = x_grd.shape
        # Define geotransform
        geotransform = (x_grd.min(), GRID_PX_SIZE, 0.0, y_grd.max(), 0.0, GRID_PX_SIZE * -1)
        # Define x and y size
        self._init_from_dataset_params(raster_x_size, raster_y_size, geotransform, projection)
        # If required timestamp was not specified then extract date from filename and use first time
        if timestamp is None:
            timestamp = self.date_from_filename(filename)
        # Comvert time info from the dataset to the datetime
        timestamps = num2date(nc_dataset['time'][:].data, nc_dataset['time'].units)
        # find band id for the required timestamp
        # Note add 1 because in gdal counting starts from 1 not from 0
        src_timestamp_id = np.where(timestamps == timestamp)[0][0] + 1
        # Iterate through all subdatasets and bands to the dataset
        for subdataset in gdal_dataset.GetSubDatasets():
            # Get name of subdataset
            subdataset_name = subdataset[0].split(':')[2]
            # Check if the subdataset in the accepted 3D vars list
            if subdataset_name not in self.BAND_NAMES:
                continue
            gdal_subdataset = gdal.Open(subdataset[0])
            # need to be float for the nan replasement
            band_data = gdal_subdataset.GetRasterBand(int(src_timestamp_id)).ReadAsArray().astype('float')
            # remove fill value (replace with nan)
            fill_value = int(gdal_subdataset.GetMetadata_Dict()['#'.join([subdataset_name, '_FillValue'])])
            band_data[band_data == fill_value] = np.nan
            # Interpolate data on the regular grid
            band_grid_data = self.band2grid((nc_dataset['x'][:], nc_dataset['y'][:]), 
                                             band_data, (x_grd, y_grd))
            # Create VRT ffor the regridded data
            band_vrt = VRT.from_array(band_grid_data)
            # Add VRT to the list of all dataset vrts
            self.band_vrts[subdataset_name + 'VRT'] = band_vrt
            # Add band to the dataset
            src = {'SourceFilename': self.band_vrts[subdataset_name + 'VRT'].filename, 
                   'SourceBand': 1}
            # Add band specific metadata
            dst = {'name': subdataset_name}
            for key in gdal_subdataset.GetMetadata_Dict().keys():
                if key.startswith(subdataset_name):
                    clean_metadata_name = key.split('#')[1]
                    dst[clean_metadata_name] = gdal_subdataset.GetMetadata_Dict()[key]
            # Create band
            self.create_band(src, dst)
            self.dataset.FlushCache()

        # Set GCMD metadata
        self.dataset.SetMetadataItem('instrument', json.dumps(pti.get_gcmd_instrument('SCR-HF')))
        self.dataset.SetMetadataItem('platform', json.dumps(pti.get_gcmd_platform('CODAR SeaSonde')))
        self.dataset.SetMetadataItem('Data Center', json.dumps(pti.get_gcmd_provider('NO/MET')))
        self.dataset.SetMetadataItem('Entry Title', 'Near-Real Time Surface Ocean Radial Velocity')
        self.dataset.SetMetadataItem('gcmd_location',json.dumps(pti.get_gcmd_location('NORTH SEA')))
        # Set time coverage metadata
        self.dataset.SetMetadataItem('time_coverage_start', timestamp.isoformat())
        self.dataset.SetMetadataItem('time_coverage_end',
                                     (timestamp + timedelta(minutes=59, seconds=59)).isoformat())
        # Set NetCDF dataset metadata
        for key, value in gdal_dataset.GetMetadata_Dict().items():
            self.dataset.SetMetadataItem(key.split('#')[1], value)
    def _get_normalized_attributes(self, dataset_info, *args, **kwargs):
        """Gets dataset attributes using nansat"""
        normalized_attributes = {}
        n_points = int(kwargs.get('n_points', 10))
        nansat_options = kwargs.get('nansat_options', {})
        url_scheme = urlparse(dataset_info).scheme
        if not 'http' in url_scheme and not 'ftp' in url_scheme:
            normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME
            normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE
        elif 'http' in url_scheme and not 'ftp' in url_scheme:
            normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME
            normalized_attributes['geospaas_service'] = OPENDAP_SERVICE
        elif 'ftp' in url_scheme:
            raise ValueError(
                f"Can't ingest '{dataset_info}': nansat can't open remote ftp files"
            )

        # Open file with Nansat
        nansat_object = Nansat(nansat_filename(dataset_info),
                               log_level=self.LOGGER.getEffectiveLevel(),
                               **nansat_options)

        # get metadata from Nansat and get objects from vocabularies
        n_metadata = nansat_object.get_metadata()

        # set compulsory metadata (source)
        normalized_attributes['entry_title'] = n_metadata.get(
            'entry_title', 'NONE')
        normalized_attributes['summary'] = n_metadata.get('summary', 'NONE')
        normalized_attributes['time_coverage_start'] = dateutil.parser.parse(
            n_metadata['time_coverage_start']).replace(tzinfo=tzutc())
        normalized_attributes['time_coverage_end'] = dateutil.parser.parse(
            n_metadata['time_coverage_end']).replace(tzinfo=tzutc())
        normalized_attributes['platform'] = json.loads(n_metadata['platform'])
        normalized_attributes['instrument'] = json.loads(
            n_metadata['instrument'])
        normalized_attributes['specs'] = n_metadata.get('specs', '')
        normalized_attributes['entry_id'] = n_metadata.get(
            'entry_id', 'NERSC_' + str(uuid.uuid4()))

        # set optional ForeignKey metadata from Nansat or from defaults
        normalized_attributes['gcmd_location'] = n_metadata.get(
            'gcmd_location', pti.get_gcmd_location('SEA SURFACE'))
        normalized_attributes['provider'] = pti.get_gcmd_provider(
            n_metadata.get('provider', 'NERSC'))
        normalized_attributes['iso_topic_category'] = n_metadata.get(
            'ISO_topic_category', pti.get_iso19115_topic_category('Oceans'))

        # Find coverage to set number of points in the geolocation
        if nansat_object.vrt.dataset.GetGCPs():
            nansat_object.reproject_gcps()
        normalized_attributes['location_geometry'] = GEOSGeometry(
            nansat_object.get_border_wkt(n_points=n_points), srid=4326)

        json_dumped_dataset_parameters = n_metadata.get(
            'dataset_parameters', None)
        if json_dumped_dataset_parameters:
            json_loads_result = json.loads(json_dumped_dataset_parameters)
            if isinstance(json_loads_result, list):
                normalized_attributes['dataset_parameters'] = [
                    get_cf_or_wkv_standard_name(dataset_param)
                    for dataset_param in json_loads_result
                ]
            else:
                raise TypeError(
                    f"Can't ingest '{dataset_info}': the 'dataset_parameters' section of the "
                    "metadata returned by nansat is not a JSON list")
        else:
            normalized_attributes['dataset_parameters'] = []

        return normalized_attributes
示例#11
0
 def get_gcmd_location(self, raw_metadata):
     """Get the GCMD location from the raw metadata"""
     return pti.get_gcmd_location('SEA SURFACE')