def test_new_range(self): self.assertEqual( { 'type': 'array', 'items': [{ 'type': 'string', 'format': 'date', }, { 'type': 'string', 'format': 'date', }], }, JsonDateSchema.new_range().to_dict()) self.assertEqual( { 'type': ['array', 'null'], 'items': [{ 'type': ['string', 'null'], 'format': 'date', 'minDate': '2020-02-01', 'maxDate': '2020-07-05', }, { 'type': ['string', 'null'], 'format': 'date', 'minDate': '2020-02-01', 'maxDate': '2020-07-05', }], }, JsonDateSchema.new_range(min_date='2020-02-01', max_date='2020-07-05', nullable=True).to_dict())
def _get_default_open_params_schema(self) -> JsonObjectSchema: params = dict( dataset_name=JsonStringSchema(min_length=1, enum=list( self._handler_registry.keys())), variable_names=JsonArraySchema( items=(JsonStringSchema(min_length=0)), unique_items=True), crs=JsonStringSchema(), # W, S, E, N bbox=JsonArraySchema( items=(JsonNumberSchema(minimum=-180, maximum=180), JsonNumberSchema(minimum=-90, maximum=90), JsonNumberSchema(minimum=-180, maximum=180), JsonNumberSchema(minimum=-90, maximum=90))), spatial_res=JsonNumberSchema(), time_range=JsonDateSchema.new_range(), time_period=JsonStringSchema(), ) required = [ 'variable_names', 'bbox', 'spatial_res', 'time_range', ] return JsonObjectSchema(properties=params, required=required)
def _get_open_data_params_schema( dsd: DatasetDescriptor = None) -> JsonObjectSchema: min_date = dsd.time_range[0] if dsd and dsd.time_range else None max_date = dsd.time_range[1] if dsd and dsd.time_range else None # noinspection PyUnresolvedReferences cube_params = dict( variable_names=JsonArraySchema(items=JsonStringSchema( enum=dsd.data_vars.keys() if dsd and dsd.data_vars else None)), time_range=JsonDateSchema.new_range(min_date, max_date)) if dsd and (('lat' in dsd.dims and 'lon' in dsd.dims) or ('latitude' in dsd.dims and 'longitude' in dsd.dims)): min_lon = dsd.bbox[0] if dsd and dsd.bbox else -180 min_lat = dsd.bbox[1] if dsd and dsd.bbox else -90 max_lon = dsd.bbox[2] if dsd and dsd.bbox else 180 max_lat = dsd.bbox[3] if dsd and dsd.bbox else 90 bbox = JsonArraySchema( items=(JsonNumberSchema(minimum=min_lon, maximum=max_lon), JsonNumberSchema(minimum=min_lat, maximum=max_lat), JsonNumberSchema(minimum=min_lon, maximum=max_lon), JsonNumberSchema(minimum=min_lat, maximum=max_lat))) cube_params['bbox'] = bbox cci_schema = JsonObjectSchema(properties=dict(**cube_params), required=[], additional_properties=False) return cci_schema
def _get_open_data_params_schema(self, dsd: DatasetDescriptor = None ) -> JsonObjectSchema: min_date, max_date = dsd.time_range if dsd.time_range is not None else ( None, None) cube_params = dict( dataset_name=JsonStringSchema(min_length=1), variable_names=JsonArraySchema(items=JsonStringSchema( enum=[v.name for v in dsd. data_vars] if dsd and dsd.data_vars else None)), variable_units=JsonArraySchema(), variable_sample_types=JsonArraySchema(), tile_size=JsonArraySchema( items=(JsonNumberSchema(minimum=1, maximum=2500, default=DEFAULT_TILE_SIZE), JsonNumberSchema(minimum=1, maximum=2500, default=DEFAULT_TILE_SIZE)), default=(DEFAULT_TILE_SIZE, DEFAULT_TILE_SIZE)), crs=JsonStringSchema(default=DEFAULT_CRS, enum=AVAILABLE_CRS_IDS), bbox=JsonArraySchema(items=(JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema())), spatial_res=JsonNumberSchema(exclusive_minimum=0.0), time_range=JsonDateSchema.new_range(min_date=min_date, max_date=max_date), # TODO: add pattern time_period=JsonStringSchema( default='1D', nullable=True, enum=[None, *map(lambda n: f'{n}D', range(1, 14)), '1W', '2W']), time_tolerance=JsonStringSchema(default=DEFAULT_TIME_TOLERANCE, format='^([1-9]*[0-9]*)[NULSTH]$'), collection_id=JsonStringSchema(), four_d=JsonBooleanSchema(default=False), ) cache_params = dict(max_cache_size=JsonIntegerSchema(minimum=0), ) # required cube_params required = [ 'bbox', 'spatial_res', 'time_range', ] sh_params = {} if self._sentinel_hub is None: # If we are NOT connected to the API (yet), we also include store parameters sh_schema = SentinelHubDataStore.get_data_store_params_schema() sh_params = sh_schema.properties required.extend(sh_schema.required or []) return JsonObjectSchema(properties=dict(**sh_params, **cube_params, **cache_params), required=required)
def test_to_dict(self): self.assertEqual( { 'type': 'string', 'format': 'date', 'minDate': '2020-02-01', 'maxDate': '2020-07-05' }, JsonDateSchema(min_date='2020-02-01', max_date='2020-07-05').to_dict()) self.assertEqual( { 'type': ['string', 'null'], 'format': 'date', 'minDate': '2020-02-01', 'maxDate': '2020-07-05' }, JsonDateSchema(min_date='2020-02-01', max_date='2020-07-05', nullable=True).to_dict())
def get_open_data_params_schema(self, data_id: Optional[str] = None) -> \ JsonObjectSchema: # If the data_id has a product type suffix, remove it. dataset_id = data_id.split(':')[0] if ':' in data_id else data_id ds_info = self._dataset_dicts[dataset_id] variable_info_table = ds_info['variables'] bbox = ds_info['bbox'] params = dict( variable_names=JsonArraySchema( items=(JsonStringSchema( min_length=0, enum=[ cds_api_name for cds_api_name, _, _, _ in variable_info_table ])), unique_items=True, nullable=True, description='identifiers of the requested variables'), # crs omitted, since it's constant. # W, S, E, N (will be converted to N, W, S, E) bbox=JsonArraySchema( items=(JsonNumberSchema(minimum=bbox[0], maximum=bbox[2]), JsonNumberSchema(minimum=bbox[1], maximum=bbox[3]), JsonNumberSchema(minimum=bbox[0], maximum=bbox[2]), JsonNumberSchema(minimum=bbox[1], maximum=bbox[3])), description='bounding box (min_x, min_y, max_x, max_y)'), # spatial_res in the ds_info dictionary gives the minimum # resolution, but the ERA5 backend can resample, so we # also set a maximum. The choice of 10° as maximum is fairly # arbitrary but seems reasonable. spatial_res=JsonNumberSchema(minimum=ds_info['spatial_res'], maximum=10, default=ds_info['spatial_res'], description='spatial resolution'), time_range=JsonDateSchema.new_range(), # time_period (time aggregation period) omitted, since it is # constant. ) required = [ 'variable_names', 'bbox', 'spatial_res', 'time_range', ] return JsonObjectSchema(properties=params, required=required, additional_properties=False)
def get_open_data_params_schema(self, data_id: Optional[str] = None) -> \ JsonObjectSchema: # If the data_id has a product type suffix, remove it. dataset_id = data_id.split(':')[0] if ':' in data_id else data_id ds_info = self._dataset_dicts[dataset_id] variable_info_table = ds_info['variables'] bbox = ds_info['bbox'] params = dict( dataset_name=JsonStringSchema( min_length=1, enum=list(self._valid_data_ids), description='identifier of the requested dataset'), variable_names=JsonArraySchema( items=(JsonStringSchema( min_length=0, enum=[ cds_api_name for cds_api_name, _, _, _ in variable_info_table ])), unique_items=True, nullable=True, description='identifiers of the requested variables'), crs=JsonStringSchema(nullable=True, default=ds_info['crs'], enum=[None, ds_info['crs']], description='co-ordinate reference system'), # W, S, E, N (will be converted to N, W, S, E) bbox=JsonArraySchema( items=(JsonNumberSchema(minimum=bbox[0], maximum=bbox[2]), JsonNumberSchema(minimum=bbox[1], maximum=bbox[3]), JsonNumberSchema(minimum=bbox[0], maximum=bbox[2]), JsonNumberSchema(minimum=bbox[1], maximum=bbox[3])), description='bounding box (min_x, min_y, max_x, max_y)'), spatial_res=JsonNumberSchema(minimum=ds_info['spatial_res'], maximum=10, default=ds_info['spatial_res'], description='spatial resolution'), time_range=JsonDateSchema.new_range(), time_period=JsonStringSchema( const=ds_info['time_period'], description='time aggregation period'), ) required = [ 'variable_names', 'bbox', 'spatial_res', 'time_range', ] return JsonObjectSchema(properties=params, required=required)
def get_schema(cls) -> JsonObjectSchema: return JsonObjectSchema(properties=dict( data_id=JsonStringSchema(min_length=1), data_type=DataType.get_schema(), crs=JsonStringSchema(min_length=1), bbox=JsonArraySchema(items=[ JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema() ]), time_range=JsonDateSchema.new_range(nullable=True), time_period=JsonStringSchema(min_length=1), open_params_schema=JsonObjectSchema(additional_properties=True), ), required=['data_id', 'data_type'], additional_properties=True, factory=cls)
def get_schema(cls): return JsonObjectSchema(properties=dict( variable_names=JsonArraySchema( items=JsonStringSchema(min_length=1), min_items=0), crs=JsonStringSchema(nullable=True, min_length=1), bbox=JsonArraySchema(nullable=True, items=[ JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema() ]), spatial_res=JsonNumberSchema(nullable=True, exclusive_minimum=0.0), time_range=JsonDateSchema.new_range(nullable=True), time_period=JsonStringSchema(nullable=True, pattern=r'^([1-9][0-9]*)?[DWMY]$'), ), required=['variable_names'], additional_properties=False, factory=cls)
def get_schema(cls): return JsonObjectSchema(properties=dict( variable_names=JsonArraySchema( nullable=True, items=JsonStringSchema(min_length=1), min_items=0), crs=JsonStringSchema(nullable=True, min_length=1), bbox=JsonArraySchema(nullable=True, items=[ JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema() ]), spatial_res=JsonNumberSchema(nullable=True, exclusive_minimum=0.0), tile_size=JsonArraySchema(nullable=True, items=[ JsonIntegerSchema(minimum=1, maximum=2500), JsonIntegerSchema(minimum=1, maximum=2500), ]), time_range=JsonDateSchema.new_range(nullable=True), time_period=JsonStringSchema(nullable=True, pattern=r'^([1-9][0-9]*)?[DWMY]$'), chunks=JsonObjectSchema(nullable=True, additional_properties=JsonIntegerSchema( nullable=True, minimum=1)), metadata=JsonObjectSchema(nullable=True, additional_properties=True), variable_metadata=JsonObjectSchema( nullable=True, additional_properties=JsonObjectSchema( additional_properties=True)), ), additional_properties=False, factory=cls)
def get_open_data_params_schema(self, data_id: str) -> JsonObjectSchema: _, variable_spec, _ = data_id.split(':') variable_properties = self._var_map[variable_spec] params = dict( time_range=JsonDateSchema.new_range(), # crs, bbox, and spatial_res omitted, since they're constant. # time_period omitted, since (although the store as a whole offers # three aggregation periods) it's constant for a given data-id. # There are complex interdependencies between allowed values for # these parameters and for the date specifiers, which can't be # represented in JSON Schema. The best we can do is to make them # all available, set sensible defaults, and trust that the user # knows what they're requesting. # type_of_sensor will be added below *only* if >1 type available. # There's only one variable available per data ID, but we can't # omit variable_names, because we need to support the # variable_names=[] case (to produce an empty cube). variable_names=JsonArraySchema( items=(JsonStringSchema( min_length=0, enum=variable_properties.variables, default=variable_properties.variables[0])), unique_items=True, default=[variable_properties.variables[0]]), type_of_record=JsonStringSchema( enum=['cdr', 'icdr'], title='Type of record', description=( 'When dealing with satellite data it is common to ' 'encounter references to Climate Data Records (CDR) and ' 'interim-CDR (ICDR). For this dataset, both the ICDR and ' 'CDR parts of each product were generated using the same ' 'software and algorithms. The CDR is intended to have ' 'sufficient length, consistency, and continuity to detect ' 'climate variability and change. The ICDR provides a ' 'short-delay access to current data where consistency with ' 'the CDR baseline is expected but was not extensively ' 'checked.'), default='cdr'), version=JsonStringSchema( enum=[ 'v201706.0.0', 'v201812.0.0', 'v201812.0.1', 'v201912.0.0' ], title='Data version', description=( 'Format: vMajor.Minor.Run, e.g. "v201706.0.0". The Major ' 'number usually represents the year (YYYY) and month (MM) ' 'of date. The initial value for Minor is zero, and will ' 'increment when updating the file. If there is a need – ' 'e.g. because of technical issues – to replace a file ' 'which has already been made public, the Run number of ' 'the replacement file shifts to the next increment. The ' 'initial Run number is zero.'), default='v201912.0.0')) if len(variable_properties.sensor_types) > 1: params['type_of_sensor'] = JsonStringSchema( enum=variable_properties.sensor_types, default=variable_properties.sensor_types[0], title='Type of sensor', description=( 'Passive sensors measure reflected sunlight. ' 'Active sensors have their own source of illumination.')) return JsonObjectSchema(properties=params, required=['time_range'], additional_properties=False)
def get_open_data_params_schema(self, data_id: str) -> JsonObjectSchema: _, variable_spec, aggregation = data_id.split(':') variables = self._var_map[variable_spec][0] sensors = self._var_map[variable_spec][1] params = dict( dataset_name=JsonStringSchema(min_length=1, enum=self.get_supported_data_ids()), # The only allowed variable is already determined by the # data_id, so this schema forces an array containing only that # variable. variable_names=JsonArraySchema(items=(JsonStringSchema( min_length=0, enum=variables, default=variables[0])), unique_items=True), # Source for CRS information: §6.5 of # https://www.esa-soilmoisture-cci.org/sites/default/files/documents/CCI2_Soil_Moisture_D3.3.1_Product_Users_Guide%201.2.pdf crs=JsonStringSchema(nullable=True, default='WGS84', enum=[None, 'WGS84']), # W, S, E, N (will be converted to N, W, S, E). # For the soil moisture dataset, all data is global and no # geographic subsetting is possible, so the values are fixed # (i.e. minimum == maximum for every limit). bbox=JsonArraySchema( items=(JsonNumberSchema(minimum=-180, maximum=-180), JsonNumberSchema(minimum=-90, maximum=-90), JsonNumberSchema(minimum=180, maximum=180), JsonNumberSchema(minimum=90, maximum=90))), # Like the bounding box, the spatial resolution is fixed. spatial_res=JsonNumberSchema(minimum=0.25, maximum=0.25, default=0.25), time_range=JsonDateSchema.new_range(), time_period=JsonStringSchema( enum=[self._aggregation_map[aggregation]]), # Non-standard parameters start here. There are complex # interdependencies between allowed values for these and for # the date specifiers, which can't be represented in JSON Schema. # The best we can do is to make them all available, set sensible # defaults, and trust that the user knows what they're requesting. type_of_sensor=JsonStringSchema( enum=sensors, default=sensors[0], title='Type of sensor', description=( 'Passive sensors measure reflected sunlight. ' 'Active sensors have their own source of illumination.')), type_of_record=JsonStringSchema( enum=['cdr', 'icdr'], title='Type of record', description=( 'When dealing with satellite data it is common to ' 'encounter references to Climate Data Records (CDR) and ' 'interim-CDR (ICDR). For this dataset, both the ICDR and ' 'CDR parts of each product were generated using the same ' 'software and algorithms. The CDR is intended to have ' 'sufficient length, consistency, and continuity to detect ' 'climate variability and change. The ICDR provides a ' 'short-delay access to current data where consistency with ' 'the CDR baseline is expected but was not extensively ' 'checked.'), default='cdr'), version=JsonStringSchema( enum=[ 'v201706.0.0', 'v201812.0.0', 'v201812.0.1', 'v201912.0.0' ], title='Data version', description=( 'Format: vMajor.Minor.Run, e.g. "v201706.0.0". The Major ' 'number usually represents the year (YYYY) and month (MM) ' 'of date. The initial value for Minor is zero, and will ' 'increment when updating the file. If there is a need – ' 'e.g. because of technical issues – to replace a file ' 'which has already been made public, the Run number of ' 'the replacement file shifts to the next increment. The ' 'initial Run number is zero.'), default='v201912.0.0')) required = [ 'variable_names', 'time_range', ] return JsonObjectSchema(properties=dict(**params, ), required=required)
def test_min_max_validity_checks(self): with self.assertRaises(ValueError): JsonDateSchema(min_date='2002-02-02T10:20:14') with self.assertRaises(ValueError): JsonDateSchema(max_date='pippo')
def test_store_date_limits(self): minimum = '1981-05-06' maximum = '1982-09-15' schema = JsonDateSchema(min_date=minimum, max_date=maximum) self.assertEqual(minimum, schema.min_date) self.assertEqual(maximum, schema.max_date)
def test_to_instance(self): self.assertEqual( '2020-06-03', JsonDateSchema(min_date='2020-02-01', max_date='2020-07-05').to_instance('2020-06-03'))