def update(self, data_set_meta_info: DataSetMetaInfo):
     data_type = data_set_meta_info.data_type
     if data_type is None:
         raise ValueError('Data must have Data Type')
     if not self.provides_data_type(data_type):
         raise ValueError('Data Type {} is not provided.'.format(data_type))
     if self._contains(data_set_meta_info):
         return
     data_set_info = {}
     if data_set_meta_info.coverage is not None and loads(data_set_meta_info.coverage) is not None:
         data_set_info['coverage'] = data_set_meta_info.coverage
     data_set_start_time = None
     if data_set_meta_info.start_time is not None:
         data_set_start_time = get_time_from_string(data_set_meta_info.start_time, False)
     data_set_end_time = None
     if data_set_meta_info.end_time is not None:
         data_set_end_time = get_time_from_string(data_set_meta_info.start_time, True)
     if data_set_start_time is not None and data_set_end_time is not None and data_set_start_time > data_set_end_time:
         raise ValueError('start time must not be later than end time')
     if data_set_start_time is not None:
         data_set_info['start_time'] = data_set_meta_info.start_time
     if data_set_end_time is not None:
         data_set_info['end_time'] = data_set_meta_info.end_time
     data_set_info['data_type'] = data_type
     data_set_info['name'] = data_set_meta_info.identifier
     self.data_set_infos['data_sets'].append(data_set_info)
     self._update_json_file()
 def query(self, query_string: str) -> List[DataSetMetaInfo]:
     roi = self.get_roi_from_query_string(query_string)
     query_start_time = self.get_start_time_from_query_string(query_string)
     query_end_time = self.get_end_time_from_query_string(query_string)
     data_types = self.get_data_types_from_query_string(query_string)
     data_set_meta_infos = []
     for data_set_info in self.data_set_infos['data_sets']:
         if data_set_info.get('coverage') is not None and roi is not None:
             data_set_coverage = loads(data_set_info.get('coverage'))
             if not roi.intersects(data_set_coverage):
                 continue
         if query_start_time is not None and data_set_info.get('start_time') is not None:
             data_set_start_time = get_time_from_string(data_set_info.get('start_time'), False)
             if query_end_time < data_set_start_time:
                 continue
         if query_end_time is not None and data_set_info.get('end_time') is not None:
             data_set_end_time = get_time_from_string(data_set_info.get('end_time'), True)
             if data_set_end_time < query_start_time:
                 continue
         if data_set_info.get('data_type') in data_types:
             data_set_meta_info = DataSetMetaInfo(coverage=data_set_info.get('coverage'),
                                                  start_time=data_set_info.get('start_time'),
                                                  end_time=data_set_info.get('end_time'),
                                                  data_type=data_set_info.get('data_type'),
                                                  identifier=data_set_info.get('name'))
             data_set_meta_infos.append(data_set_meta_info)
     return data_set_meta_infos
示例#3
0
    def get(self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
        file_refs = []
        if os.path.exists(data_set_meta_info.identifier):
            mime_type = get_mime_type(data_set_meta_info.identifier)
            file_refs.append(
                FileRef(data_set_meta_info.identifier,
                        data_set_meta_info.start_time,
                        data_set_meta_info.end_time, mime_type))
            return file_refs
        relative_path = (self.path + self.pattern).replace('//', '/')
        relative_path = relative_path.replace(
            '/{}/'.format(_DATA_TYPE_PATTERN),
            '/{}/'.format(data_set_meta_info.data_type))
        if _DAY_PATTERN not in self.pattern and _MONTH_PATTERN not in self.pattern and \
                _YEAR_PATTERN not in self.pattern:
            if os.path.exists(relative_path):
                file_names = glob.glob(relative_path + '/**', recursive=True)
                for file_name in file_names:
                    file_name = file_name.replace('\\', '/')
                    if data_set_meta_info.identifier in file_name and \
                            data_validation.is_valid(file_name, data_set_meta_info.data_type):
                        mime_type = get_mime_type(file_name)
                        file_refs.append(
                            FileRef(file_name, data_set_meta_info.start_time,
                                    data_set_meta_info.end_time, mime_type))
            return file_refs
        if data_set_meta_info.start_time is None and data_set_meta_info.end_time is None:
            mime_type = get_mime_type(relative_path)
            file_refs.append(
                FileRef(relative_path, data_set_meta_info.start_time,
                        data_set_meta_info.end_time, mime_type))
            return file_refs

        # todo consider (weird) case when a start time but no end time is given
        start_time = get_time_from_string(data_set_meta_info.start_time)
        end_time = get_time_from_string(data_set_meta_info.end_time)
        time = start_time
        while time <= end_time:
            path = relative_path
            path = path.replace('/{}/'.format(_YEAR_PATTERN),
                                '/{:04d}/'.format(time.year))
            path = path.replace('/{}/'.format(_MONTH_PATTERN),
                                '/{:02d}/'.format(time.month))
            path = path.replace('/{}/'.format(_DAY_PATTERN),
                                '/{:02d}/'.format(time.day))
            time = self._get_next_time_step(time)
            if not os.path.exists(path):
                continue
            file_names = glob.glob(path + '/**', recursive=True)
            for file_name in file_names:
                file_name = file_name.replace('\\', '/')
                if data_set_meta_info.identifier in file_name and \
                        data_validation.is_valid(file_name, data_set_meta_info.data_type):
                    mime_type = get_mime_type(file_name)
                    file_refs.append(
                        FileRef(file_name, data_set_meta_info.start_time,
                                data_set_meta_info.end_time, mime_type))
        return file_refs
示例#4
0
def create_kaska_s2_inference_output_files(start_time: Union[str, datetime],
                                           end_time: Union[str, datetime],
                                           time_step: Union[int, timedelta],
                                           forward_models: List[str],
                                           output_directory: str,
                                           parameters: Optional[List[str]] = None,
                                           state_mask: Optional[str] = None,
                                           roi: Optional[Union[str, Polygon]] = None,
                                           spatial_resolution: Optional[int] = None,
                                           roi_grid: Optional[str] = None,
                                           destination_grid: Optional[str] = None,
                                           ):
    if type(start_time) is str:
        start_time = get_time_from_string(start_time)
    if type(end_time) is str:
        end_time = get_time_from_string(end_time)
    if type(time_step) is int:
        time_step = timedelta(days=time_step)
    time_grid = []
    current_time = start_time
    while current_time < end_time:
        time_grid.append(current_time)
        current_time += time_step
    time_grid.append(end_time)
    mask_data_set, untiled_reprojection = _get_mask_data_set_and_reprojection(state_mask, spatial_resolution, roi,
                                                                              roi_grid, destination_grid)
    model_parameter_names = []
    other_logger.info('Assembling model parameter names')
    for forward_model_name in forward_models:
        forward_model = get_forward_model(forward_model_name)
        if forward_model is None:
            other_logger.warning(f'Could not find forward model {forward_model_name}')
            continue
        for variable in forward_model.variables:
            other_logger.info(f'Checking variable {variable}')
            if variable not in model_parameter_names:
                model_parameter_names.append(variable)
    outfile_names = []
    requested_indexes = []
    for i, parameter_name in enumerate(model_parameter_names):
        other_logger.info(f'Checking for {parameter_name}')
        if parameters is None or parameter_name in parameters:
            other_logger.info(f'Creating output files for {parameter_name}')
            requested_indexes.append(i)
            for time_step in time_grid:
                time = time_step.strftime('%Y%j')
                outfile_names.append(f"{output_directory}/{parameter_name}_A{time}.tif")
                other_logger.info(f'Created output file {parameter_name}')
    writer = GeoTiffWriter(outfile_names, mask_data_set.GetGeoTransform(), mask_data_set.GetProjection(),
                           mask_data_set.RasterXSize, mask_data_set.RasterYSize, num_bands=None, data_types=None)
    writer.close()
示例#5
0
    def put(self, from_url: str, data_set_meta_info: DataSetMetaInfo):
        # we assume here that it suffices to consider the start time for putting a data set correctly
        data_type_path = get_data_type_path(data_set_meta_info.data_type,
                                            from_url)
        relative_path = self.path + self.pattern + data_type_path
        relative_path = relative_path.replace(
            '/{}/'.format(_DATA_TYPE_PATTERN),
            '/{}/'.format(data_set_meta_info.data_type))
        if _YEAR_PATTERN in relative_path or _MONTH_PATTERN in relative_path or _DAY_PATTERN in relative_path:
            if data_set_meta_info.start_time is None:
                raise ValueError(
                    'Data Set Meta Info is missing required time information')
            time = get_time_from_string(data_set_meta_info.start_time)
            relative_path = relative_path.replace('/{}/'.format(_YEAR_PATTERN),
                                                  '/{:04d}/'.format(time.year))
            relative_path = relative_path.replace(
                '/{}/'.format(_MONTH_PATTERN), '/{:02d}/'.format(time.month))
            relative_path = relative_path.replace('/{}/'.format(_DAY_PATTERN),
                                                  '/{:02d}/'.format(time.day))
        if not from_url == relative_path:
            if os.path.isdir(from_url):
                if os.path.exists(relative_path):
                    shutil.rmtree(relative_path)
                shutil.copytree(from_url, relative_path)
            else:
                if not os.path.exists(relative_path):
                    os.makedirs(relative_path)
                shutil.copy(from_url, relative_path)

        return DataSetMetaInfo(data_set_meta_info.coverage,
                               data_set_meta_info.start_time,
                               data_set_meta_info.end_time,
                               data_set_meta_info.data_type, relative_path)
示例#6
0
 def add_observations(self, product_observations: ProductObservations,
                      date: str):
     bands_per_observation = product_observations.bands_per_observation
     date = get_time_from_string(date)
     self.dates.append(date)
     self._observations[date] = product_observations
     self.bands_per_observation[date] = bands_per_observation
示例#7
0
 def _notify_copied_to_local(self, data_set_meta_info: DataSetMetaInfo):
     tile_name = self._get_tile_name(data_set_meta_info.identifier)
     start_time = data_set_meta_info.start_time
     start_time_as_datetime = get_time_from_string(
         data_set_meta_info.start_time)
     year = start_time_as_datetime.year
     month = start_time_as_datetime.month
     day = start_time_as_datetime.day
     aws_index = self._get_aws_index(data_set_meta_info.identifier)
     time = get_time_from_string(start_time).strftime('%Y-%m-%d')
     file_dir = '{0}/{1},{2},{3}/'.format(self._temp_dir, tile_name, time,
                                          aws_index)
     other_file_dir = '{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/'.format(
         self._temp_dir, tile_name[0:2], tile_name[2:3], tile_name[3:5],
         year, month, day, aws_index)
     if os.path.exists(file_dir):
         shutil.rmtree(file_dir)
     if os.path.exists(other_file_dir):
         shutil.rmtree(other_file_dir)
示例#8
0
 def extract_meta_info(self, path: str) -> DataSetMetaInfo:
     id = path.split('/')[-1]
     dataset = xarray.open_dataset(path)
     if 'lat' in dataset.coords and 'lon' in dataset.coords:
         lat_min = dataset.lat.min().values.item(0)
         lat_max = dataset.lat.max().values.item(0)
         lon_min = dataset.lon.min().values.item(0)
         lon_max = dataset.lon.max().values.item(0)
     coverage = f'POLYGON(({lon_min} {lat_max}, {lon_max} {lat_max}, {lon_max} {lat_min}, ' \
                f'{lon_min} {lat_min}, {lon_min} {lat_max}))'
     dataset.close()
     start_time = get_time_from_string(
         id[17:32]).strftime('%Y-%m-%d %H:%M:%S')
     end_time = get_time_from_string(
         id[33:48]).strftime('%Y-%m-%d %H:%M:%S')
     return DataSetMetaInfo(identifier=id,
                            coverage=coverage,
                            start_time=start_time,
                            end_time=end_time,
                            data_type=DataTypeConstants.S1_SPECKLED)
示例#9
0
文件: utils.py 项目: RaT0M/vm-support
def create_sar_config_file(temp_dir: str, roi: str, start_time: str, end_time: str, s1_slc_directory: str,
                           s1_grd_directory: str, temporal_filter: str) -> str:
    config = {'SAR': {}}
    config['SAR']['input_folder'] = s1_slc_directory
    config['SAR']['output_folder'] = s1_grd_directory
    config['SAR']['gpt'] = '/software/snap/bin/gpt'
    config['SAR']['speckle_filter'] = {'multi_temporal': {'apply': 'yes', 'files': temporal_filter}}
    minx, miny, maxx, maxy = loads(roi).bounds
    config['SAR']['region'] = {'ul': {'lat': maxy, 'lon': minx}, 'lr': {'lat': miny, 'lon': maxx}}
    start_time = get_time_from_string(start_time)
    if start_time is not None:
        config['SAR']['year'] = start_time.year
    else:
        end_time = get_time_from_string(end_time)
        if end_time is not None:
            config['SAR']['year'] = end_time.year
    config_file_name = '{}/sar_config.yaml'.format(temp_dir)
    with open(config_file_name, 'w') as config_file:
        yaml.dump(config, config_file, default_flow_style=False)
    return config_file_name
示例#10
0
 def _query_wrapped_meta_info_provider(self, query_string: str, local_data_set_meta_infos: List[DataSetMetaInfo]) \
         -> List[DataSetMetaInfo]:
     only_dataset = DataSetMetaInfo(
         coverage="POLYGON((15 15, 25 15, 25 25, 15 25, 15 15))",
         start_time="2017-03-11 14:33:00",
         end_time="2017-03-11 14:45:00",
         data_type="TYPE_C",
         identifier="dterftge")
     if not self.get_roi_from_query_string(query_string).intersects(
             loads(only_dataset.coverage)):
         return []
     if self.get_start_time_from_query_string(
             query_string) > get_time_from_string(only_dataset.end_time):
         return []
     if self.get_end_time_from_query_string(
             query_string) < get_time_from_string(only_dataset.start_time):
         return []
     if 'TYPE_C' not in self.get_data_types_from_query_string(query_string):
         return []
     return [only_dataset]
示例#11
0
 def _get_bucket_names(data_set_meta_info: DataSetMetaInfo) -> List[str]:
     start_time = get_time_from_string(data_set_meta_info.start_time)
     base_bucket_names = _DATA_TYPE_PARAMETER_DICTS[
         data_set_meta_info.data_type]['baseBuckets']
     bucket_names = []
     for base_bucket_name in base_bucket_names:
         quarter = int(int(start_time.month - 1) / 3) + 1
         bucket_name = base_bucket_name.replace('{YYYY}',
                                                str(start_time.year))
         bucket_name = bucket_name.replace('{q}', str(quarter))
         bucket_names.append(bucket_name)
     return bucket_names
示例#12
0
def test_get_data_set_meta_infos_for_tile_description():
    parameters = {'path_to_json_file': path_to_json_file}
    aws_s2_meta_info_provider = AwsS2MetaInfoProviderAccessor.create_from_parameters(parameters)
    tile_description = TileDescription('30SWJ', BARRAX_TILE)
    start_time = get_time_from_string('2016-04-01')
    end_time = get_time_from_string('2016-04-30')
    data_set_meta_infos = aws_s2_meta_info_provider._get_data_set_meta_infos_for_tile_description(tile_description,
                                                                                                  start_time, end_time)
    assert 6 == len(data_set_meta_infos)
    assert '2016-04-01T10:57:59' == data_set_meta_infos[0].start_time
    assert '30/S/WJ/2016/4/1/0' == data_set_meta_infos[0].identifier
    assert '2016-04-04T11:03:11' == data_set_meta_infos[1].start_time
    assert '30/S/WJ/2016/4/4/0' == data_set_meta_infos[1].identifier
    assert '2016-04-11T10:57:56' == data_set_meta_infos[2].start_time
    assert '30/S/WJ/2016/4/11/0' == data_set_meta_infos[2].identifier
    assert '2016-04-14T11:09:07' == data_set_meta_infos[3].start_time
    assert '30/S/WJ/2016/4/14/0' == data_set_meta_infos[3].identifier
    assert '2016-04-21T10:59:16' == data_set_meta_infos[4].start_time
    assert '30/S/WJ/2016/4/21/0' == data_set_meta_infos[4].identifier
    assert '2016-04-24T11:09:39' == data_set_meta_infos[5].start_time
    assert '30/S/WJ/2016/4/24/0' == data_set_meta_infos[5].identifier
示例#13
0
 def _get_prefix(data_set_meta_info: DataSetMetaInfo):
     data_type_dict = _DATA_TYPE_PARAMETER_DICTS[
         data_set_meta_info.data_type]
     storage_structure = data_type_dict['storageStructure']
     data_time = get_time_from_string(data_set_meta_info.start_time)
     prefix = storage_structure.replace('{}'.format('YYYY'),
                                        '{:04d}'.format(data_time.year))
     prefix = prefix.replace('{}'.format('MM'),
                             '{:02d}'.format(data_time.month))
     prefix = prefix.replace('{}'.format('DD'),
                             '{:02d}'.format(data_time.day))
     for placeholder in data_type_dict['placeholders'].keys():
         start = data_type_dict['placeholders'][placeholder]['start']
         end = data_type_dict['placeholders'][placeholder]['end']
         prefix = prefix.replace(placeholder,
                                 data_set_meta_info.identifier[start:end])
     return prefix
 def _query_wrapped_meta_info_provider(self, query_string: str, local_data_set_meta_infos: List[DataSetMetaInfo]) \
         -> List[DataSetMetaInfo]:
     data_types = self.get_data_types_from_query_string(query_string)
     if DataTypeConstants.AWS_S2_L1C not in data_types:
         return []
     roi = self.get_roi_from_query_string(query_string)
     tile_descriptions = self.get_affected_tile_descriptions(roi)
     start_time = self.get_start_time_from_query_string(query_string)
     if start_time is None:
         start_time = get_time_from_string(FIRST_DAY)
     end_time = self.get_end_time_from_query_string(query_string)
     if end_time is None:
         end_time = datetime.now()
     data_set_meta_infos = []
     for tile_description in tile_descriptions:
         data_set_meta_infos_for_tile = self._get_data_set_meta_infos_for_tile_description(
             tile_description, start_time, end_time)
         for data_set_meta_info_for_tile in data_set_meta_infos_for_tile:
             if not self._is_provided_locally(data_set_meta_info_for_tile,
                                              local_data_set_meta_infos):
                 data_set_meta_infos.append(data_set_meta_info_for_tile)
     return data_set_meta_infos
示例#15
0
 def _get_file_ref(self,
                   data_set_meta_info: DataSetMetaInfo,
                   bands=None,
                   metafiles=None) -> Optional[FileRef]:
     """auxiliary method to delimit the number of downloaded files for testing"""
     if not self._is_valid_identifier(data_set_meta_info.identifier):
         # consider throwing an exception
         return None
     from sentinelhub import AwsTileRequest
     tile_name = self._get_tile_name(data_set_meta_info.identifier)
     start_time_as_datetime = get_time_from_string(
         data_set_meta_info.start_time)
     time = start_time_as_datetime.strftime('%Y-%m-%d')
     aws_index = self._get_aws_index(data_set_meta_info.identifier)
     request = AwsTileRequest(tile=tile_name,
                              time=time,
                              aws_index=aws_index,
                              bands=bands,
                              metafiles=metafiles,
                              data_folder=self._temp_dir)
     year = start_time_as_datetime.year
     month = start_time_as_datetime.month
     day = start_time_as_datetime.day
     logging.info('Downloading S2 Data from {}-{}-{}'.format(
         month, day, year))
     request.save_data()
     saved_dir = '{}/{},{}-{:02d}-{:02d},{}/'.format(
         self._temp_dir, tile_name, year, month, day, aws_index)
     new_dir = '{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/'.format(
         self._temp_dir, tile_name[0:2], tile_name[2:3], tile_name[3:5],
         year, month, day, aws_index)
     copy_tree(saved_dir, new_dir)
     logging.info('Downloaded S2 Data from {}-{}-{}'.format(
         month, day, year))
     return FileRef(new_dir, data_set_meta_info.start_time,
                    data_set_meta_info.end_time, get_mime_type(new_dir))
示例#16
0
 def _get_from_wrapped(
         self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
     file_refs = []
     time = get_time_from_string(data_set_meta_info.start_time)
     file_url = '{}/{}/{}/{}.{:02d}.{:02d}/{}'.format(
         _BASE_URL, _PLATFORM, data_set_meta_info.data_type, time.year,
         time.month, time.day, data_set_meta_info.identifier)
     request = urllib2.Request(file_url)
     authorization = base64.encodebytes(str.encode('{}:{}'.format(self._username, self._password))). \
         replace(b'\n', b'').decode()
     request.add_header('Authorization', 'Basic {}'.format(authorization))
     remote_file = self._opener.open(request)
     temp_url = '{}/{}'.format(self._temp_dir,
                               data_set_meta_info.identifier)
     logging.info('Downloading {}'.format(data_set_meta_info.identifier))
     with open(temp_url, 'wb') as temp_file:
         total_size_in_bytes = int(remote_file.info()['Content-Length'])
         one_percent = total_size_in_bytes / 100
         downloaded_bytes = 0
         next_threshold = one_percent
         length = 1024
         buf = remote_file.read(length)
         while buf:
             temp_file.write(buf)
             buf = remote_file.read(length)
             downloaded_bytes += 1024
             if downloaded_bytes > next_threshold:
                 stdout.write('\r{} %'.format(
                     int(next_threshold / one_percent)))
                 stdout.flush()
                 next_threshold += one_percent
     logging.info('Downloaded {}'.format(data_set_meta_info.identifier))
     file_refs.append(
         FileRef(temp_url, data_set_meta_info.start_time,
                 data_set_meta_info.end_time, get_mime_type(temp_url)))
     return file_refs
示例#17
0
 def remove(self, data_set_meta_info: DataSetMetaInfo):
     # todo test whether this works with aws s2 data too
     time = get_time_from_string(data_set_meta_info.start_time)
     relative_path = self.path + self.pattern
     relative_path = relative_path.replace(
         '/{}/'.format(_DATA_TYPE_PATTERN),
         '/{}/'.format(data_set_meta_info.data_type))
     relative_path = relative_path.replace('/{}/'.format(_YEAR_PATTERN),
                                           '/{:04d}/'.format(time.year))
     relative_path = relative_path.replace('/{}/'.format(_MONTH_PATTERN),
                                           '/{:02d}/'.format(time.month))
     relative_path = relative_path.replace('/{}/'.format(_DAY_PATTERN),
                                           '/{:02d}/'.format(time.day))
     if os.path.exists(relative_path):
         file_names = os.listdir(relative_path)
         for file_name in file_names:
             if data_set_meta_info.identifier in file_name:
                 os.remove(relative_path + file_name)
     while not self.path == relative_path and len(
             os.listdir(relative_path)) == 0:
         os.rmdir(relative_path)
         relative_path = relative_path[:relative_path[:relative_path.
                                                      rfind('/')].rfind('/'
                                                                        )]
示例#18
0
def infer_kaska_s2(start_time: Union[str, datetime],
                   end_time: Union[str, datetime],
                   time_step: Union[int, timedelta],
                   datasets_dir: str,
                   forward_models: List[str],
                   output_directory: str,
                   parameters: Optional[List[str]] = None,
                   state_mask: Optional[str] = None,
                   roi: Optional[Union[str, Polygon]] = None,
                   spatial_resolution: Optional[int] = None,
                   roi_grid: Optional[str] = None,
                   destination_grid: Optional[str] = None,
                   tile_index_x: Optional[int] = 0,
                   tile_index_y: Optional[int] = 0,
                   tile_width: Optional[int] = None,
                   tile_height: Optional[int] = None
                   ):
    if type(start_time) is str:
        start_time = get_time_from_string(start_time)
    if type(end_time) is str:
        end_time = get_time_from_string(end_time)
    if type(time_step) is int:
        time_step = timedelta(days=time_step)
    time_grid = []
    current_time = start_time
    while current_time < end_time:
        time_grid.append(current_time)
        current_time += time_step
    time_grid.append(end_time)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    temp_dir = f'{output_directory}/temp_{tile_index_x}_{tile_index_y}/'
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)
    mask_data_set, untiled_reprojection = _get_mask_data_set_and_reprojection(state_mask, spatial_resolution, roi,
                                                                              roi_grid, destination_grid)
    reprojection = untiled_reprojection
    tile_mask_data_set = mask_data_set
    raster_width = mask_data_set.RasterXSize
    raster_height = mask_data_set.RasterYSize
    offset_x = 0
    offset_y = 0
    if tile_width is not None and tile_height is not None:
        geo_transform = mask_data_set.GetGeoTransform()
        ulx, xres, xskew, uly, yskew, yres = geo_transform
        minlrx = ulx + (mask_data_set.RasterXSize * xres)
        minlry = uly + (mask_data_set.RasterYSize * yres)
        ulx = ulx + (tile_index_x * tile_width * xres)
        uly = uly + (tile_index_y * tile_height * yres)
        lrx = ulx + (tile_width * xres)
        lry = uly + (tile_height * yres)
        raster_width = tile_width
        raster_height = tile_height
        if (lrx > ulx and lrx > minlrx) or (lrx < ulx and lrx < minlrx):
            lrx = minlrx
            raster_width = np.abs((ulx - lrx) / xres)
        if (lry > uly and lry > minlry) or (lry < uly and lry < minlry):
            lry = minlry
            raster_height = np.abs((uly - lry) / yres)
        offset_x = tile_index_x * tile_width
        offset_y = tile_index_y * tile_height
        roi_bounds = (min(ulx, lrx), min(uly, lry), max(ulx, lrx), max(uly, lry))
        destination_spatial_reference_system = osr.SpatialReference()
        projection = mask_data_set.GetProjection()
        destination_spatial_reference_system.ImportFromWkt(projection)
        reprojection = Reprojection(roi_bounds, xres, yres, destination_spatial_reference_system)
        tile_mask_data_set = reprojection.reproject(mask_data_set)
    elif tile_width is not None or tile_height is not None:
        logging.warning('To use tiling, parameters tileWidth and tileHeight must be set. Continue without tiling')
    file_refs = _get_valid_files(datasets_dir)
    observations_factory = ObservationsFactory()
    observations_factory.sort_file_ref_list(file_refs)
    # an observations wrapper to be passed to kafka
    observations = observations_factory.create_observations(file_refs, reprojection, forward_models)
    model_parameter_names = []
    other_logger.info('Assembling model parameter names')
    for forward_model_name in forward_models:
        forward_model = get_forward_model(forward_model_name)
        if forward_model is None:
            other_logger.warning(f'Could not find forward model {forward_model_name}')
            continue
        for variable in forward_model.variables:
            other_logger.info(f'Checking variable {variable}')
            if variable not in model_parameter_names:
                model_parameter_names.append(variable)
    outfile_names = []
    requested_indexes = []
    for i, parameter_name in enumerate(model_parameter_names):
        other_logger.info(f'Checking for {parameter_name}')
        if parameters is None or parameter_name in parameters:
            other_logger.info(f'Creating output files for {parameter_name}')
            requested_indexes.append(i)
            for time_step in time_grid:
                time = time_step.strftime('%Y-%m-%d')
                outfile_names.append(f"{output_directory}/s2_{parameter_name}_A{time}.tif")
                other_logger.info(f'Created output file {parameter_name}')
    writer = GeoTiffWriter(outfile_names, mask_data_set.GetGeoTransform(), mask_data_set.GetProjection(),
                           mask_data_set.RasterXSize, mask_data_set.RasterYSize, num_bands=None, data_types=None)
    data = []
    at_least_one_valid_observation = False
    for date in observations.dates:
        granule = observations.read_granule(date)
        if granule[0] is not None:
            at_least_one_valid_observation = True
            break
    if not at_least_one_valid_observation:
        logging.info('No valid observations found. Will skip inference.')
        for j in requested_indexes:
            for i in range(len(time_grid)):
                data.append(np.zeros((int(raster_height), int(raster_width))))
    else:
        # todo make this more elaborate when more than one inverter is available
        approx_inverter = get_inverter("prosail_5paras", "Sentinel2")

        kaska = KaSKA(observations=observations,
                      time_grid=time_grid,
                      state_mask=tile_mask_data_set,
                      approx_inverter=approx_inverter,
                      output_folder=temp_dir,
                    save_sgl_inversion=False)
        results = kaska.run_retrieval()
        for j, sub_data in enumerate(results[1:]):
            if j in requested_indexes:
                for i in range(len(time_grid)):
                    data.append(sub_data[i, :, :])
    other_logger.info(f'Writing to {offset_x}, {offset_y} with width {raster_width} and height {raster_height}')
    writer.write(data, raster_width, raster_height, offset_x, offset_y)
示例#19
0
def _pm_request_of(request, workdir: str, id: str) -> Dict:
    template_text = pkg_resources.resource_string(
        __name__, "resources/pm_request_template.json")
    pm_request = json.loads(template_text)
    pm_request['requestName'] = f"{workdir}/{request['name']}"
    pm_request['requestId'] = id
    pm_request['productionType'] = _determine_workflow(request)
    pm_request['data_root'] = workdir
    pm_request['simulation'] = pm_request['simulation'] == 'True'
    pm_request['log_dir'] = f'{workdir}/log'
    pm_request['General']['roi'] = request['roi']
    pm_request['General']['start_time'] = \
        datetime.datetime.strftime(get_time_from_string(request['timeRange'][0]), '%Y-%m-%d')
    pm_request['General']['end_time'] = \
        datetime.datetime.strftime(get_time_from_string(request['timeRange'][1]), '%Y-%m-%d')
    pm_request['General']['time_interval'] = request['timeStep']
    pm_request['General']['spatial_resolution'] = request['spatialResolution']
    pm_request['General']['tile_width'] = 512
    pm_request['General']['tile_height'] = 512
    num_tiles_x, num_tiles_y = _get_num_tiles_of_request(request, 512, 512)
    pm_request['General']['num_tiles_x'] = num_tiles_x
    pm_request['General']['num_tiles_y'] = num_tiles_y
    pm_request['Inference']['time_interval'] = request['timeStep']
    forward_models = []
    for model_dict in request['forwardModels']:
        model = {
            "name":
            model_dict["name"],
            "type":
            model_dict["type"],
            "data_type":
            model_dict["modelDataType"],
            "required_priors":
            [prior for prior in model_dict["requiredPriors"]],
            "output_parameters":
            [parameter for parameter in model_dict["outputParameters"]]
        }
        forward_models.append(model)
    pm_request['Inference']['forward_models'] = forward_models
    pm_request['Prior']['output_directory'] = workdir + '/priors'
    for user_prior_dict in request['userPriors']:
        if 'mu' in user_prior_dict:
            pm_request['Prior'][user_prior_dict['name']] = {
                'user': {
                    'mu': user_prior_dict['mu']
                }
            }
        if 'unc' in user_prior_dict:
            if 'user' not in pm_request['Prior'][user_prior_dict['name']]:
                pm_request['Prior'][user_prior_dict['name']]['user'] = {}
            pm_request['Prior'][user_prior_dict['name']]['user'][
                'unc'] = user_prior_dict['unc']
    if 's1TemporalFilter' in request:
        pm_request['SAR']['speckle_filter']['multi_temporal'][
            'files'] = request['s1TemporalFilter']
        (min_lon, min_lat, max_lon, max_lat) = loads(request['roi']).bounds
        pm_request['SAR']['region']['ul']['lat'] = max_lat
        pm_request['SAR']['region']['ul']['lon'] = min_lon
        pm_request['SAR']['region']['lr']['lat'] = min_lat
        pm_request['SAR']['region']['lr']['lon'] = max_lon
        pm_request['SAR']['year'] = datetime.datetime.strftime(
            get_time_from_string(request['timeRange'][0]), '%Y')
    if 's2ComputeRoi' in request:
        pm_request['S2-PreProcessing']['compute_only_roi'] = request[
            's2ComputeRoi']
    if 'postProcessors' in request:
        post_processor_list = []
        for post_processor_dict in request['postProcessors']:
            pp_dict = {}
            pp_dict['name'] = post_processor_dict['name']
            pp_dict['type'] = post_processor_dict['type']
            pp_dict['input_types'] = [
                input_type for input_type in post_processor_dict["inputTypes"]
            ]
            pp_dict['indicator_names'] = [
                indicator_name
                for indicator_name in post_processor_dict["indicatorNames"]
            ]
            pp_dict['variable_names'] = [
                variable_name
                for variable_name in post_processor_dict["variableNames"]
            ]
            post_processor_list.append(pp_dict)
        pm_request['post_processing']['post_processors'] = post_processor_list
    return pm_request
示例#20
0
def _infer(start_time: Union[str, datetime],
           end_time: Union[str, datetime],
           parameter_list: List[str],
           prior_directory: str,
           datasets_dir: str,
           previous_state_dir: str,
           next_state_dir: str,
           emulators_directory: Optional[str],
           forward_models: Optional[List[str]],
           output_directory: str,
           state_mask: Optional[str],
           roi: Optional[Union[str, Polygon]],
           spatial_resolution: Optional[int],
           roi_grid: Optional[str],
           destination_grid: Optional[str]):
    # we assume that time is derived for one time step; or, to be more precise, for one time period (with no
    # intermediate time steps). This time step/time period is described by start time and end time.
    if type(start_time) is str:
        start_time = get_time_from_string(start_time)
    if type(end_time) is str:
        end_time = get_time_from_string(end_time)

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    if forward_models is None and emulators_directory is not None:
        logging.info('Determining forward model name from emulators directory')
        aux_data_provider = get_aux_data_provider()
        model_metadata_file = f'{emulators_directory}/metadata.json'
        if aux_data_provider.assure_element_provided(model_metadata_file):
            with(open(model_metadata_file, 'r')) as model_file:
                model_metadata = json.load(model_file)
                forward_models = [model_metadata['id']]
                logging.info(f"Determined forward model '{forward_models[0]}' from emulators directory")
        else:
            raise FileNotFoundError(f'Could not find {model_metadata_file}')
    mask_data_set, reprojection = _get_mask_data_set_and_reprojection(state_mask, spatial_resolution, roi, roi_grid,
                                                                      destination_grid)
    mask = mask_data_set.ReadAsArray().astype(np.bool)
    geo_transform = mask_data_set.GetGeoTransform()
    projection = mask_data_set.GetProjection()
    complete_parameter_list = []
    for forward_model_name in forward_models:
        logging.info(f'Checking for forward model {forward_model_name}')
        forward_model = get_forward_model(forward_model_name)
        if forward_model is not None:
            logging.info(f'Forward model {forward_model_name} found')
            model_variables = forward_model.variables
            for model_variable in model_variables:
                if model_variable not in complete_parameter_list:
                    complete_parameter_list.append(model_variable)
        else:
            raise ValueError(f'Could not find {forward_model_name}')
    output = InferenceWriter(parameter_list, complete_parameter_list, output_directory, start_time, geo_transform,
                             projection, mask.shape[1], mask.shape[0], state_folder=next_state_dir)
    prior_files = glob.glob(prior_directory + '/*.vrt')
    inference_prior = InferencePrior('', global_prior_files=prior_files, reference_dataset=mask_data_set)

    file_refs = _get_valid_files(datasets_dir)
    observations_factory = ObservationsFactory()
    observations_factory.sort_file_ref_list(file_refs)
    # an observations wrapper to be passed to kafka
    observations = observations_factory.create_observations(file_refs, reprojection, forward_models)

    p_forecast_inv = None
    x_forecast = None
    if previous_state_dir is not None and os.path.exists(previous_state_dir):
        p_inv_fname = "P_analysis_inv_%s.npz" % start_time.strftime("A%Y%j")
        p_inv_fname = os.path.join(previous_state_dir, p_inv_fname)
        if os.path.exists(p_inv_fname):
            p_forecast_inv = sp.load_npz(p_inv_fname)
        x_fname = "X_analysis_%s.npz" % start_time.strftime("A%Y%j")
        x_fname = os.path.join(previous_state_dir, x_fname)
        if os.path.exists(x_fname):
            x_forecast = np.load(x_fname)['arr_0']
        mask_fname = "state_mask_%s.npz" % start_time.strftime("A%Y%j")
        mask_fname = os.path.join(previous_state_dir, mask_fname)
        if os.path.exists(mask_fname):
            mask = np.load(mask_fname)['arr_0']
    if p_forecast_inv is None or x_forecast is None:
        processed_prior = inference_prior.process_prior(complete_parameter_list, start_time, mask)
        if x_forecast is None:
            x_forecast = processed_prior[0]
        if p_forecast_inv is None:
            p_forecast_inv = processed_prior[1]
        mask = processed_prior[2]

    linear_kalman = LinearKalman(observations, output, mask, create_prosail_observation_operator,
                                 complete_parameter_list, state_propagation=propagator, prior=None, linear=False)

    # Inflation amount for propagation
    q = np.zeros_like(x_forecast)
    # todo figure out correct setting
    if 'lai' in complete_parameter_list:
        lai_index = complete_parameter_list.index('lai')
        q[lai_index::len(complete_parameter_list)] = 0.05
    linear_kalman.set_trajectory_model()
    linear_kalman.set_trajectory_uncertainty(q)

    time_grid = [start_time, end_time]
    linear_kalman.run(time_grid, x_forecast, None, p_forecast_inv, iter_obs_op=True)
def test_create_observations():
    class DummyObservations(ProductObservations):
        def get_band_data_by_name(
                self,
                band_name: str,
                retrieve_uncertainty: bool = True) -> ObservationData:
            return ObservationData(observations=np.array([0.5]),
                                   uncertainty=sp.lil_matrix((1, 1)),
                                   mask=np.array([0]),
                                   metadata={},
                                   emulator=None)

        def get_band_data(
                self,
                band_index: int,
                retrieve_uncertainty: bool = True) -> ObservationData:
            return ObservationData(observations=np.array([0.5]),
                                   uncertainty=sp.lil_matrix((1, 1)),
                                   mask=np.array([0]),
                                   metadata={},
                                   emulator=None)

        @property
        def bands_per_observation(self):
            return 15

        @property
        def data_type(self):
            return 'dummy_type'

        def set_no_data_value(self, band: Union[str, int],
                              no_data_value: float):
            pass

    class DummyObservationsCreator(ProductObservationsCreator):
        DUMMY_PATTERN = 'dfghztm_[0-9]{4}_dvfgbh'
        DUMMY_PATTERN_MATCHER = re.compile('dfghztm_[0-9]{4}_dvfgbh')

        @classmethod
        def can_read(cls, file_ref: FileRef) -> bool:
            if os.path.exists(file_ref.url):
                file = open(file_ref.url, 'r')
                return cls.DUMMY_PATTERN_MATCHER.search(file.name) is not None

        @classmethod
        def create_observations(
                cls, file_ref: FileRef, reprojection: Optional[Reprojection],
                emulator_folder: Optional[str]) -> ProductObservations:
            if cls.can_read(file_ref):
                return DummyObservations()

    observations_factory = ObservationsFactory()
    observations_factory.add_observations_creator_to_registry(
        DummyObservationsCreator())

    start_time = '2017-06-04'
    file_refs = [
        FileRef(url=DUMMY_FILE,
                start_time=start_time,
                end_time='2017-06-07',
                mime_type='unknown mime type'),
        FileRef(url='tzzg',
                start_time='2017-06-07',
                end_time='2017-06-10',
                mime_type='unknown mime type')
    ]
    observations_wrapper = observations_factory.create_observations(
        file_refs, None, '')

    assert 1, observations_wrapper.get_num_observations()
    assert 15, observations_wrapper.bands_per_observation(0)
    start_time = get_time_from_string(start_time)
    data = observations_wrapper.get_band_data(start_time, 0)
    assert 1, len(data.observations)
    assert 0.5, data.observations[0]
    other_data = observations_wrapper.get_band_data_by_name(start_time, 'name')
    assert 1, len(other_data.observations)
    assert 0.5, other_data.observations[0]
    assert 'dummy_type' == observations_wrapper.get_data_type(start_time)
示例#22
0
 def _query_wrapped_meta_info_provider(self, query_string: str, local_data_set_meta_infos: List[DataSetMetaInfo]) \
         -> List[DataSetMetaInfo]:
     requested_data_types = []
     query_data_types = self.get_data_types_from_query_string(query_string)
     for supported_data_type in self._supported_data_types:
         if supported_data_type in query_data_types:
             requested_data_types.append(supported_data_type)
     if len(requested_data_types) == 0:
         return []
     roi = self.get_roi_from_query_string(query_string)
     tile_coverages = []
     for v in range(18):
         for h in range(36):
             tile_coverage = get_tile_coverage(h, v)
             if tile_coverage is not None and tile_coverage.intersects(roi):
                 tile_coverages.append((h, v, tile_coverage.wkt))
     start_time = self.get_start_time_from_query_string(query_string)
     if start_time is None:
         start_time = get_time_from_string(FIRST_DAY)
     end_time = self.get_end_time_from_query_string(query_string)
     if end_time is None:
         end_time = datetime.datetime.now()
     data_set_meta_infos = []
     try:
         for requested_data_type in requested_data_types:
             start_doy = start_time.timetuple().tm_yday
             current_time = start_time - datetime.timedelta(
                 days=(start_doy - _DATA_OFFSETS[requested_data_type]) %
                 _DATA_INTERVALS[requested_data_type])
             while current_time < end_time:
                 current_time_str = current_time.strftime(
                     '%Y-%m-%d %H:%M:%S')
                 current_tile_coverages = []
                 for h, v, tile_coverage in tile_coverages:
                     add_to_current = True
                     for local_data_set_meta_info in local_data_set_meta_infos:
                         if local_data_set_meta_info.coverage == tile_coverage and \
                                 local_data_set_meta_info.start_time == current_time_str:
                             add_to_current = False
                             break
                     if add_to_current:
                         current_tile_coverages.append(
                             (h, v, tile_coverage))
                 next_time = current_time + datetime.timedelta(
                     days=_DATA_INTERVALS[requested_data_type])
                 next_time -= datetime.timedelta(seconds=1)
                 if len(current_tile_coverages) > 0:
                     date_dir_url = '{}/{}/{}/{}.{:02d}.{:02d}/'.format(
                         _BASE_URL, _PLATFORM, requested_data_type,
                         current_time.year, current_time.month,
                         current_time.day)
                     date_page = urllib2.urlopen(
                         date_dir_url).read().decode('utf-8')
                     for h, v, tile_coverage in current_tile_coverages:
                         file_regex = '.hdf">{}.A{}{:03d}.h{:02d}v{:02d}.006.*.hdf'. \
                             format(requested_data_type.split('.')[0], current_time.year,
                                    current_time.timetuple().tm_yday, h, v)
                         available_files = re.findall(file_regex, date_page)
                         for file in available_files:
                             current_time_str = current_time.strftime(
                                 '%Y-%m-%d %H:%M:%S')
                             logging.info('Found {} data set for {}'.format(
                                 requested_data_type, current_time_str))
                             data_set_meta_infos.append(
                                 DataSetMetaInfo(
                                     tile_coverage, current_time_str,
                                     next_time.strftime(
                                         '%Y-%m-%d %H:%M:%S'),
                                     requested_data_type, file[6:]))
                 current_time = next_time + datetime.timedelta(seconds=1)
     except URLError as e:
         logging.warning(
             'Could not access NASA Land Processes Distributed Active Archive Center: {}'
             .format(e.reason))
     return data_set_meta_infos
示例#23
0
 def get_end_time_from_query_string(query_string: str) -> Optional[datetime]:
     end_time_as_string = query_string.split(';')[2]
     return get_time_from_string(end_time_as_string, True)
示例#24
0
 def get_start_time_from_query_string(query_string: str) -> Optional[datetime]:
     start_time_as_string = query_string.split(';')[1]
     return get_time_from_string(start_time_as_string, False)