示例#1
0
    def write_sites(self, output_filename: PathLike):
        """
        Writes HFR radar facility locations to specified file and layer.

        :param output_filename: path to output file
        """

        if not isinstance(output_filename, Path):
            output_filename = Path(output_filename)

        output_filename, layer_name = PyOFS.split_layer_filename(
            output_filename)

        layer_records = []

        for site_index in range(self.dataset['nSites']):
            site_code = (self.dataset['site_code']
                         [site_index].tobytes().decode().strip('\x00').strip())
            site_network_code = (self.dataset['site_netCode'][site_index].
                                 tobytes().decode().strip('\x00').strip())
            lon = float(self.dataset['site_lon'][site_index])
            lat = float(self.dataset['site_lat'][site_index])

            record = {
                'id': site_index + 1,
                'geometry': {
                    'type': 'Point',
                    'coordinates': (lon, lat)
                },
                'properties': {
                    'code': site_code,
                    'net_code': site_network_code,
                    'lon': float(lon),
                    'lat': float(lat),
                },
            }

            layer_records.append(record)

        schema = {
            'geometry': 'Point',
            'properties': {
                'code': 'str',
                'net_code': 'str',
                'lon': 'float',
                'lat': 'float'
            },
        }

        with fiona.open(
                output_filename,
                'w',
                'GPKG',
                layer=layer_name,
                schema=schema,
                crs=OUTPUT_CRS.to_dict(),
        ) as layer:
            layer.writerecords(layer_records)
示例#2
0
    def _sss(self, data_time: datetime) -> numpy.array:
        """
        Retrieve SMOS SSS data.

        :param data_time: datetime to retrieve (only uses month)
        :return: array of data
        """

        # SMOS has data on month-long resolution
        data_time = datetime(data_time.year, data_time.month, 16)

        if numpy.datetime64(data_time) in self.dataset['times'].values:
            return self.dataset['smap_sss'].sel(times=data_time).values
        else:
            raise PyOFS.NoDataError(
                f'No data exists for {data_time:%Y%m%dT%H%M%S}.')
示例#3
0
    def __init__(self, station_name: str):
        """
        NDBC data buoy

        :param station_name: station name
        :raises NoDataError: if observation does not exist
        """

        self.station_name = station_name
        self.url = f'{SOURCE_URL}/{self.station_name}/{self.station_name}o9999.nc'

        try:
            self.dataset = xarray.open_dataset(self.url)
            self.longitude = self.dataset['longitude'].values.item()
            self.latitude = self.dataset['latitude'].values.item()
        except:
            raise PyOFS.NoDataError(f'No NDBC observation found at {self.url}')
示例#4
0
    def __init__(self, stations: [str] = None):
        """
        Collection of NDBC data buoys

        :param stations: list of station names
        :raises NoDataError: if data does not exist
        """

        if stations is None:
            with requests.get(CATALOG_URL) as station_catalog:
                self.station_names = re.findall(
                    "href='(.*?)/catalog.html'", station_catalog.text
                )
        elif type(stations) is str:
            self.station_names = list(
                numpy.genfromtxt(WCOFS_NDBC_STATIONS_FILENAME, dtype='str')
            )
        else:
            self.station_names = stations

        self.stations = {}

        LOGGER.debug(f'Collecting NDBC data from {len(self.station_names)} station...')

        # concurrently populate dictionary with datasets for each station
        with futures.ThreadPoolExecutor() as concurrency_pool:
            running_futures = {
                concurrency_pool.submit(DataBuoyDataset, station_name): station_name
                for station_name in self.station_names
            }

            for completed_future in futures.as_completed(running_futures):
                station_name = running_futures[completed_future]

                if type(completed_future.exception()) is not PyOFS.NoDataError:
                    result = completed_future.result()
                    self.stations[station_name] = result

            del running_futures

        if len(self.stations) == 0:
            raise PyOFS.NoDataError(f'No NDBC datasets found in {self.stations}')
示例#5
0
def store_viirs_pass_times(
    satellite: str,
    study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
    start_time: datetime = VIIRS_START_TIME,
    output_filename: str = PASS_TIMES_FILENAME,
    num_periods: int = 1,
    algorithm: str = 'STAR',
    version: str = '2.40',
):
    """
    Compute VIIRS pass times from the given start date along the number of periods specified.

    :param satellite: satellite for which to store pass times, either NPP or N20
    :param study_area_polygon_filename: path to vector file containing polygon of study area
    :param start_time: beginning of given VIIRS period (in UTC)
    :param output_filename: path to output file
    :param num_periods: number of periods to store
    :param algorithm: either 'STAR' or 'OSPO'
    :param version: ACSPO Version number (2.40 - 2.41)
    """

    if not isinstance(study_area_polygon_filename, Path):
        study_area_polygon_filename = Path(study_area_polygon_filename)

    start_time = PyOFS.round_to_ten_minutes(start_time)
    end_time = PyOFS.round_to_ten_minutes(start_time +
                                          (VIIRS_PERIOD * num_periods))

    LOGGER.info(
        f'Getting pass times between {start_time:%Y-%m-%d %H:%M:%S} and {end_time:%Y-%m-%d %H:%M:%S}'
    )

    datetime_range = PyOFS.ten_minute_range(start_time, end_time)

    # construct polygon from the first record in layer
    study_area_polygon = shapely.geometry.Polygon(
        utilities.get_first_record(study_area_polygon_filename)['geometry']
        ['coordinates'][0])

    lines = []

    for datetime_index in range(len(datetime_range)):
        current_time = datetime_range[datetime_index]

        # find number of cycles from the first orbit to the present day
        num_cycles = int((datetime.now() - start_time).days / 16)

        # iterate over each cycle
        for cycle_index in range(0, num_cycles):
            # get current datetime of interest
            cycle_offset = VIIRS_PERIOD * cycle_index
            cycle_time = current_time + cycle_offset

            try:
                # get observation of new datetime
                dataset = VIIRSDataset(cycle_time, satellite,
                                       study_area_polygon_filename, algorithm,
                                       version)

                # check if observation falls within polygon extent
                if dataset.data_extent.is_valid:
                    if study_area_polygon.intersects(dataset.data_extent):
                        # get duration from current cycle start
                        cycle_duration = cycle_time - (start_time +
                                                       cycle_offset)

                        LOGGER.info(
                            f'{cycle_time:%Y%m%dT%H%M%S} {cycle_duration / timedelta(seconds=1)}: valid scene (checked {cycle_index + 1} cycle(s))'
                        )
                        lines.append(
                            f'{cycle_time:%Y%m%dT%H%M%S},{cycle_duration / timedelta(seconds=1)}'
                        )

                # if we get to here, break and continue to the next datetime
                break
            except PyOFS.NoDataError as error:
                LOGGER.warning(f'{error.__class__.__name__}: {error}')
        else:
            LOGGER.warning(
                f'{current_time:%Y%m%dT%H%M%S}: missing observation across all cycles'
            )

        # write lines to file
        with open(output_filename, 'w') as output_file:
            output_file.write('\n'.join(lines))

        LOGGER.info('Wrote data to file')
示例#6
0
    def __init__(
        self,
        start_time: datetime,
        end_time: datetime,
        satellites: list = ('NPP', 'N20'),
        study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
        pass_times_filename: PathLike = PASS_TIMES_FILENAME,
        algorithm: str = 'OSPO',
        version: str = None,
    ):
        """
        Collect VIIRS datasets within time interval.

        :param start_time: beginning of time interval (in UTC)
        :param end_time: end of time interval (in UTC)
        :param satellites: VIIRS platforms
        :param study_area_polygon_filename: filename of vector file of study area boundary
        :param pass_times_filename: path to text file with pass times
        :param algorithm: either 'STAR' or 'OSPO'
        :param version: ACSPO algorithm version
        :raises NoDataError: if data does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        if not isinstance(pass_times_filename, Path):
            pass_times_filename = Path(pass_times_filename)

        self.start_time = start_time
        if end_time > datetime.utcnow():
            # VIIRS near real time delay is 2 hours behind UTC
            self.end_time = datetime.utcnow() - NRT_DELAY
        else:
            self.end_time = end_time

        self.satellites = satellites

        self.study_area_polygon_filename = study_area_polygon_filename
        self.viirs_pass_times_filename = pass_times_filename
        self.algorithm = algorithm
        self.version = version

        if 'N20' in self.satellites:
            self.pass_times = get_pass_times(
                self.start_time, self.end_time,
                self.viirs_pass_times_filename) - timedelta(minutes=50)
        else:
            self.pass_times = get_pass_times(self.start_time, self.end_time,
                                             self.viirs_pass_times_filename)

        if len(self.pass_times) > 0:
            LOGGER.info(
                f'Collecting VIIRS data from {len(self.pass_times)} passes between {numpy.min(self.pass_times)} UTC and {numpy.max(self.pass_times)} UTC...'
            )

            # create dictionary to store scenes
            self.datasets = {pass_time: {} for pass_time in self.pass_times}

            with futures.ThreadPoolExecutor() as concurrency_pool:
                for satellite in self.satellites:
                    running_futures = {}
                    for pass_time in self.pass_times:
                        running_future = concurrency_pool.submit(
                            VIIRSDataset,
                            data_time=pass_time,
                            study_area_polygon_filename=self.
                            study_area_polygon_filename,
                            algorithm=self.algorithm,
                            version=self.version,
                            satellite=satellite,
                        )
                        running_futures[running_future] = pass_time

                    for completed_future in futures.as_completed(
                            running_futures):
                        if completed_future.exception() is None:
                            pass_time = running_futures[completed_future]
                            viirs_dataset = completed_future.result()
                            self.datasets[pass_time][satellite] = viirs_dataset
                        else:
                            LOGGER.warning(
                                f'Dataset creation error: {completed_future.exception()}'
                            )

                    del running_futures

            if len(self.datasets) > 0:
                VIIRSRange.study_area_transform = VIIRSDataset.study_area_transform
                VIIRSRange.study_area_extent = VIIRSDataset.study_area_extent
                VIIRSRange.study_area_bounds = VIIRSDataset.study_area_bounds

                LOGGER.debug(
                    f'VIIRS data was found in {len(self.datasets)} passes.')
            else:
                raise PyOFS.NoDataError(
                    f'No VIIRS datasets found between {self.start_time} UTC and {self.end_time} UTC.'
                )

        else:
            raise PyOFS.NoDataError(
                f'There are no VIIRS passes between {self.start_time} UTC and {self.end_time} UTC.'
            )
示例#7
0
    def write_vector(
        self,
        output_filename: PathLike,
        start_time: datetime,
        end_time: datetime,
        variables: [str] = None,
    ):
        """
        Write average of buoy data for all hours in the given time interval to a single layer of the provided output file.

        :param output_filename: path to output file
        :param start_time: beginning of time interval
        :param end_time: end of time interval
        :param variables: list of variable names
        """

        if not isinstance(output_filename, Path):
            output_filename = Path(output_filename)

        output_filename, layer_name = PyOFS.split_layer_filename(output_filename)

        if variables is None:
            variables = MEASUREMENT_VARIABLES

        station_data = self.data_average(variables, start_time, end_time)

        # # concurrently populate dictionary with data for each station within given time interval
        # with futures.ThreadPoolExecutor() as concurrency_pool:
        #     running_futures = {
        #         station_name: {
        #             variable: concurrency_pool.submit(station.data, variable, start_time, end_time)
        #             for variable in variables}
        #         for station_name, station in self.stations.items()
        #     }
        #
        #     for station_name, station_running_futures in running_futures:
        #         station_data[station_name] = {}
        #
        #         for completed_future in futures.as_completed(station_running_futures):
        #             result = completed_future.result()
        #
        #             if result is not None:
        #                 station_data[station_name][station_running_futures[completed_future]] = result

        schema = {
            'geometry': 'Point',
            'properties': {
                'name': 'str',
                'longitude': 'float',
                'latitude': 'float',
                'water_temperature': 'float',
                'conductivity': 'float',
                'salinity': 'float',
                'o2_saturation': 'float',
                'dissolved_oxygen': 'float',
                'chlorophyll_concentration': 'float',
                'turbidity': 'float',
                'water_ph': 'float',
                'water_eh': 'float',
            },
        }

        LOGGER.debug('Creating features...')

        layer_records = []

        for station_name, station_data in station_data.items():
            station = self.stations[station_name]

            record = {
                'geometry': {
                    'type': 'Point',
                    'coordinates': (station.longitude, station.latitude),
                },
                'properties': {
                    'name': station_name,
                    'longitude': station.longitude,
                    'latitude': station.latitude,
                    'water_temperature': station_data['water_temperature'],
                    'conductivity': station_data['conductivity'],
                    'salinity': station_data['salinity'],
                    'o2_saturation': station_data['o2_saturation'],
                    'dissolved_oxygen': station_data['dissolved_oxygen'],
                    'chlorophyll_concentration': station_data['chlorophyll_concentration'],
                    'turbidity': station_data['turbidity'],
                    'water_ph': station_data['water_ph'],
                    'water_eh': station_data['water_eh'],
                },
            }

            layer_records.append(record)

        LOGGER.info(
            f'Writing to {output_filename}{":" + layer_name if layer_name is not None else ""}'
        )
        with fiona.open(
            output_filename, 'w', 'GPKG', schema, OUTPUT_CRS, layer=layer_name
        ) as output_layer:
            output_layer.writerecords(layer_records)
示例#8
0
    def data(self,
             variable: str,
             time: datetime,
             crop: bool = True) -> xarray.DataArray:
        """
        Get data of specified variable at specified hour.
        :param variable: name of variable to retrieve
        :param time: time from which to retrieve data
        :param crop: whether to crop to study area extent
        :return: array of data
        """

        if time >= self.model_time:
            direction = 'forecast'
        else:
            direction = 'nowcast'

        if self.time_interval == 'daily':
            time = time.replace(hour=0, minute=0, second=0, microsecond=0)

        if direction in DATASET_STRUCTURE[self.source]:
            if len(self.datasets[direction]) > 0:
                if variable in DATA_VARIABLES:
                    datasets = DATA_VARIABLES[variable][self.source]
                    dataset_name, variable_name = next(iter(datasets.items()))

                    with self.dataset_locks[direction][dataset_name]:
                        data_variable = self.datasets[direction][dataset_name][
                            DATA_VARIABLES[variable][
                                self.source][dataset_name]]

                        # TODO study areas that cross over longitude +74.16 may have problems here
                        if crop:
                            selection = data_variable.sel(
                                lon=slice(self.study_area_west + 360,
                                          self.study_area_east + 360),
                                lat=slice(self.study_area_south,
                                          self.study_area_north),
                            )
                        else:
                            western_selection = data_variable.sel(
                                lon=slice(180, numpy.max(self.raw_lon)),
                                lat=slice(numpy.min(self.lat),
                                          numpy.max(self.lat)),
                            )
                            eastern_selection = data_variable.sel(
                                lon=slice(numpy.min(self.raw_lon), 180),
                                lat=slice(numpy.min(self.lat),
                                          numpy.max(self.lat)),
                            )
                            selection = numpy.concatenate(
                                (western_selection, eastern_selection), axis=1)

                        # to resample the 3 hr for forcast and 1hr for nowcast nc file to a daily
                        selections = selection.resample(time='D').mean()
                        selections = selections.sel(time=time,
                                                    method='nearest')

                        # correction for the
                        if variable == 'ssh':
                            selections = selections + 0.25

                        selections = numpy.flip(selections.squeeze(), axis=0)

                        if selections.size > 0:
                            return selections
                        else:
                            raise PyOFS.NoDataError(
                                f'no RTOFS data for {time} within the cropped area ({self.study_area_west:.2f}, {self.study_area_south:.2f}), ({self.study_area_east:.2f}, {self.study_area_north:.2f})'
                            )
                else:
                    raise ValueError(
                        f'Variable must be one of {list(DATA_VARIABLES)}.')
            else:
                LOGGER.warning(
                    f'{direction} does not exist in RTOFS for {self.model_time:%Y%m%d}.'
                )
        else:
            raise ValueError(
                f'Direction must be one of {list(DATASET_STRUCTURE[self.source].keys())}.'
            )
示例#9
0
    def __init__(
        self,
        data_time: datetime = None,
        satellite: str = 'G17',
        study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
        algorithm: str = 'STAR',
        version: str = None,
    ):
        """
        Retrieve ABI NetCDF observation from NOAA with given datetime.

        :param data_time: observation datetime
        :param satellite: ABI platform
        :param study_area_polygon_filename: filename of vector file containing study area boundary
        :param algorithm: either 'STAR' or 'OSPO'
        :param version: ACSPO algorithm version
        :raises NoDataError: if observation does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        if data_time is None:
            data_time = datetime.now()

        # round minute to nearest 10 minutes (ABI data interval)
        self.data_time = PyOFS.round_to_ten_minutes(data_time)

        self.satellite = satellite

        self.study_area_polygon_filename = study_area_polygon_filename

        # use NRT flag if granule is less than 13 days old
        self.near_real_time = datetime.now() - data_time <= timedelta(days=13)
        self.algorithm = algorithm

        if version is None:
            if data_time >= datetime(2019, 4, 23, 12, 50):
                self.version = '2.71'
            elif data_time >= datetime(2018, 11, 7, 15, 10):
                self.version = '2.60'
            elif data_time >= datetime(2017, 9, 14, 12, 50):
                self.version = '2.41'
            else:
                self.version = '2.40'
        else:
            self.version = version

        self.url = None

        day_dir = f'{self.data_time.year}/{self.data_time.timetuple().tm_yday:03}'
        filename = f'{self.data_time:%Y%m%d%H%M%S}-{self.algorithm}-L3C_GHRSST-SSTsubskin-ABI_{self.satellite.upper()}-ACSPO_V{self.version}-v02.0-fv01.0.nc'

        # TODO N20 does not yet have a reanalysis archive on NESDIS (as of March 8th, 2019)
        if self.satellite.upper() == 'N20' and not self.near_real_time:
            raise PyOFS.NoDataError(
                f'{self.satellite.upper()} does not yet have a reanalysis archive'
            )

        for source, source_url in SOURCE_URLS['OpenDAP'].items():
            url = source_url

            if self.near_real_time:
                if source == 'NESDIS':
                    url = f'{source_url}/grid{self.satellite.upper()}ABINRTL3CWW00/{day_dir}/{filename}'
                elif source == 'JPL':
                    url = f'{source_url}/AMERICAS/GOES17/{algorithm}/v{self.version}/{day_dir}/{filename}'
                elif source in 'NODC':
                    url = f'{source_url}/ABI_{self.satellite.upper()}/{algorithm}/{day_dir}/{filename}'
            else:
                if source == 'NESDIS':
                    url = f'{source_url}/grid{"" if self.near_real_time else "S"}{self.satellite.upper()}ABISCIENCEL3CWW00/{day_dir}/{filename}'
                else:
                    LOGGER.warning(
                        f'{source} does not contain a reanalysis archive')

            try:
                self.dataset = xarray.open_dataset(url)
                self.url = url
                break
            except Exception as error:
                LOGGER.warning(f'{error.__class__.__name__}: {error}')

        if self.url is None:
            LOGGER.warning(
                'Error collecting from OpenDAP; falling back to FTP...')

            for source, source_url in SOURCE_URLS['FTP'].items():
                host_url, ftp_input_dir = source_url.split('/', 1)
                ftp_path = ftp_input_dir
                url = host_url

                if source == 'NESDIS':
                    if self.near_real_time:
                        ftp_path = f'/{ftp_input_dir}/nrt/abi/{self.satellite.lower()}/l3c/{day_dir}/{filename}'
                    else:
                        ftp_path = f'/{ftp_input_dir}/ran/abi/{"S" if self.satellite.upper() == "G17" else ""}{self.satellite.lower()}/l3c/{day_dir}/{filename}'

                    url = f'{host_url}/{ftp_path.lstrip("/")}'

                try:
                    with ftplib.FTP(host_url) as ftp_connection:
                        ftp_connection.login()

                        output_dir = DATA_DIRECTORY / 'input' / 'abi'

                        if not output_dir.exists():
                            os.makedirs(output_dir, exist_ok=True)

                        output_filename = output_dir / f'abi_{self.data_time:%Y%m%dT%H%M}.nc'

                        if output_filename.exists():
                            os.remove(output_filename)

                        try:
                            with open(output_filename, 'wb') as output_file:
                                ftp_connection.retrbinary(
                                    f'RETR {ftp_path}', output_file.write)
                                self.dataset = xarray.open_dataset(
                                    output_filename)
                        except:
                            raise
                        finally:
                            os.remove(output_filename)

                    self.url = url
                    break
                except Exception as error:
                    LOGGER.warning(f'{error.__class__.__name__}: {error}')

                if self.url is not None:
                    break

        if self.url is None:
            raise PyOFS.NoDataError(
                f'No ABI observation found at {self.data_time} UTC.')

        # construct rectangular polygon of granule extent
        if 'geospatial_bounds' in self.dataset.attrs:
            self.data_extent = shapely.wkt.loads(
                self.dataset.geospatial_bounds)
        elif 'geospatial_lon_min' in self.dataset.attrs:
            lon_min = float(self.dataset.geospatial_lon_min)
            lon_max = float(self.dataset.geospatial_lon_max)
            lat_min = float(self.dataset.geospatial_lat_min)
            lat_max = float(self.dataset.geospatial_lat_max)

            if lon_min < lon_max:
                self.data_extent = shapely.geometry.Polygon([
                    (lon_min, lat_max),
                    (lon_max, lat_max),
                    (lon_max, lat_min),
                    (lon_min, lat_min),
                ])
            else:
                # geospatial bounds cross the antimeridian, so we create a multipolygon
                self.data_extent = shapely.geometry.MultiPolygon([
                    shapely.geometry.Polygon([
                        (lon_min, lat_max),
                        (180, lat_max),
                        (180, lat_min),
                        (lon_min, lat_min),
                    ]),
                    shapely.geometry.Polygon([
                        (-180, lat_max),
                        (lon_max, lat_max),
                        (lon_max, lat_min),
                        (-180, lat_min),
                    ]),
                ])

        lon_pixel_size = self.dataset.geospatial_lon_resolution
        lat_pixel_size = self.dataset.geospatial_lat_resolution

        if ABIDataset.study_area_extent is None:
            LOGGER.debug(
                f'Calculating indices and transform from granule at {self.data_time} UTC...'
            )

            # get first record in layer
            ABIDataset.study_area_extent = shapely.geometry.MultiPolygon([
                shapely.geometry.Polygon(polygon[0])
                for polygon in utilities.get_first_record(
                    self.study_area_polygon_filename)['geometry']
                ['coordinates']
            ])

            ABIDataset.study_area_bounds = ABIDataset.study_area_extent.bounds
            ABIDataset.study_area_transform = rasterio.transform.from_origin(
                ABIDataset.study_area_bounds[0],
                ABIDataset.study_area_bounds[3],
                lon_pixel_size,
                lat_pixel_size,
            )

        if ABIDataset.study_area_bounds is not None:
            self.dataset = self.dataset.isel(time=0).sel(
                lon=slice(ABIDataset.study_area_bounds[0],
                          ABIDataset.study_area_bounds[2]),
                lat=slice(ABIDataset.study_area_bounds[3],
                          ABIDataset.study_area_bounds[1]),
            )

        if ABIDataset.study_area_coordinates is None:
            ABIDataset.study_area_coordinates = {
                'lon': self.dataset['lon'],
                'lat': self.dataset['lat'],
            }
示例#10
0
    def write_vector(
        self,
        output_filename: PathLike,
        variables: Collection[str] = None,
        start_time: datetime = None,
        end_time: datetime = None,
        dop_threshold: float = 0.5,
    ):
        """
        Write average of HFR data for all hours in the given time interval to a single layer of the provided output file.

        :param output_filename: path to output file
        :param variables: variable names to use
        :param start_time: beginning of time interval
        :param end_time: end of time interval
        :param dop_threshold: threshold for Dilution of Precision (DOP) above which data should be discarded
        """

        if not isinstance(output_filename, Path):
            output_filename = Path(output_filename)

        output_filename, layer_name = PyOFS.split_layer_filename(
            output_filename)
        if layer_name is None:
            layer_name = 'ssuv'

        if variables is None:
            variables = DATA_VARIABLES

        variable_means = {
            variable: self.data_average(variable, start_time, end_time,
                                        dop_threshold)
            for variable in variables
        }

        # define layer schema
        schema = {
            'geometry': 'Point',
            'properties': {
                'lon': 'float',
                'lat': 'float'
            }
        }

        schema['properties'].update(
            {variable: 'float'
             for variable in variables})

        # create features
        layer_records = []

        feature_index = 1

        for col in range(len(self.dataset['lon'])):
            for row in range(len(self.dataset['lat'])):
                data = [
                    float(variable_means[variable][row, col])
                    for variable in variables
                ]

                # stop if record has masked values
                if not (numpy.isnan(data)).all():
                    lon = self.dataset['lon'][col]
                    lat = self.dataset['lat'][row]

                    record = {
                        'id': feature_index,
                        'geometry': {
                            'type': 'Point',
                            'coordinates': (lon, lat)
                        },
                        'properties': {
                            'lon': float(lon),
                            'lat': float(lat)
                        },
                    }

                    record['properties'].update(dict(zip(variables, data)))

                    layer_records.append(record)
                    feature_index += 1

        # write queued features to layer
        LOGGER.info(f'Writing {output_filename}')
        with fiona.open(
                output_filename,
                'w',
                'GPKG',
                layer=layer_name,
                schema=schema,
                crs=OUTPUT_CRS.to_dict(),
        ) as layer:
            layer.writerecords(layer_records)
示例#11
0
    def write_vectors(
        self,
        output_filename: PathLike,
        variables: Collection[str] = None,
        start_time: datetime = None,
        end_time: datetime = None,
        dop_threshold: float = 0.5,
    ):
        """
        Write HFR data to a layer of the provided output file for every hour in the given time interval.

        :param output_filename: path to output file
        :param variables: variable names to use
        :param start_time: beginning of time interval
        :param end_time: end of time interval
        :param dop_threshold: threshold for Dilution of Precision (DOP) above which data should be discarded
        """

        if not isinstance(output_filename, Path):
            output_filename = Path(output_filename)

        output_filename = PyOFS.split_layer_filename(output_filename)[0]

        if variables is None:
            variables = DATA_VARIABLES

        if start_time is None:
            start_time = self.start_time

        if end_time is None:
            end_time = self.end_time

        time_interval_selection = self.dataset.sel(
            time=slice(start_time, end_time))

        if dop_threshold is not None:
            dop_mask = (
                (self.dataset['dopx'].sel(time=slice(start_time, end_time)) <=
                 dop_threshold)
                & (self.dataset['dopy'].sel(time=slice(start_time, end_time))
                   <= dop_threshold)).values
            time_interval_selection[~dop_mask] = numpy.nan

        # create dict to store features
        layers = {}

        # create layer using OGR, then add features using QGIS
        for hfr_time in time_interval_selection['time']:
            hfr_time = datetime.utcfromtimestamp(
                (hfr_time.values - numpy.datetime64('1970-01-01T00:00:00Z')) /
                numpy.timedelta64(1, 's'))
            layer_name = f'{hfr_time:%Y%m%dT%H%M%S}'

            hfr_data = time_interval_selection.sel(time=hfr_time)

            # create features
            layer_records = []

            feature_index = 1

            for col in range(len(self.dataset['lon'])):
                for row in range(len(self.dataset['lat'])):
                    data = [
                        float(hfr_data[variable_name][row, col].values)
                        for variable, variable_name in variables.items()
                    ]

                    # stop if record has masked values
                    if not (numpy.isnan(data)).all():
                        lon = self.dataset['lon'][col]
                        lat = self.dataset['lat'][row]

                        record = {
                            'id': feature_index,
                            'geometry': {
                                'type': 'Point',
                                'coordinates': (lon, lat)
                            },
                            'properties': {
                                'lon': float(lon),
                                'lat': float(lat)
                            },
                        }

                        record['properties'].update(
                            dict(zip(list(variables.keys()), data)))

                        layer_records.append(record)
                        feature_index += 1

            layers[layer_name] = layer_records

        # write queued features to their respective layers
        schema = {
            'geometry': 'Point',
            'properties': {
                'u': 'float',
                'v': 'float',
                'lat': 'float',
                'lon': 'float',
                'dop_lat': 'float',
                'dop_lon': 'float',
            },
        }

        for layer_name, layer_records in layers.items():
            with fiona.open(
                    output_filename,
                    'w',
                    'GPKG',
                    layer=layer_name,
                    schema=schema,
                    crs=OUTPUT_CRS.to_dict(),
            ) as layer:
                layer.writerecords(layer_records)
示例#12
0
    def write_raster(
        self,
        output_filename: PathLike,
        variable: str,
        time: datetime,
        fill_value=LEAFLET_NODATA_VALUE,
        driver: str = 'GTiff',
        crop: bool = True,
    ):
        """
        Writes interpolated raster of given variable to output path.
        :param output_filename: path of raster file to create
        :param variable: name of variable
        :param time: time from which to retrieve data
        :param fill_value: desired fill value of output
        :param driver: strings of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid')
        :param crop: whether to crop to study area extent
        """

        if not isinstance(output_filename, Path):
            output_filename = Path(output_filename)

        output_data = self.data(variable, time, crop).values

        if output_data is not None:
            if crop:
                transform = self.study_area_transform
            else:
                transform = self.global_grid_transform

            gdal_args = {
                'transform':
                transform,
                'height':
                output_data.shape[0],
                'width':
                output_data.shape[1],
                'count':
                1,
                'dtype':
                rasterio.float32,
                'crs':
                CRS.from_dict(OUTPUT_CRS),
                'nodata':
                numpy.array([fill_value]).astype(output_data.dtype).item(),
            }

            if driver == 'AAIGrid':
                file_extension = 'asc'
                gdal_args.update({'FORCE_CELLSIZE': 'YES'})
            elif driver == 'GPKG':
                file_extension = 'gpkg'
            else:
                file_extension = 'tiff'
                gdal_args.update(TIFF_CREATION_OPTIONS)

            output_filename = f'{output_filename.stem}.{file_extension}'

            LOGGER.info(f'Writing {output_filename}')
            with rasterio.open(output_filename, 'w', driver,
                               **gdal_args) as output_raster:
                output_raster.write(output_data, 1)
                if driver == 'GTiff':
                    output_raster.build_overviews(
                        PyOFS.overview_levels(output_data.shape),
                        Resampling['average'])
                    output_raster.update_tags(ns='rio_overview',
                                              resampling='average')
示例#13
0
    def write_rasters(
        self,
        output_dir: PathLike,
        variables: list,
        time: datetime,
        filename_prefix: str = None,
        filename_suffix: str = None,
        fill_value=LEAFLET_NODATA_VALUE,
        driver: str = 'GTiff',
        crop: bool = True,
    ):
        """
        Write averaged raster data of given variables to given output directory.
        :param output_dir: path to directory
        :param variables: variable names to use
        :param time: time from which to retrieve data
        :param filename_prefix: prefix for filenames
        :param filename_suffix: suffix for filenames
        :param fill_value: desired fill value of output
        :param driver: strings of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid')
        :param crop: whether to crop to study area extent
        """

        if not isinstance(output_dir, Path):
            output_dir = Path(output_dir)

        if variables is None:
            variables = DATA_VARIABLES[self.source]

        if filename_prefix is None:
            filename_prefix = 'rtofs'
        filename_suffix = f'_{filename_suffix}' if filename_suffix is not None else ''

        if self.time_interval == 'daily':
            time = time.replace(hour=0, minute=0, second=0, microsecond=0)

        time_delta = int((time - self.model_time) / timedelta(days=1))
        direction = 'forecast' if time_delta >= 0 else 'nowcast'
        time_delta_string = f'{direction[0]}{abs(time_delta) + 1 if direction == "forecast" else abs(time_delta):03}'

        variable_means = {}
        for variable in variables:
            if variable not in ['dir', 'mag']:
                try:
                    variable_means[variable] = self.data(variable, time, crop)
                except KeyError:
                    LOGGER.warning(
                        f'variable "{variable}" not found in RTOFS dataset')
                except Exception as error:
                    LOGGER.warning(error)

        variable_means = {
            variable: variable_mean.values
            for variable, variable_mean in variable_means.items()
            if variable_mean is not None
        }

        if 'dir' in variables or 'mag' in variables:
            u_name = 'ssu'
            v_name = 'ssv'

            if u_name not in variable_means:
                u_data = self.data(u_name, time, crop)
                u_data = u_data.values if u_data is not None else None
            else:
                u_data = variable_means[u_name]

            if v_name not in variable_means:
                v_data = self.data(v_name, time, crop)
                v_data = v_data.values if v_data is not None else None
            else:
                v_data = variable_means[v_name]

            if 'anim' in filename_suffix:
                variable_means['dir'] = u_data
                variable_means['mag'] = v_data

            else:
                # calculate direction and magnitude of vector in degrees (0-360) and in metres per second
                variable_means['dir'] = (numpy.arctan2(u_data, v_data) +
                                         numpy.pi) * (180 / numpy.pi)
                variable_means['mag'] = numpy.sqrt(u_data**2 + v_data**2)

        # write interpolated grids to raster files
        for variable, variable_mean in variable_means.items():
            if variable_mean is not None and variable_mean.size > 0:
                if crop:
                    transform = self.study_area_transform
                else:
                    transform = self.global_grid_transform

                if fill_value is not None:
                    variable_mean[numpy.isnan(variable_mean)] = fill_value

                gdal_args = {
                    'transform':
                    transform,
                    'height':
                    variable_mean.shape[0],
                    'width':
                    variable_mean.shape[1],
                    'count':
                    1,
                    'dtype':
                    rasterio.float32,
                    'crs':
                    CRS.from_dict(OUTPUT_CRS),
                    'nodata':
                    numpy.array([fill_value
                                 ]).astype(variable_mean.dtype).item(),
                }

                if driver == 'AAIGrid':
                    file_extension = 'asc'
                    gdal_args.update({'FORCE_CELLSIZE': 'YES'})
                elif driver == 'GPKG':
                    file_extension = 'gpkg'
                else:
                    file_extension = 'tiff'
                    gdal_args.update(TIFF_CREATION_OPTIONS)

                output_filename = f'{filename_prefix}_{variable}_{self.model_time:%Y%m%d}_{time_delta_string}{filename_suffix}.{file_extension}'
                output_filename = output_dir / output_filename

                LOGGER.info(f'Writing {output_filename}')
                with rasterio.open(output_filename, 'w', driver,
                                   **gdal_args) as output_raster:
                    output_raster.write(variable_mean, 1)
                    if driver == 'GTiff':
                        output_raster.build_overviews(
                            PyOFS.overview_levels(variable_mean.shape),
                            Resampling['average'])
                        output_raster.update_tags(ns='rio_overview',
                                                  resampling='average')
示例#14
0
    def write_rasters(
        self,
        output_dir: PathLike,
        variables: Collection[str] = ('sst', 'sses'),
        filename_prefix: str = 'abi',
        fill_value: float = LEAFLET_NODATA_VALUE,
        driver: str = 'GTiff',
        correct_sses: bool = False,
    ):
        """
        Write ABI rasters to file using data from given variables.

        :param output_dir: path to output directory
        :param variables: variable names to write
        :param filename_prefix: prefix for output filenames
        :param fill_value: desired fill value of output
        :param driver: strings of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid')
        :param correct_sses: whether to subtract SSES bias from SST
        """

        if not isinstance(output_dir, Path):
            output_dir = Path(output_dir)

        for variable in variables:
            input_data = self.data(variable, correct_sses)

            if variable == 'sses':
                fill_value = 0

            if input_data is not None and not numpy.isnan(input_data).all():
                if fill_value is not None:
                    input_data[numpy.isnan(input_data)] = fill_value

                gdal_args = {
                    'height': input_data.shape[0],
                    'width': input_data.shape[1],
                    'count': 1,
                    'dtype': rasterio.float32,
                    'crs': CRS.from_dict(OUTPUT_CRS),
                    'transform': ABIDataset.study_area_transform,
                    'nodata': fill_value,
                }

                if driver == 'AAIGrid':
                    file_extension = 'asc'
                    gdal_args.update({'FORCE_CELLSIZE': 'YES'})
                elif driver == 'GPKG':
                    file_extension = 'gpkg'
                else:
                    file_extension = 'tiff'
                    gdal_args.update(TIFF_CREATION_OPTIONS)

                output_filename = output_dir / f'{filename_prefix}_{variable}.{file_extension}'

                # use rasterio to write to raster with GDAL args
                LOGGER.info(f'Writing to {output_filename}')
                with rasterio.open(output_filename, 'w', driver,
                                   **gdal_args) as output_raster:
                    output_raster.write(input_data, 1)
                    if driver == 'GTiff':
                        output_raster.build_overviews(
                            PyOFS.overview_levels(input_data.shape),
                            Resampling['average'])
                        output_raster.update_tags(ns='rio_overview',
                                                  resampling='average')
示例#15
0
    def __init__(self,
                 start_time: datetime = None,
                 end_time: datetime = None,
                 resolution: int = 6):
        """
        Creates new observation object from source.

        :param start_time: beginning of time interval
        :param end_time: end of time interval
        :param resolution: desired observation resolution in kilometers
        :raises NoDataError: if observation does not exist.
        """

        if start_time is None:
            start_time = datetime.now()

        self.start_time = start_time

        if end_time is None:
            end_time = self.start_time + timedelta(days=1)

        if end_time > datetime.utcnow():
            # HFR near real time delay is 1 hour behind UTC
            self.end_time = datetime.utcnow() - NRT_DELAY
        else:
            self.end_time = end_time

        self.resolution = resolution

        # NDBC only keeps observations within the past 4 days
        for source, source_url in SOURCE_URLS.items():
            # get URL
            if source == 'NDBC':
                url = f'{source_url}/hfradar_uswc_{self.resolution}km'
            elif source == 'UCSD':
                url = f'{source_url}/{self.resolution}km/hourly/RTV/HFRADAR_US_West_Coast_{self.resolution}km_Resolution_Hourly_RTV_best.ncd'
            else:
                url = source_url

            try:
                self.dataset = xarray.open_dataset(url)
                self.url = url
                break
            except OSError as error:
                LOGGER.warning(f'{error.__class__.__name__}: {error}')
        else:
            raise PyOFS.NoDataError(
                f'No HFR observations found between {self.start_time} and {self.end_time}'
            )

        raw_times = self.dataset['time']

        self.dataset['time'] = xarray.DataArray(
            numpy.array(raw_times.values, dtype='datetime64[h]'),
            coords=raw_times.coords,
            dims=raw_times.dims,
            attrs=raw_times.attrs,
        )

        self.dataset = self.dataset.sel(
            time=slice(self.start_time, self.end_time))

        LOGGER.info(
            f'Collecting HFR velocity between {str(self.dataset["time"].min().values)[:19]} and {str(self.dataset["time"].max().values)[:19]}...'
        )

        if HFRadarRange.grid_transform is None:
            lon = self.dataset['lon'].values
            lat = self.dataset['lat'].values

            # define image properties
            west = numpy.min(lon)
            north = numpy.max(lat)

            self.mean_x_size = numpy.mean(numpy.diff(lon))
            self.mean_y_size = numpy.mean(numpy.diff(lat))

            # get rasterio geotransform of HFR observation (flipped latitude)
            self.grid_transform = rasterio.transform.from_origin(
                west, north, self.mean_x_size, self.mean_y_size)
示例#16
0
    def __init__(
        self,
        start_time: datetime,
        end_time: datetime,
        satellites: list = ('G17', ),
        study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
        algorithm: str = 'STAR',
        version: str = None,
    ):
        """
        Collect ABI datasets within time interval.

        :param start_time: beginning of time interval (in UTC)
        :param end_time: end of time interval (in UTC)
        :param satellites: ABI platforms
        :param study_area_polygon_filename: filename of vector file of study area boundary
        :param algorithm: either 'STAR' or 'OSPO'
        :param version: ACSPO algorithm version
        :raises NoDataError: if data does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        self.start_time = start_time
        if end_time > datetime.utcnow():
            # ABI near real time delay is 2 hours behind UTC
            self.end_time = datetime.utcnow() - NRT_DELAY
        else:
            self.end_time = end_time

        self.satellites = satellites

        self.study_area_polygon_filename = study_area_polygon_filename
        self.algorithm = algorithm
        self.version = version

        day_start = datetime.utcnow().replace(hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0)
        self.pass_times = [
            day_start + timedelta(hours=hour)
            for hour in [-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8]
            if day_start + timedelta(hours=hour) < self.end_time
        ]

        if len(self.pass_times) > 0:
            LOGGER.info(
                f'Collecting ABI data from {len(self.pass_times)} passes between {numpy.min(self.pass_times)} UTC and {numpy.max(self.pass_times)} UTC...'
            )

            # create dictionary to store scenes
            self.datasets = {pass_time: {} for pass_time in self.pass_times}

            with futures.ThreadPoolExecutor() as concurrency_pool:
                for satellite in self.satellites:
                    running_futures = {}
                    for pass_time in self.pass_times:
                        running_future = concurrency_pool.submit(
                            ABIDataset,
                            data_time=pass_time,
                            study_area_polygon_filename=self.
                            study_area_polygon_filename,
                            algorithm=self.algorithm,
                            version=self.version,
                            satellite=satellite,
                        )
                        running_futures[running_future] = pass_time

                    for completed_future in futures.as_completed(
                            running_futures):
                        if completed_future.exception() is None:
                            pass_time = running_futures[completed_future]
                            abi_dataset = completed_future.result()
                            self.datasets[pass_time][satellite] = abi_dataset
                        else:
                            LOGGER.warning(
                                f'Dataset creation error: {completed_future.exception()}'
                            )

                    del running_futures

            if len(self.datasets) > 0:
                ABIRange.study_area_transform = ABIDataset.study_area_transform
                ABIRange.study_area_extent = ABIDataset.study_area_extent
                ABIRange.study_area_bounds = ABIDataset.study_area_bounds

                LOGGER.debug(
                    f'ABI data was found in {len(self.datasets)} passes.')
            else:
                raise PyOFS.NoDataError(
                    f'No ABI datasets found between {self.start_time} UTC and {self.end_time} UTC.'
                )

        else:
            raise PyOFS.NoDataError(
                f'There are no ABI passes between {self.start_time} UTC and {self.end_time} UTC.'
            )
示例#17
0
    def write_rasters(
        self,
        output_dir: PathLike,
        filename_prefix: str = 'hfr',
        filename_suffix: str = '',
        variables: Collection[str] = None,
        start_time: datetime = None,
        end_time: datetime = None,
        fill_value: float = LEAFLET_NODATA_VALUE,
        driver: str = 'GTiff',
        dop_threshold: float = None,
    ):
        """
        Write average of HFR data for all hours in the given time interval to rasters.

        :param output_dir: path to output directory
        :param filename_prefix: prefix for output filenames
        :param filename_suffix: suffix for output filenames
        :param variables: variable names to use
        :param start_time: beginning of time interval
        :param end_time: end of time interval
        :param fill_value: desired fill value of output
        :param driver: string of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid')
        :param dop_threshold: threshold for dilution of precision above which data is not useable
        """

        if not isinstance(output_dir, Path):
            output_dir = Path(output_dir)

        if variables is None:
            variables = DATA_VARIABLES

        if filename_suffix != '':
            filename_suffix = f'_{filename_suffix}'

        variable_means = {
            variable: self.data_average(variable, start_time, end_time,
                                        dop_threshold)
            for variable in variables if variable not in ['dir', 'mag']
        }

        if 'dir' in variables or 'mag' in variables:
            if 'ssu' in variables:
                u_data = variable_means['ssu']
            else:
                u_data = self.data_average('ssu', start_time, end_time,
                                           dop_threshold)

            if 'ssv' in variables:
                v_data = variable_means['ssv']
            else:
                v_data = self.data_average('ssv', start_time, end_time,
                                           dop_threshold)

            if 'anim' in filename_suffix:
                variable_means['dir'] = u_data
                variable_means['mag'] = v_data

            else:
                # calculate direction and magnitude of vector in degrees (0-360) and in metres per second
                variable_means['dir'] = (numpy.arctan2(u_data, v_data) +
                                         numpy.pi) * (180 / numpy.pi)
                variable_means['mag'] = numpy.sqrt(u_data**2 + v_data**2)

        for variable, variable_data in variable_means.items():
            raster_data = variable_data.astype(rasterio.float32)

            gdal_args = {
                'height':
                raster_data.shape[0],
                'width':
                raster_data.shape[1],
                'count':
                1,
                'dtype':
                raster_data.dtype,
                'crs':
                OUTPUT_CRS,
                'transform':
                self.grid_transform,
                'nodata':
                numpy.array([fill_value]).astype(raster_data.dtype).item(),
            }

            if driver == 'AAIGrid':
                file_extension = 'asc'

                # interpolate to regular grid in case of ASCII grid
                mean_cell_length = numpy.min(self.cell_size())
                west, north, east, south = self.bounds()

                input_lon, input_lat = numpy.meshgrid(self.dataset['lon'],
                                                      self.dataset['lat'])
                output_lon = numpy.arange(west, east,
                                          mean_cell_length)[None, :]
                output_lat = numpy.arange(south, north, mean_cell_length)[:,
                                                                          None]

                raster_data = scipy.interpolate.griddata(
                    (input_lon.flatten(), input_lat.flatten()),
                    raster_data.flatten(),
                    (output_lon, output_lat),
                    method='nearest',
                    fill_value=fill_value,
                ).astype(raster_data.dtype)

                gdal_args.update({
                    'height':
                    raster_data.shape[0],
                    'width':
                    raster_data.shape[1],
                    'FORCE_CELLSIZE':
                    'YES',
                    'transform':
                    rasterio.transform.from_origin(
                        numpy.min(output_lon),
                        numpy.max(output_lat),
                        numpy.max(numpy.diff(output_lon)),
                        numpy.max(numpy.diff(output_lon)),
                    ),
                })
            elif driver == 'GPKG':
                file_extension = 'gpkg'
            else:
                file_extension = 'tiff'
                gdal_args.update(TIFF_CREATION_OPTIONS)

            if fill_value is not None:
                raster_data[numpy.isnan(raster_data)] = fill_value

            output_filename = (
                output_dir /
                f'{filename_prefix}_{variable}{filename_suffix}.{file_extension}'
            )

            LOGGER.info(f'Writing {output_filename}')
            with rasterio.open(output_filename, 'w', driver,
                               **gdal_args) as output_raster:
                output_raster.write(numpy.flipud(raster_data), 1)
                if driver == 'GTiff':
                    output_raster.build_overviews(
                        PyOFS.overview_levels(raster_data.shape),
                        Resampling['average'])
                    output_raster.update_tags(ns='rio_overview',
                                              resampling='average')
示例#18
0
    def write_raster(
            self,
            output_dir: PathLike,
            filename_prefix: str = None,
            filename_suffix: str = None,
            start_time: datetime = None,
            end_time: datetime = None,
            average: bool = False,
            fill_value: float = LEAFLET_NODATA_VALUE,
            driver: str = 'GTiff',
            correct_sses: bool = False,
            variables: Collection[str] = tuple(['sst']),
            satellite: str = None,
    ):
        """
        Write ABI raster of SST data (either overlapped or averaged) from the given time interval.

        :param output_dir: path to output directory
        :param filename_prefix: prefix for output filenames
        :param filename_suffix: suffix for output filenames
        :param start_time: beginning of time interval (in UTC)
        :param end_time: end of time interval (in UTC)
        :param average: whether to average rasters, otherwise overlap them
        :param fill_value: desired fill value of output
        :param driver: string of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid')
        :param correct_sses: whether to subtract SSES bias from L3 sea surface temperature data
        :param variables: variables to write (either 'sst' or 'sses')
        :param satellite: ABI platform to retrieve; if not specified, will average from both satellites
        """

        if not isinstance(output_dir, Path):
            output_dir = Path(output_dir)

        if start_time is None:
            start_time = self.start_time

        if end_time is None:
            end_time = self.end_time

        variable_data = self.data(start_time, end_time, average, correct_sses,
                                  variables, satellite)

        for variable, output_data in variable_data.items():
            if output_data is not None and numpy.any(
                    ~numpy.isnan(output_data)):
                output_data[numpy.isnan(output_data)] = fill_value

                raster_data = output_data.astype(rasterio.float32)

                if fill_value is not None:
                    raster_data[numpy.isnan(raster_data)] = fill_value

                # define arguments to GDAL driver
                gdal_args = {
                    'height':
                    raster_data.shape[0],
                    'width':
                    raster_data.shape[1],
                    'count':
                    1,
                    'crs':
                    OUTPUT_CRS,
                    'dtype':
                    raster_data.dtype,
                    'nodata':
                    numpy.array([fill_value]).astype(raster_data.dtype).item(),
                    'transform':
                    ABIRange.study_area_transform,
                }

                if driver == 'AAIGrid':
                    file_extension = 'asc'
                    gdal_args.update({'FORCE_CELLSIZE': 'YES'})
                elif driver == 'GPKG':
                    file_extension = 'gpkg'
                else:
                    file_extension = 'tiff'
                    gdal_args.update(TIFF_CREATION_OPTIONS)

                if filename_prefix is None:
                    current_filename_prefix = f'{satellite}_abi_{variable}'
                else:
                    current_filename_prefix = filename_prefix

                if filename_suffix is None:
                    start_time_string = f'{start_time:%Y%m%d%H%M}'
                    end_time_string = f'{end_time:%Y%m%d%H%M}'

                    if '0000' in start_time_string and '0000' in end_time_string:
                        start_time_string = start_time_string.replace(
                            '0000', '')
                        end_time_string = end_time_string.replace('0000', '')

                    current_filename_suffix = f'{start_time_string}_{end_time_string}'
                else:
                    current_filename_suffix = filename_suffix

                output_filename = (
                    output_dir /
                    f'{current_filename_prefix}_{current_filename_suffix}.{file_extension}'
                )

                LOGGER.info(f'Writing {output_filename}')
                with rasterio.open(output_filename, 'w', driver,
                                   **gdal_args) as output_raster:
                    output_raster.write(raster_data, 1)
                    if driver == 'GTiff':
                        output_raster.build_overviews(
                            PyOFS.overview_levels(raster_data.shape),
                            Resampling['average'])
                        output_raster.update_tags(ns='rio_overview',
                                                  resampling='average')
            else:
                LOGGER.warning(
                    f'No {"ABI" if satellite is None else "ABI " + satellite} {variable} found between {start_time} and {end_time}.'
                )
示例#19
0
    def __init__(
        self,
        model_date: datetime = None,
        source: str = '2ds',
        time_interval: str = 'daily',
        study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
        source_url: str = None,
        use_defaults: bool = True,
    ):
        """
        Creates new observation object from datetime and given model parameters.
        :param model_date: model run date
        :param source: either '2ds' or '3dz'
        :param time_interval: time interval of model output
        :param study_area_polygon_filename: filename of vector file containing study area boundary
        :param source_url: directory containing NetCDF files
        :param use_defaults: whether to fall back to default source URLs if the provided one does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        if model_date is None:
            model_date = datetime.now()

        if type(model_date) is date:
            self.model_time = datetime.combine(model_date, datetime.min.time())
        else:
            self.model_time = model_date.replace(hour=0,
                                                 minute=0,
                                                 second=0,
                                                 microsecond=0)

        self.source = source
        self.time_interval = time_interval

        self.study_area_polygon_filename = study_area_polygon_filename
        self.study_area_geojson = utilities.get_first_record(
            self.study_area_polygon_filename)['geometry']

        self.datasets = {}
        self.dataset_locks = {}

        date_string = f'{self.model_time:%Y%m%d}'
        date_dir = f'rtofs_global{date_string}'

        source_urls = SOURCE_URLS.copy()

        if source_url is not None:
            source_url = {'priority': source_url}
            if use_defaults:
                source_urls = {**source_url, **{source_urls}}

        self.source_names = []
        if self.time_interval == '3hrly' or self.time_interval == 'hrly' or self.time_interval == 'daily':
            # added due to the different hourly source for nowcast and forecast
            for self.time_interval in {'hrly', '3hrly'}:
                for source_name, source_url in source_urls.items():
                    for forecast_direction, datasets in DATASET_STRUCTURE[
                            self.source].items():
                        if (forecast_direction == 'nowcast'
                                and 'nowcast' in self.datasets
                                and len(self.datasets['nowcast']) > 0) or (
                                    forecast_direction == 'forecast'
                                    and 'forecast' in self.datasets
                                    and len(self.datasets['forecast']) > 0):
                            continue

                        self.datasets[forecast_direction] = {}
                        self.dataset_locks[forecast_direction] = {}

                        for dataset_name in datasets:
                            filename = f'rtofs_glo_{self.source}_{forecast_direction}_{self.time_interval}_{dataset_name}'
                            if filename not in [
                                    'rtofs_glo_2ds_nowcast_3hrly_prog',
                                    'rtofs_glo_2ds_nowcast_3hrly_diag',
                                    'rtofs_glo_2ds_forecast_hrly_prog',
                                    'rtofs_glo_2ds_forecast_hrly_diag'
                            ]:

                                url = f'{source_url}/{date_dir}/{filename}'
                                if source_name == 'local':
                                    url = f'{url}.nc'

                                try:
                                    dataset = xarray.open_dataset(url)
                                    self.datasets[forecast_direction][
                                        dataset_name] = dataset
                                    self.dataset_locks[forecast_direction][
                                        dataset_name] = threading.Lock()
                                    self.source_names.append(source_name)
                                except OSError as error:
                                    LOGGER.warning(
                                        f'{error.__class__.__name__}: {error}')

        if (len(self.datasets['nowcast']) +
                len(self.datasets['forecast'])) > 0:
            if len(self.datasets['nowcast']) > 0:
                sample_dataset = next(iter(self.datasets['nowcast'].values()))
            else:
                sample_dataset = next(iter(self.datasets['forecast'].values()))

            self.lat = sample_dataset['lat'].values
            if not any(source_name == 'NCEP'
                       for source_name in self.source_names):
                self.lon = sample_dataset['lon']
                self.raw_lon = self.lon
            else:
                # for some reason RTOFS from NCEP has longitude values shifted by 360
                self.raw_lon = sample_dataset['lon'].values
                self.lon = self.raw_lon - 180 - numpy.min(self.raw_lon)

            lat_pixel_size = numpy.mean(numpy.diff(sample_dataset['lat']))
            lon_pixel_size = numpy.mean(numpy.diff(sample_dataset['lon']))

            self.global_north = numpy.max(self.lat)
            self.global_west = numpy.min(self.lon)

            self.global_grid_transform = rasterio.transform.from_origin(
                self.global_west, self.global_north, lon_pixel_size,
                lat_pixel_size)

            (
                self.study_area_west,
                self.study_area_south,
                self.study_area_east,
                self.study_area_north,
            ) = geometry.shape(self.study_area_geojson).bounds

            self.study_area_transform = rasterio.transform.from_origin(
                self.study_area_west, self.study_area_north, lon_pixel_size,
                lat_pixel_size)
        else:
            raise PyOFS.NoDataError(
                f'No RTOFS datasets found for {self.model_time}.')