Exemplo n.º 1
0
def check_station(dataset: xarray.Dataset, study_area_polygon_filename: PathLike) -> bool:
    """
    Check whether station exists within the given study area.

    :param dataset: NetCDF Dataset
    :param study_area_polygon_filename: vector file containing study area boundary
    :return: whether station is within study area
    """

    if not isinstance(study_area_polygon_filename, Path):
        study_area_polygon_filename = Path(study_area_polygon_filename)

    # construct polygon from the first record in the layer
    study_area_polygon = shapely.geometry.Polygon(
        utilities.get_first_record(study_area_polygon_filename)['geometry']['coordinates'][0]
    )

    lon = dataset['longitude'][:]
    lat = dataset['latitude'][:]

    point = shapely.geometry.point.Point(lon, lat)

    return point.intersects(study_area_polygon)
Exemplo n.º 2
0
    def __init__(
        self,
        data_time: datetime = None,
        satellite: str = 'G17',
        study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
        algorithm: str = 'STAR',
        version: str = None,
    ):
        """
        Retrieve ABI NetCDF observation from NOAA with given datetime.

        :param data_time: observation datetime
        :param satellite: ABI platform
        :param study_area_polygon_filename: filename of vector file containing study area boundary
        :param algorithm: either 'STAR' or 'OSPO'
        :param version: ACSPO algorithm version
        :raises NoDataError: if observation does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        if data_time is None:
            data_time = datetime.now()

        # round minute to nearest 10 minutes (ABI data interval)
        self.data_time = PyOFS.round_to_ten_minutes(data_time)

        self.satellite = satellite

        self.study_area_polygon_filename = study_area_polygon_filename

        # use NRT flag if granule is less than 13 days old
        self.near_real_time = datetime.now() - data_time <= timedelta(days=13)
        self.algorithm = algorithm

        if version is None:
            if data_time >= datetime(2019, 4, 23, 12, 50):
                self.version = '2.71'
            elif data_time >= datetime(2018, 11, 7, 15, 10):
                self.version = '2.60'
            elif data_time >= datetime(2017, 9, 14, 12, 50):
                self.version = '2.41'
            else:
                self.version = '2.40'
        else:
            self.version = version

        self.url = None

        day_dir = f'{self.data_time.year}/{self.data_time.timetuple().tm_yday:03}'
        filename = f'{self.data_time:%Y%m%d%H%M%S}-{self.algorithm}-L3C_GHRSST-SSTsubskin-ABI_{self.satellite.upper()}-ACSPO_V{self.version}-v02.0-fv01.0.nc'

        # TODO N20 does not yet have a reanalysis archive on NESDIS (as of March 8th, 2019)
        if self.satellite.upper() == 'N20' and not self.near_real_time:
            raise PyOFS.NoDataError(
                f'{self.satellite.upper()} does not yet have a reanalysis archive'
            )

        for source, source_url in SOURCE_URLS['OpenDAP'].items():
            url = source_url

            if self.near_real_time:
                if source == 'NESDIS':
                    url = f'{source_url}/grid{self.satellite.upper()}ABINRTL3CWW00/{day_dir}/{filename}'
                elif source == 'JPL':
                    url = f'{source_url}/AMERICAS/GOES17/{algorithm}/v{self.version}/{day_dir}/{filename}'
                elif source in 'NODC':
                    url = f'{source_url}/ABI_{self.satellite.upper()}/{algorithm}/{day_dir}/{filename}'
            else:
                if source == 'NESDIS':
                    url = f'{source_url}/grid{"" if self.near_real_time else "S"}{self.satellite.upper()}ABISCIENCEL3CWW00/{day_dir}/{filename}'
                else:
                    LOGGER.warning(
                        f'{source} does not contain a reanalysis archive')

            try:
                self.dataset = xarray.open_dataset(url)
                self.url = url
                break
            except Exception as error:
                LOGGER.warning(f'{error.__class__.__name__}: {error}')

        if self.url is None:
            LOGGER.warning(
                'Error collecting from OpenDAP; falling back to FTP...')

            for source, source_url in SOURCE_URLS['FTP'].items():
                host_url, ftp_input_dir = source_url.split('/', 1)
                ftp_path = ftp_input_dir
                url = host_url

                if source == 'NESDIS':
                    if self.near_real_time:
                        ftp_path = f'/{ftp_input_dir}/nrt/abi/{self.satellite.lower()}/l3c/{day_dir}/{filename}'
                    else:
                        ftp_path = f'/{ftp_input_dir}/ran/abi/{"S" if self.satellite.upper() == "G17" else ""}{self.satellite.lower()}/l3c/{day_dir}/{filename}'

                    url = f'{host_url}/{ftp_path.lstrip("/")}'

                try:
                    with ftplib.FTP(host_url) as ftp_connection:
                        ftp_connection.login()

                        output_dir = DATA_DIRECTORY / 'input' / 'abi'

                        if not output_dir.exists():
                            os.makedirs(output_dir, exist_ok=True)

                        output_filename = output_dir / f'abi_{self.data_time:%Y%m%dT%H%M}.nc'

                        if output_filename.exists():
                            os.remove(output_filename)

                        try:
                            with open(output_filename, 'wb') as output_file:
                                ftp_connection.retrbinary(
                                    f'RETR {ftp_path}', output_file.write)
                                self.dataset = xarray.open_dataset(
                                    output_filename)
                        except:
                            raise
                        finally:
                            os.remove(output_filename)

                    self.url = url
                    break
                except Exception as error:
                    LOGGER.warning(f'{error.__class__.__name__}: {error}')

                if self.url is not None:
                    break

        if self.url is None:
            raise PyOFS.NoDataError(
                f'No ABI observation found at {self.data_time} UTC.')

        # construct rectangular polygon of granule extent
        if 'geospatial_bounds' in self.dataset.attrs:
            self.data_extent = shapely.wkt.loads(
                self.dataset.geospatial_bounds)
        elif 'geospatial_lon_min' in self.dataset.attrs:
            lon_min = float(self.dataset.geospatial_lon_min)
            lon_max = float(self.dataset.geospatial_lon_max)
            lat_min = float(self.dataset.geospatial_lat_min)
            lat_max = float(self.dataset.geospatial_lat_max)

            if lon_min < lon_max:
                self.data_extent = shapely.geometry.Polygon([
                    (lon_min, lat_max),
                    (lon_max, lat_max),
                    (lon_max, lat_min),
                    (lon_min, lat_min),
                ])
            else:
                # geospatial bounds cross the antimeridian, so we create a multipolygon
                self.data_extent = shapely.geometry.MultiPolygon([
                    shapely.geometry.Polygon([
                        (lon_min, lat_max),
                        (180, lat_max),
                        (180, lat_min),
                        (lon_min, lat_min),
                    ]),
                    shapely.geometry.Polygon([
                        (-180, lat_max),
                        (lon_max, lat_max),
                        (lon_max, lat_min),
                        (-180, lat_min),
                    ]),
                ])

        lon_pixel_size = self.dataset.geospatial_lon_resolution
        lat_pixel_size = self.dataset.geospatial_lat_resolution

        if ABIDataset.study_area_extent is None:
            LOGGER.debug(
                f'Calculating indices and transform from granule at {self.data_time} UTC...'
            )

            # get first record in layer
            ABIDataset.study_area_extent = shapely.geometry.MultiPolygon([
                shapely.geometry.Polygon(polygon[0])
                for polygon in utilities.get_first_record(
                    self.study_area_polygon_filename)['geometry']
                ['coordinates']
            ])

            ABIDataset.study_area_bounds = ABIDataset.study_area_extent.bounds
            ABIDataset.study_area_transform = rasterio.transform.from_origin(
                ABIDataset.study_area_bounds[0],
                ABIDataset.study_area_bounds[3],
                lon_pixel_size,
                lat_pixel_size,
            )

        if ABIDataset.study_area_bounds is not None:
            self.dataset = self.dataset.isel(time=0).sel(
                lon=slice(ABIDataset.study_area_bounds[0],
                          ABIDataset.study_area_bounds[2]),
                lat=slice(ABIDataset.study_area_bounds[3],
                          ABIDataset.study_area_bounds[1]),
            )

        if ABIDataset.study_area_coordinates is None:
            ABIDataset.study_area_coordinates = {
                'lon': self.dataset['lon'],
                'lat': self.dataset['lat'],
            }
Exemplo n.º 3
0
def store_viirs_pass_times(
    satellite: str,
    study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
    start_time: datetime = VIIRS_START_TIME,
    output_filename: str = PASS_TIMES_FILENAME,
    num_periods: int = 1,
    algorithm: str = 'STAR',
    version: str = '2.40',
):
    """
    Compute VIIRS pass times from the given start date along the number of periods specified.

    :param satellite: satellite for which to store pass times, either NPP or N20
    :param study_area_polygon_filename: path to vector file containing polygon of study area
    :param start_time: beginning of given VIIRS period (in UTC)
    :param output_filename: path to output file
    :param num_periods: number of periods to store
    :param algorithm: either 'STAR' or 'OSPO'
    :param version: ACSPO Version number (2.40 - 2.41)
    """

    if not isinstance(study_area_polygon_filename, Path):
        study_area_polygon_filename = Path(study_area_polygon_filename)

    start_time = PyOFS.round_to_ten_minutes(start_time)
    end_time = PyOFS.round_to_ten_minutes(start_time +
                                          (VIIRS_PERIOD * num_periods))

    LOGGER.info(
        f'Getting pass times between {start_time:%Y-%m-%d %H:%M:%S} and {end_time:%Y-%m-%d %H:%M:%S}'
    )

    datetime_range = PyOFS.ten_minute_range(start_time, end_time)

    # construct polygon from the first record in layer
    study_area_polygon = shapely.geometry.Polygon(
        utilities.get_first_record(study_area_polygon_filename)['geometry']
        ['coordinates'][0])

    lines = []

    for datetime_index in range(len(datetime_range)):
        current_time = datetime_range[datetime_index]

        # find number of cycles from the first orbit to the present day
        num_cycles = int((datetime.now() - start_time).days / 16)

        # iterate over each cycle
        for cycle_index in range(0, num_cycles):
            # get current datetime of interest
            cycle_offset = VIIRS_PERIOD * cycle_index
            cycle_time = current_time + cycle_offset

            try:
                # get observation of new datetime
                dataset = VIIRSDataset(cycle_time, satellite,
                                       study_area_polygon_filename, algorithm,
                                       version)

                # check if observation falls within polygon extent
                if dataset.data_extent.is_valid:
                    if study_area_polygon.intersects(dataset.data_extent):
                        # get duration from current cycle start
                        cycle_duration = cycle_time - (start_time +
                                                       cycle_offset)

                        LOGGER.info(
                            f'{cycle_time:%Y%m%dT%H%M%S} {cycle_duration / timedelta(seconds=1)}: valid scene (checked {cycle_index + 1} cycle(s))'
                        )
                        lines.append(
                            f'{cycle_time:%Y%m%dT%H%M%S},{cycle_duration / timedelta(seconds=1)}'
                        )

                # if we get to here, break and continue to the next datetime
                break
            except PyOFS.NoDataError as error:
                LOGGER.warning(f'{error.__class__.__name__}: {error}')
        else:
            LOGGER.warning(
                f'{current_time:%Y%m%dT%H%M%S}: missing observation across all cycles'
            )

        # write lines to file
        with open(output_filename, 'w') as output_file:
            output_file.write('\n'.join(lines))

        LOGGER.info('Wrote data to file')
Exemplo n.º 4
0
    def __init__(
            self,
            study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME
    ):
        """
        Retrieve VIIRS NetCDF observation from NOAA with given datetime.

        :param study_area_polygon_filename: filename of vector file containing study area boundary
        :raises NoDataError: if observation does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        self.study_area_polygon_filename = study_area_polygon_filename

        for source, source_url in SOURCE_URLS['OpenDAP'].items():
            try:
                self.dataset = xarray.open_dataset(source_url)
                break
            except Exception as error:
                LOGGER.warning(f'{error.__class__.__name__}: {error}')
        else:
            raise NoDataError(
                f'dataset creation error: no data found in sources')

        # construct rectangular polygon of granule extent
        lon_min = float(self.dataset.geospatial_lon_min)
        lon_max = float(self.dataset.geospatial_lon_max)
        lat_min = float(self.dataset.geospatial_lat_min)
        lat_max = float(self.dataset.geospatial_lat_max)

        if lon_min < lon_max:
            self.data_extent = shapely.geometry.Polygon([
                (lon_min, lat_max),
                (lon_max, lat_max),
                (lon_max, lat_min),
                (lon_min, lat_min),
            ])
        else:
            # geospatial bounds cross the antimeridian, so we create a multipolygon
            self.data_extent = shapely.geometry.MultiPolygon([
                shapely.geometry.Polygon([
                    (lon_min, lat_max),
                    (180, lat_max),
                    (180, lat_min),
                    (lon_min, lat_min),
                ]),
                shapely.geometry.Polygon([
                    (-180, lat_max),
                    (lon_max, lat_max),
                    (lon_max, lat_min),
                    (-180, lat_min),
                ]),
            ])

        lon_pixel_size = numpy.mean(
            numpy.diff(self.dataset['longitude'].values))
        lat_pixel_size = numpy.mean(numpy.diff(
            self.dataset['latitude'].values))

        if SMAPDataset.study_area_extent is None:
            # get first record in layer
            SMAPDataset.study_area_extent = shapely.geometry.MultiPolygon([
                shapely.geometry.Polygon(polygon[0])
                for polygon in utilities.get_first_record(
                    self.study_area_polygon_filename)['geometry']
                ['coordinates']
            ])

            SMAPDataset.study_area_bounds = SMAPDataset.study_area_extent.bounds
            SMAPDataset.study_area_transform = rasterio.transform.from_origin(
                SMAPDataset.study_area_bounds[0],
                SMAPDataset.study_area_bounds[3],
                lon_pixel_size,
                lat_pixel_size,
            )

        if SMAPDataset.study_area_bounds is not None:
            self.dataset = self.dataset.sel(
                longitude=slice(SMAPDataset.study_area_bounds[0],
                                SMAPDataset.study_area_bounds[2]),
                latitude=slice(SMAPDataset.study_area_bounds[3],
                               SMAPDataset.study_area_bounds[1]),
            )

        if SMAPDataset.study_area_coordinates is None:
            SMAPDataset.study_area_coordinates = {
                'lon': self.dataset['longitude'],
                'lat': self.dataset['latitude'],
            }
Exemplo n.º 5
0
    def __init__(
        self,
        model_date: datetime = None,
        source: str = '2ds',
        time_interval: str = 'daily',
        study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME,
        source_url: str = None,
        use_defaults: bool = True,
    ):
        """
        Creates new observation object from datetime and given model parameters.
        :param model_date: model run date
        :param source: either '2ds' or '3dz'
        :param time_interval: time interval of model output
        :param study_area_polygon_filename: filename of vector file containing study area boundary
        :param source_url: directory containing NetCDF files
        :param use_defaults: whether to fall back to default source URLs if the provided one does not exist
        """

        if not isinstance(study_area_polygon_filename, Path):
            study_area_polygon_filename = Path(study_area_polygon_filename)

        if model_date is None:
            model_date = datetime.now()

        if type(model_date) is date:
            self.model_time = datetime.combine(model_date, datetime.min.time())
        else:
            self.model_time = model_date.replace(hour=0,
                                                 minute=0,
                                                 second=0,
                                                 microsecond=0)

        self.source = source
        self.time_interval = time_interval

        self.study_area_polygon_filename = study_area_polygon_filename
        self.study_area_geojson = utilities.get_first_record(
            self.study_area_polygon_filename)['geometry']

        self.datasets = {}
        self.dataset_locks = {}

        date_string = f'{self.model_time:%Y%m%d}'
        date_dir = f'rtofs_global{date_string}'

        source_urls = SOURCE_URLS.copy()

        if source_url is not None:
            source_url = {'priority': source_url}
            if use_defaults:
                source_urls = {**source_url, **{source_urls}}

        self.source_names = []
        if self.time_interval == '3hrly' or self.time_interval == 'hrly' or self.time_interval == 'daily':
            # added due to the different hourly source for nowcast and forecast
            for self.time_interval in {'hrly', '3hrly'}:
                for source_name, source_url in source_urls.items():
                    for forecast_direction, datasets in DATASET_STRUCTURE[
                            self.source].items():
                        if (forecast_direction == 'nowcast'
                                and 'nowcast' in self.datasets
                                and len(self.datasets['nowcast']) > 0) or (
                                    forecast_direction == 'forecast'
                                    and 'forecast' in self.datasets
                                    and len(self.datasets['forecast']) > 0):
                            continue

                        self.datasets[forecast_direction] = {}
                        self.dataset_locks[forecast_direction] = {}

                        for dataset_name in datasets:
                            filename = f'rtofs_glo_{self.source}_{forecast_direction}_{self.time_interval}_{dataset_name}'
                            if filename not in [
                                    'rtofs_glo_2ds_nowcast_3hrly_prog',
                                    'rtofs_glo_2ds_nowcast_3hrly_diag',
                                    'rtofs_glo_2ds_forecast_hrly_prog',
                                    'rtofs_glo_2ds_forecast_hrly_diag'
                            ]:

                                url = f'{source_url}/{date_dir}/{filename}'
                                if source_name == 'local':
                                    url = f'{url}.nc'

                                try:
                                    dataset = xarray.open_dataset(url)
                                    self.datasets[forecast_direction][
                                        dataset_name] = dataset
                                    self.dataset_locks[forecast_direction][
                                        dataset_name] = threading.Lock()
                                    self.source_names.append(source_name)
                                except OSError as error:
                                    LOGGER.warning(
                                        f'{error.__class__.__name__}: {error}')

        if (len(self.datasets['nowcast']) +
                len(self.datasets['forecast'])) > 0:
            if len(self.datasets['nowcast']) > 0:
                sample_dataset = next(iter(self.datasets['nowcast'].values()))
            else:
                sample_dataset = next(iter(self.datasets['forecast'].values()))

            self.lat = sample_dataset['lat'].values
            if not any(source_name == 'NCEP'
                       for source_name in self.source_names):
                self.lon = sample_dataset['lon']
                self.raw_lon = self.lon
            else:
                # for some reason RTOFS from NCEP has longitude values shifted by 360
                self.raw_lon = sample_dataset['lon'].values
                self.lon = self.raw_lon - 180 - numpy.min(self.raw_lon)

            lat_pixel_size = numpy.mean(numpy.diff(sample_dataset['lat']))
            lon_pixel_size = numpy.mean(numpy.diff(sample_dataset['lon']))

            self.global_north = numpy.max(self.lat)
            self.global_west = numpy.min(self.lon)

            self.global_grid_transform = rasterio.transform.from_origin(
                self.global_west, self.global_north, lon_pixel_size,
                lat_pixel_size)

            (
                self.study_area_west,
                self.study_area_south,
                self.study_area_east,
                self.study_area_north,
            ) = geometry.shape(self.study_area_geojson).bounds

            self.study_area_transform = rasterio.transform.from_origin(
                self.study_area_west, self.study_area_north, lon_pixel_size,
                lat_pixel_size)
        else:
            raise PyOFS.NoDataError(
                f'No RTOFS datasets found for {self.model_time}.')