def check_station(dataset: xarray.Dataset, study_area_polygon_filename: PathLike) -> bool: """ Check whether station exists within the given study area. :param dataset: NetCDF Dataset :param study_area_polygon_filename: vector file containing study area boundary :return: whether station is within study area """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) # construct polygon from the first record in the layer study_area_polygon = shapely.geometry.Polygon( utilities.get_first_record(study_area_polygon_filename)['geometry']['coordinates'][0] ) lon = dataset['longitude'][:] lat = dataset['latitude'][:] point = shapely.geometry.point.Point(lon, lat) return point.intersects(study_area_polygon)
def __init__( self, data_time: datetime = None, satellite: str = 'G17', study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, algorithm: str = 'STAR', version: str = None, ): """ Retrieve ABI NetCDF observation from NOAA with given datetime. :param data_time: observation datetime :param satellite: ABI platform :param study_area_polygon_filename: filename of vector file containing study area boundary :param algorithm: either 'STAR' or 'OSPO' :param version: ACSPO algorithm version :raises NoDataError: if observation does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) if data_time is None: data_time = datetime.now() # round minute to nearest 10 minutes (ABI data interval) self.data_time = PyOFS.round_to_ten_minutes(data_time) self.satellite = satellite self.study_area_polygon_filename = study_area_polygon_filename # use NRT flag if granule is less than 13 days old self.near_real_time = datetime.now() - data_time <= timedelta(days=13) self.algorithm = algorithm if version is None: if data_time >= datetime(2019, 4, 23, 12, 50): self.version = '2.71' elif data_time >= datetime(2018, 11, 7, 15, 10): self.version = '2.60' elif data_time >= datetime(2017, 9, 14, 12, 50): self.version = '2.41' else: self.version = '2.40' else: self.version = version self.url = None day_dir = f'{self.data_time.year}/{self.data_time.timetuple().tm_yday:03}' filename = f'{self.data_time:%Y%m%d%H%M%S}-{self.algorithm}-L3C_GHRSST-SSTsubskin-ABI_{self.satellite.upper()}-ACSPO_V{self.version}-v02.0-fv01.0.nc' # TODO N20 does not yet have a reanalysis archive on NESDIS (as of March 8th, 2019) if self.satellite.upper() == 'N20' and not self.near_real_time: raise PyOFS.NoDataError( f'{self.satellite.upper()} does not yet have a reanalysis archive' ) for source, source_url in SOURCE_URLS['OpenDAP'].items(): url = source_url if self.near_real_time: if source == 'NESDIS': url = f'{source_url}/grid{self.satellite.upper()}ABINRTL3CWW00/{day_dir}/{filename}' elif source == 'JPL': url = f'{source_url}/AMERICAS/GOES17/{algorithm}/v{self.version}/{day_dir}/{filename}' elif source in 'NODC': url = f'{source_url}/ABI_{self.satellite.upper()}/{algorithm}/{day_dir}/{filename}' else: if source == 'NESDIS': url = f'{source_url}/grid{"" if self.near_real_time else "S"}{self.satellite.upper()}ABISCIENCEL3CWW00/{day_dir}/{filename}' else: LOGGER.warning( f'{source} does not contain a reanalysis archive') try: self.dataset = xarray.open_dataset(url) self.url = url break except Exception as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') if self.url is None: LOGGER.warning( 'Error collecting from OpenDAP; falling back to FTP...') for source, source_url in SOURCE_URLS['FTP'].items(): host_url, ftp_input_dir = source_url.split('/', 1) ftp_path = ftp_input_dir url = host_url if source == 'NESDIS': if self.near_real_time: ftp_path = f'/{ftp_input_dir}/nrt/abi/{self.satellite.lower()}/l3c/{day_dir}/{filename}' else: ftp_path = f'/{ftp_input_dir}/ran/abi/{"S" if self.satellite.upper() == "G17" else ""}{self.satellite.lower()}/l3c/{day_dir}/{filename}' url = f'{host_url}/{ftp_path.lstrip("/")}' try: with ftplib.FTP(host_url) as ftp_connection: ftp_connection.login() output_dir = DATA_DIRECTORY / 'input' / 'abi' if not output_dir.exists(): os.makedirs(output_dir, exist_ok=True) output_filename = output_dir / f'abi_{self.data_time:%Y%m%dT%H%M}.nc' if output_filename.exists(): os.remove(output_filename) try: with open(output_filename, 'wb') as output_file: ftp_connection.retrbinary( f'RETR {ftp_path}', output_file.write) self.dataset = xarray.open_dataset( output_filename) except: raise finally: os.remove(output_filename) self.url = url break except Exception as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') if self.url is not None: break if self.url is None: raise PyOFS.NoDataError( f'No ABI observation found at {self.data_time} UTC.') # construct rectangular polygon of granule extent if 'geospatial_bounds' in self.dataset.attrs: self.data_extent = shapely.wkt.loads( self.dataset.geospatial_bounds) elif 'geospatial_lon_min' in self.dataset.attrs: lon_min = float(self.dataset.geospatial_lon_min) lon_max = float(self.dataset.geospatial_lon_max) lat_min = float(self.dataset.geospatial_lat_min) lat_max = float(self.dataset.geospatial_lat_max) if lon_min < lon_max: self.data_extent = shapely.geometry.Polygon([ (lon_min, lat_max), (lon_max, lat_max), (lon_max, lat_min), (lon_min, lat_min), ]) else: # geospatial bounds cross the antimeridian, so we create a multipolygon self.data_extent = shapely.geometry.MultiPolygon([ shapely.geometry.Polygon([ (lon_min, lat_max), (180, lat_max), (180, lat_min), (lon_min, lat_min), ]), shapely.geometry.Polygon([ (-180, lat_max), (lon_max, lat_max), (lon_max, lat_min), (-180, lat_min), ]), ]) lon_pixel_size = self.dataset.geospatial_lon_resolution lat_pixel_size = self.dataset.geospatial_lat_resolution if ABIDataset.study_area_extent is None: LOGGER.debug( f'Calculating indices and transform from granule at {self.data_time} UTC...' ) # get first record in layer ABIDataset.study_area_extent = shapely.geometry.MultiPolygon([ shapely.geometry.Polygon(polygon[0]) for polygon in utilities.get_first_record( self.study_area_polygon_filename)['geometry'] ['coordinates'] ]) ABIDataset.study_area_bounds = ABIDataset.study_area_extent.bounds ABIDataset.study_area_transform = rasterio.transform.from_origin( ABIDataset.study_area_bounds[0], ABIDataset.study_area_bounds[3], lon_pixel_size, lat_pixel_size, ) if ABIDataset.study_area_bounds is not None: self.dataset = self.dataset.isel(time=0).sel( lon=slice(ABIDataset.study_area_bounds[0], ABIDataset.study_area_bounds[2]), lat=slice(ABIDataset.study_area_bounds[3], ABIDataset.study_area_bounds[1]), ) if ABIDataset.study_area_coordinates is None: ABIDataset.study_area_coordinates = { 'lon': self.dataset['lon'], 'lat': self.dataset['lat'], }
def store_viirs_pass_times( satellite: str, study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, start_time: datetime = VIIRS_START_TIME, output_filename: str = PASS_TIMES_FILENAME, num_periods: int = 1, algorithm: str = 'STAR', version: str = '2.40', ): """ Compute VIIRS pass times from the given start date along the number of periods specified. :param satellite: satellite for which to store pass times, either NPP or N20 :param study_area_polygon_filename: path to vector file containing polygon of study area :param start_time: beginning of given VIIRS period (in UTC) :param output_filename: path to output file :param num_periods: number of periods to store :param algorithm: either 'STAR' or 'OSPO' :param version: ACSPO Version number (2.40 - 2.41) """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) start_time = PyOFS.round_to_ten_minutes(start_time) end_time = PyOFS.round_to_ten_minutes(start_time + (VIIRS_PERIOD * num_periods)) LOGGER.info( f'Getting pass times between {start_time:%Y-%m-%d %H:%M:%S} and {end_time:%Y-%m-%d %H:%M:%S}' ) datetime_range = PyOFS.ten_minute_range(start_time, end_time) # construct polygon from the first record in layer study_area_polygon = shapely.geometry.Polygon( utilities.get_first_record(study_area_polygon_filename)['geometry'] ['coordinates'][0]) lines = [] for datetime_index in range(len(datetime_range)): current_time = datetime_range[datetime_index] # find number of cycles from the first orbit to the present day num_cycles = int((datetime.now() - start_time).days / 16) # iterate over each cycle for cycle_index in range(0, num_cycles): # get current datetime of interest cycle_offset = VIIRS_PERIOD * cycle_index cycle_time = current_time + cycle_offset try: # get observation of new datetime dataset = VIIRSDataset(cycle_time, satellite, study_area_polygon_filename, algorithm, version) # check if observation falls within polygon extent if dataset.data_extent.is_valid: if study_area_polygon.intersects(dataset.data_extent): # get duration from current cycle start cycle_duration = cycle_time - (start_time + cycle_offset) LOGGER.info( f'{cycle_time:%Y%m%dT%H%M%S} {cycle_duration / timedelta(seconds=1)}: valid scene (checked {cycle_index + 1} cycle(s))' ) lines.append( f'{cycle_time:%Y%m%dT%H%M%S},{cycle_duration / timedelta(seconds=1)}' ) # if we get to here, break and continue to the next datetime break except PyOFS.NoDataError as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') else: LOGGER.warning( f'{current_time:%Y%m%dT%H%M%S}: missing observation across all cycles' ) # write lines to file with open(output_filename, 'w') as output_file: output_file.write('\n'.join(lines)) LOGGER.info('Wrote data to file')
def __init__( self, study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME ): """ Retrieve VIIRS NetCDF observation from NOAA with given datetime. :param study_area_polygon_filename: filename of vector file containing study area boundary :raises NoDataError: if observation does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) self.study_area_polygon_filename = study_area_polygon_filename for source, source_url in SOURCE_URLS['OpenDAP'].items(): try: self.dataset = xarray.open_dataset(source_url) break except Exception as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') else: raise NoDataError( f'dataset creation error: no data found in sources') # construct rectangular polygon of granule extent lon_min = float(self.dataset.geospatial_lon_min) lon_max = float(self.dataset.geospatial_lon_max) lat_min = float(self.dataset.geospatial_lat_min) lat_max = float(self.dataset.geospatial_lat_max) if lon_min < lon_max: self.data_extent = shapely.geometry.Polygon([ (lon_min, lat_max), (lon_max, lat_max), (lon_max, lat_min), (lon_min, lat_min), ]) else: # geospatial bounds cross the antimeridian, so we create a multipolygon self.data_extent = shapely.geometry.MultiPolygon([ shapely.geometry.Polygon([ (lon_min, lat_max), (180, lat_max), (180, lat_min), (lon_min, lat_min), ]), shapely.geometry.Polygon([ (-180, lat_max), (lon_max, lat_max), (lon_max, lat_min), (-180, lat_min), ]), ]) lon_pixel_size = numpy.mean( numpy.diff(self.dataset['longitude'].values)) lat_pixel_size = numpy.mean(numpy.diff( self.dataset['latitude'].values)) if SMAPDataset.study_area_extent is None: # get first record in layer SMAPDataset.study_area_extent = shapely.geometry.MultiPolygon([ shapely.geometry.Polygon(polygon[0]) for polygon in utilities.get_first_record( self.study_area_polygon_filename)['geometry'] ['coordinates'] ]) SMAPDataset.study_area_bounds = SMAPDataset.study_area_extent.bounds SMAPDataset.study_area_transform = rasterio.transform.from_origin( SMAPDataset.study_area_bounds[0], SMAPDataset.study_area_bounds[3], lon_pixel_size, lat_pixel_size, ) if SMAPDataset.study_area_bounds is not None: self.dataset = self.dataset.sel( longitude=slice(SMAPDataset.study_area_bounds[0], SMAPDataset.study_area_bounds[2]), latitude=slice(SMAPDataset.study_area_bounds[3], SMAPDataset.study_area_bounds[1]), ) if SMAPDataset.study_area_coordinates is None: SMAPDataset.study_area_coordinates = { 'lon': self.dataset['longitude'], 'lat': self.dataset['latitude'], }
def __init__( self, model_date: datetime = None, source: str = '2ds', time_interval: str = 'daily', study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, source_url: str = None, use_defaults: bool = True, ): """ Creates new observation object from datetime and given model parameters. :param model_date: model run date :param source: either '2ds' or '3dz' :param time_interval: time interval of model output :param study_area_polygon_filename: filename of vector file containing study area boundary :param source_url: directory containing NetCDF files :param use_defaults: whether to fall back to default source URLs if the provided one does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) if model_date is None: model_date = datetime.now() if type(model_date) is date: self.model_time = datetime.combine(model_date, datetime.min.time()) else: self.model_time = model_date.replace(hour=0, minute=0, second=0, microsecond=0) self.source = source self.time_interval = time_interval self.study_area_polygon_filename = study_area_polygon_filename self.study_area_geojson = utilities.get_first_record( self.study_area_polygon_filename)['geometry'] self.datasets = {} self.dataset_locks = {} date_string = f'{self.model_time:%Y%m%d}' date_dir = f'rtofs_global{date_string}' source_urls = SOURCE_URLS.copy() if source_url is not None: source_url = {'priority': source_url} if use_defaults: source_urls = {**source_url, **{source_urls}} self.source_names = [] if self.time_interval == '3hrly' or self.time_interval == 'hrly' or self.time_interval == 'daily': # added due to the different hourly source for nowcast and forecast for self.time_interval in {'hrly', '3hrly'}: for source_name, source_url in source_urls.items(): for forecast_direction, datasets in DATASET_STRUCTURE[ self.source].items(): if (forecast_direction == 'nowcast' and 'nowcast' in self.datasets and len(self.datasets['nowcast']) > 0) or ( forecast_direction == 'forecast' and 'forecast' in self.datasets and len(self.datasets['forecast']) > 0): continue self.datasets[forecast_direction] = {} self.dataset_locks[forecast_direction] = {} for dataset_name in datasets: filename = f'rtofs_glo_{self.source}_{forecast_direction}_{self.time_interval}_{dataset_name}' if filename not in [ 'rtofs_glo_2ds_nowcast_3hrly_prog', 'rtofs_glo_2ds_nowcast_3hrly_diag', 'rtofs_glo_2ds_forecast_hrly_prog', 'rtofs_glo_2ds_forecast_hrly_diag' ]: url = f'{source_url}/{date_dir}/{filename}' if source_name == 'local': url = f'{url}.nc' try: dataset = xarray.open_dataset(url) self.datasets[forecast_direction][ dataset_name] = dataset self.dataset_locks[forecast_direction][ dataset_name] = threading.Lock() self.source_names.append(source_name) except OSError as error: LOGGER.warning( f'{error.__class__.__name__}: {error}') if (len(self.datasets['nowcast']) + len(self.datasets['forecast'])) > 0: if len(self.datasets['nowcast']) > 0: sample_dataset = next(iter(self.datasets['nowcast'].values())) else: sample_dataset = next(iter(self.datasets['forecast'].values())) self.lat = sample_dataset['lat'].values if not any(source_name == 'NCEP' for source_name in self.source_names): self.lon = sample_dataset['lon'] self.raw_lon = self.lon else: # for some reason RTOFS from NCEP has longitude values shifted by 360 self.raw_lon = sample_dataset['lon'].values self.lon = self.raw_lon - 180 - numpy.min(self.raw_lon) lat_pixel_size = numpy.mean(numpy.diff(sample_dataset['lat'])) lon_pixel_size = numpy.mean(numpy.diff(sample_dataset['lon'])) self.global_north = numpy.max(self.lat) self.global_west = numpy.min(self.lon) self.global_grid_transform = rasterio.transform.from_origin( self.global_west, self.global_north, lon_pixel_size, lat_pixel_size) ( self.study_area_west, self.study_area_south, self.study_area_east, self.study_area_north, ) = geometry.shape(self.study_area_geojson).bounds self.study_area_transform = rasterio.transform.from_origin( self.study_area_west, self.study_area_north, lon_pixel_size, lat_pixel_size) else: raise PyOFS.NoDataError( f'No RTOFS datasets found for {self.model_time}.')