def write_sites(self, output_filename: PathLike): """ Writes HFR radar facility locations to specified file and layer. :param output_filename: path to output file """ if not isinstance(output_filename, Path): output_filename = Path(output_filename) output_filename, layer_name = PyOFS.split_layer_filename( output_filename) layer_records = [] for site_index in range(self.dataset['nSites']): site_code = (self.dataset['site_code'] [site_index].tobytes().decode().strip('\x00').strip()) site_network_code = (self.dataset['site_netCode'][site_index]. tobytes().decode().strip('\x00').strip()) lon = float(self.dataset['site_lon'][site_index]) lat = float(self.dataset['site_lat'][site_index]) record = { 'id': site_index + 1, 'geometry': { 'type': 'Point', 'coordinates': (lon, lat) }, 'properties': { 'code': site_code, 'net_code': site_network_code, 'lon': float(lon), 'lat': float(lat), }, } layer_records.append(record) schema = { 'geometry': 'Point', 'properties': { 'code': 'str', 'net_code': 'str', 'lon': 'float', 'lat': 'float' }, } with fiona.open( output_filename, 'w', 'GPKG', layer=layer_name, schema=schema, crs=OUTPUT_CRS.to_dict(), ) as layer: layer.writerecords(layer_records)
def _sss(self, data_time: datetime) -> numpy.array: """ Retrieve SMOS SSS data. :param data_time: datetime to retrieve (only uses month) :return: array of data """ # SMOS has data on month-long resolution data_time = datetime(data_time.year, data_time.month, 16) if numpy.datetime64(data_time) in self.dataset['times'].values: return self.dataset['smap_sss'].sel(times=data_time).values else: raise PyOFS.NoDataError( f'No data exists for {data_time:%Y%m%dT%H%M%S}.')
def __init__(self, station_name: str): """ NDBC data buoy :param station_name: station name :raises NoDataError: if observation does not exist """ self.station_name = station_name self.url = f'{SOURCE_URL}/{self.station_name}/{self.station_name}o9999.nc' try: self.dataset = xarray.open_dataset(self.url) self.longitude = self.dataset['longitude'].values.item() self.latitude = self.dataset['latitude'].values.item() except: raise PyOFS.NoDataError(f'No NDBC observation found at {self.url}')
def __init__(self, stations: [str] = None): """ Collection of NDBC data buoys :param stations: list of station names :raises NoDataError: if data does not exist """ if stations is None: with requests.get(CATALOG_URL) as station_catalog: self.station_names = re.findall( "href='(.*?)/catalog.html'", station_catalog.text ) elif type(stations) is str: self.station_names = list( numpy.genfromtxt(WCOFS_NDBC_STATIONS_FILENAME, dtype='str') ) else: self.station_names = stations self.stations = {} LOGGER.debug(f'Collecting NDBC data from {len(self.station_names)} station...') # concurrently populate dictionary with datasets for each station with futures.ThreadPoolExecutor() as concurrency_pool: running_futures = { concurrency_pool.submit(DataBuoyDataset, station_name): station_name for station_name in self.station_names } for completed_future in futures.as_completed(running_futures): station_name = running_futures[completed_future] if type(completed_future.exception()) is not PyOFS.NoDataError: result = completed_future.result() self.stations[station_name] = result del running_futures if len(self.stations) == 0: raise PyOFS.NoDataError(f'No NDBC datasets found in {self.stations}')
def store_viirs_pass_times( satellite: str, study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, start_time: datetime = VIIRS_START_TIME, output_filename: str = PASS_TIMES_FILENAME, num_periods: int = 1, algorithm: str = 'STAR', version: str = '2.40', ): """ Compute VIIRS pass times from the given start date along the number of periods specified. :param satellite: satellite for which to store pass times, either NPP or N20 :param study_area_polygon_filename: path to vector file containing polygon of study area :param start_time: beginning of given VIIRS period (in UTC) :param output_filename: path to output file :param num_periods: number of periods to store :param algorithm: either 'STAR' or 'OSPO' :param version: ACSPO Version number (2.40 - 2.41) """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) start_time = PyOFS.round_to_ten_minutes(start_time) end_time = PyOFS.round_to_ten_minutes(start_time + (VIIRS_PERIOD * num_periods)) LOGGER.info( f'Getting pass times between {start_time:%Y-%m-%d %H:%M:%S} and {end_time:%Y-%m-%d %H:%M:%S}' ) datetime_range = PyOFS.ten_minute_range(start_time, end_time) # construct polygon from the first record in layer study_area_polygon = shapely.geometry.Polygon( utilities.get_first_record(study_area_polygon_filename)['geometry'] ['coordinates'][0]) lines = [] for datetime_index in range(len(datetime_range)): current_time = datetime_range[datetime_index] # find number of cycles from the first orbit to the present day num_cycles = int((datetime.now() - start_time).days / 16) # iterate over each cycle for cycle_index in range(0, num_cycles): # get current datetime of interest cycle_offset = VIIRS_PERIOD * cycle_index cycle_time = current_time + cycle_offset try: # get observation of new datetime dataset = VIIRSDataset(cycle_time, satellite, study_area_polygon_filename, algorithm, version) # check if observation falls within polygon extent if dataset.data_extent.is_valid: if study_area_polygon.intersects(dataset.data_extent): # get duration from current cycle start cycle_duration = cycle_time - (start_time + cycle_offset) LOGGER.info( f'{cycle_time:%Y%m%dT%H%M%S} {cycle_duration / timedelta(seconds=1)}: valid scene (checked {cycle_index + 1} cycle(s))' ) lines.append( f'{cycle_time:%Y%m%dT%H%M%S},{cycle_duration / timedelta(seconds=1)}' ) # if we get to here, break and continue to the next datetime break except PyOFS.NoDataError as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') else: LOGGER.warning( f'{current_time:%Y%m%dT%H%M%S}: missing observation across all cycles' ) # write lines to file with open(output_filename, 'w') as output_file: output_file.write('\n'.join(lines)) LOGGER.info('Wrote data to file')
def __init__( self, start_time: datetime, end_time: datetime, satellites: list = ('NPP', 'N20'), study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, pass_times_filename: PathLike = PASS_TIMES_FILENAME, algorithm: str = 'OSPO', version: str = None, ): """ Collect VIIRS datasets within time interval. :param start_time: beginning of time interval (in UTC) :param end_time: end of time interval (in UTC) :param satellites: VIIRS platforms :param study_area_polygon_filename: filename of vector file of study area boundary :param pass_times_filename: path to text file with pass times :param algorithm: either 'STAR' or 'OSPO' :param version: ACSPO algorithm version :raises NoDataError: if data does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) if not isinstance(pass_times_filename, Path): pass_times_filename = Path(pass_times_filename) self.start_time = start_time if end_time > datetime.utcnow(): # VIIRS near real time delay is 2 hours behind UTC self.end_time = datetime.utcnow() - NRT_DELAY else: self.end_time = end_time self.satellites = satellites self.study_area_polygon_filename = study_area_polygon_filename self.viirs_pass_times_filename = pass_times_filename self.algorithm = algorithm self.version = version if 'N20' in self.satellites: self.pass_times = get_pass_times( self.start_time, self.end_time, self.viirs_pass_times_filename) - timedelta(minutes=50) else: self.pass_times = get_pass_times(self.start_time, self.end_time, self.viirs_pass_times_filename) if len(self.pass_times) > 0: LOGGER.info( f'Collecting VIIRS data from {len(self.pass_times)} passes between {numpy.min(self.pass_times)} UTC and {numpy.max(self.pass_times)} UTC...' ) # create dictionary to store scenes self.datasets = {pass_time: {} for pass_time in self.pass_times} with futures.ThreadPoolExecutor() as concurrency_pool: for satellite in self.satellites: running_futures = {} for pass_time in self.pass_times: running_future = concurrency_pool.submit( VIIRSDataset, data_time=pass_time, study_area_polygon_filename=self. study_area_polygon_filename, algorithm=self.algorithm, version=self.version, satellite=satellite, ) running_futures[running_future] = pass_time for completed_future in futures.as_completed( running_futures): if completed_future.exception() is None: pass_time = running_futures[completed_future] viirs_dataset = completed_future.result() self.datasets[pass_time][satellite] = viirs_dataset else: LOGGER.warning( f'Dataset creation error: {completed_future.exception()}' ) del running_futures if len(self.datasets) > 0: VIIRSRange.study_area_transform = VIIRSDataset.study_area_transform VIIRSRange.study_area_extent = VIIRSDataset.study_area_extent VIIRSRange.study_area_bounds = VIIRSDataset.study_area_bounds LOGGER.debug( f'VIIRS data was found in {len(self.datasets)} passes.') else: raise PyOFS.NoDataError( f'No VIIRS datasets found between {self.start_time} UTC and {self.end_time} UTC.' ) else: raise PyOFS.NoDataError( f'There are no VIIRS passes between {self.start_time} UTC and {self.end_time} UTC.' )
def write_vector( self, output_filename: PathLike, start_time: datetime, end_time: datetime, variables: [str] = None, ): """ Write average of buoy data for all hours in the given time interval to a single layer of the provided output file. :param output_filename: path to output file :param start_time: beginning of time interval :param end_time: end of time interval :param variables: list of variable names """ if not isinstance(output_filename, Path): output_filename = Path(output_filename) output_filename, layer_name = PyOFS.split_layer_filename(output_filename) if variables is None: variables = MEASUREMENT_VARIABLES station_data = self.data_average(variables, start_time, end_time) # # concurrently populate dictionary with data for each station within given time interval # with futures.ThreadPoolExecutor() as concurrency_pool: # running_futures = { # station_name: { # variable: concurrency_pool.submit(station.data, variable, start_time, end_time) # for variable in variables} # for station_name, station in self.stations.items() # } # # for station_name, station_running_futures in running_futures: # station_data[station_name] = {} # # for completed_future in futures.as_completed(station_running_futures): # result = completed_future.result() # # if result is not None: # station_data[station_name][station_running_futures[completed_future]] = result schema = { 'geometry': 'Point', 'properties': { 'name': 'str', 'longitude': 'float', 'latitude': 'float', 'water_temperature': 'float', 'conductivity': 'float', 'salinity': 'float', 'o2_saturation': 'float', 'dissolved_oxygen': 'float', 'chlorophyll_concentration': 'float', 'turbidity': 'float', 'water_ph': 'float', 'water_eh': 'float', }, } LOGGER.debug('Creating features...') layer_records = [] for station_name, station_data in station_data.items(): station = self.stations[station_name] record = { 'geometry': { 'type': 'Point', 'coordinates': (station.longitude, station.latitude), }, 'properties': { 'name': station_name, 'longitude': station.longitude, 'latitude': station.latitude, 'water_temperature': station_data['water_temperature'], 'conductivity': station_data['conductivity'], 'salinity': station_data['salinity'], 'o2_saturation': station_data['o2_saturation'], 'dissolved_oxygen': station_data['dissolved_oxygen'], 'chlorophyll_concentration': station_data['chlorophyll_concentration'], 'turbidity': station_data['turbidity'], 'water_ph': station_data['water_ph'], 'water_eh': station_data['water_eh'], }, } layer_records.append(record) LOGGER.info( f'Writing to {output_filename}{":" + layer_name if layer_name is not None else ""}' ) with fiona.open( output_filename, 'w', 'GPKG', schema, OUTPUT_CRS, layer=layer_name ) as output_layer: output_layer.writerecords(layer_records)
def data(self, variable: str, time: datetime, crop: bool = True) -> xarray.DataArray: """ Get data of specified variable at specified hour. :param variable: name of variable to retrieve :param time: time from which to retrieve data :param crop: whether to crop to study area extent :return: array of data """ if time >= self.model_time: direction = 'forecast' else: direction = 'nowcast' if self.time_interval == 'daily': time = time.replace(hour=0, minute=0, second=0, microsecond=0) if direction in DATASET_STRUCTURE[self.source]: if len(self.datasets[direction]) > 0: if variable in DATA_VARIABLES: datasets = DATA_VARIABLES[variable][self.source] dataset_name, variable_name = next(iter(datasets.items())) with self.dataset_locks[direction][dataset_name]: data_variable = self.datasets[direction][dataset_name][ DATA_VARIABLES[variable][ self.source][dataset_name]] # TODO study areas that cross over longitude +74.16 may have problems here if crop: selection = data_variable.sel( lon=slice(self.study_area_west + 360, self.study_area_east + 360), lat=slice(self.study_area_south, self.study_area_north), ) else: western_selection = data_variable.sel( lon=slice(180, numpy.max(self.raw_lon)), lat=slice(numpy.min(self.lat), numpy.max(self.lat)), ) eastern_selection = data_variable.sel( lon=slice(numpy.min(self.raw_lon), 180), lat=slice(numpy.min(self.lat), numpy.max(self.lat)), ) selection = numpy.concatenate( (western_selection, eastern_selection), axis=1) # to resample the 3 hr for forcast and 1hr for nowcast nc file to a daily selections = selection.resample(time='D').mean() selections = selections.sel(time=time, method='nearest') # correction for the if variable == 'ssh': selections = selections + 0.25 selections = numpy.flip(selections.squeeze(), axis=0) if selections.size > 0: return selections else: raise PyOFS.NoDataError( f'no RTOFS data for {time} within the cropped area ({self.study_area_west:.2f}, {self.study_area_south:.2f}), ({self.study_area_east:.2f}, {self.study_area_north:.2f})' ) else: raise ValueError( f'Variable must be one of {list(DATA_VARIABLES)}.') else: LOGGER.warning( f'{direction} does not exist in RTOFS for {self.model_time:%Y%m%d}.' ) else: raise ValueError( f'Direction must be one of {list(DATASET_STRUCTURE[self.source].keys())}.' )
def __init__( self, data_time: datetime = None, satellite: str = 'G17', study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, algorithm: str = 'STAR', version: str = None, ): """ Retrieve ABI NetCDF observation from NOAA with given datetime. :param data_time: observation datetime :param satellite: ABI platform :param study_area_polygon_filename: filename of vector file containing study area boundary :param algorithm: either 'STAR' or 'OSPO' :param version: ACSPO algorithm version :raises NoDataError: if observation does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) if data_time is None: data_time = datetime.now() # round minute to nearest 10 minutes (ABI data interval) self.data_time = PyOFS.round_to_ten_minutes(data_time) self.satellite = satellite self.study_area_polygon_filename = study_area_polygon_filename # use NRT flag if granule is less than 13 days old self.near_real_time = datetime.now() - data_time <= timedelta(days=13) self.algorithm = algorithm if version is None: if data_time >= datetime(2019, 4, 23, 12, 50): self.version = '2.71' elif data_time >= datetime(2018, 11, 7, 15, 10): self.version = '2.60' elif data_time >= datetime(2017, 9, 14, 12, 50): self.version = '2.41' else: self.version = '2.40' else: self.version = version self.url = None day_dir = f'{self.data_time.year}/{self.data_time.timetuple().tm_yday:03}' filename = f'{self.data_time:%Y%m%d%H%M%S}-{self.algorithm}-L3C_GHRSST-SSTsubskin-ABI_{self.satellite.upper()}-ACSPO_V{self.version}-v02.0-fv01.0.nc' # TODO N20 does not yet have a reanalysis archive on NESDIS (as of March 8th, 2019) if self.satellite.upper() == 'N20' and not self.near_real_time: raise PyOFS.NoDataError( f'{self.satellite.upper()} does not yet have a reanalysis archive' ) for source, source_url in SOURCE_URLS['OpenDAP'].items(): url = source_url if self.near_real_time: if source == 'NESDIS': url = f'{source_url}/grid{self.satellite.upper()}ABINRTL3CWW00/{day_dir}/{filename}' elif source == 'JPL': url = f'{source_url}/AMERICAS/GOES17/{algorithm}/v{self.version}/{day_dir}/{filename}' elif source in 'NODC': url = f'{source_url}/ABI_{self.satellite.upper()}/{algorithm}/{day_dir}/{filename}' else: if source == 'NESDIS': url = f'{source_url}/grid{"" if self.near_real_time else "S"}{self.satellite.upper()}ABISCIENCEL3CWW00/{day_dir}/{filename}' else: LOGGER.warning( f'{source} does not contain a reanalysis archive') try: self.dataset = xarray.open_dataset(url) self.url = url break except Exception as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') if self.url is None: LOGGER.warning( 'Error collecting from OpenDAP; falling back to FTP...') for source, source_url in SOURCE_URLS['FTP'].items(): host_url, ftp_input_dir = source_url.split('/', 1) ftp_path = ftp_input_dir url = host_url if source == 'NESDIS': if self.near_real_time: ftp_path = f'/{ftp_input_dir}/nrt/abi/{self.satellite.lower()}/l3c/{day_dir}/{filename}' else: ftp_path = f'/{ftp_input_dir}/ran/abi/{"S" if self.satellite.upper() == "G17" else ""}{self.satellite.lower()}/l3c/{day_dir}/{filename}' url = f'{host_url}/{ftp_path.lstrip("/")}' try: with ftplib.FTP(host_url) as ftp_connection: ftp_connection.login() output_dir = DATA_DIRECTORY / 'input' / 'abi' if not output_dir.exists(): os.makedirs(output_dir, exist_ok=True) output_filename = output_dir / f'abi_{self.data_time:%Y%m%dT%H%M}.nc' if output_filename.exists(): os.remove(output_filename) try: with open(output_filename, 'wb') as output_file: ftp_connection.retrbinary( f'RETR {ftp_path}', output_file.write) self.dataset = xarray.open_dataset( output_filename) except: raise finally: os.remove(output_filename) self.url = url break except Exception as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') if self.url is not None: break if self.url is None: raise PyOFS.NoDataError( f'No ABI observation found at {self.data_time} UTC.') # construct rectangular polygon of granule extent if 'geospatial_bounds' in self.dataset.attrs: self.data_extent = shapely.wkt.loads( self.dataset.geospatial_bounds) elif 'geospatial_lon_min' in self.dataset.attrs: lon_min = float(self.dataset.geospatial_lon_min) lon_max = float(self.dataset.geospatial_lon_max) lat_min = float(self.dataset.geospatial_lat_min) lat_max = float(self.dataset.geospatial_lat_max) if lon_min < lon_max: self.data_extent = shapely.geometry.Polygon([ (lon_min, lat_max), (lon_max, lat_max), (lon_max, lat_min), (lon_min, lat_min), ]) else: # geospatial bounds cross the antimeridian, so we create a multipolygon self.data_extent = shapely.geometry.MultiPolygon([ shapely.geometry.Polygon([ (lon_min, lat_max), (180, lat_max), (180, lat_min), (lon_min, lat_min), ]), shapely.geometry.Polygon([ (-180, lat_max), (lon_max, lat_max), (lon_max, lat_min), (-180, lat_min), ]), ]) lon_pixel_size = self.dataset.geospatial_lon_resolution lat_pixel_size = self.dataset.geospatial_lat_resolution if ABIDataset.study_area_extent is None: LOGGER.debug( f'Calculating indices and transform from granule at {self.data_time} UTC...' ) # get first record in layer ABIDataset.study_area_extent = shapely.geometry.MultiPolygon([ shapely.geometry.Polygon(polygon[0]) for polygon in utilities.get_first_record( self.study_area_polygon_filename)['geometry'] ['coordinates'] ]) ABIDataset.study_area_bounds = ABIDataset.study_area_extent.bounds ABIDataset.study_area_transform = rasterio.transform.from_origin( ABIDataset.study_area_bounds[0], ABIDataset.study_area_bounds[3], lon_pixel_size, lat_pixel_size, ) if ABIDataset.study_area_bounds is not None: self.dataset = self.dataset.isel(time=0).sel( lon=slice(ABIDataset.study_area_bounds[0], ABIDataset.study_area_bounds[2]), lat=slice(ABIDataset.study_area_bounds[3], ABIDataset.study_area_bounds[1]), ) if ABIDataset.study_area_coordinates is None: ABIDataset.study_area_coordinates = { 'lon': self.dataset['lon'], 'lat': self.dataset['lat'], }
def write_vector( self, output_filename: PathLike, variables: Collection[str] = None, start_time: datetime = None, end_time: datetime = None, dop_threshold: float = 0.5, ): """ Write average of HFR data for all hours in the given time interval to a single layer of the provided output file. :param output_filename: path to output file :param variables: variable names to use :param start_time: beginning of time interval :param end_time: end of time interval :param dop_threshold: threshold for Dilution of Precision (DOP) above which data should be discarded """ if not isinstance(output_filename, Path): output_filename = Path(output_filename) output_filename, layer_name = PyOFS.split_layer_filename( output_filename) if layer_name is None: layer_name = 'ssuv' if variables is None: variables = DATA_VARIABLES variable_means = { variable: self.data_average(variable, start_time, end_time, dop_threshold) for variable in variables } # define layer schema schema = { 'geometry': 'Point', 'properties': { 'lon': 'float', 'lat': 'float' } } schema['properties'].update( {variable: 'float' for variable in variables}) # create features layer_records = [] feature_index = 1 for col in range(len(self.dataset['lon'])): for row in range(len(self.dataset['lat'])): data = [ float(variable_means[variable][row, col]) for variable in variables ] # stop if record has masked values if not (numpy.isnan(data)).all(): lon = self.dataset['lon'][col] lat = self.dataset['lat'][row] record = { 'id': feature_index, 'geometry': { 'type': 'Point', 'coordinates': (lon, lat) }, 'properties': { 'lon': float(lon), 'lat': float(lat) }, } record['properties'].update(dict(zip(variables, data))) layer_records.append(record) feature_index += 1 # write queued features to layer LOGGER.info(f'Writing {output_filename}') with fiona.open( output_filename, 'w', 'GPKG', layer=layer_name, schema=schema, crs=OUTPUT_CRS.to_dict(), ) as layer: layer.writerecords(layer_records)
def write_vectors( self, output_filename: PathLike, variables: Collection[str] = None, start_time: datetime = None, end_time: datetime = None, dop_threshold: float = 0.5, ): """ Write HFR data to a layer of the provided output file for every hour in the given time interval. :param output_filename: path to output file :param variables: variable names to use :param start_time: beginning of time interval :param end_time: end of time interval :param dop_threshold: threshold for Dilution of Precision (DOP) above which data should be discarded """ if not isinstance(output_filename, Path): output_filename = Path(output_filename) output_filename = PyOFS.split_layer_filename(output_filename)[0] if variables is None: variables = DATA_VARIABLES if start_time is None: start_time = self.start_time if end_time is None: end_time = self.end_time time_interval_selection = self.dataset.sel( time=slice(start_time, end_time)) if dop_threshold is not None: dop_mask = ( (self.dataset['dopx'].sel(time=slice(start_time, end_time)) <= dop_threshold) & (self.dataset['dopy'].sel(time=slice(start_time, end_time)) <= dop_threshold)).values time_interval_selection[~dop_mask] = numpy.nan # create dict to store features layers = {} # create layer using OGR, then add features using QGIS for hfr_time in time_interval_selection['time']: hfr_time = datetime.utcfromtimestamp( (hfr_time.values - numpy.datetime64('1970-01-01T00:00:00Z')) / numpy.timedelta64(1, 's')) layer_name = f'{hfr_time:%Y%m%dT%H%M%S}' hfr_data = time_interval_selection.sel(time=hfr_time) # create features layer_records = [] feature_index = 1 for col in range(len(self.dataset['lon'])): for row in range(len(self.dataset['lat'])): data = [ float(hfr_data[variable_name][row, col].values) for variable, variable_name in variables.items() ] # stop if record has masked values if not (numpy.isnan(data)).all(): lon = self.dataset['lon'][col] lat = self.dataset['lat'][row] record = { 'id': feature_index, 'geometry': { 'type': 'Point', 'coordinates': (lon, lat) }, 'properties': { 'lon': float(lon), 'lat': float(lat) }, } record['properties'].update( dict(zip(list(variables.keys()), data))) layer_records.append(record) feature_index += 1 layers[layer_name] = layer_records # write queued features to their respective layers schema = { 'geometry': 'Point', 'properties': { 'u': 'float', 'v': 'float', 'lat': 'float', 'lon': 'float', 'dop_lat': 'float', 'dop_lon': 'float', }, } for layer_name, layer_records in layers.items(): with fiona.open( output_filename, 'w', 'GPKG', layer=layer_name, schema=schema, crs=OUTPUT_CRS.to_dict(), ) as layer: layer.writerecords(layer_records)
def write_raster( self, output_filename: PathLike, variable: str, time: datetime, fill_value=LEAFLET_NODATA_VALUE, driver: str = 'GTiff', crop: bool = True, ): """ Writes interpolated raster of given variable to output path. :param output_filename: path of raster file to create :param variable: name of variable :param time: time from which to retrieve data :param fill_value: desired fill value of output :param driver: strings of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid') :param crop: whether to crop to study area extent """ if not isinstance(output_filename, Path): output_filename = Path(output_filename) output_data = self.data(variable, time, crop).values if output_data is not None: if crop: transform = self.study_area_transform else: transform = self.global_grid_transform gdal_args = { 'transform': transform, 'height': output_data.shape[0], 'width': output_data.shape[1], 'count': 1, 'dtype': rasterio.float32, 'crs': CRS.from_dict(OUTPUT_CRS), 'nodata': numpy.array([fill_value]).astype(output_data.dtype).item(), } if driver == 'AAIGrid': file_extension = 'asc' gdal_args.update({'FORCE_CELLSIZE': 'YES'}) elif driver == 'GPKG': file_extension = 'gpkg' else: file_extension = 'tiff' gdal_args.update(TIFF_CREATION_OPTIONS) output_filename = f'{output_filename.stem}.{file_extension}' LOGGER.info(f'Writing {output_filename}') with rasterio.open(output_filename, 'w', driver, **gdal_args) as output_raster: output_raster.write(output_data, 1) if driver == 'GTiff': output_raster.build_overviews( PyOFS.overview_levels(output_data.shape), Resampling['average']) output_raster.update_tags(ns='rio_overview', resampling='average')
def write_rasters( self, output_dir: PathLike, variables: list, time: datetime, filename_prefix: str = None, filename_suffix: str = None, fill_value=LEAFLET_NODATA_VALUE, driver: str = 'GTiff', crop: bool = True, ): """ Write averaged raster data of given variables to given output directory. :param output_dir: path to directory :param variables: variable names to use :param time: time from which to retrieve data :param filename_prefix: prefix for filenames :param filename_suffix: suffix for filenames :param fill_value: desired fill value of output :param driver: strings of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid') :param crop: whether to crop to study area extent """ if not isinstance(output_dir, Path): output_dir = Path(output_dir) if variables is None: variables = DATA_VARIABLES[self.source] if filename_prefix is None: filename_prefix = 'rtofs' filename_suffix = f'_{filename_suffix}' if filename_suffix is not None else '' if self.time_interval == 'daily': time = time.replace(hour=0, minute=0, second=0, microsecond=0) time_delta = int((time - self.model_time) / timedelta(days=1)) direction = 'forecast' if time_delta >= 0 else 'nowcast' time_delta_string = f'{direction[0]}{abs(time_delta) + 1 if direction == "forecast" else abs(time_delta):03}' variable_means = {} for variable in variables: if variable not in ['dir', 'mag']: try: variable_means[variable] = self.data(variable, time, crop) except KeyError: LOGGER.warning( f'variable "{variable}" not found in RTOFS dataset') except Exception as error: LOGGER.warning(error) variable_means = { variable: variable_mean.values for variable, variable_mean in variable_means.items() if variable_mean is not None } if 'dir' in variables or 'mag' in variables: u_name = 'ssu' v_name = 'ssv' if u_name not in variable_means: u_data = self.data(u_name, time, crop) u_data = u_data.values if u_data is not None else None else: u_data = variable_means[u_name] if v_name not in variable_means: v_data = self.data(v_name, time, crop) v_data = v_data.values if v_data is not None else None else: v_data = variable_means[v_name] if 'anim' in filename_suffix: variable_means['dir'] = u_data variable_means['mag'] = v_data else: # calculate direction and magnitude of vector in degrees (0-360) and in metres per second variable_means['dir'] = (numpy.arctan2(u_data, v_data) + numpy.pi) * (180 / numpy.pi) variable_means['mag'] = numpy.sqrt(u_data**2 + v_data**2) # write interpolated grids to raster files for variable, variable_mean in variable_means.items(): if variable_mean is not None and variable_mean.size > 0: if crop: transform = self.study_area_transform else: transform = self.global_grid_transform if fill_value is not None: variable_mean[numpy.isnan(variable_mean)] = fill_value gdal_args = { 'transform': transform, 'height': variable_mean.shape[0], 'width': variable_mean.shape[1], 'count': 1, 'dtype': rasterio.float32, 'crs': CRS.from_dict(OUTPUT_CRS), 'nodata': numpy.array([fill_value ]).astype(variable_mean.dtype).item(), } if driver == 'AAIGrid': file_extension = 'asc' gdal_args.update({'FORCE_CELLSIZE': 'YES'}) elif driver == 'GPKG': file_extension = 'gpkg' else: file_extension = 'tiff' gdal_args.update(TIFF_CREATION_OPTIONS) output_filename = f'{filename_prefix}_{variable}_{self.model_time:%Y%m%d}_{time_delta_string}{filename_suffix}.{file_extension}' output_filename = output_dir / output_filename LOGGER.info(f'Writing {output_filename}') with rasterio.open(output_filename, 'w', driver, **gdal_args) as output_raster: output_raster.write(variable_mean, 1) if driver == 'GTiff': output_raster.build_overviews( PyOFS.overview_levels(variable_mean.shape), Resampling['average']) output_raster.update_tags(ns='rio_overview', resampling='average')
def write_rasters( self, output_dir: PathLike, variables: Collection[str] = ('sst', 'sses'), filename_prefix: str = 'abi', fill_value: float = LEAFLET_NODATA_VALUE, driver: str = 'GTiff', correct_sses: bool = False, ): """ Write ABI rasters to file using data from given variables. :param output_dir: path to output directory :param variables: variable names to write :param filename_prefix: prefix for output filenames :param fill_value: desired fill value of output :param driver: strings of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid') :param correct_sses: whether to subtract SSES bias from SST """ if not isinstance(output_dir, Path): output_dir = Path(output_dir) for variable in variables: input_data = self.data(variable, correct_sses) if variable == 'sses': fill_value = 0 if input_data is not None and not numpy.isnan(input_data).all(): if fill_value is not None: input_data[numpy.isnan(input_data)] = fill_value gdal_args = { 'height': input_data.shape[0], 'width': input_data.shape[1], 'count': 1, 'dtype': rasterio.float32, 'crs': CRS.from_dict(OUTPUT_CRS), 'transform': ABIDataset.study_area_transform, 'nodata': fill_value, } if driver == 'AAIGrid': file_extension = 'asc' gdal_args.update({'FORCE_CELLSIZE': 'YES'}) elif driver == 'GPKG': file_extension = 'gpkg' else: file_extension = 'tiff' gdal_args.update(TIFF_CREATION_OPTIONS) output_filename = output_dir / f'{filename_prefix}_{variable}.{file_extension}' # use rasterio to write to raster with GDAL args LOGGER.info(f'Writing to {output_filename}') with rasterio.open(output_filename, 'w', driver, **gdal_args) as output_raster: output_raster.write(input_data, 1) if driver == 'GTiff': output_raster.build_overviews( PyOFS.overview_levels(input_data.shape), Resampling['average']) output_raster.update_tags(ns='rio_overview', resampling='average')
def __init__(self, start_time: datetime = None, end_time: datetime = None, resolution: int = 6): """ Creates new observation object from source. :param start_time: beginning of time interval :param end_time: end of time interval :param resolution: desired observation resolution in kilometers :raises NoDataError: if observation does not exist. """ if start_time is None: start_time = datetime.now() self.start_time = start_time if end_time is None: end_time = self.start_time + timedelta(days=1) if end_time > datetime.utcnow(): # HFR near real time delay is 1 hour behind UTC self.end_time = datetime.utcnow() - NRT_DELAY else: self.end_time = end_time self.resolution = resolution # NDBC only keeps observations within the past 4 days for source, source_url in SOURCE_URLS.items(): # get URL if source == 'NDBC': url = f'{source_url}/hfradar_uswc_{self.resolution}km' elif source == 'UCSD': url = f'{source_url}/{self.resolution}km/hourly/RTV/HFRADAR_US_West_Coast_{self.resolution}km_Resolution_Hourly_RTV_best.ncd' else: url = source_url try: self.dataset = xarray.open_dataset(url) self.url = url break except OSError as error: LOGGER.warning(f'{error.__class__.__name__}: {error}') else: raise PyOFS.NoDataError( f'No HFR observations found between {self.start_time} and {self.end_time}' ) raw_times = self.dataset['time'] self.dataset['time'] = xarray.DataArray( numpy.array(raw_times.values, dtype='datetime64[h]'), coords=raw_times.coords, dims=raw_times.dims, attrs=raw_times.attrs, ) self.dataset = self.dataset.sel( time=slice(self.start_time, self.end_time)) LOGGER.info( f'Collecting HFR velocity between {str(self.dataset["time"].min().values)[:19]} and {str(self.dataset["time"].max().values)[:19]}...' ) if HFRadarRange.grid_transform is None: lon = self.dataset['lon'].values lat = self.dataset['lat'].values # define image properties west = numpy.min(lon) north = numpy.max(lat) self.mean_x_size = numpy.mean(numpy.diff(lon)) self.mean_y_size = numpy.mean(numpy.diff(lat)) # get rasterio geotransform of HFR observation (flipped latitude) self.grid_transform = rasterio.transform.from_origin( west, north, self.mean_x_size, self.mean_y_size)
def __init__( self, start_time: datetime, end_time: datetime, satellites: list = ('G17', ), study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, algorithm: str = 'STAR', version: str = None, ): """ Collect ABI datasets within time interval. :param start_time: beginning of time interval (in UTC) :param end_time: end of time interval (in UTC) :param satellites: ABI platforms :param study_area_polygon_filename: filename of vector file of study area boundary :param algorithm: either 'STAR' or 'OSPO' :param version: ACSPO algorithm version :raises NoDataError: if data does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) self.start_time = start_time if end_time > datetime.utcnow(): # ABI near real time delay is 2 hours behind UTC self.end_time = datetime.utcnow() - NRT_DELAY else: self.end_time = end_time self.satellites = satellites self.study_area_polygon_filename = study_area_polygon_filename self.algorithm = algorithm self.version = version day_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) self.pass_times = [ day_start + timedelta(hours=hour) for hour in [-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8] if day_start + timedelta(hours=hour) < self.end_time ] if len(self.pass_times) > 0: LOGGER.info( f'Collecting ABI data from {len(self.pass_times)} passes between {numpy.min(self.pass_times)} UTC and {numpy.max(self.pass_times)} UTC...' ) # create dictionary to store scenes self.datasets = {pass_time: {} for pass_time in self.pass_times} with futures.ThreadPoolExecutor() as concurrency_pool: for satellite in self.satellites: running_futures = {} for pass_time in self.pass_times: running_future = concurrency_pool.submit( ABIDataset, data_time=pass_time, study_area_polygon_filename=self. study_area_polygon_filename, algorithm=self.algorithm, version=self.version, satellite=satellite, ) running_futures[running_future] = pass_time for completed_future in futures.as_completed( running_futures): if completed_future.exception() is None: pass_time = running_futures[completed_future] abi_dataset = completed_future.result() self.datasets[pass_time][satellite] = abi_dataset else: LOGGER.warning( f'Dataset creation error: {completed_future.exception()}' ) del running_futures if len(self.datasets) > 0: ABIRange.study_area_transform = ABIDataset.study_area_transform ABIRange.study_area_extent = ABIDataset.study_area_extent ABIRange.study_area_bounds = ABIDataset.study_area_bounds LOGGER.debug( f'ABI data was found in {len(self.datasets)} passes.') else: raise PyOFS.NoDataError( f'No ABI datasets found between {self.start_time} UTC and {self.end_time} UTC.' ) else: raise PyOFS.NoDataError( f'There are no ABI passes between {self.start_time} UTC and {self.end_time} UTC.' )
def write_rasters( self, output_dir: PathLike, filename_prefix: str = 'hfr', filename_suffix: str = '', variables: Collection[str] = None, start_time: datetime = None, end_time: datetime = None, fill_value: float = LEAFLET_NODATA_VALUE, driver: str = 'GTiff', dop_threshold: float = None, ): """ Write average of HFR data for all hours in the given time interval to rasters. :param output_dir: path to output directory :param filename_prefix: prefix for output filenames :param filename_suffix: suffix for output filenames :param variables: variable names to use :param start_time: beginning of time interval :param end_time: end of time interval :param fill_value: desired fill value of output :param driver: string of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid') :param dop_threshold: threshold for dilution of precision above which data is not useable """ if not isinstance(output_dir, Path): output_dir = Path(output_dir) if variables is None: variables = DATA_VARIABLES if filename_suffix != '': filename_suffix = f'_{filename_suffix}' variable_means = { variable: self.data_average(variable, start_time, end_time, dop_threshold) for variable in variables if variable not in ['dir', 'mag'] } if 'dir' in variables or 'mag' in variables: if 'ssu' in variables: u_data = variable_means['ssu'] else: u_data = self.data_average('ssu', start_time, end_time, dop_threshold) if 'ssv' in variables: v_data = variable_means['ssv'] else: v_data = self.data_average('ssv', start_time, end_time, dop_threshold) if 'anim' in filename_suffix: variable_means['dir'] = u_data variable_means['mag'] = v_data else: # calculate direction and magnitude of vector in degrees (0-360) and in metres per second variable_means['dir'] = (numpy.arctan2(u_data, v_data) + numpy.pi) * (180 / numpy.pi) variable_means['mag'] = numpy.sqrt(u_data**2 + v_data**2) for variable, variable_data in variable_means.items(): raster_data = variable_data.astype(rasterio.float32) gdal_args = { 'height': raster_data.shape[0], 'width': raster_data.shape[1], 'count': 1, 'dtype': raster_data.dtype, 'crs': OUTPUT_CRS, 'transform': self.grid_transform, 'nodata': numpy.array([fill_value]).astype(raster_data.dtype).item(), } if driver == 'AAIGrid': file_extension = 'asc' # interpolate to regular grid in case of ASCII grid mean_cell_length = numpy.min(self.cell_size()) west, north, east, south = self.bounds() input_lon, input_lat = numpy.meshgrid(self.dataset['lon'], self.dataset['lat']) output_lon = numpy.arange(west, east, mean_cell_length)[None, :] output_lat = numpy.arange(south, north, mean_cell_length)[:, None] raster_data = scipy.interpolate.griddata( (input_lon.flatten(), input_lat.flatten()), raster_data.flatten(), (output_lon, output_lat), method='nearest', fill_value=fill_value, ).astype(raster_data.dtype) gdal_args.update({ 'height': raster_data.shape[0], 'width': raster_data.shape[1], 'FORCE_CELLSIZE': 'YES', 'transform': rasterio.transform.from_origin( numpy.min(output_lon), numpy.max(output_lat), numpy.max(numpy.diff(output_lon)), numpy.max(numpy.diff(output_lon)), ), }) elif driver == 'GPKG': file_extension = 'gpkg' else: file_extension = 'tiff' gdal_args.update(TIFF_CREATION_OPTIONS) if fill_value is not None: raster_data[numpy.isnan(raster_data)] = fill_value output_filename = ( output_dir / f'{filename_prefix}_{variable}{filename_suffix}.{file_extension}' ) LOGGER.info(f'Writing {output_filename}') with rasterio.open(output_filename, 'w', driver, **gdal_args) as output_raster: output_raster.write(numpy.flipud(raster_data), 1) if driver == 'GTiff': output_raster.build_overviews( PyOFS.overview_levels(raster_data.shape), Resampling['average']) output_raster.update_tags(ns='rio_overview', resampling='average')
def write_raster( self, output_dir: PathLike, filename_prefix: str = None, filename_suffix: str = None, start_time: datetime = None, end_time: datetime = None, average: bool = False, fill_value: float = LEAFLET_NODATA_VALUE, driver: str = 'GTiff', correct_sses: bool = False, variables: Collection[str] = tuple(['sst']), satellite: str = None, ): """ Write ABI raster of SST data (either overlapped or averaged) from the given time interval. :param output_dir: path to output directory :param filename_prefix: prefix for output filenames :param filename_suffix: suffix for output filenames :param start_time: beginning of time interval (in UTC) :param end_time: end of time interval (in UTC) :param average: whether to average rasters, otherwise overlap them :param fill_value: desired fill value of output :param driver: string of valid GDAL driver (currently one of 'GTiff', 'GPKG', or 'AAIGrid') :param correct_sses: whether to subtract SSES bias from L3 sea surface temperature data :param variables: variables to write (either 'sst' or 'sses') :param satellite: ABI platform to retrieve; if not specified, will average from both satellites """ if not isinstance(output_dir, Path): output_dir = Path(output_dir) if start_time is None: start_time = self.start_time if end_time is None: end_time = self.end_time variable_data = self.data(start_time, end_time, average, correct_sses, variables, satellite) for variable, output_data in variable_data.items(): if output_data is not None and numpy.any( ~numpy.isnan(output_data)): output_data[numpy.isnan(output_data)] = fill_value raster_data = output_data.astype(rasterio.float32) if fill_value is not None: raster_data[numpy.isnan(raster_data)] = fill_value # define arguments to GDAL driver gdal_args = { 'height': raster_data.shape[0], 'width': raster_data.shape[1], 'count': 1, 'crs': OUTPUT_CRS, 'dtype': raster_data.dtype, 'nodata': numpy.array([fill_value]).astype(raster_data.dtype).item(), 'transform': ABIRange.study_area_transform, } if driver == 'AAIGrid': file_extension = 'asc' gdal_args.update({'FORCE_CELLSIZE': 'YES'}) elif driver == 'GPKG': file_extension = 'gpkg' else: file_extension = 'tiff' gdal_args.update(TIFF_CREATION_OPTIONS) if filename_prefix is None: current_filename_prefix = f'{satellite}_abi_{variable}' else: current_filename_prefix = filename_prefix if filename_suffix is None: start_time_string = f'{start_time:%Y%m%d%H%M}' end_time_string = f'{end_time:%Y%m%d%H%M}' if '0000' in start_time_string and '0000' in end_time_string: start_time_string = start_time_string.replace( '0000', '') end_time_string = end_time_string.replace('0000', '') current_filename_suffix = f'{start_time_string}_{end_time_string}' else: current_filename_suffix = filename_suffix output_filename = ( output_dir / f'{current_filename_prefix}_{current_filename_suffix}.{file_extension}' ) LOGGER.info(f'Writing {output_filename}') with rasterio.open(output_filename, 'w', driver, **gdal_args) as output_raster: output_raster.write(raster_data, 1) if driver == 'GTiff': output_raster.build_overviews( PyOFS.overview_levels(raster_data.shape), Resampling['average']) output_raster.update_tags(ns='rio_overview', resampling='average') else: LOGGER.warning( f'No {"ABI" if satellite is None else "ABI " + satellite} {variable} found between {start_time} and {end_time}.' )
def __init__( self, model_date: datetime = None, source: str = '2ds', time_interval: str = 'daily', study_area_polygon_filename: PathLike = STUDY_AREA_POLYGON_FILENAME, source_url: str = None, use_defaults: bool = True, ): """ Creates new observation object from datetime and given model parameters. :param model_date: model run date :param source: either '2ds' or '3dz' :param time_interval: time interval of model output :param study_area_polygon_filename: filename of vector file containing study area boundary :param source_url: directory containing NetCDF files :param use_defaults: whether to fall back to default source URLs if the provided one does not exist """ if not isinstance(study_area_polygon_filename, Path): study_area_polygon_filename = Path(study_area_polygon_filename) if model_date is None: model_date = datetime.now() if type(model_date) is date: self.model_time = datetime.combine(model_date, datetime.min.time()) else: self.model_time = model_date.replace(hour=0, minute=0, second=0, microsecond=0) self.source = source self.time_interval = time_interval self.study_area_polygon_filename = study_area_polygon_filename self.study_area_geojson = utilities.get_first_record( self.study_area_polygon_filename)['geometry'] self.datasets = {} self.dataset_locks = {} date_string = f'{self.model_time:%Y%m%d}' date_dir = f'rtofs_global{date_string}' source_urls = SOURCE_URLS.copy() if source_url is not None: source_url = {'priority': source_url} if use_defaults: source_urls = {**source_url, **{source_urls}} self.source_names = [] if self.time_interval == '3hrly' or self.time_interval == 'hrly' or self.time_interval == 'daily': # added due to the different hourly source for nowcast and forecast for self.time_interval in {'hrly', '3hrly'}: for source_name, source_url in source_urls.items(): for forecast_direction, datasets in DATASET_STRUCTURE[ self.source].items(): if (forecast_direction == 'nowcast' and 'nowcast' in self.datasets and len(self.datasets['nowcast']) > 0) or ( forecast_direction == 'forecast' and 'forecast' in self.datasets and len(self.datasets['forecast']) > 0): continue self.datasets[forecast_direction] = {} self.dataset_locks[forecast_direction] = {} for dataset_name in datasets: filename = f'rtofs_glo_{self.source}_{forecast_direction}_{self.time_interval}_{dataset_name}' if filename not in [ 'rtofs_glo_2ds_nowcast_3hrly_prog', 'rtofs_glo_2ds_nowcast_3hrly_diag', 'rtofs_glo_2ds_forecast_hrly_prog', 'rtofs_glo_2ds_forecast_hrly_diag' ]: url = f'{source_url}/{date_dir}/{filename}' if source_name == 'local': url = f'{url}.nc' try: dataset = xarray.open_dataset(url) self.datasets[forecast_direction][ dataset_name] = dataset self.dataset_locks[forecast_direction][ dataset_name] = threading.Lock() self.source_names.append(source_name) except OSError as error: LOGGER.warning( f'{error.__class__.__name__}: {error}') if (len(self.datasets['nowcast']) + len(self.datasets['forecast'])) > 0: if len(self.datasets['nowcast']) > 0: sample_dataset = next(iter(self.datasets['nowcast'].values())) else: sample_dataset = next(iter(self.datasets['forecast'].values())) self.lat = sample_dataset['lat'].values if not any(source_name == 'NCEP' for source_name in self.source_names): self.lon = sample_dataset['lon'] self.raw_lon = self.lon else: # for some reason RTOFS from NCEP has longitude values shifted by 360 self.raw_lon = sample_dataset['lon'].values self.lon = self.raw_lon - 180 - numpy.min(self.raw_lon) lat_pixel_size = numpy.mean(numpy.diff(sample_dataset['lat'])) lon_pixel_size = numpy.mean(numpy.diff(sample_dataset['lon'])) self.global_north = numpy.max(self.lat) self.global_west = numpy.min(self.lon) self.global_grid_transform = rasterio.transform.from_origin( self.global_west, self.global_north, lon_pixel_size, lat_pixel_size) ( self.study_area_west, self.study_area_south, self.study_area_east, self.study_area_north, ) = geometry.shape(self.study_area_geojson).bounds self.study_area_transform = rasterio.transform.from_origin( self.study_area_west, self.study_area_north, lon_pixel_size, lat_pixel_size) else: raise PyOFS.NoDataError( f'No RTOFS datasets found for {self.model_time}.')