def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/' CheckConnection.set_url('ncei.noaa.gov') x_arr_list = [] start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) for day in range((date_hi - start_date).days + 1): dt = datetime(start_date.year, start_date.month, start_date.day) + timedelta(days=day) catalog = TDSCatalog( '%s%s%.2d/%s%.2d%.2d/catalog.xml' % (base_url, dt.year, dt.month, dt.year, dt.month, dt.day)) for hour in [3, 6]: for cycle in [0, 6, 12, 18]: attempts = 0 while True: try: attempts += 1 name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % ( dt.year, dt.month, dt.day, cycle, hour) if name in list(catalog.datasets): ds_subset = catalog.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_50_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) break except Exception as e: logger.error(traceback.format_exc()) CheckConnection.is_online() logger.error(e) logger.error( 'Filename %s - Failed connecting to GFS Server - number of attempts: %d' % (name, attempts)) time.sleep(2) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 res = dataset.interp(lon=lon_points, lat=lat_points, time=time_points).to_dataframe()[GFS_50_VAR_LIST] res[[ 'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground' ]] = [[np.nan, np.nan]] * len(res) return res
def download_file(zipped_file: str, download_dir: Path, year: int) -> str: # url link to data url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{0}/".format(year) CheckConnection.is_online() logger.info('downloading AIS file: %s' % zipped_file) # download zip file using wget with url and file name with requests.get(os.path.join(url, zipped_file), stream=True) as req: req.raise_for_status() zipped_file = zipped_file.split('/')[-1] if len(zipped_file.split('/')) > 1 else zipped_file with open(zipped_file, "wb") as handle: for chunk in req.iter_content(chunk_size=8192): handle.write(chunk) handle.close() # extract each zip file into output directory then delete it with zipfile.ZipFile(zipped_file, 'r') as zip_ref: for f in zip_ref.infolist(): if f.filename.endswith('.csv'): f.filename = os.path.basename(f.filename) file_name = f.filename zip_ref.extract(f, download_dir) if str(Path(f.filename).parent).endswith('.gdb'): zip_ref.extractall(download_dir) name = str(Path(f.filename).parent) gdb_file = Path(download_dir, name) file_name = name.split('.')[0] + '.csv' file_path = Path(download_dir, file_name) try: chunkify_gdb(gdb_file, file_path, chunkSize=100000) except Exception as e: # discard the file in case of an error to resume later properly if file_path: file_path.unlink(missing_ok=True) raise e shutil.rmtree(gdb_file) break os.remove(zipped_file) return file_name
def try_get_data(url): try: CheckConnection.is_online() url_auth = authenticate_CAS_for_URL(url, config['UN_CMEMS'], config['PW_CMEMS']) response = open_url(url_auth) CheckConnection.is_online() read_bytes = response.read() CheckConnection.is_online() return xr.open_dataset(read_bytes) except Exception as e: logger.error(traceback.format_exc()) raise ValueError( 'Error:', BeautifulSoup(read_bytes, 'html.parser').find('p', {"class": "error"}), 'Request: ', url, response)
def get_GFS(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points): logger.debug( 'obtaining GFS 0.25 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]' % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo), str(lon_hi))) start_date = datetime(date_lo.year, date_lo.month, date_lo.day) - timedelta(days=1) # consider the supported time range if start_date < datetime(2015, 1, 15): logger.debug('GFS 0.25 DATASET is out of supported range') return get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points, lat_points, lon_points) x_arr_list = [] base_url = 'https://rda.ucar.edu/thredds/catalog/files/g/ds084.1' CheckConnection.set_url('rda.ucar.edu') # calculate a day prior for midnight interpolation http_util.session_manager.set_session_options(auth=(config['UN_RDA'], config['PW_RDA'])) start_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" % (base_url, start_date.year, start_date.year, start_date.month, start_date.day)) ds_subset = start_cat.datasets['gfs.0p25.%s%.2d%.2d18.f006.grib2' % (start_date.year, start_date.month, start_date.day)].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_25_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) for day in range((date_hi - date_lo).days + 1): end_date = datetime(date_lo.year, date_lo.month, date_lo.day) + timedelta(days=day) end_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" % (base_url, end_date.year, end_date.year, end_date.month, end_date.day)) for cycle in [0, 6, 12, 18]: for hours in [3, 6]: name = 'gfs.0p25.%s%.2d%.2d%.2d.f0%.2d.grib2' % ( end_date.year, end_date.month, end_date.day, cycle, hours) if name in list(end_cat.datasets): ds_subset = end_cat.datasets[name].subset() query = ds_subset.query().lonlat_box( north=lat_hi, south=lat_lo, east=lon_hi, west=lon_lo).variables(*GFS_25_VAR_LIST) CheckConnection.is_online() data = ds_subset.get_data(query) x_arr = xr.open_dataset(NetCDF4DataStore(data)) if 'time1' in list(x_arr.coords): x_arr = x_arr.rename({'time1': 'time'}) x_arr_list.append(x_arr) else: logger.warning('dataset %s is not found' % name) dataset = xr.combine_by_coords(x_arr_list).squeeze() lon_points = ((lon_points + 180) % 360) + 180 b = xr.DataArray([1] * len(lon_points)) res = dataset.interp(longitude=lon_points, latitude=lat_points, time=time_points, bounds_dim=b).to_dataframe()[GFS_25_VAR_LIST] return res