示例#1
0
def get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
               lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.50 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    base_url = 'https://www.ncei.noaa.gov/thredds/model-gfs-g4-anl-files-old/'
    CheckConnection.set_url('ncei.noaa.gov')

    x_arr_list = []
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    for day in range((date_hi - start_date).days + 1):
        dt = datetime(start_date.year, start_date.month,
                      start_date.day) + timedelta(days=day)
        catalog = TDSCatalog(
            '%s%s%.2d/%s%.2d%.2d/catalog.xml' %
            (base_url, dt.year, dt.month, dt.year, dt.month, dt.day))
        for hour in [3, 6]:
            for cycle in [0, 6, 12, 18]:
                attempts = 0
                while True:
                    try:
                        attempts += 1
                        name = 'gfsanl_4_%s%.2d%.2d_%.2d00_00%s.grb2' % (
                            dt.year, dt.month, dt.day, cycle, hour)
                        if name in list(catalog.datasets):
                            ds_subset = catalog.datasets[name].subset()
                            query = ds_subset.query().lonlat_box(
                                north=lat_hi,
                                south=lat_lo,
                                east=lon_hi,
                                west=lon_lo).variables(*GFS_50_VAR_LIST)
                            CheckConnection.is_online()
                            data = ds_subset.get_data(query)
                            x_arr = xr.open_dataset(NetCDF4DataStore(data))
                            if 'time1' in list(x_arr.coords):
                                x_arr = x_arr.rename({'time1': 'time'})
                            x_arr_list.append(x_arr)
                        else:
                            logger.warning('dataset %s is not found' % name)
                        break
                    except Exception as e:
                        logger.error(traceback.format_exc())
                        CheckConnection.is_online()
                        logger.error(e)
                        logger.error(
                            'Filename %s - Failed connecting to GFS Server - number of attempts: %d'
                            % (name, attempts))
                        time.sleep(2)

    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    res = dataset.interp(lon=lon_points, lat=lat_points,
                         time=time_points).to_dataframe()[GFS_50_VAR_LIST]
    res[[
        'Wind_speed_gust_surface', 'Dewpoint_temperature_height_above_ground'
    ]] = [[np.nan, np.nan]] * len(res)
    return res
示例#2
0
def download_file(zipped_file: str, download_dir: Path, year: int) -> str:
    # url link to data
    url = "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{0}/".format(year)
    CheckConnection.is_online()
    logger.info('downloading AIS file: %s' % zipped_file)

    # download zip file using wget with url and file name
    with requests.get(os.path.join(url, zipped_file), stream=True) as req:
        req.raise_for_status()
        zipped_file = zipped_file.split('/')[-1] if len(zipped_file.split('/')) > 1 else zipped_file
        with open(zipped_file, "wb") as handle:
            for chunk in req.iter_content(chunk_size=8192):
                handle.write(chunk)
            handle.close()
    # extract each zip file into output directory then delete it
    with zipfile.ZipFile(zipped_file, 'r') as zip_ref:
        for f in zip_ref.infolist():
            if f.filename.endswith('.csv'):
                f.filename = os.path.basename(f.filename)
                file_name = f.filename
                zip_ref.extract(f, download_dir)
            if str(Path(f.filename).parent).endswith('.gdb'):
                zip_ref.extractall(download_dir)
                name = str(Path(f.filename).parent)
                gdb_file = Path(download_dir, name)
                file_name = name.split('.')[0] + '.csv'
                file_path = Path(download_dir, file_name)
                try:
                    chunkify_gdb(gdb_file, file_path, chunkSize=100000)
                except Exception as e:
                    # discard the file in case of an error to resume later properly
                    if file_path:
                        file_path.unlink(missing_ok=True)
                    raise e
                shutil.rmtree(gdb_file)
                break
    os.remove(zipped_file)
    return file_name
示例#3
0
def try_get_data(url):
    try:
        CheckConnection.is_online()
        url_auth = authenticate_CAS_for_URL(url, config['UN_CMEMS'],
                                            config['PW_CMEMS'])
        response = open_url(url_auth)
        CheckConnection.is_online()
        read_bytes = response.read()
        CheckConnection.is_online()
        return xr.open_dataset(read_bytes)
    except Exception as e:
        logger.error(traceback.format_exc())
        raise ValueError(
            'Error:',
            BeautifulSoup(read_bytes,
                          'html.parser').find('p', {"class": "error"}),
            'Request: ', url, response)
示例#4
0
def get_GFS(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi, time_points,
            lat_points, lon_points):
    logger.debug(
        'obtaining GFS 0.25 dataset for DATE [%s, %s] LAT [%s, %s] LON [%s, %s]'
        % (str(date_lo), str(date_hi), str(lat_lo), str(lat_hi), str(lon_lo),
           str(lon_hi)))
    start_date = datetime(date_lo.year, date_lo.month,
                          date_lo.day) - timedelta(days=1)
    # consider the supported time range
    if start_date < datetime(2015, 1, 15):
        logger.debug('GFS 0.25 DATASET is out of supported range')
        return get_GFS_50(date_lo, date_hi, lat_lo, lat_hi, lon_lo, lon_hi,
                          time_points, lat_points, lon_points)
    x_arr_list = []
    base_url = 'https://rda.ucar.edu/thredds/catalog/files/g/ds084.1'
    CheckConnection.set_url('rda.ucar.edu')
    # calculate a day prior for midnight interpolation
    http_util.session_manager.set_session_options(auth=(config['UN_RDA'],
                                                        config['PW_RDA']))
    start_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" %
                           (base_url, start_date.year, start_date.year,
                            start_date.month, start_date.day))
    ds_subset = start_cat.datasets['gfs.0p25.%s%.2d%.2d18.f006.grib2' %
                                   (start_date.year, start_date.month,
                                    start_date.day)].subset()
    query = ds_subset.query().lonlat_box(
        north=lat_hi, south=lat_lo, east=lon_hi,
        west=lon_lo).variables(*GFS_25_VAR_LIST)
    CheckConnection.is_online()
    data = ds_subset.get_data(query)
    x_arr = xr.open_dataset(NetCDF4DataStore(data))
    if 'time1' in list(x_arr.coords):
        x_arr = x_arr.rename({'time1': 'time'})
    x_arr_list.append(x_arr)

    for day in range((date_hi - date_lo).days + 1):
        end_date = datetime(date_lo.year, date_lo.month,
                            date_lo.day) + timedelta(days=day)
        end_cat = TDSCatalog("%s/%s/%s%.2d%.2d/catalog.xml" %
                             (base_url, end_date.year, end_date.year,
                              end_date.month, end_date.day))
        for cycle in [0, 6, 12, 18]:
            for hours in [3, 6]:
                name = 'gfs.0p25.%s%.2d%.2d%.2d.f0%.2d.grib2' % (
                    end_date.year, end_date.month, end_date.day, cycle, hours)
                if name in list(end_cat.datasets):
                    ds_subset = end_cat.datasets[name].subset()
                    query = ds_subset.query().lonlat_box(
                        north=lat_hi, south=lat_lo, east=lon_hi,
                        west=lon_lo).variables(*GFS_25_VAR_LIST)
                    CheckConnection.is_online()
                    data = ds_subset.get_data(query)
                    x_arr = xr.open_dataset(NetCDF4DataStore(data))
                    if 'time1' in list(x_arr.coords):
                        x_arr = x_arr.rename({'time1': 'time'})
                    x_arr_list.append(x_arr)
                else:
                    logger.warning('dataset %s is not found' % name)
    dataset = xr.combine_by_coords(x_arr_list).squeeze()
    lon_points = ((lon_points + 180) % 360) + 180
    b = xr.DataArray([1] * len(lon_points))
    res = dataset.interp(longitude=lon_points,
                         latitude=lat_points,
                         time=time_points,
                         bounds_dim=b).to_dataframe()[GFS_25_VAR_LIST]
    return res