def netcdf_to_ascii_PNNL2018(homedir, subdir, netcdfs, mappingfile, catalog_label, meta_file): # initialize list of dataframe outputs outfiledict = {} # generate destination folder filedir = os.path.join(homedir, subdir) ogh.ensure_dir(filedir) # connect with collection of netcdfs ds_mf = xray.open_mfdataset(netcdfs, engine='netcdf4') # convert netcdfs to pandas.Panel API ds_pan = ds_mf.to_dataframe().reset_index('TIME') # generate list of variables ds_vars = [ ds_var for ds_var in ds_pan.columns if ds_var not in ['YEAR', 'MONTH', 'DAY', 'TIME', 'LAT', 'LON'] ] # read in gridded cells of interest maptable, nstation = ogh.mappingfileToDF(mappingfile, colvar=None) # maptable = maptable[0:3] # at each latlong of interest for ind, eachrow in maptable.iterrows(): # generate ASCII time-series ds_df = ds_pan.loc[eachrow['SN'], eachrow['WE'], :].reset_index( drop=True).loc[:, ds_vars] # create file name # outfilename = os.path.join(filedir, catalog_label + '_' + time_nm + '_{0}_{1}'.format(eachrow['LAT'],eachrow['LONG_'])) outfilename = os.path.join( filedir, catalog_label + '_{0}_{1}'.format(eachrow['LAT'], eachrow['LONG_'])) # save ds_df outfiledict[outfilename] = da.delayed(ds_df.to_csv)( path_or_buf=outfilename, sep='\t', header=False, index=False) # compute ASCII time-series files ProgressBar().register() outfiledict = da.compute(outfiledict)[0] # update metadata file meta_file[catalog_label]['variable_info'].update(dict(ds_mf.attrs)) meta_file[catalog_label]['variable_info'].update(dict(ds_mf.variables)) meta_file[catalog_label]['variable_list'] = np.array(ds_vars) # catalog the output files ogh.addCatalogToMap(outfilepath=mappingfile, maptable=maptable, folderpath=filedir, catalog_label=catalog_label) os.chdir(homedir) return (list(outfiledict.keys()))
def get_x_hourlywrf_PNNL2018( homedir, spatialbounds, subdir='PNNL2018/Hourly_WRF_1981_2015/SaukSpatialBounds', nworkers=4, start_date='2005-01-01', end_date='2007-12-31', time_resolution='H', time_steps=24, file_prefix='sp_', rename_timelatlong_names={ 'south_north': 'SN', 'west_east': 'WE', 'time': 'TIME' }, replace_file=True): """ get hourly WRF data from a 2018 PNNL WRF run using xarray on netcdf files """ # check and generate data directory filedir = os.path.join(homedir, subdir) ogh.ensure_dir(filedir) # modify each month between start_date and end_date to year-month dates = [ x.strftime('%Y%m%d') for x in pd.date_range(start=start_date, end=end_date, freq='D') ] # initialize parallel workers da.set_options(pool=ThreadPool(nworkers)) ProgressBar().register() # generate the list of files to download filelist = compile_x_wrfpnnl2018_raw_locations(dates) # download files of interest NetCDFs = [] for url, date in zip(filelist, dates): NetCDFs.append( da.delayed(wget_x_download_spSubset_PNNL)( fileurl=url, filedate=date, time_resolution=time_resolution, time_steps=time_steps, spatialbounds=spatialbounds, file_prefix=file_prefix, rename_timelatlong_names=rename_timelatlong_names, replace_file=replace_file)) # run operations outputfiles = da.compute(NetCDFs)[0] # reset working directory os.chdir(homedir) return (outputfiles)
def get_x_dailymet_Livneh2013_raw( homedir, spatialbounds, subdir='livneh2013/Daily_MET_1915_2011/raw_netcdf', nworkers=4, start_date='1915-01-01', end_date='2011-12-31', rename_timelatlong_names={ 'lat': 'LAT', 'lon': 'LON', 'time': 'TIME' }, file_prefix='sp_', replace_file=True): """ get Daily MET data from Livneh et al. (2013) using xarray on netcdf files """ # check and generate DailyMET livneh 2013 data directory filedir = os.path.join(homedir, subdir) ogh.ensure_dir(filedir) # modify each month between start_date and end_date to year-month dates = [ x.strftime('%Y%m') for x in pd.date_range(start=start_date, end=end_date, freq='M') ] # initialize parallel workers da.set_options(pool=ThreadPool(nworkers)) ProgressBar().register() # generate the list of files to download filelist = compile_x_dailymet_Livneh2013_raw_locations(dates) # download files of interest NetCDFs = [] for url in filelist: NetCDFs.append( da.delayed(ftp_x_download_spSubset)( fileurl=url, spatialbounds=spatialbounds, file_prefix=file_prefix, rename_timelatlong_names=rename_timelatlong_names, replace_file=replace_file)) # run operations outputfiles = da.compute(NetCDFs)[0] # reset working directory os.chdir(homedir) return (outputfiles)
def netcdf_to_ascii(homedir, subdir, source_directory, mappingfile, catalog_label, meta_file, temporal_resolution='D', netcdfs=None, variable_list=None): # initialize list of dataframe outputs outfiledict = {} # generate destination folder filedir = os.path.join(homedir, subdir) ogh.ensure_dir(filedir) # connect with collection of netcdfs if isinstance(netcdfs, type(None)): netcdfs = [ os.path.join(source_directory, file) for file in os.listdir(source_directory) if file.endswith('.nc') ] ds_mf = xray.open_mfdataset(netcdfs, engine='netcdf4').sortby('TIME') # generate list of variables if not isinstance(variable_list, type(None)): ds_vars = variable_list.copy() else: ds_vars = [ ds_var for ds_var in dict(ds_mf.variables).keys() if ds_var not in ['YEAR', 'MONTH', 'DAY', 'TIME', 'LAT', 'LON'] ] # convert netcdfs to pandas.Panel API ds_pan = ds_mf.to_dataframe()[ds_vars] # read in gridded cells of interest maptable, nstation = ogh.mappingfileToDF(mappingfile, colvar=None, summary=False) # at each latlong of interest for ind, eachrow in maptable.iterrows(): # generate ASCII time-series ds_df = ds_pan.loc[eachrow['LAT'], eachrow['LONG_'], :].reset_index(drop=True, level=[0, 1]) # create file name outfilename = os.path.join( filedir, 'data_{0}_{1}'.format(eachrow['LAT'], eachrow['LONG_'])) # save ds_df outfiledict[outfilename] = da.delayed(ds_df.to_csv)( path_or_buf=outfilename, sep='\t', header=False, index=False) # compute ASCII time-series files ProgressBar().register() outfiledict = da.compute(outfiledict)[0] # annotate metadata file meta_file[catalog_label] = dict(ds_mf.attrs) meta_file[catalog_label]['variable_list'] = list(np.array(ds_vars)) meta_file[catalog_label]['delimiter'] = '\t' meta_file[catalog_label]['start_date'] = pd.Series( ds_mf.TIME).sort_values().iloc[0].strftime('%Y-%m-%d %H:%M:%S') meta_file[catalog_label]['end_date'] = pd.Series( ds_mf.TIME).sort_values().iloc[-1].strftime('%Y-%m-%d %H:%M:%S') meta_file[catalog_label]['temporal_resolution'] = temporal_resolution meta_file[catalog_label]['variable_info'] = dict(ds_mf.variables) # catalog the output files ogh.addCatalogToMap(outfilepath=mappingfile, maptable=maptable, folderpath=filedir, catalog_label=catalog_label) os.chdir(homedir) return (list(outfiledict.keys()))
def test_ensuredir(self): path0 = os.getcwd() path1 = os.path.join(data_path, 'test_files') ogh.ensure_dir(path1) ogh.ensure_dir(path0) assert os.path.exists(path1)