def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract GRIDMET temperature Args: netcdf_ws (str): folder of GRIDMET netcdf files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) extent_path (str): filepath a raster defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nExtracting GRIDMET vapor pressure') # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Save GRIDMET lat, lon, and elevation arrays elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img') output_fmt = '{}_{}_daily_gridmet.img' gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc$') # GRIDMET band name dictionary gridmet_band_dict = dict() gridmet_band_dict['pr'] = 'precipitation_amount' gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' gridmet_band_dict['sph'] = 'specific_humidity' gridmet_band_dict['tmmn'] = 'air_temperature' gridmet_band_dict['tmmx'] = 'air_temperature' gridmet_band_dict['vs'] = 'wind_speed' # Get extent/geo from elevation raster gridmet_ds = gdal.Open(elev_raster) gridmet_osr = gdc.raster_ds_osr(gridmet_ds) gridmet_proj = gdc.osr_proj(gridmet_osr) gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True) gridmet_extent = gdc.raster_ds_extent(gridmet_ds) gridmet_full_geo = gridmet_extent.geo(gridmet_cs) gridmet_x, gridmet_y = gridmet_extent.origin() gridmet_ds = None logging.debug(' Projection: {}'.format(gridmet_proj)) logging.debug(' Cellsize: {}'.format(gridmet_cs)) logging.debug(' Geo: {}'.format(gridmet_full_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) gridmet_extent = gdc.Extent(output_extent) gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') gridmet_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) gridmet_extent = gdc.project_extent(gridmet_extent, extent_osr, gridmet_osr, extent_cs) gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) else: gridmet_geo = gridmet_full_geo # Get indices for slicing/clipping input arrays g_i, g_j = gdc.array_geo_offsets(gridmet_full_geo, gridmet_geo, cs=gridmet_cs) g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs) # Read the elevation array elev_array = gdc.raster_to_array(elev_raster, mask_extent=gridmet_extent, return_nodata=False) pair_array = et_common.air_pressure_func(elev_array) del elev_array # Process each variable input_var = 'sph' output_var = 'ea' logging.info("\nVariable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): input_match = gridmet_re.match(input_name) if not input_match: logging.debug("{}".format(input_name)) logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug("{}".format(input_name)) logging.debug(' Variable didn\'t match, skipping') continue else: logging.info("{}".format(input_name)) year_str = input_match.group('YEAR') logging.info(" {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input NetCDF doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Create a single raster for each year with 365 bands # Each day will be stored in a separate band output_path = os.path.join(var_ws, output_fmt.format(output_var, year_str)) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) gdc.build_empty_raster(output_path, band_cnt=366, output_dtype=np.float32, output_proj=gridmet_proj, output_cs=gridmet_cs, output_extent=gridmet_extent, output_fill_flag=True) # Read in the GRIDMET NetCDF file # Immediatly clip input array to save memory input_nc_f = netCDF4.Dataset(input_raster, 'r') input_nc = input_nc_f.variables[ gridmet_band_dict[input_var]][:, g_i:g_i + g_cols, g_j:g_j + g_rows].copy() input_nc = np.transpose(input_nc, (0, 2, 1)) # A numpy array is returned when slicing a masked array # if there are no masked pixels # This is a hack to force the numpy array back to a masked array if type(input_nc) != np.ma.core.MaskedArray: input_nc = np.ma.core.MaskedArray( input_nc, np.zeros(input_nc.shape, dtype=bool)) # Check all valid dates in the year year_dates = date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: # logging.debug(' before start date, skipping') continue elif end_dt is not None and date_dt > end_dt: # logging.debug(' after end date, skipping') continue logging.info(' {}'.format(date_dt.strftime('%Y_%m_%d'))) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_full_ma = input_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue input_full_array = input_full_ma.data.astype(np.float32) input_full_nodata = float(input_full_ma.fill_value) input_full_array[input_full_array == input_full_nodata] = np.nan # Since inputs are netcdf, need to create GDAL raster # datasets in order to use gdal_common functions # Create an in memory dataset of the full ETo array input_full_ds = gdc.array_to_mem_ds(input_full_array, output_geo=gridmet_full_geo, output_proj=gridmet_proj) # Then extract the subset from the in memory dataset sph_array = gdc.raster_ds_to_array(input_full_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # Compute ea [kPa] from specific humidity [kg/kg] ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array) # Save the projected array as 32-bit floats gdc.array_to_comp_raster(ea_array.astype(np.float32), output_path, band=doy, stats_flag=False) # gdc.array_to_raster( # ea_array.astype(np.float32), output_path, # output_geo=gridmet_geo, output_proj=gridmet_proj, # stats_flag=False) del sph_array, ea_array input_nc_f.close() del input_nc_f if stats_flag: gdc.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), etr_flag=False, eto_flag=False, start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Compute daily ETr/ETo from GRIDMET data Args: netcdf_ws (str): folder of GRIDMET netcdf files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters etr_flag (bool): if True, compute alfalfa reference ET (ETr) eto_flag (bool): if True, compute grass reference ET (ETo) start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) extent_path (str): file path defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nComputing GRIDMET ETo/ETr') np.seterr(invalid='ignore') # Compute ETr and/or ETo if not etr_flag and not eto_flag: logging.info(' ETo/ETr flag(s) not set, defaulting to ETr') etr_flag = True # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Save GRIDMET lat, lon, and elevation arrays elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img') lat_raster = os.path.join(ancillary_ws, 'gridmet_lat.img') # Wind speed is measured at 2m zw = 10 etr_fmt = 'etr_{}_daily_gridmet.img' eto_fmt = 'eto_{}_daily_gridmet.img' # gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc') # GRIDMET band name dictionary gridmet_band_dict = dict() gridmet_band_dict['pr'] = 'precipitation_amount' gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' gridmet_band_dict['sph'] = 'specific_humidity' gridmet_band_dict['tmmn'] = 'air_temperature' gridmet_band_dict['tmmx'] = 'air_temperature' gridmet_band_dict['vs'] = 'wind_speed' # Get extent/geo from elevation raster gridmet_ds = gdal.Open(elev_raster) gridmet_osr = gdc.raster_ds_osr(gridmet_ds) gridmet_proj = gdc.osr_proj(gridmet_osr) gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True) gridmet_extent = gdc.raster_ds_extent(gridmet_ds) gridmet_full_geo = gridmet_extent.geo(gridmet_cs) gridmet_x, gridmet_y = gridmet_extent.origin() gridmet_ds = None logging.debug(' Projection: {}'.format(gridmet_proj)) logging.debug(' Cellsize: {}'.format(gridmet_cs)) logging.debug(' Geo: {}'.format(gridmet_full_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) gridmet_extent = gdc.Extent(output_extent) gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): gridmet_extent = gdc.feature_path_extent(extent_path) extent_osr = gdc.feature_path_osr(extent_path) extent_cs = None else: gridmet_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) gridmet_extent = gdc.project_extent(gridmet_extent, extent_osr, gridmet_osr, extent_cs) gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) else: gridmet_geo = gridmet_full_geo # Get indices for slicing/clipping input arrays g_i, g_j = gdc.array_geo_offsets(gridmet_full_geo, gridmet_geo, cs=gridmet_cs) g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs) # Read the elevation and latitude arrays elev_array = gdc.raster_to_array(elev_raster, mask_extent=gridmet_extent, return_nodata=False) lat_array = gdc.raster_to_array(lat_raster, mask_extent=gridmet_extent, return_nodata=False) lat_array *= math.pi / 180 # Check elevation and latitude arrays if np.all(np.isnan(elev_array)): logging.error('\nERROR: The elevation array is all nodata, exiting\n') sys.exit() elif np.all(np.isnan(lat_array)): logging.error('\nERROR: The latitude array is all nodata, exiting\n') sys.exit() # Build output folder etr_ws = os.path.join(output_ws, 'etr') eto_ws = os.path.join(output_ws, 'eto') if etr_flag and not os.path.isdir(etr_ws): os.makedirs(etr_ws) if eto_flag and not os.path.isdir(eto_ws): os.makedirs(eto_ws) # By default, try to process all possible years if start_dt.year == end_dt.year: year_list = [str(start_dt.year)] year_list = sorted(map(str, range((start_dt.year), (end_dt.year + 1)))) # Process each year separately for year_str in year_list: logging.info("\nYear: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path tmin_path = os.path.join(netcdf_ws, 'tmmn_{}.nc'.format(year_str)) tmax_path = os.path.join(netcdf_ws, 'tmmx_{}.nc'.format(year_str)) sph_path = os.path.join(netcdf_ws, 'sph_{}.nc'.format(year_str)) rs_path = os.path.join(netcdf_ws, 'srad_{}.nc'.format(year_str)) wind_path = os.path.join(netcdf_ws, 'vs_{}.nc'.format(year_str)) # Check that all input files are present missing_flag = False for input_path in [tmin_path, tmax_path, sph_path, rs_path, wind_path]: if not os.path.isfile(input_path): logging.debug( ' Input NetCDF doesn\'t exist\n {}'.format(input_path)) missing_flag = True if missing_flag: logging.debug(' skipping') continue logging.debug(" {}".format(tmin_path)) logging.debug(" {}".format(tmax_path)) logging.debug(" {}".format(sph_path)) logging.debug(" {}".format(rs_path)) logging.debug(" {}".format(wind_path)) # Create a single raster for each year with 365 bands # Each day will be stored in a separate band etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str)) eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str)) if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)): logging.debug(' {}'.format(etr_raster)) gdc.build_empty_raster(etr_raster, band_cnt=366, output_dtype=np.float32, output_proj=gridmet_proj, output_cs=gridmet_cs, output_extent=gridmet_extent, output_fill_flag=True) if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)): logging.debug(' {}'.format(eto_raster)) gdc.build_empty_raster(eto_raster, band_cnt=366, output_dtype=np.float32, output_proj=gridmet_proj, output_cs=gridmet_cs, output_extent=gridmet_extent, output_fill_flag=True) # DEADBEEF - Need to find a way to test if both of these conditionals # did not pass and pass logging debug message to user # Read in the GRIDMET NetCDF file tmin_nc_f = netCDF4.Dataset(tmin_path, 'r') tmax_nc_f = netCDF4.Dataset(tmax_path, 'r') sph_nc_f = netCDF4.Dataset(sph_path, 'r') rs_nc_f = netCDF4.Dataset(rs_path, 'r') wind_nc_f = netCDF4.Dataset(wind_path, 'r') logging.info(' Reading NetCDFs into memory') # Immediatly clip input arrays to save memory tmin_nc = tmin_nc_f.variables[ gridmet_band_dict['tmmn']][:, g_i:g_i + g_cols, g_j:g_j + g_rows].copy() tmax_nc = tmax_nc_f.variables[ gridmet_band_dict['tmmx']][:, g_i:g_i + g_cols, g_j:g_j + g_rows].copy() sph_nc = sph_nc_f.variables[gridmet_band_dict['sph']][:, g_i:g_i + g_cols, g_j:g_j + g_rows].copy() rs_nc = rs_nc_f.variables[gridmet_band_dict['srad']][:, g_i:g_i + g_cols, g_j:g_j + g_rows].copy() wind_nc = wind_nc_f.variables[gridmet_band_dict['vs']][:, g_i:g_i + g_cols, g_j:g_j + g_rows].copy() # tmin_nc = tmin_nc_f.variables[gridmet_band_dict['tmmn']][:] # tmax_nc = tmax_nc_f.variables[gridmet_band_dict['tmmx']][:] # sph_nc = sph_nc_f.variables[gridmet_band_dict['sph']][:] # rs_nc = rs_nc_f.variables[gridmet_band_dict['srad']][:] # wind_nc = wind_nc_f.variables[gridmet_band_dict['vs']][:] # Transpose arrays back to row x col tmin_nc = np.transpose(tmin_nc, (0, 2, 1)) tmax_nc = np.transpose(tmax_nc, (0, 2, 1)) sph_nc = np.transpose(sph_nc, (0, 2, 1)) rs_nc = np.transpose(rs_nc, (0, 2, 1)) wind_nc = np.transpose(wind_nc, (0, 2, 1)) # A numpy array is returned when slicing a masked array # if there are no masked pixels # This is a hack to force the numpy array back to a masked array # For now assume all arrays need to be converted if type(tmin_nc) != np.ma.core.MaskedArray: tmin_nc = np.ma.core.MaskedArray( tmin_nc, np.zeros(tmin_nc.shape, dtype=bool)) if type(tmax_nc) != np.ma.core.MaskedArray: tmax_nc = np.ma.core.MaskedArray( tmax_nc, np.zeros(tmax_nc.shape, dtype=bool)) if type(sph_nc) != np.ma.core.MaskedArray: sph_nc = np.ma.core.MaskedArray(sph_nc, np.zeros(sph_nc.shape, dtype=bool)) if type(rs_nc) != np.ma.core.MaskedArray: rs_nc = np.ma.core.MaskedArray(rs_nc, np.zeros(rs_nc.shape, dtype=bool)) if type(wind_nc) != np.ma.core.MaskedArray: wind_nc = np.ma.core.MaskedArray( wind_nc, np.zeros(wind_nc.shape, dtype=bool)) # Check all valid dates in the year year_dates = date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: tmin_ma = tmin_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue tmin_array = tmin_ma.data.astype(np.float32) tmin_nodata = float(tmin_ma.fill_value) tmin_array[tmin_array == tmin_nodata] = np.nan try: tmax_ma = tmax_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue tmax_array = tmax_ma.data.astype(np.float32) tmax_nodata = float(tmax_ma.fill_value) tmax_array[tmax_array == tmax_nodata] = np.nan try: sph_ma = sph_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue sph_array = sph_ma.data.astype(np.float32) sph_nodata = float(sph_ma.fill_value) sph_array[sph_array == sph_nodata] = np.nan try: rs_ma = rs_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue rs_array = rs_ma.data.astype(np.float32) rs_nodata = float(rs_ma.fill_value) rs_array[rs_array == rs_nodata] = np.nan try: wind_ma = wind_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue wind_array = wind_ma.data.astype(np.float32) wind_nodata = float(wind_ma.fill_value) wind_array[wind_array == wind_nodata] = np.nan del tmin_ma, tmax_ma, sph_ma, rs_ma, wind_ma # Since inputs are netcdf, need to create GDAL raster # datasets in order to use gdal_common functions # Create an in memory dataset of the full ETo array tmin_ds = gdc.array_to_mem_ds( tmin_array, output_geo=gridmet_geo, # tmin_array, output_geo=gridmet_full_geo, output_proj=gridmet_proj) tmax_ds = gdc.array_to_mem_ds( tmax_array, output_geo=gridmet_geo, # tmax_array, output_geo=gridmet_full_geo, output_proj=gridmet_proj) sph_ds = gdc.array_to_mem_ds( sph_array, output_geo=gridmet_geo, # sph_array, output_geo=gridmet_full_geo, output_proj=gridmet_proj) rs_ds = gdc.array_to_mem_ds( rs_array, output_geo=gridmet_geo, # rs_array, output_geo=gridmet_full_geo, output_proj=gridmet_proj) wind_ds = gdc.array_to_mem_ds( wind_array, output_geo=gridmet_geo, # wind_array, output_geo=gridmet_full_geo, output_proj=gridmet_proj) # Then extract the subset from the in memory dataset tmin_array = gdc.raster_ds_to_array(tmin_ds, 1, mask_extent=gridmet_extent, return_nodata=False) tmax_array = gdc.raster_ds_to_array(tmax_ds, 1, mask_extent=gridmet_extent, return_nodata=False) sph_array = gdc.raster_ds_to_array(sph_ds, 1, mask_extent=gridmet_extent, return_nodata=False) rs_array = gdc.raster_ds_to_array(rs_ds, 1, mask_extent=gridmet_extent, return_nodata=False) wind_array = gdc.raster_ds_to_array(wind_ds, 1, mask_extent=gridmet_extent, return_nodata=False) del tmin_ds, tmax_ds, sph_ds, rs_ds, wind_ds # Adjust units tmin_array -= 273.15 tmax_array -= 273.15 rs_array *= 0.0864 # ETr/ETo if etr_flag: etr_array = et_common.refet_daily_func(tmin_array, tmax_array, sph_array, rs_array, wind_array, zw, elev_array, lat_array, doy, 'ETR') if eto_flag: eto_array = et_common.refet_daily_func(tmin_array, tmax_array, sph_array, rs_array, wind_array, zw, elev_array, lat_array, doy, 'ETO') # del tmin_array, tmax_array, sph_array, rs_array, wind_array # Save the projected array as 32-bit floats if etr_flag: gdc.array_to_comp_raster(etr_array.astype(np.float32), etr_raster, band=doy, stats_flag=False) # gdc.array_to_raster( # etr_array.astype(np.float32), etr_raster, # output_geo=gridmet_geo, output_proj=gridmet_proj, # stats_flag=stats_flag) del etr_array if eto_flag: gdc.array_to_comp_raster(eto_array.astype(np.float32), eto_raster, band=doy, stats_flag=False) # gdc.array_to_raster( # eto_array.astype(np.float32), eto_raster, # output_geo=gridmet_geo, output_proj=gridmet_proj, # stats_flag=stats_flag) del eto_array del tmin_nc del tmax_nc del sph_nc del rs_nc del wind_nc tmin_nc_f.close() tmax_nc_f.close() sph_nc_f.close() rs_nc_f.close() wind_nc_f.close() del tmin_nc_f, tmax_nc_f, sph_nc_f, rs_nc_f, wind_nc_f if stats_flag and etr_flag: gdc.raster_statistics(etr_raster) if stats_flag and eto_flag: gdc.raster_statistics(eto_raster) logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET precipitation Args: netcdf_ws (str): folder of DAYMET netcdf files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) extent_path (str): file path defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nExtracting DAYMET precipitation') # If a date is not set, process 2015 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2015, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2015, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Save DAYMET lat, lon, and elevation arrays mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET band name dictionary # daymet_band_dict = dict() # daymet_band_dict['prcp'] = 'precipitation_amount' # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # daymet_band_dict['sph'] = 'specific_humidity' # daymet_band_dict['tmin'] = 'air_temperature' # daymet_band_dict['tmax'] = 'air_temperature' # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = gdc.raster_ds_osr(daymet_ds) daymet_proj = gdc.osr_proj(daymet_osr) daymet_cs = gdc.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = gdc.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = gdc.project_extent( gdc.Extent(output_extent), gdc.epsg_osr(4326), daymet_osr, 0.001) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): output_extent = gdc.feature_path_extent(extent_path) extent_osr = gdc.feature_path_osr(extent_path) extent_cs = None else: output_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) output_extent = gdc.project_extent( output_extent, extent_osr, daymet_osr, extent_cs) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] # output_shape = output_extent.shape(cs=daymet_cs) xi, yi = gdc.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Process each variable input_var = 'prcp' output_var = 'ppt' logging.info("\nVariable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): logging.debug("{}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input raster doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Build output folder output_year_ws = os.path.join(var_ws, year_str) if not os.path.isdir(output_year_ws): os.makedirs(output_year_ws) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = date_range( dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) output_path = os.path.join( output_year_ws, '{}_{}_daymet.img'.format( output_var, date_dt.strftime('%Y%m%d'))) if os.path.isfile(output_path): logging.debug(' {}'.format(output_path)) if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][ doy_i, yi: yi + output_rows, xi: xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) output_array = input_ma.data.astype(np.float32) output_array[output_array == input_nodata] = np.nan # Save the array as 32-bit floats gdc.array_to_raster( output_array.astype(np.float32), output_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del input_ma, output_array input_nc_f.close() del input_nc_f logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), variables=['prcp'], daily_flag=False, monthly_flag=True, annual_flag=False, start_year=1981, end_year=2010, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET temperature Args: netcdf_ws (str): folder of DAYMET netcdf files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters variables (list): DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx') Set as ['all'] to process all variables daily_flag (bool): if True, compute daily (DOY) climatologies monthly_flag (bool): if True, compute monthly climatologies annual_flag (bool): if True, compute annual climatologies start_year (int): YYYY end_year (int): YYYY extent_path (str): filepath a raster defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nGenerating DAYMET climatologies') daily_fmt = 'daymet_{var}_30yr_normal_{doy:03d}.img' monthly_fmt = 'daymet_{var}_30yr_normal_{month:02d}.img' annual_fmt = 'daymet_{var}_30yr_normal.img' # daily_fmt = 'daymet_{var}_normal_{start}_{end}_{doy:03d}.img' # monthly_fmt = 'daymet_{var}_normal_{start}_{end}_{month:02d}.img' # annual_fmt = 'daymet_{var}_normal_{start}_{end}.img' # If a date is not set, process 1981-2010 climatology try: start_dt = dt.datetime(start_year, 1, 1) logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(1981, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime(end_year, 12, 31) logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2010, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Get DAYMET spatial reference from an ancillary raster mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET rasters to extract var_full_list = ['prcp', 'tmmn', 'tmmx'] # data_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif 'all' in variables: logging.error('\nDownloading all variables\n {}'.format( ','.join(var_full_list))) var_list = var_full_list[:] elif not set(variables).issubset(set(var_full_list)): logging.error( '\nERROR: variables parameter is invalid\n {}'.format(variables)) sys.exit() else: var_list = variables[:] # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = gdc.raster_ds_osr(daymet_ds) daymet_proj = gdc.osr_proj(daymet_osr) daymet_cs = gdc.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = gdc.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = gdc.project_extent(gdc.Extent(output_extent), gdc.epsg_osr(4326), daymet_osr, 0.001) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') output_extent = gdc.project_extent( gdc.raster_path_extent(extent_path), gdc.raster_path_osr(extent_path), daymet_osr, gdc.raster_path_cellsize(extent_path, x_only=True)) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] output_shape = output_extent.shape(cs=daymet_cs) xi, yi = gdc.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Process each variable for input_var in var_list: logging.info("\nVariable: {}".format(input_var)) # Rename variables to match cimis if input_var == 'prcp': output_var = 'ppt' else: output_var = input_var logging.debug("Output name: {}".format(output_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Build output arrays logging.debug(' Building arrays') if daily_flag: daily_sum = np.full((365, output_shape[0], output_shape[1]), 0, np.float64) daily_count = np.full((365, output_shape[0], output_shape[1]), 0, np.uint8) if monthly_flag: monthly_sum = np.full((12, output_shape[0], output_shape[1]), 0, np.float64) monthly_count = np.full((12, output_shape[0], output_shape[1]), 0, np.uint8) if monthly_flag: annual_sum = np.full((output_shape[0], output_shape[1]), 0, np.float64) annual_count = np.full((output_shape[0], output_shape[1]), 0, np.uint8) # Process each file/year separately for input_name in sorted(os.listdir(netcdf_ws)): logging.debug(" {}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) if not os.path.isfile(input_raster): logging.debug( ' Input raster doesn\'t exist, skipping {}'.format( input_raster)) continue # Build output folder if daily_flag: daily_ws = os.path.join(var_ws, 'daily') if not os.path.isdir(daily_ws): os.makedirs(daily_ws) if monthly_flag: monthly_temp_sum = np.full( (12, output_shape[0], output_shape[1]), 0, np.float64) monthly_temp_count = np.full( (12, output_shape[0], output_shape[1]), 0, np.uint8) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: logging.debug(' {}'.format(date_dt.date())) # if start_dt is not None and date_dt < start_dt: # logging.debug( # ' {} - before start date, skipping'.format( # date_dt.date())) # continue # elif end_dt is not None and date_dt > end_dt: # logging.debug(' {} - after end date, skipping'.format( # date_dt.date())) # continue # else: # logging.info(' {}'.format(date_dt.date())) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) month_i = date_dt.month - 1 # Arrays are being read as masked array with a -9999 fill value # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][doy_i, yi:yi + output_rows, xi:xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) output_array = input_ma.data.astype(np.float32) output_array[output_array == input_nodata] = np.nan output_mask = np.isfinite(output_array) # Convert Kelvin to Celsius if input_var in ['tmax', 'tmin']: output_array -= 273.15 # Save values if daily_flag: daily_sum[doy_i, :, :] += output_array daily_count[doy_i, :, :] += output_mask if monthly_flag: monthly_temp_sum[month_i, :, :] += output_array monthly_temp_count[month_i, :, :] += output_mask if annual_flag: annual_sum[:, :] += output_array annual_count[:, :] += output_mask # Cleanup # del input_ds, input_array del input_ma, output_array, output_mask # Compute mean monthly for the year if monthly_flag: # Sum precipitation if input_var == 'prcp': monthly_sum += monthly_temp_sum else: monthly_sum += monthly_temp_sum / monthly_temp_count # Is this the right count? monthly_count += np.any(monthly_temp_count, axis=0) del monthly_temp_sum, monthly_temp_count input_nc_f.close() del input_nc_f # Save the projected climatology arrays if daily_flag: for doy_i in range(daily_sum.shape[0]): daily_name = daily_fmt.format(var=output_var, start=start_year, end=end_year, doy=doy_i + 1) daily_path = os.path.join(daily_ws, daily_name) gdc.array_to_raster(daily_sum[doy_i, :, :] / daily_count[doy_i, :, :], daily_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del daily_sum, daily_count if monthly_flag: for month_i in range(monthly_sum.shape[0]): monthly_name = monthly_fmt.format(var=output_var, start=start_year, end=end_year, month=month_i + 1) monthly_path = os.path.join(var_ws, monthly_name) gdc.array_to_raster(monthly_sum[month_i, :, :] / monthly_count[month_i, :, :], monthly_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del monthly_sum, monthly_count if annual_flag: annual_name = annual_fmt.format(var=output_var, start=start_year, end=end_year) annual_path = os.path.join(var_ws, annual_name) gdc.array_to_raster(annual_sum / annual_count, annual_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del annual_sum, annual_count logging.debug('\nScript Complete')