Exemplo n.º 1
0
def main(cimis_ws=os.getcwd(),
         gridmet_ws=None,
         ancillary_ws=os.getcwd(),
         etr_flag=False,
         eto_flag=False,
         start_date=None,
         end_date=None,
         stats_flag=True,
         overwrite_flag=False):
    """Fill missing CIMIS days with projected data from GRIDMET

    Currently missing (CGM 2014-08-15)
    2010-11-16 -> 2010-11-23

    Args:
        cimis_ws (str): root folder path of CIMIS data
        gridmet_ws (str): root folder path of GRIDMET data
        ancillary_ws (str): folder of ancillary rasters
        etr_flag (bool): if True, compute alfalfa reference ET (ETr)
        eto_flag (bool): if True, compute grass reference ET (ETo)
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nFilling CIMIS with GRIDMET')
    cimis_re = re.compile(
        '(?P<VAR>etr)_(?P<YYYY>\d{4})_daily_(?P<GRID>\w+).img$')
    # gridmet_re = re.compile(
    #     '(?P<VAR>ppt)_(?P<YYY>\d{4})_daily_(?P<GRID>\w+).img$')
    gridmet_fmt = 'etr_{}_daily_gridmet.img'

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    logging.debug('  CIMIS: {}'.format(cimis_ws))
    logging.debug('  GRIDMET: {}'.format(gridmet_ws))

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Get GRIDMET spatial reference and cellsize from elevation raster
    # gridmet_elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img')

    # Get CIMIS spatial reference and cellsize from mask raster
    cimis_mask_raster = os.path.join(ancillary_ws, 'cimis_mask.img')

    # Resample type
    # 0 = GRA_NearestNeighbour, Nearest neighbour (select on one input pixel)
    # 1 = GRA_Bilinear,Bilinear (2x2 kernel)
    # 2 = GRA_Cubic, Cubic Convolution Approximation (4x4 kernel)
    # 3 = GRA_CubicSpline, Cubic B-Spline Approximation (4x4 kernel)
    # 4 = GRA_Lanczos, Lanczos windowed sinc interpolation (6x6 kernel)
    # 5 = GRA_Average, Average (computes the average of all non-NODATA contributing pixels)
    # 6 = GRA_Mode, Mode (selects the value which appears most often of all the sampled points)
    resample_type = gdal.GRA_Bilinear

    # ETo/ETr workspaces
    cimis_eto_ws = os.path.join(cimis_ws, 'eto')
    cimis_etr_ws = os.path.join(cimis_ws, 'etr')
    gridmet_eto_ws = os.path.join(gridmet_ws, 'eto')
    gridmet_etr_ws = os.path.join(gridmet_ws, 'etr')

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get CIMIS grid properties from mask
    logging.info('\nCIMIS Properties')
    cimis_mask_ds = gdal.Open(cimis_mask_raster)
    cimis_osr = gdc.raster_ds_osr(cimis_mask_ds)
    cimis_proj = gdc.osr_proj(cimis_osr)
    cimis_cs = gdc.raster_ds_cellsize(cimis_mask_ds, x_only=True)
    cimis_extent = gdc.raster_ds_extent(cimis_mask_ds)
    cimis_geo = cimis_extent.geo(cimis_cs)
    cimis_mask_ds = None
    logging.debug('  Projection: {}'.format(cimis_proj))
    logging.debug('  Cellsize: {}'.format(cimis_cs))
    logging.debug('  Geo: {}'.format(cimis_geo))
    logging.debug('  Extent: {}'.format(cimis_extent))

    # Read the CIMIS mask array if present
    cimis_mask, cimis_mask_nodata = gdc.raster_to_array(cimis_mask_raster)
    cimis_mask = cimis_mask != cimis_mask_nodata

    # # Get extent/geo from elevation raster
    # logging.info('\nGRIDMET Properties')
    # gridmet_ds = gdal.Open(gridmet_elev_raster)
    # gridmet_osr = gdc.raster_ds_osr(gridmet_ds)
    # gridmet_proj = gdc.osr_proj(gridmet_osr)
    # gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True)
    # gridmet_full_extent = gdc.raster_ds_extent(gridmet_ds)
    # gridmet_full_geo = gridmet_full_extent.geo(gridmet_cs)
    # gridmet_x, gridmet_y = gridmet_full_extent.origin()
    # gridmet_ds = None
    # logging.debug('  Projection: {}'.format(gridmet_proj))
    # logging.debug('  Cellsize: {}'.format(gridmet_cs))
    # logging.debug('  Geo: {}'.format(gridmet_full_geo))
    # logging.debug('  Extent: {}'.format(gridmet_full_extent))

    # # Project CIMIS extent to the GRIDMET spatial reference
    # logging.info('\nGet CIMIS extent in GRIDMET spat. ref.')
    # gridmet_sub_extent = gdc.project_extent(
    #     cimis_extent, cimis_osr, gridmet_osr, cimis_cs)
    # gridmet_sub_extent.buffer_extent(4 * gridmet_cs)
    # gridmet_sub_extent.adjust_to_snap(
    #     'EXPAND', gridmet_x, gridmet_y, gridmet_cs)
    # gridmet_sub_geo = gridmet_sub_extent.geo(gridmet_cs)
    # logging.debug('  Geo: {}'.format(gridmet_sub_geo))
    # logging.debug('  Extent: {}'.format(gridmet_sub_extent))

    # Process Missing ETo
    if eto_flag:
        logging.info('\nETo')
        for cimis_name in sorted(os.listdir(cimis_eto_ws)):
            logging.debug("\n{}".format(cimis_name))
            cimis_match = cimis_re.match(cimis_name)
            if not cimis_match:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            year = int(cimis_match.group('YYYY'))
            logging.info("  {}".format(str(year)))
            if start_dt is not None and year < start_dt.year:
                logging.debug('  Before start date, skipping')
                continue
            elif end_dt is not None and year > end_dt.year:
                logging.debug('  After end date, skipping')
                continue

            cimis_path = os.path.join(cimis_eto_ws, cimis_name)
            gridmet_path = os.path.join(gridmet_eto_ws,
                                        gridmet_fmt.format(str(year)))
            if not os.path.isfile(gridmet_path):
                logging.debug('  GRIDMET raster does not exist, skipping')
                continue
            if not os.path.isfile(cimis_path):
                logging.error('  CIMIS raster does not exist, skipping')
                continue

            # Check all valid dates in the year
            year_dates = date_range(dt.datetime(year, 1, 1),
                                    dt.datetime(year + 1, 1, 1))
            for date_dt in year_dates:
                if start_dt is not None and date_dt < start_dt:
                    continue
                elif end_dt is not None and date_dt > end_dt:
                    continue
                doy = int(date_dt.strftime('%j'))

                # Look for arrays that don't have data
                eto_array = gdc.raster_to_array(cimis_path,
                                                band=doy,
                                                return_nodata=False)
                if np.any(np.isfinite(eto_array)):
                    logging.debug('  {} - no missing data, skipping'.format(
                        date_dt.strftime('%Y-%m-%d')))
                    continue
                else:
                    logging.info('  {}'.format(date_dt.strftime('%Y-%m-%d')))

                # # This is much faster but doesn't apply the CIMIS mask
                # # Create an in memory dataset of the full ETo array
                # eto_full_rows, eto_full_cols = eto_full_array[:,:,doy_i].shape
                # eto_full_type, eto_full_nodata = numpy_to_gdal_type(np.float32)
                # eto_full_ds = mem_driver.Create(
                #     '', eto_full_cols, eto_full_rows, 1, eto_full_type)
                # eto_full_ds.SetProjection(gridmet_proj)
                # eto_full_ds.SetGeoTransform(gridmet_full_geo)
                # eto_full_band = eto_full_ds.GetRasterBand(1)
                # # eto_full_band.Fill(eto_full_nodata)
                # eto_full_band.SetNoDataValue(eto_full_nodata)
                # eto_full_band.WriteArray(eto_full_array[:,:,doy_i], 0, 0)
                #
                # # Extract the subset
                # eto_sub_array, eto_sub_nodata = gdc.raster_ds_to_array(
                #     eto_full_ds, 1, gridmet_sub_extent)
                # eto_sub_rows, eto_sub_cols = eto_sub_array.shape
                # eto_full_ds = None
                #
                # # Create projected raster
                # eto_sub_ds = mem_driver.Create(
                #     '', eto_sub_cols, eto_sub_rows, 1, eto_full_type)
                # eto_sub_ds.SetProjection(gridmet_proj)
                # eto_sub_ds.SetGeoTransform(gridmet_sub_geo)
                # eto_sub_band = eto_sub_ds.GetRasterBand(1)
                # eto_sub_band.Fill(eto_sub_nodata)
                # eto_sub_band.SetNoDataValue(eto_sub_nodata)
                # eto_sub_band.WriteArray(eto_sub_array, 0, 0)
                # eto_sub_ds.FlushCache()
                #
                # # Project input DEM to CIMIS spat. ref.
                # gdc.project_raster_ds(
                #     eto_sub_ds, gridmet_path, resample_type,
                #     env.snap_proj, env.cellsize, cimis_extent)
                # eto_sub_ds = None

                # Extract the subset
                gridmet_ds = gdal.Open(gridmet_path)
                gridmet_extent = gdc.raster_ds_extent(gridmet_ds)
                gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True)
                gridmet_osr = gdc.raster_ds_osr(gridmet_ds)
                eto_full_array = gdc.raster_ds_to_array(gridmet_ds,
                                                        band=doy,
                                                        return_nodata=False)
                gridmet_ds = None

                # Get the projected subset of the full ETo array
                # This is slower than projecting the subset above
                eto_sub_array = gdc.project_array(eto_full_array,
                                                  resample_type, gridmet_osr,
                                                  gridmet_cs, gridmet_extent,
                                                  cimis_osr, cimis_cs,
                                                  cimis_extent)

                # Save the projected array
                gdc.array_to_comp_raster(eto_sub_array,
                                         cimis_path,
                                         band=doy,
                                         stats_flag=False)
                # gdc.array_to_raster(
                #     eto_sub_array, output_path, output_geo=cimis_geo,
                #     output_proj=cimis_proj, stats_flag=False)
                # gdc.array_to_raster(
                #     eto_sub_array, output_path,
                #     output_geo=cimis_geo, output_proj=cimis_proj,
                #     mask_array=cimis_mask, stats_flag=False)

                del eto_sub_array, eto_full_array

            if stats_flag:
                gdc.raster_statistics(cimis_path)

    # Process Missing ETr
    if etr_flag:
        logging.info('\nETr')
        for cimis_name in sorted(os.listdir(cimis_etr_ws)):
            cimis_match = cimis_re.match(cimis_name)
            if not cimis_match:
                continue
            year = int(cimis_match.group('YYYY'))
            if start_dt is not None and year < start_dt.year:
                continue
            elif end_dt is not None and year > end_dt.year:
                continue
            logging.info("{}".format(str(year)))

            cimis_path = os.path.join(cimis_etr_ws, cimis_name)
            gridmet_path = os.path.join(gridmet_etr_ws,
                                        gridmet_fmt.format(str(year)))
            if not os.path.isfile(gridmet_path):
                continue
            if not os.path.isfile(cimis_path):
                logging.error('  CIMIS raster does not exist')
                continue

            # Check all valid dates in the year
            year_dates = date_range(dt.datetime(year, 1, 1),
                                    dt.datetime(year + 1, 1, 1))
            for date_dt in year_dates:
                if start_dt is not None and date_dt < start_dt:
                    continue
                elif end_dt is not None and date_dt > end_dt:
                    continue
                doy = int(date_dt.strftime('%j'))

                # Look for arrays that don't have data
                etr_array = gdc.raster_to_array(cimis_path,
                                                band=doy,
                                                return_nodata=False)
                if np.any(np.isfinite(etr_array)):
                    logging.debug('  {} - skipping'.format(
                        date_dt.strftime('%Y-%m-%d')))
                    continue
                else:
                    logging.info('  {}'.format(date_dt.strftime('%Y-%m-%d')))

                # Extract the subset
                gridmet_ds = gdal.Open(gridmet_path)
                gridmet_extent = gdc.raster_ds_extent(gridmet_ds)
                gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True)
                gridmet_osr = gdc.raster_ds_osr(gridmet_ds)
                etr_full_array = gdc.raster_ds_to_array(gridmet_ds,
                                                        band=doy,
                                                        return_nodata=False)
                gridmet_ds = None

                # Get the projected subset of the full ETr array
                # This is slower than projecting the subset
                etr_sub_array = gdc.project_array(etr_full_array,
                                                  resample_type, gridmet_osr,
                                                  gridmet_cs, gridmet_extent,
                                                  cimis_osr, cimis_cs,
                                                  cimis_extent)

                # # Save the projected array
                gdc.array_to_comp_raster(etr_sub_array,
                                         cimis_path,
                                         band=doy,
                                         stats_flag=False)
                # gdc.array_to_raster(
                #     etr_sub_array, output_path,
                #     output_geo=cimis_geo, output_proj=cimis_proj,
                #     mask_array=cimis_mask, stats_flag=False)

                del etr_sub_array, etr_full_array

            if stats_flag:
                gdc.raster_statistics(cimis_path)

    logging.debug('\nScript Complete')
Exemplo n.º 2
0
def main(grb_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(),
         variables=['pr'], landsat_ws=None,
         start_date=None, end_date=None, times_str='',
         extent_path=None, output_extent=None,
         stats_flag=True, overwrite_flag=False):
    """Extract NLDAS target variable(s)

    Args:
        grb_ws (str): folder of NLDAS GRB files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        variable (list): NLDAS variables to download
          ('ppt', 'srad', 'sph', 'tair', tmmn', 'tmmx', 'vs')
        landsat_ws (str): folder of Landsat scenes or tar.gz files
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        times (str): comma separated values and/or ranges of UTC hours
            (i.e. "1, 2, 5-8")
            Parsed with python_common.parse_int_set()
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nExtract NLDAS target variable(s)')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile(
        'NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
        '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    output_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = '{}_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # NLDAS rasters to extract
    data_full_list = ['pr', 'srad', 'sph', 'tair', 'tmmn', 'tmmx', 'vs']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif not set(variables).issubset(set(data_full_list)):
        logging.error('\nERROR: variables parameter is invalid\n  {}'.format(
            variables))
        sys.exit()

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')

    # Build a date list from landsat_ws scene folders or tar.gz files
    date_list = []
    if landsat_ws is not None and os.path.isdir(landsat_ws):
        logging.info('\nReading dates from Landsat IDs')
        logging.info('  {}'.format(landsat_ws))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        for root, dirs, files in os.walk(landsat_ws, topdown=True):
            # If root matches, don't explore subfolders
            try:
                landsat_match = landsat_re.match(os.path.basename(root))
                date_list.append(dt.datetime.strptime(
                    '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat())
                dirs[:] = []
            except:
                pass

            for file in files:
                try:
                    landsat_match = landsat_re.match(file)
                    date_list.append(dt.datetime.strptime(
                        '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat())
                except:
                    pass
        date_list = sorted(list(set(date_list)))
    # elif landsat_ws is not None and os.path.isfile(landsat_ws):
    #     with open(landsat_ws) as landsat_f:

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = gdc.raster_ds_osr(nldas_ds)
    nldas_proj = gdc.osr_proj(nldas_osr)
    nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = gdc.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = gdc.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = gdc.project_extent(
            nldas_extent, extent_osr, nldas_osr, extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = gdc.raster_to_array(
            mask_path, mask_extent=nldas_extent, fill_value=0,
            return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # NLDAS band name dictionary
    nldas_band_dict = dict()
    nldas_band_dict['pr'] = 'Total precipitation [kg/m^2]'
    nldas_band_dict['srad'] = 'Downward shortwave radiation flux [W/m^2]'
    nldas_band_dict['sph'] = 'Specific humidity [kg/kg]'
    nldas_band_dict['tair'] = 'Temperature [C]'
    nldas_band_dict['tmmn'] = 'Temperature [C]'
    nldas_band_dict['tmmx'] = 'Temperature [C]'
    nldas_band_dict['vs'] = [
        'u-component of wind [m/s]', 'v-component of wind [m/s]']

    # NLDAS band name dictionary
    # nldas_band_dict = dict()
    # nldas_band_dict['pr'] = 'precipitation_amount'
    # nldas_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # nldas_band_dict['sph'] = 'specific_humidity'
    # nldas_band_dict['tmmn'] = 'air_temperature'
    # nldas_band_dict['tmmx'] = 'air_temperature'
    # nldas_band_dict['vs'] = 'wind_speed'

    # NLDAS band name dictionary (EarthEngine keys, GRID_ELEMENT values)
    # nldas_band_dict = dict()
    # nldas_band_dict['total_precipitation'] = 'Total precipitation [kg/m^2]'
    # nldas_band_dict['shortwave_radiation'] = 'Downward shortwave radiation flux [W/m^2]'
    # nldas_band_dict['specific_humidity'] = 'Specific humidity [kg/kg]'
    # nldas_band_dict['pressure'] = 'Pressure [Pa]'
    # nldas_band_dict['temperature'] = 'Temperature [C]'
    # nldas_band_dict['wind_u'] = 'u-component of wind [m/s]'
    # nldas_band_dict['wind_v'] = 'v-component of wind [m/s]'

    # Process each variable
    logging.info('\nReading NLDAS GRIBs')
    for input_var in variables:
        logging.info("Variable: {}".format(input_var))

        # Build output folder
        var_ws = os.path.join(output_ws, input_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Each sub folder in the main folde has all imagery for 1 day
        # The path for each subfolder is the /YYYY/DOY

        # This approach will process files for target dates
        # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
        #     logging.info(input_dt.date())

        # Iterate all available files and check dates if necessary
        for root, folders, files in os.walk(grb_ws):
            root_split = os.path.normpath(root).split(os.sep)

            # If the year/doy is outside the range, skip
            if (re.match('\d{4}', root_split[-2]) and
                    re.match('\d{3}', root_split[-1])):
                root_dt = dt.datetime.strptime('{}_{}'.format(
                    root_split[-2], root_split[-1]), '%Y_%j')
                logging.info('{}-{:02d}-{:02d}'.format(
                    root_dt.year, root_dt.month, root_dt.day))
                if ((start_dt is not None and root_dt < start_dt) or
                        (end_dt is not None and root_dt > end_dt)):
                    continue
                elif date_list and root_dt.date().isoformat() not in date_list:
                    continue
            # If the year is outside the range, don't search subfolders
            elif re.match('\d{4}', root_split[-1]):
                root_year = int(root_split[-1])
                logging.info('Year: {}'.format(root_year))
                if ((start_dt is not None and root_year < start_dt.year) or
                        (end_dt is not None and root_year > end_dt.year)):
                    folders[:] = []
                else:
                    folders[:] = sorted(folders)
                continue
            else:
                continue

            # Create a single raster for each day with 24 bands
            # Each time step will be stored in a separate band
            output_name = output_fmt.format(
                input_var, root_dt.year, root_dt.month, root_dt.day)
            output_path = os.path.join(
                var_ws, str(root_dt.year), output_name)
            logging.debug('  {}'.format(output_path))
            if os.path.isfile(output_path):
                if not overwrite_flag:
                    logging.debug('    File already exists, skipping')
                    continue
                else:
                    logging.debug('    File already exists, removing existing')
                    os.remove(output_path)
            logging.debug('  {}'.format(root))
            if not os.path.isdir(os.path.dirname(output_path)):
                os.makedirs(os.path.dirname(output_path))
            gdc.build_empty_raster(
                output_path, band_cnt=24, output_dtype=np.float32,
                output_proj=nldas_proj, output_cs=nldas_cs,
                output_extent=nldas_extent, output_fill_flag=True)

            # Iterate through hourly files
            for input_name in sorted(files):
                logging.info('  {}'.format(input_name))
                input_path = os.path.join(root, input_name)
                input_match = input_re.match(input_name)
                if input_match is None:
                    logging.debug(
                        '  Regular expression didn\'t match, skipping')
                    continue
                input_dt = dt.datetime(
                    int(input_match.group('YEAR')),
                    int(input_match.group('MONTH')),
                    int(input_match.group('DAY')))
                time_str = input_match.group('TIME')
                band_num = int(time_str[:2]) + 1
                # if start_dt is not None and input_dt < start_dt:
                #     continue
                # elif end_dt is not None and input_dt > end_dt:
                #     continue
                # elif date_list and input_dt.date().isoformat() not in date_list:
                #     continue
                if time_str not in time_list:
                    logging.debug('    Time not in list, skipping')
                    continue
                logging.debug('    Time: {} {}'.format(
                    input_dt.date(), time_str))
                logging.debug('    Band: {}'.format(band_num))

                # Determine band numbering/naming
                input_band_dict = grib_band_names(input_path)

                # Extract array and save
                input_ds = gdal.Open(input_path)

                # Convert Kelvin to Celsius (old NLDAS files were in K i think)
                if input_var in ['tair', 'tmmx', 'tmmn']:
                    # Temperature should be in C for et_common.refet_hourly_func()
                    if 'Temperature [K]' in input_band_dict.keys():
                        temp_band_units = 'K'
                        output_array = gdc.raster_ds_to_array(
                            input_ds, band=input_band_dict['Temperature [K]'],
                            mask_extent=nldas_extent, return_nodata=False)
                    elif 'Temperature [C]' in input_band_dict.keys():
                        temp_band_units = 'C'
                        output_array = gdc.raster_ds_to_array(
                            input_ds, band=input_band_dict['Temperature [C]'],
                            mask_extent=nldas_extent, return_nodata=False)
                    else:
                        logging.error('Unknown Temperature units, skipping')
                        logging.error('  {}'.format(input_band_dict.keys()))
                        continue

                    # DEADBEEF - Having issue with T appearing to be C but labeled as K
                    # Try to determine temperature units from values
                    temp_mean = float(np.nanmean(output_array))
                    temp_units_dict = {20: 'C', 293: 'K'}
                    temp_array_units = temp_units_dict[
                        min(temp_units_dict, key=lambda x:abs(x - temp_mean))]
                    if temp_array_units == 'K' and temp_band_units == 'K':
                        logging.debug('  Converting temperature from K to C')
                        output_array -= 273.15
                    elif temp_array_units == 'C' and temp_band_units == 'C':
                        pass
                    elif temp_array_units == 'C' and temp_band_units == 'K':
                        logging.debug(
                            ('  Temperature units are K in the GRB band name, ' +
                             'but values appear to be C\n    Mean temperature: {:.2f}\n' +
                             '  Values will NOT be adjusted').format(temp_mean))
                    elif temp_array_units == 'K' and temp_band_units == 'C':
                        logging.debug(
                            ('  Temperature units are C in the GRB band name, ' +
                             'but values appear to be K\n    Mean temperature: {:.2f}\n' +
                             '  Values will be adjusted from K to C').format(temp_mean))
                        output_array -= 273.15

                # Compute wind speed from vectors
                elif input_var == 'vs':
                    wind_u_array = gdc.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict['u-component of wind [m/s]'],
                        mask_extent=nldas_extent, return_nodata=False)
                    wind_v_array = gdc.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict['v-component of wind [m/s]'],
                        mask_extent=nldas_extent, return_nodata=False)
                    output_array = np.sqrt(
                        wind_u_array ** 2 + wind_v_array ** 2)
                # Read all other variables directly
                else:
                    output_array = gdc.raster_ds_to_array(
                        input_ds,
                        band=input_band_dict[nldas_band_dict[input_var]],
                        mask_extent=nldas_extent, return_nodata=False)

                # Save the projected array as 32-bit floats
                gdc.array_to_comp_raster(
                    output_array.astype(np.float32), output_path,
                    band=band_num)
                # gdc.block_to_raster(
                #     ea_array.astype(np.float32), output_path, band=band)
                # gdc.array_to_raster(
                #     output_array.astype(np.float32), output_path,
                #     output_geo=nldas_geo, output_proj=nldas_proj,
                #     stats_flag=stats_flag)

                del output_array
                input_ds = None

            if stats_flag:
                gdc.raster_statistics(output_path)

    logging.debug('\nScript Complete')
Exemplo n.º 3
0
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(), start_date=None, end_date=None,
         extent_path=None, output_extent=None,
         stats_flag=True, overwrite_flag=False):
    """Extract DAYMET precipitation

    Args:
        netcdf_ws (str): folder of DAYMET netcdf files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nExtracting DAYMET precipitation')

    # If a date is not set, process 2015
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2015, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2015, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Save DAYMET lat, lon, and elevation arrays
    mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img')

    daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$')

    # DAYMET band name dictionary
    # daymet_band_dict = dict()
    # daymet_band_dict['prcp'] = 'precipitation_amount'
    # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    # daymet_band_dict['sph'] = 'specific_humidity'
    # daymet_band_dict['tmin'] = 'air_temperature'
    # daymet_band_dict['tmax'] = 'air_temperature'

    # Get extent/geo from mask raster
    daymet_ds = gdal.Open(mask_raster)
    daymet_osr = gdc.raster_ds_osr(daymet_ds)
    daymet_proj = gdc.osr_proj(daymet_osr)
    daymet_cs = gdc.raster_ds_cellsize(daymet_ds, x_only=True)
    daymet_extent = gdc.raster_ds_extent(daymet_ds)
    daymet_geo = daymet_extent.geo(daymet_cs)
    daymet_x, daymet_y = daymet_extent.origin()
    daymet_ds = None
    logging.debug('  Projection: {}'.format(daymet_proj))
    logging.debug('  Cellsize: {}'.format(daymet_cs))
    logging.debug('  Geo: {}'.format(daymet_geo))
    logging.debug('  Extent: {}'.format(daymet_extent))
    logging.debug('  Origin: {} {}'.format(daymet_x, daymet_y))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        # Assume input extent is in decimal degrees
        output_extent = gdc.project_extent(
            gdc.Extent(output_extent), gdc.epsg_osr(4326), daymet_osr, 0.001)
        output_extent = gdc.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            output_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            output_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        output_extent = gdc.project_extent(
            output_extent, extent_osr, daymet_osr, extent_cs)
        output_extent = gdc.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    else:
        output_extent = daymet_extent.copy()
        output_geo = daymet_geo[:]
    # output_shape = output_extent.shape(cs=daymet_cs)
    xi, yi = gdc.array_geo_offsets(daymet_geo, output_geo, daymet_cs)
    output_rows, output_cols = output_extent.shape(daymet_cs)
    logging.debug('  Shape: {} {}'.format(output_rows, output_cols))
    logging.debug('  Offsets: {} {} (x y)'.format(xi, yi))

    # Process each variable
    input_var = 'prcp'
    output_var = 'ppt'
    logging.info("\nVariable: {}".format(input_var))

    # Build output folder
    var_ws = os.path.join(output_ws, output_var)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Process each file in the input workspace
    for input_name in sorted(os.listdir(netcdf_ws)):
        logging.debug("{}".format(input_name))
        input_match = daymet_re.match(input_name)
        if not input_match:
            logging.debug('  Regular expression didn\'t match, skipping')
            continue
        elif input_match.group('VAR') != input_var:
            logging.debug('  Variable didn\'t match, skipping')
            continue
        year_str = input_match.group('YEAR')
        logging.info("  Year: {}".format(year_str))
        year_int = int(year_str)
        year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
        if start_dt is not None and year_int < start_dt.year:
            logging.debug('    Before start date, skipping')
            continue
        elif end_dt is not None and year_int > end_dt.year:
            logging.debug('    After end date, skipping')
            continue

        # Build input file path
        input_raster = os.path.join(netcdf_ws, input_name)
        # if not os.path.isfile(input_raster):
        #     logging.debug(
        #         '    Input raster doesn\'t exist, skipping    {}'.format(
        #             input_raster))
        #     continue

        # Build output folder
        output_year_ws = os.path.join(var_ws, year_str)
        if not os.path.isdir(output_year_ws):
            os.makedirs(output_year_ws)

        # Read in the DAYMET NetCDF file
        input_nc_f = netCDF4.Dataset(input_raster, 'r')
        # logging.debug(input_nc_f.variables)

        # Check all valid dates in the year
        year_dates = date_range(
            dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1))
        for date_dt in year_dates:
            if start_dt is not None and date_dt < start_dt:
                logging.debug('  {} - before start date, skipping'.format(
                    date_dt.date()))
                continue
            elif end_dt is not None and date_dt > end_dt:
                logging.debug('  {} - after end date, skipping'.format(
                    date_dt.date()))
                continue
            else:
                logging.info('  {}'.format(date_dt.date()))

            output_path = os.path.join(
                output_year_ws, '{}_{}_daymet.img'.format(
                    output_var, date_dt.strftime('%Y%m%d')))
            if os.path.isfile(output_path):
                logging.debug('    {}'.format(output_path))
                if not overwrite_flag:
                    logging.debug('    File already exists, skipping')
                    continue
                else:
                    logging.debug('    File already exists, removing existing')
                    os.remove(output_path)

            doy = int(date_dt.strftime('%j'))
            doy_i = range(1, year_days + 1).index(doy)

            # Arrays are being read as masked array with a fill value of -9999
            # Convert to basic numpy array arrays with nan values
            try:
                input_ma = input_nc_f.variables[input_var][
                    doy_i, yi: yi + output_rows, xi: xi + output_cols]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            input_nodata = float(input_ma.fill_value)
            output_array = input_ma.data.astype(np.float32)
            output_array[output_array == input_nodata] = np.nan

            # Save the array as 32-bit floats
            gdc.array_to_raster(
                output_array.astype(np.float32), output_path,
                output_geo=output_geo, output_proj=daymet_proj,
                stats_flag=stats_flag)

            del input_ma, output_array
        input_nc_f.close()
        del input_nc_f

    logging.debug('\nScript Complete')
Exemplo n.º 4
0
def main(netcdf_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         start_date=None,
         end_date=None,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract GRIDMET temperature

    Args:
        netcdf_ws (str): folder of GRIDMET netcdf files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        extent_path (str): filepath a raster defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nExtracting GRIDMET vapor pressure')

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Save GRIDMET lat, lon, and elevation arrays
    elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img')

    output_fmt = '{}_{}_daily_gridmet.img'
    gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc$')

    # GRIDMET band name dictionary
    gridmet_band_dict = dict()
    gridmet_band_dict['pr'] = 'precipitation_amount'
    gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    gridmet_band_dict['sph'] = 'specific_humidity'
    gridmet_band_dict['tmmn'] = 'air_temperature'
    gridmet_band_dict['tmmx'] = 'air_temperature'
    gridmet_band_dict['vs'] = 'wind_speed'

    # Get extent/geo from elevation raster
    gridmet_ds = gdal.Open(elev_raster)
    gridmet_osr = gdc.raster_ds_osr(gridmet_ds)
    gridmet_proj = gdc.osr_proj(gridmet_osr)
    gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True)
    gridmet_extent = gdc.raster_ds_extent(gridmet_ds)
    gridmet_full_geo = gridmet_extent.geo(gridmet_cs)
    gridmet_x, gridmet_y = gridmet_extent.origin()
    gridmet_ds = None
    logging.debug('  Projection: {}'.format(gridmet_proj))
    logging.debug('  Cellsize: {}'.format(gridmet_cs))
    logging.debug('  Geo: {}'.format(gridmet_full_geo))
    logging.debug('  Extent: {}'.format(gridmet_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        gridmet_extent = gdc.Extent(output_extent)
        gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y,
                                      gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(gridmet_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        gridmet_extent = gdc.raster_path_extent(extent_path)
        extent_osr = gdc.raster_path_osr(extent_path)
        extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        gridmet_extent = gdc.project_extent(gridmet_extent, extent_osr,
                                            gridmet_osr, extent_cs)
        gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y,
                                      gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(gridmet_extent))
    else:
        gridmet_geo = gridmet_full_geo

    # Get indices for slicing/clipping input arrays
    g_i, g_j = gdc.array_geo_offsets(gridmet_full_geo,
                                     gridmet_geo,
                                     cs=gridmet_cs)
    g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs)

    # Read the elevation array
    elev_array = gdc.raster_to_array(elev_raster,
                                     mask_extent=gridmet_extent,
                                     return_nodata=False)
    pair_array = et_common.air_pressure_func(elev_array)
    del elev_array

    # Process each variable
    input_var = 'sph'
    output_var = 'ea'
    logging.info("\nVariable: {}".format(input_var))

    # Build output folder
    var_ws = os.path.join(output_ws, output_var)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Process each file in the input workspace
    for input_name in sorted(os.listdir(netcdf_ws)):
        input_match = gridmet_re.match(input_name)
        if not input_match:
            logging.debug("{}".format(input_name))
            logging.debug('  Regular expression didn\'t match, skipping')
            continue
        elif input_match.group('VAR') != input_var:
            logging.debug("{}".format(input_name))
            logging.debug('  Variable didn\'t match, skipping')
            continue
        else:
            logging.info("{}".format(input_name))

        year_str = input_match.group('YEAR')
        logging.info("  {}".format(year_str))
        year_int = int(year_str)
        year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
        if start_dt is not None and year_int < start_dt.year:
            logging.debug('    Before start date, skipping')
            continue
        elif end_dt is not None and year_int > end_dt.year:
            logging.debug('    After end date, skipping')
            continue

        # Build input file path
        input_raster = os.path.join(netcdf_ws, input_name)
        # if not os.path.isfile(input_raster):
        #     logging.debug(
        #         '  Input NetCDF doesn\'t exist, skipping    {}'.format(
        #             input_raster))
        #     continue

        # Create a single raster for each year with 365 bands
        # Each day will be stored in a separate band
        output_path = os.path.join(var_ws,
                                   output_fmt.format(output_var, year_str))
        logging.debug('  {}'.format(output_path))
        if os.path.isfile(output_path):
            if not overwrite_flag:
                logging.debug('    File already exists, skipping')
                continue
            else:
                logging.debug('    File already exists, removing existing')
                os.remove(output_path)
        gdc.build_empty_raster(output_path,
                               band_cnt=366,
                               output_dtype=np.float32,
                               output_proj=gridmet_proj,
                               output_cs=gridmet_cs,
                               output_extent=gridmet_extent,
                               output_fill_flag=True)

        # Read in the GRIDMET NetCDF file
        # Immediatly clip input array to save memory
        input_nc_f = netCDF4.Dataset(input_raster, 'r')
        input_nc = input_nc_f.variables[
            gridmet_band_dict[input_var]][:, g_i:g_i + g_cols,
                                          g_j:g_j + g_rows].copy()
        input_nc = np.transpose(input_nc, (0, 2, 1))

        # A numpy array is returned when slicing a masked array
        #   if there are no masked pixels
        # This is a hack to force the numpy array back to a masked array
        if type(input_nc) != np.ma.core.MaskedArray:
            input_nc = np.ma.core.MaskedArray(
                input_nc, np.zeros(input_nc.shape, dtype=bool))

        # Check all valid dates in the year
        year_dates = date_range(dt.datetime(year_int, 1, 1),
                                dt.datetime(year_int + 1, 1, 1))
        for date_dt in year_dates:
            if start_dt is not None and date_dt < start_dt:
                # logging.debug('  before start date, skipping')
                continue
            elif end_dt is not None and date_dt > end_dt:
                # logging.debug('  after end date, skipping')
                continue
            logging.info('  {}'.format(date_dt.strftime('%Y_%m_%d')))

            doy = int(date_dt.strftime('%j'))
            doy_i = range(1, year_days + 1).index(doy)

            # Arrays are being read as masked array with a fill value of -9999
            # Convert to basic numpy array arrays with nan values
            try:
                input_full_ma = input_nc[doy_i, :, :]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            input_full_array = input_full_ma.data.astype(np.float32)
            input_full_nodata = float(input_full_ma.fill_value)
            input_full_array[input_full_array == input_full_nodata] = np.nan

            # Since inputs are netcdf, need to create GDAL raster
            #   datasets in order to use gdal_common functions
            # Create an in memory dataset of the full ETo array
            input_full_ds = gdc.array_to_mem_ds(input_full_array,
                                                output_geo=gridmet_full_geo,
                                                output_proj=gridmet_proj)

            # Then extract the subset from the in memory dataset
            sph_array = gdc.raster_ds_to_array(input_full_ds,
                                               1,
                                               mask_extent=gridmet_extent,
                                               return_nodata=False)

            # Compute ea [kPa] from specific humidity [kg/kg]
            ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array)

            # Save the projected array as 32-bit floats
            gdc.array_to_comp_raster(ea_array.astype(np.float32),
                                     output_path,
                                     band=doy,
                                     stats_flag=False)
            # gdc.array_to_raster(
            #     ea_array.astype(np.float32), output_path,
            #     output_geo=gridmet_geo, output_proj=gridmet_proj,
            #     stats_flag=False)
            del sph_array, ea_array
        input_nc_f.close()
        del input_nc_f

        if stats_flag:
            gdc.raster_statistics(output_path)

    logging.debug('\nScript Complete')
Exemplo n.º 5
0
def main(grb_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         etr_flag=False,
         eto_flag=False,
         landsat_ws=None,
         start_date=None,
         end_date=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         daily_flag=True,
         stats_flag=True,
         overwrite_flag=False):
    """Compute hourly ETr/ETo from NLDAS data

    Args:
        grb_ws (str): folder of NLDAS GRB files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        etr_flag (bool): if True, compute alfalfa reference ET (ETr)
        eto_flag (bool): if True, compute grass reference ET (ETo)
        landsat_ws (str): folder of Landsat scenes or tar.gz files
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        times (str): comma separated values and/or ranges of UTC hours
            (i.e. "1, 2, 5-8")
            Parsed with python_common.parse_int_set()
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        daily_flag (bool): if True, save daily ETr/ETo sum raster.
            Default is True
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nComputing NLDAS hourly ETr/ETo')
    np.seterr(invalid='ignore')

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    etr_folder = 'etr'
    eto_folder = 'eto'
    hour_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # hour_fmt = '{}_{:04d}{:02d}{:02d}_{4:04d}_nldas.img'
    day_fmt = '{}_{:04d}{:02d}{:02d}_nldas.img'
    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')
    elev_path = os.path.join(ancillary_ws, 'nldas_elev.img')
    lat_path = os.path.join(ancillary_ws, 'nldas_lat.img')
    lon_path = os.path.join(ancillary_ws, 'nldas_lon.img')

    # Build a date list from landsat_ws scene folders or tar.gz files
    date_list = []
    if landsat_ws is not None and os.path.isdir(landsat_ws):
        logging.info('\nReading dates from Landsat IDs')
        logging.info('  {}'.format(landsat_ws))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        for root, dirs, files in os.walk(landsat_ws, topdown=True):
            # If root matches, don't explore subfolders
            try:
                landsat_match = landsat_re.match(os.path.basename(root))
                date_list.append(
                    dt.datetime.strptime('_'.join(landsat_match.groups()),
                                         '%Y_%m_%d').date().isoformat())
                dirs[:] = []
            except:
                pass

            for file in files:
                try:
                    landsat_match = landsat_re.match(file)
                    date_list.append(
                        dt.datetime.strptime('_'.join(landsat_match.groups()),
                                             '%Y_%m_%d').date().isoformat())
                except:
                    pass
        date_list = sorted(list(set(date_list)))
    # elif landsat_ws is not None and os.path.isfile(landsat_ws):
    #     with open(landsat_ws) as landsat_f:

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = gdc.raster_ds_osr(nldas_ds)
    nldas_proj = gdc.osr_proj(nldas_osr)
    nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = gdc.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = gdc.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = gdc.project_extent(nldas_extent, extent_osr, nldas_osr,
                                          extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = gdc.raster_to_array(mask_path,
                                                      mask_extent=nldas_extent,
                                                      fill_value=0,
                                                      return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Read ancillary arrays (or subsets?)
    elev_array = gdc.raster_to_array(elev_path,
                                     mask_extent=nldas_extent,
                                     return_nodata=False)
    # pair_array = et_common.air_pressure_func(elev_array)
    lat_array = gdc.raster_to_array(lat_path,
                                    mask_extent=nldas_extent,
                                    return_nodata=False)
    lon_array = gdc.raster_to_array(lon_path,
                                    mask_extent=nldas_extent,
                                    return_nodata=False)

    # Hourly RefET functions expects lat/lon in radians
    lat_array *= (math.pi / 180)
    lon_array *= (math.pi / 180)

    # Build output folder
    etr_ws = os.path.join(output_ws, etr_folder)
    eto_ws = os.path.join(output_ws, eto_folder)
    if etr_flag and not os.path.isdir(etr_ws):
        os.makedirs(etr_ws)
    if eto_flag and not os.path.isdir(eto_ws):
        os.makedirs(eto_ws)

    # DEADBEEF - Instead of processing all available files, the following
    #   code will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    # Each sub folder in the main folder has all imagery for 1 day
    #   (in UTC time)
    # The path for each subfolder is the /YYYY/DOY
    errors = defaultdict(list)
    for root, folders, files in os.walk(grb_ws):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}'.format(root_dt.date()))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue
        logging.debug('  {}'.format(root))

        # Start off assuming every file needs to be processed
        day_skip_flag = False

        # Build output folders if necessary
        etr_year_ws = os.path.join(etr_ws, str(root_dt.year))
        eto_year_ws = os.path.join(eto_ws, str(root_dt.year))
        if etr_flag and not os.path.isdir(etr_year_ws):
            os.makedirs(etr_year_ws)
        if eto_flag and not os.path.isdir(eto_year_ws):
            os.makedirs(eto_year_ws)

        # Build daily total paths
        etr_day_path = os.path.join(
            etr_year_ws,
            day_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day))
        eto_day_path = os.path.join(
            eto_year_ws,
            day_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day))
        etr_hour_path = os.path.join(
            etr_year_ws,
            hour_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day))
        eto_hour_path = os.path.join(
            eto_year_ws,
            hour_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day))
        # logging.debug('  {}'.format(etr_hour_path))

        # If daily ETr/ETo files are present, day can be skipped
        if not overwrite_flag and daily_flag:
            if etr_flag and not os.path.isfile(etr_day_path):
                pass
            elif eto_flag and not os.path.isfile(eto_day_path):
                pass
            else:
                day_skip_flag = True

        # If the hour and daily files don't need to be made, skip the day
        if not overwrite_flag:
            if etr_flag and not os.path.isfile(etr_hour_path):
                pass
            elif eto_flag and not os.path.isfile(eto_hour_path):
                pass
            elif day_skip_flag:
                logging.debug('  File(s) already exist, skipping')
                continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        if etr_flag:
            logging.debug('  {}'.format(etr_day_path))
            gdc.build_empty_raster(etr_hour_path,
                                   band_cnt=24,
                                   output_dtype=np.float32,
                                   output_proj=nldas_proj,
                                   output_cs=nldas_cs,
                                   output_extent=nldas_extent,
                                   output_fill_flag=True)
        if eto_flag:
            logging.debug('  {}'.format(eto_day_path))
            gdc.build_empty_raster(eto_hour_path,
                                   band_cnt=24,
                                   output_dtype=np.float32,
                                   output_proj=nldas_proj,
                                   output_cs=nldas_cs,
                                   output_extent=nldas_extent,
                                   output_fill_flag=True)

        # Sum all ETr/ETo images in each folder to generate a UTC day total
        etr_day_array = 0
        eto_day_array = 0

        # Process each hour file
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('    Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            input_doy = int(input_dt.strftime('%j'))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if not daily_flag and time_str not in time_list:
                logging.debug('    Time not in list and not daily, skipping')
                continue

            input_path = os.path.join(root, input_name)
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            try:
                input_band_dict = grib_band_names(input_path)
            except RuntimeError as e:
                errors[input_path].append(e)
                logging.error(' RuntimeError: {} Skipping: {}'.format(
                    e, input_path))
                continue

            # Read input bands
            input_ds = gdal.Open(input_path)

            # Temperature should be in C for et_common.refet_hourly_func()
            if 'Temperature [K]' in input_band_dict.keys():
                temp_band_units = 'K'
                temp_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Temperature [K]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
            elif 'Temperature [C]' in input_band_dict.keys():
                temp_band_units = 'C'
                temp_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Temperature [C]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
            else:
                logging.error('Unknown Temperature units, skipping')
                logging.error('  {}'.format(input_band_dict.keys()))
                continue

            # DEADBEEF - Having issue with T appearing to be C but labeled as K
            # Try to determine temperature units from values
            temp_mean = float(np.nanmean(temp_array))
            temp_units_dict = {20: 'C', 293: 'K'}
            temp_array_units = temp_units_dict[min(
                temp_units_dict, key=lambda x: abs(x - temp_mean))]
            if temp_array_units == 'K' and temp_band_units == 'K':
                logging.debug('  Converting temperature from K to C')
                temp_array -= 273.15
            elif temp_array_units == 'C' and temp_band_units == 'C':
                pass
            elif temp_array_units == 'C' and temp_band_units == 'K':
                logging.debug((
                    '  Temperature units are K in the GRB band name, ' +
                    'but values appear to be C\n    Mean temperature: {:.2f}\n'
                    + '  Values will NOT be adjusted').format(temp_mean))
            elif temp_array_units == 'K' and temp_band_units == 'C':
                logging.debug((
                    '  Temperature units are C in the GRB band name, ' +
                    'but values appear to be K\n    Mean temperature: {:.2f}\n'
                    +
                    '  Values will be adjusted from K to C').format(temp_mean))
                temp_array -= 273.15
            try:
                sph_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['Specific humidity [kg/kg]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                rs_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict[
                        'Downward shortwave radiation flux [W/m^2]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                wind_u_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['u-component of wind [m/s]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                wind_v_array = gdc.raster_ds_to_array(
                    input_ds,
                    band=input_band_dict['v-component of wind [m/s]'],
                    mask_extent=nldas_extent,
                    return_nodata=False)
                input_ds = None
            except KeyError as e:
                errors[input_path].append(e)
                logging.error(' KeyError: {} Skipping: {}'.format(
                    e, input_ds.GetDescription()))
                continue

            rs_array *= 0.0036  # W m-2 to MJ m-2 hr-1
            wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2)
            del wind_u_array, wind_v_array

            # ETr
            if etr_flag:
                etr_array = et_common.refet_hourly_func(temp_array,
                                                        sph_array,
                                                        rs_array,
                                                        wind_array,
                                                        zw=10,
                                                        elev=elev_array,
                                                        lat=lat_array,
                                                        lon=lon_array,
                                                        doy=input_doy,
                                                        time=int(time_str) /
                                                        100,
                                                        ref_type='ETR')
                if daily_flag:
                    etr_day_array += etr_array
                if time_str in time_list:
                    gdc.array_to_comp_raster(etr_array.astype(np.float32),
                                             etr_hour_path,
                                             band=band_num,
                                             stats_flag=False)
                    del etr_array

            # ETo
            if eto_flag:
                eto_array = et_common.refet_hourly_func(temp_array,
                                                        sph_array,
                                                        rs_array,
                                                        wind_array,
                                                        zw=10,
                                                        elev=elev_array,
                                                        lat=lat_array,
                                                        lon=lon_array,
                                                        doy=input_doy,
                                                        time=int(time_str) /
                                                        100,
                                                        ref_type='ETO')
                if eto_flag and daily_flag:
                    eto_day_array += eto_array
                if eto_flag and time_str in time_list:
                    gdc.array_to_comp_raster(eto_array.astype(np.float32),
                                             eto_hour_path,
                                             band=band_num,
                                             stats_flag=False)
                    del eto_array

            del temp_array, sph_array, rs_array, wind_array

        if stats_flag and etr_flag:
            gdc.raster_statistics(etr_hour_path)
        if stats_flag and eto_flag:
            gdc.raster_statistics(eto_hour_path)

        # Save the projected ETr/ETo as 32-bit floats
        if not day_skip_flag and daily_flag:
            if etr_flag:
                try:
                    gdc.array_to_raster(etr_day_array.astype(np.float32),
                                        etr_day_path,
                                        output_geo=nldas_geo,
                                        output_proj=nldas_proj,
                                        stats_flag=stats_flag)
                except AttributeError:
                    pass
            if eto_flag:
                try:

                    gdc.array_to_raster(eto_day_array.astype(np.float32),
                                        eto_day_path,
                                        output_geo=nldas_geo,
                                        output_proj=nldas_proj,
                                        stats_flag=stats_flag)
                except AttributeError:
                    pass

        del etr_day_array, eto_day_array

    if len(errors) > 0:
        logging.info('\nThe following errors were encountered:')
        for key, value in errors.items():
            logging.error(' Filepath: {}, error: {}'.format(key, value))

    logging.debug('\nScript Complete')
Exemplo n.º 6
0
def main(ancillary_ws=os.getcwd(),
         zero_elev_nodata_flag=False,
         overwrite_flag=False):
    """Process GRIDMET ancillary data

    Args:
        ancillary_ws (str): folder of ancillary rasters
        zero_elev_nodata_flag (bool): if True, set elevation nodata values to 0
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nProcess GRIDMET ancillary rasters')

    # Site URL
    elev_url = 'https://climate.northwestknowledge.net/METDATA/data/metdata_elevationdata.nc'

    # Manually define the spatial reference and extent of the GRIDMET data
    # This could be read in from a raster
    gridmet_osr = osr.SpatialReference()
    # Assume GRIDMET data is in WGS84 not NAD83 (need to check with John)
    gridmet_osr.ImportFromEPSG(4326)
    # gridmet_osr.ImportFromEPSG(4326)
    gridmet_proj = gdc.osr_proj(gridmet_osr)
    gridmet_cs = 1. / 24  # 0.041666666666666666
    gridmet_x = -125 + gridmet_cs * 5
    gridmet_y = 49 + gridmet_cs * 10
    # gridmet_y = lon_array[0,0] - 0.5 * gridmet_cs
    # gridmet_y = lat_array[0,0] + 0.5 * gridmet_cs
    # gridmet_rows, gridmet_cols = elev_array.shape
    gridmet_geo = (gridmet_x, gridmet_cs, 0., gridmet_y, 0., -gridmet_cs)
    # gridmet_extent = gdc.geo_extent(
    #     gridmet_geo, gridmet_rows, gridmet_cols)
    # Keep track of the original/full geo-transform and extent
    # gridmet_full_geo = (
    #     gridmet_x, gridmet_cs, 0., gridmet_y, 0., -gridmet_cs)
    # gridmet_full_extent = gdc.geo_extent(
    #     gridmet_geo, gridmet_rows, gridmet_cols)
    logging.debug('  X/Y: {} {}'.format(gridmet_x, gridmet_y))
    logging.debug('  Geo: {}'.format(gridmet_geo))
    logging.debug('  Cellsize: {}'.format(gridmet_cs))

    # Build output workspace if it doesn't exist
    if not os.path.isdir(ancillary_ws):
        os.makedirs(ancillary_ws)

    # Output paths
    elev_nc = os.path.join(ancillary_ws, os.path.basename(elev_url))
    elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img')
    lat_raster = os.path.join(ancillary_ws, 'gridmet_lat.img')
    lon_raster = os.path.join(ancillary_ws, 'gridmet_lon.img')

    # Compute DEM raster
    if overwrite_flag or not os.path.isfile(elev_raster):
        logging.info('\nGRIDMET DEM')
        logging.info('  Downloading')
        logging.debug('    {}'.format(elev_url))
        logging.debug('    {}'.format(elev_nc))
        url_download(elev_url, elev_nc)
        # try:
        #     urllib.urlretrieve(elev_url, elev_nc)
        # except:
        #     logging.error("  ERROR: {}\n  FILE: {}".format(
        #         sys.exc_info()[0], elev_nc))
        #     # Try to remove the file since it may not have completely downloaded
        #     os.remove(elev_nc)

        logging.info('  Extracting')
        logging.debug('    {}'.format(elev_raster))
        elev_nc_f = netCDF4.Dataset(elev_nc, 'r')
        elev_ma = elev_nc_f.variables['elevation'][0, :, :]
        elev_array = elev_ma.data.astype(np.float32)
        # elev_nodata = float(elev_ma.fill_value)
        elev_array[(elev_array == elev_ma.fill_value) |
                   (elev_array <= -300)] = np.nan
        if zero_elev_nodata_flag:
            elev_array[np.isnan(elev_array)] = 0
        if np.all(np.isnan(elev_array)):
            logging.error(
                '\nERROR: The elevation array is all nodata, exiting\n')
            sys.exit()
        gdc.array_to_raster(elev_array,
                            elev_raster,
                            output_geo=gridmet_geo,
                            output_proj=gridmet_proj)
        elev_nc_f.close()
        # del elev_nc_f, elev_ma, elev_array, elev_nodata
        del elev_nc_f, elev_ma, elev_array
        os.remove(elev_nc)

    # Compute latitude/longitude rasters
    if ((overwrite_flag or not os.path.isfile(lat_raster)
         or not os.path.isfile(lat_raster)) and os.path.isfile(elev_raster)):
        logging.info('\nGRIDMET Latitude/Longitude')
        logging.debug('    {}'.format(lat_raster))
        lat_array, lon_array = gdc.raster_lat_lon_func(elev_raster)
        # Handle the conversion to radians in the other GRIDMET scripts
        # lat_array *= (math.pi / 180)
        gdc.array_to_raster(lat_array,
                            lat_raster,
                            output_geo=gridmet_geo,
                            output_proj=gridmet_proj)
        logging.debug('    {}'.format(lon_raster))
        gdc.array_to_raster(lon_array,
                            lon_raster,
                            output_geo=gridmet_geo,
                            output_proj=gridmet_proj)
        del lat_array, lon_array

    logging.debug('\nScript Complete')
Exemplo n.º 7
0
def main(grb_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         landsat_ws=None,
         start_date=None,
         end_date=None,
         times_str='',
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract hourly NLDAS vapour pressure rasters

    Args:
        grb_ws (str): folder of NLDAS GRB files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        landsat_ws (str): folder of Landsat scenes or tar.gz files
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        times (str): comma separated values and/or ranges of UTC hours
            (i.e. "1, 2, 5-8")
            Parsed with python_common.parse_int_set()
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nExtracting NLDAS vapour pressure rasters')

    # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb'
    input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' +
                          '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$')

    output_folder = 'ea'
    output_fmt = 'ea_{:04d}{:02d}{:02d}_hourly_nldas.img'
    # output_fmt = 'ea_{:04d}{:02d}{:02d}_{:04d}_nldas.img'

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Only process a specific hours
    if not times_str:
        time_list = range(0, 24, 1)
    else:
        time_list = list(parse_int_set(times_str))
    time_list = ['{:02d}00'.format(t) for t in time_list]

    # Assume NLDAS is NAD83
    # input_epsg = 'EPSG:4269'

    # Ancillary raster paths
    mask_path = os.path.join(ancillary_ws, 'nldas_mask.img')
    elev_path = os.path.join(ancillary_ws, 'nldas_elev.img')

    # Build a date list from landsat_ws scene folders or tar.gz files
    date_list = []
    if landsat_ws is not None and os.path.isdir(landsat_ws):
        logging.info('\nReading dates from Landsat IDs')
        logging.info('  {}'.format(landsat_ws))
        landsat_re = re.compile(
            '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' +
            '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})')
        for root, dirs, files in os.walk(landsat_ws, topdown=True):
            # If root matches, don't explore subfolders
            try:
                landsat_match = landsat_re.match(os.path.basename(root))
                date_list.append(
                    dt.datetime.strptime('_'.join(landsat_match.groups()),
                                         '%Y_%m_%d').date().isoformat())
                dirs[:] = []
            except:
                pass

            for file in files:
                try:
                    landsat_match = landsat_re.match(file)
                    date_list.append(
                        dt.datetime.strptime('_'.join(landsat_match.groups()),
                                             '%Y_%m_%d').date().isoformat())
                except:
                    pass
        date_list = sorted(list(set(date_list)))
    # elif landsat_ws is not None and os.path.isfile(landsat_ws):
    #     with open(landsat_ws) as landsat_f:

    # This allows GDAL to throw Python Exceptions
    # gdal.UseExceptions()
    # mem_driver = gdal.GetDriverByName('MEM')

    # Get the NLDAS spatial reference from the mask raster
    nldas_ds = gdal.Open(mask_path)
    nldas_osr = gdc.raster_ds_osr(nldas_ds)
    nldas_proj = gdc.osr_proj(nldas_osr)
    nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True)
    nldas_extent = gdc.raster_ds_extent(nldas_ds)
    nldas_geo = nldas_extent.geo(nldas_cs)
    nldas_x, nldas_y = nldas_extent.origin()
    nldas_ds = None
    logging.debug('  Projection: {}'.format(nldas_proj))
    logging.debug('  Cellsize: {}'.format(nldas_cs))
    logging.debug('  Geo: {}'.format(nldas_geo))
    logging.debug('  Extent: {}'.format(nldas_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        nldas_extent = gdc.Extent(output_extent)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            nldas_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            nldas_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        nldas_extent = gdc.project_extent(nldas_extent, extent_osr, nldas_osr,
                                          extent_cs)
        nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs)
        nldas_geo = nldas_extent.geo(nldas_cs)
        logging.debug('  Geo: {}'.format(nldas_geo))
        logging.debug('  Extent: {}'.format(nldas_extent))
    logging.debug('')

    # Read the NLDAS mask array if present
    if mask_path and os.path.isfile(mask_path):
        mask_array, mask_nodata = gdc.raster_to_array(mask_path,
                                                      mask_extent=nldas_extent,
                                                      fill_value=0,
                                                      return_nodata=True)
        mask_array = mask_array != mask_nodata
    else:
        mask_array = None

    # Read elevation arrays (or subsets?)
    elev_array = gdc.raster_to_array(elev_path,
                                     mask_extent=nldas_extent,
                                     return_nodata=False)
    pair_array = et_common.air_pressure_func(elev_array)

    # Build output folder
    var_ws = os.path.join(output_ws, output_folder)
    if not os.path.isdir(var_ws):
        os.makedirs(var_ws)

    # Each sub folder in the main folder has all imagery for 1 day
    # The path for each subfolder is the /YYYY/DOY

    # This approach will process files for target dates
    # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)):
    #     logging.info(input_dt.date())

    # Iterate all available files and check dates if necessary
    for root, folders, files in os.walk(grb_ws):
        root_split = os.path.normpath(root).split(os.sep)

        # If the year/doy is outside the range, skip
        if (re.match('\d{4}', root_split[-2])
                and re.match('\d{3}', root_split[-1])):
            root_dt = dt.datetime.strptime(
                '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j')
            logging.info('{}'.format(root_dt.date()))
            if ((start_dt is not None and root_dt < start_dt)
                    or (end_dt is not None and root_dt > end_dt)):
                continue
            elif date_list and root_dt.date().isoformat() not in date_list:
                continue
        # If the year is outside the range, don't search subfolders
        elif re.match('\d{4}', root_split[-1]):
            root_year = int(root_split[-1])
            logging.info('Year: {}'.format(root_year))
            if ((start_dt is not None and root_year < start_dt.year)
                    or (end_dt is not None and root_year > end_dt.year)):
                folders[:] = []
            else:
                folders[:] = sorted(folders)
            continue
        else:
            continue

        # Create a single raster for each day with 24 bands
        # Each time step will be stored in a separate band
        output_name = output_fmt.format(root_dt.year, root_dt.month,
                                        root_dt.day)
        output_path = os.path.join(var_ws, str(root_dt.year), output_name)
        logging.debug('  {}'.format(output_path))
        if os.path.isfile(output_path):
            if not overwrite_flag:
                logging.debug('    File already exists, skipping')
                continue
            else:
                logging.debug('    File already exists, removing existing')
                os.remove(output_path)
        logging.debug('  {}'.format(root))
        if not os.path.isdir(os.path.dirname(output_path)):
            os.makedirs(os.path.dirname(output_path))
        gdc.build_empty_raster(output_path,
                               band_cnt=24,
                               output_dtype=np.float32,
                               output_proj=nldas_proj,
                               output_cs=nldas_cs,
                               output_extent=nldas_extent,
                               output_fill_flag=True)

        # Iterate through hourly files
        for input_name in sorted(files):
            logging.info('  {}'.format(input_name))
            input_path = os.path.join(root, input_name)
            input_match = input_re.match(input_name)
            if input_match is None:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            input_dt = dt.datetime(int(input_match.group('YEAR')),
                                   int(input_match.group('MONTH')),
                                   int(input_match.group('DAY')))
            time_str = input_match.group('TIME')
            band_num = int(time_str[:2]) + 1
            # if start_dt is not None and input_dt < start_dt:
            #     continue
            # elif end_dt is not None and input_dt > end_dt:
            #     continue
            # elif date_list and input_dt.date().isoformat() not in date_list:
            #     continue
            if time_str not in time_list:
                logging.debug('    Time not in list, skipping')
                continue
            logging.debug('    Time: {} {}'.format(input_dt.date(), time_str))
            logging.debug('    Band: {}'.format(band_num))

            # Determine band numbering/naming
            input_band_dict = grib_band_names(input_path)

            # Compute vapour pressure from specific humidity
            input_ds = gdal.Open(input_path)
            sph_array = gdc.raster_ds_to_array(
                input_ds,
                band=input_band_dict['Specific humidity [kg/kg]'],
                mask_extent=nldas_extent,
                return_nodata=False)
            ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array)

            # Save the projected array as 32-bit floats
            gdc.array_to_comp_raster(ea_array.astype(np.float32),
                                     output_path,
                                     band=band_num)
            # gdc.block_to_raster(
            #     ea_array.astype(np.float32), output_path, band=band)
            # gdc.array_to_raster(
            #     ea_array.astype(np.float32), output_path,
            #     output_geo=nldas_geo, output_proj=nldas_proj,
            #     stats_flag=stats_flag)

            del sph_array
            input_ds = None

        if stats_flag:
            gdc.raster_statistics(output_path)

    logging.debug('\nScript Complete')
Exemplo n.º 8
0
def zonal_stats(ini_path=None, overwrite_flag=False):
    """Offline Zonal Stats

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nCompute Offline Zonal Stats')

    landsat_flag = True
    gridmet_flag = True
    pdsi_flag = False

    landsat_images_folder = 'landsat'
    landsat_tables_folder = 'landsat_tables'
    gridmet_images_folder = 'gridmet_monthly'

    # Regular expression to pull out Landsat scene_id
    landsat_image_re = re.compile('^\d{8}_\d{3}_\w+.\w+.tif$')
    gridmet_image_re = re.compile('^\d{6}_gridmet.(eto|ppt).tif$')

    # For now, hardcode snap, cellsize and spatial reference
    logging.info('\nHardcoding zone/output cellsize and snap')
    zone_cs = 30
    zone_x, zone_y = 15, 15
    logging.debug('  Snap: {} {}'.format(zone_x, zone_y))
    logging.debug('  Cellsize: {}'.format(zone_cs))

    logging.info('Hardcoding Landsat snap, cellsize and spatial reference')
    landsat_x, landsat_y = 15, 15
    landsat_cs = 30
    landsat_osr = gdc.epsg_osr(32611)
    logging.debug('  Snap: {} {}'.format(landsat_x, landsat_y))
    logging.debug('  Cellsize: {}'.format(landsat_cs))
    logging.debug('  OSR: {}'.format(landsat_osr))

    logging.info('Hardcoding GRIDMET snap, cellsize and spatial reference')
    gridmet_x, gridmet_y = -124.79299639209513, 49.41685579737572
    gridmet_cs = 0.041666001963701
    # gridmet_cs = [0.041666001963701, 0.041666001489718]
    # gridmet_x, gridmet_y = -124.79166666666666666667, 25.04166666666666666667
    # gridmet_cs = 1. / 24
    gridmet_osr = gdc.epsg_osr(4326)
    # gridmet_osr = gdc.epsg_osr(4269)
    logging.debug('  Snap: {} {}'.format(gridmet_x, gridmet_y))
    logging.debug('  Cellsize: {}'.format(gridmet_cs))
    logging.debug('  OSR: {}'.format(gridmet_osr))

    landsat_daily_fields = [
        'DATE', 'SCENE_ID', 'LANDSAT', 'PATH', 'ROW',
        'YEAR', 'MONTH', 'DAY', 'DOY',
        'PIXEL_COUNT', 'FMASK_COUNT', 'DATA_COUNT', 'CLOUD_SCORE',
        'TS', 'ALBEDO_SUR', 'NDVI_TOA', 'NDVI_SUR', 'EVI_SUR',
        'NDWI_GREEN_NIR_SUR', 'NDWI_GREEN_SWIR1_SUR', 'NDWI_NIR_SWIR1_SUR',
        # 'NDWI_GREEN_NIR_TOA', 'NDWI_GREEN_SWIR1_TOA', 'NDWI_NIR_SWIR1_TOA',
        # 'NDWI_SWIR1_GREEN_TOA', 'NDWI_SWIR1_GREEN_SUR',
        # 'NDWI_TOA', 'NDWI_SUR',
        'TC_BRIGHT', 'TC_GREEN', 'TC_WET']
    # gridmet_daily_fields = [
    #     'DATE', 'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR', 'ETO', 'PPT']
    gridmet_monthly_fields = [
        'DATE', 'YEAR', 'MONTH', 'WATER_YEAR', 'ETO', 'PPT']
    pdsi_dekad_fields = [
        'DATE', 'YEAR', 'MONTH', 'DAY', 'DOY', 'PDSI']

    landsat_int_fields = [
        'YEAR', 'MONTH', 'DAY', 'DOY',
        'PIXEL_COUNT', 'FMASK_COUNT', 'CLOUD_SCORE']
    gridmet_int_fields = ['YEAR', 'MONTH', 'WATER_YEAR']

    # To figure out which Landsat and path,
    # Compare date to reference dates and look for even multiples of 16
    ref_dates = {
        datetime.datetime(1985, 3, 31): ['LT5', '039'],
        datetime.datetime(1985, 4, 7): ['LT5', '040'],
        datetime.datetime(1999, 7, 4): ['LE7', '039'],
        datetime.datetime(1999, 7, 27): ['LE7', '040'],
        datetime.datetime(2013, 4, 13): ['LC8', '039'],
        datetime.datetime(2013, 4, 20): ['LC8', '040']
        # datetime.datetime(1984, , ): ['LT4', '039'],
        # datetime.datetime(1984, , ): ['LT4', '040'],
    }

    # Open config file
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(ini_path))
    except:
        logging.error(('\nERROR: Input file could not be read, ' +
                       'is not an input file, or does not exist\n' +
                       'ERROR: ini_path = {}\n').format(ini_path))
        sys.exit()
    logging.debug('\nReading Input File')

    # Read in config file
    zone_input_ws = config.get('INPUTS', 'zone_input_ws')
    zone_filename = config.get('INPUTS', 'zone_filename')
    zone_field = config.get('INPUTS', 'zone_field')
    zone_path = os.path.join(zone_input_ws, zone_filename)

    landsat_daily_fields.insert(0, zone_field)
    # gridmet_daily_fields.insert(0, zone_field)
    gridmet_monthly_fields.insert(0, zone_field)
    pdsi_dekad_fields.insert(0, zone_field)

    images_ws = config.get('INPUTS', 'images_ws')

    # Build and check file paths
    if not os.path.isdir(zone_input_ws):
        logging.error(
            '\nERROR: The zone workspace does not exist, exiting\n  {}'.format(
                zone_input_ws))
        sys.exit()
    elif not os.path.isfile(zone_path):
        logging.error(
            '\nERROR: The zone shapefile does not exist, exiting\n  {}'.format(
                zone_path))
        sys.exit()
    elif not os.path.isdir(images_ws):
        logging.error(
            '\nERROR: The image workspace does not exist, exiting\n  {}'.format(
                images_ws))
        sys.exit()

    # Final output folder
    try:
        output_ws = config.get('INPUTS', 'output_ws')
        if not os.path.isdir(output_ws):
            os.makedirs(output_ws)
    except:
        output_ws = os.getcwd()
        logging.debug('  Defaulting output workspace to {}'.format(output_ws))

    # Start/end year
    try:
        start_year = int(config.get('INPUTS', 'start_year'))
    except:
        start_year = 1984
        logging.debug('  Defaulting start_year={}'.format(start_year))
    try:
        end_year = int(config.get('INPUTS', 'end_year'))
    except:
        end_year = datetime.datetime.today().year
        logging.debug('  Defaulting end year to {}'.format(end_year))
    if start_year and end_year and end_year < start_year:
        logging.error(
            '\nERROR: End year must be >= start year, exiting')
        sys.exit()
    default_end_year = datetime.datetime.today().year + 1
    if (start_year and start_year not in range(1984, default_end_year) or
        end_year and end_year not in range(1984, default_end_year)):
        logging.error(
            ('\nERROR: Year must be an integer from 1984-{}, ' +
             'exiting').format(default_end_year - 1))
        sys.exit()

    # Start/end month
    try:
        start_month = int(config.get('INPUTS', 'start_month'))
    except:
        start_month = None
        logging.debug('  Defaulting start_month=None')
    try:
        end_month = int(config.get('INPUTS', 'end_month'))
    except:
        end_month = None
        logging.debug('  Defaulting end_month=None')
    if start_month and start_month not in range(1, 13):
        logging.error(
            '\nERROR: Start month must be an integer from 1-12, exiting')
        sys.exit()
    elif end_month and end_month not in range(1, 13):
        logging.error(
            '\nERROR: End month must be an integer from 1-12, exiting')
        sys.exit()
    month_list = common.wrapped_range(start_month, end_month, 1, 12)

    # Start/end DOY
    try:
        start_doy = int(config.get('INPUTS', 'start_doy'))
    except:
        start_doy = None
        logging.debug('  Defaulting start_doy=None')
    try:
        end_doy = int(config.get('INPUTS', 'end_doy'))
    except:
        end_doy = None
        logging.debug('  Defaulting end_doy=None')
    if end_doy and end_doy > 273:
        logging.error(
            '\nERROR: End DOY must be in the same water year as start DOY, ' +
            'exiting')
        sys.exit()
    if start_doy and start_doy not in range(1, 367):
        logging.error(
            '\nERROR: Start DOY must be an integer from 1-366, exiting')
        sys.exit()
    elif end_doy and end_doy not in range(1, 367):
        logging.error(
            '\nERROR: End DOY must be an integer from 1-366, exiting')
        sys.exit()
    # if end_doy < start_doy:
    #     logging.error(
    #         '\nERROR: End DOY must be >= start DOY')
    #     sys.exit()
    doy_list = common.wrapped_range(start_doy, end_doy, 1, 366)

    # Control which Landsat images are used
    try:
        landsat5_flag = config.getboolean('INPUTS', 'landsat5_flag')
    except:
        landsat5_flag = False
        logging.debug('  Defaulting landsat5_flag=False')
    try:
        landsat4_flag = config.getboolean('INPUTS', 'landsat4_flag')
    except:
        landsat4_flag = False
        logging.debug('  Defaulting landsat4_flag=False')
    try:
        landsat7_flag = config.getboolean('INPUTS', 'landsat7_flag')
    except:
        landsat7_flag = False
        logging.debug('  Defaulting landsat7_flag=False')
    try:
        landsat8_flag = config.getboolean('INPUTS', 'landsat8_flag')
    except:
        landsat8_flag = False
        logging.debug('  Defaulting landsat8_flag=False')

    # Cloudmasking
    try:
        apply_mask_flag = config.getboolean('INPUTS', 'apply_mask_flag')
    except:
        apply_mask_flag = False
        logging.debug('  Defaulting apply_mask_flag=False')

    try:
        acca_flag = config.getboolean('INPUTS', 'acca_flag')
    except:
        acca_flag = False
    try:
        fmask_flag = config.getboolean('INPUTS', 'fmask_flag')
    except:
        fmask_flag = False

    # Intentionally don't apply scene_id skip/keep lists
    # Compute zonal stats for all available images
    # Filter by scene_id when making summary tables
    scene_id_keep_list = []
    scene_id_skip_list = []

    # # Only process specific Landsat scenes
    # try:
    #     scene_id_keep_path = config.get('INPUTS', 'scene_id_keep_path')
    #     with open(scene_id_keep_path) as input_f:
    #         scene_id_keep_list = input_f.readlines()
    #     scene_id_keep_list = [x.strip()[:16] for x in scene_id_keep_list]
    # except IOError:
    #     logging.error('\nFileIO Error: {}'.format(scene_id_keep_path))
    #     sys.exit()
    # except:
    #     scene_id_keep_list = []

    # # Skip specific landsat scenes
    # try:
    #     scene_id_skip_path = config.get('INPUTS', 'scene_id_skip_path')
    #     with open(scene_id_skip_path) as input_f:
    #         scene_id_skip_list = input_f.readlines()
    #     scene_id_skip_list = [x.strip()[:16] for x in scene_id_skip_list]
    # except IOError:
    #     logging.error('\nFileIO Error: {}'.format(scene_id_skip_path))
    #     sys.exit()
    # except:
    #     scene_id_skip_list = []

    # Only process certain Landsat path/rows
    try:
        path_keep_list = list(
            common.parse_int_set(config.get('INPUTS', 'path_keep_list')))
    except:
        path_keep_list = []
    # try:
    #     row_keep_list = list(
    #         common.parse_int_set(config.get('INPUTS', 'row_keep_list')))
    # except:
    #     row_keep_list = []

    # Skip or keep certain FID
    try:
        fid_skip_list = list(
            common.parse_int_set(config.get('INPUTS', 'fid_skip_list')))
    except:
        fid_skip_list = []
    try:
        fid_keep_list = list(
            common.parse_int_set(config.get('INPUTS', 'fid_keep_list')))
    except:
        fid_keep_list = []

    # For now, output projection must be manually set above to match zones
    zone_osr = gdc.feature_path_osr(zone_path)
    zone_proj = gdc.osr_proj(zone_osr)
    logging.info('\nThe zone shapefile must be in a projected coordinate system!')
    logging.info('  Proj4: {}'.format(zone_osr.ExportToProj4()))
    logging.info('{}'.format(zone_osr))


    # Read in zone shapefile
    logging.info('\nRasterizing Zone Shapefile')
    zone_name_dict = dict()
    zone_extent_dict = dict()
    zone_mask_dict = dict()

    # First get FIDs and extents
    zone_ds = ogr.Open(zone_path, 0)
    zone_lyr = zone_ds.GetLayer()
    zone_lyr.ResetReading()
    for zone_ftr in zone_lyr:
        zone_fid = zone_ftr.GetFID()
        if zone_field.upper() == 'FID':
            zone_name_dict[zone_fid] = str(zone_fid)
        else:
            zone_name_dict[zone_fid] = zone_ftr.GetField(zone_field)
        zone_extent = gdc.Extent(
            zone_ftr.GetGeometryRef().GetEnvelope()).ogrenv_swap()
        zone_extent.adjust_to_snap('EXPAND', zone_x, zone_y, zone_cs)
        zone_extent_dict[zone_fid] = list(zone_extent)

    # Rasterize each FID separately
    # The RasterizeLayer function wants a "layer"
    # There might be an easier way to select each feature as a layer
    for zone_fid, zone_extent in sorted(zone_extent_dict.items()):
        logging.debug('FID: {}'.format(zone_fid))
        logging.debug('  Name: {}'.format(zone_name_dict[zone_fid]))
        zone_ds = ogr.Open(zone_path, 0)
        zone_lyr = zone_ds.GetLayer()
        zone_lyr.ResetReading()
        zone_lyr.SetAttributeFilter("{0} = {1}".format('FID', zone_fid))

        zone_extent = gdc.Extent(zone_extent)
        zone_rows, zone_cols = zone_extent.shape(zone_cs)
        logging.debug('  Extent: {}'.format(str(zone_extent)))
        logging.debug('  Rows/Cols: {} {}'.format(zone_rows, zone_cols))

        # zones_lyr.SetAttributeFilter("{0} = {1}".format('FID', zone_fid))

        # Initialize the zone in memory raster
        mem_driver = gdal.GetDriverByName('MEM')
        zone_raster_ds = mem_driver.Create(
            '', zone_cols, zone_rows, 1, gdal.GDT_Byte)
        zone_raster_ds.SetProjection(zone_proj)
        zone_raster_ds.SetGeoTransform(
            gdc.extent_geo(zone_extent, cs=zone_cs))
        zone_band = zone_raster_ds.GetRasterBand(1)
        zone_band.SetNoDataValue(0)

        # Clear the raster before rasterizing
        zone_band.Fill(0)
        gdal.RasterizeLayer(zone_raster_ds, [1], zone_lyr)
        # zones_ftr_ds = None
        zone_array = gdc.raster_ds_to_array(
            zone_raster_ds, return_nodata=False)
        zone_mask = zone_array != 0
        logging.debug('  Pixel Count: {}'.format(np.sum(zone_mask)))
        # logging.debug('  Mask:\n{}'.format(zone_mask))
        # logging.debug('  Array:\n{}'.format(zone_array))
        zone_mask_dict[zone_fid] = zone_mask

        zone_raster_ds = None
        del zone_raster_ds, zone_array, zone_mask
    zone_ds = None
    del zone_ds, zone_lyr



    # Calculate zonal stats for each feature separately
    logging.info('')
    for fid, zone_str in sorted(zone_name_dict.items()):
        if fid_keep_list and fid not in fid_keep_list:
            continue
        elif fid_skip_list and fid in fid_skip_list:
            continue
        logging.info('ZONE: {} (FID: {})'.format(zone_str, fid))

        if not zone_field or zone_field.upper() == 'FID':
            zone_str = 'fid_' + zone_str
        else:
            zone_str = zone_str.lower().replace(' ', '_')

        zone_output_ws = os.path.join(output_ws, zone_str)
        if not os.path.isdir(zone_output_ws):
            os.makedirs(zone_output_ws)

        zone_extent = gdc.Extent(zone_extent_dict[fid])
        zone_mask = zone_mask_dict[fid]
        # logging.debug('  Extent: {}'.format(zone_extent))


        if landsat_flag:
            logging.info('  Landsat')

            landsat_output_ws = os.path.join(
                zone_output_ws, landsat_tables_folder)
            if not os.path.isdir(landsat_output_ws):
                os.makedirs(landsat_output_ws)
            logging.debug('  {}'.format(landsat_output_ws))

            # Project the zone extent to the image OSR
            clip_extent = gdc.project_extent(
                zone_extent, zone_osr, landsat_osr, zone_cs)
            # logging.debug('  Extent: {}'.format(clip_extent))
            clip_extent.adjust_to_snap('EXPAND', landsat_x, landsat_y, landsat_cs)
            logging.debug('  Extent: {}'.format(clip_extent))

            # Process date range by year
            for year in xrange(start_year, end_year + 1):
                images_year_ws = os.path.join(
                    images_ws, landsat_images_folder, str(year))
                if not os.path.isdir(images_year_ws):
                    logging.debug(
                        '  Landsat year folder doesn\'t exist, skipping\n    {}'.format(
                            images_year_ws))
                    continue
                else:
                    logging.info('  Year: {}'.format(year))

                # Create an empty dataframe
                output_path = os.path.join(
                    landsat_output_ws, '{}_landsat_{}.csv'.format(zone_str, year))
                if os.path.isfile(output_path):
                    if overwrite_flag:
                        logging.debug(
                            '  Output CSV already exists, removing\n    {}'.format(
                                output_path))
                        os.remove(output_path)
                    else:
                        logging.debug(
                            '  Output CSV already exists, skipping\n    {}'.format(
                                output_path))
                        continue
                output_df = pd.DataFrame(columns=landsat_daily_fields)
                output_df[landsat_int_fields] = output_df[
                    landsat_int_fields].astype(int)

                # Get list of all images
                year_image_list = [
                    image for image in os.listdir(images_year_ws)
                    if landsat_image_re.match(image)]
                # Get list of all unique dates (multiple images per date)
                year_dt_list = sorted(set([
                    datetime.datetime.strptime(image[:8], '%Y%m%d')
                    for image in year_image_list]))
                # Filter date lists if necessary
                if month_list:
                    year_dt_list = [
                        image_dt for image_dt in year_dt_list
                        if image_dt.month in month_list]
                if doy_list:
                    year_dt_list = [
                        image_dt for image_dt in year_dt_list
                        if int(image_dt.strftime('%j')) in doy_list]

                output_list = []
                for image_dt in year_dt_list:
                    image_str = image_dt.date().isoformat()
                    logging.debug('{}'.format(image_dt.date()))

                    # Get the list of available images
                    image_list = [
                        image for image in year_image_list
                        if image_dt.strftime('%Y%m%d') in image]
                    # This conditional is probably impossible
                    if not image_list:
                        logging.debug('    No images, skipping date')
                        continue

                    # Use date offsets to determine the Landsat and Path
                    ref_match = [
                        lp for ref_dt, lp in ref_dates.items()
                        if (((ref_dt - image_dt).days % 16 == 0) and
                            ((lp[0].upper() == 'LT5' and image_dt.year < 2012) or
                             (lp[0].upper() == 'LC8' and image_dt.year > 2012) or
                             (lp[0].upper() == 'LE7')))]
                    if ref_match:
                        landsat, path = ref_match[0]
                    else:
                        landsat, path = 'XXX', '000'
                    # Get Landsat type from first image in list
                    # image_dict['LANDSAT'] = image_list[0].split('.')[0].split('_')[2]
                    image_name_fmt = '{}_{}.{}.tif'.format(
                        image_dt.strftime('%Y%m%d_%j'), landsat.lower(), '{}')

                    if not landsat4_flag and landsat.upper() == 'LT4':
                        logging.debug('    Landsat 4, skipping image')
                        continue
                    elif not landsat5_flag and landsat.upper() == 'LT5':
                        logging.debug('    Landsat 5, skipping image')
                        continue
                    elif not landsat7_flag and landsat.upper() == 'LE7':
                        logging.debug('    Landsat 7, skipping image')
                        continue
                    elif not landsat8_flag and landsat.upper() == 'LC8':
                        logging.debug('    Landsat 8, skipping image')
                        continue

                    # Load the "mask" image first if it is available
                    # The zone_mask could be applied to the mask_array here
                    #   or below where it is used to select from the image_array
                    mask_name = image_name_fmt.format('mask')
                    mask_path = os.path.join(images_year_ws, mask_name)
                    if apply_mask_flag and mask_name in image_list:
                        logging.info('    Applying mask raster: {}'.format(
                            mask_path))
                        mask_input_array, mask_nodata = gdc.raster_to_array(
                            mask_path, band=1, mask_extent=clip_extent,
                            fill_value=None, return_nodata=True)
                        mask_array = gdc.project_array(
                            mask_input_array, gdal.GRA_NearestNeighbour,
                            landsat_osr, landsat_cs, clip_extent,
                            zone_osr, zone_cs, zone_extent,
                            output_nodata=None)
                        # Assume 0 and nodata indicate unmasked pixels
                        # All other pixels are "masked"
                        mask_array = (mask_array == 0) | (mask_array == mask_nodata)
                        # Assume 0 and nodata indicate masked pixels
                        # mask_array = (mask_array != 0) & (mask_array != mask_nodata)
                        if not np.any(mask_array):
                            logging.info('    No unmasked values')
                    else:
                        mask_array = np.ones(zone_mask.shape, dtype=np.bool)

                    # Save date specific properties
                    image_dict = dict()

                    # Get Fmask and Cloud score separately from other bands
                    # FMask
                    image_name = image_name_fmt.format('fmask')
                    image_path = os.path.join(images_year_ws, image_name)
                    if not os.path.isfile(image_path):
                        logging.error(
                            '  Image {} does not exist, skipping date'.format(
                                image_name))
                        continue
                    image_input_array, image_nodata = gdc.raster_to_array(
                        image_path, band=1, mask_extent=clip_extent,
                        fill_value=None, return_nodata=True)
                    fmask_array = gdc.project_array(
                        image_input_array, gdal.GRA_NearestNeighbour,
                        landsat_osr, landsat_cs, clip_extent,
                        zone_osr, zone_cs, zone_extent,
                        output_nodata=None)
                    fmask_mask = np.copy(zone_mask) & mask_array
                    if fmask_array.dtype in [np.float32, np.float64]:
                        fmask_mask &= np.isfinite(fmask_array)
                    else:
                        fmask_mask &= fmask_array != image_nodata
                    if not np.any(fmask_mask):
                        logging.debug('    Empty Fmask array, skipping')
                        continue
                    # Convert Fmask array into a mask (1 is cloudy, 0 is clear)
                    fmask_array = (fmask_array > 1.5) & (fmask_array < 4.5)
                    image_dict['FMASK_COUNT'] = int(np.sum(fmask_array[fmask_mask]))
                    image_dict['PIXEL_COUNT'] = int(np.sum(fmask_mask))
                    # image_dict['PIXEL_COUNT'] = int(np.sum(fmask_mask))
                    image_dict['MASK_COUNT'] = int(np.sum(mask_array))

                    # Cloud Score
                    image_name = image_name_fmt.format('cloud_score')
                    image_path = os.path.join(images_year_ws, image_name)
                    image_input_array, image_nodata = gdc.raster_to_array(
                        image_path, band=1, mask_extent=clip_extent,
                        fill_value=None, return_nodata=True)
                    cloud_array = gdc.project_array(
                        image_input_array, gdal.GRA_NearestNeighbour,
                        landsat_osr, landsat_cs, clip_extent,
                        zone_osr, zone_cs, zone_extent,
                        output_nodata=None)
                    cloud_mask = np.copy(zone_mask) & mask_array
                    if cloud_array.dtype in [np.float32, np.float64]:
                        cloud_mask &= np.isfinite(cloud_array)
                    else:
                        cloud_mask &= cloud_array != image_nodata
                    if not np.any(cloud_mask):
                        logging.debug('    Empty Cloud Score array, skipping')
                        continue
                    image_dict['CLOUD_SCORE'] = float(np.mean(cloud_array[cloud_mask]))


                    # Workflow
                    zs_list = [
                        ['ts', 1, 'TS'],
                        ['albedo_sur', 1, 'ALBEDO_SUR'],
                        ['ndvi_toa', 1, 'NDVI_TOA'],
                        ['ndvi_sur', 1, 'NDVI_SUR'],
                        ['evi_sur', 1, 'EVI_SUR'],
                        ['ndwi_green_nir_sur', 1, 'NDWI_GREEN_NIR_SUR'],
                        ['ndwi_green_swir1_sur', 1, 'NDWI_GREEN_SWIR1_SUR'],
                        ['ndwi_nir_swir1_sur', 1, 'NDWI_NIR_SWIR1_SUR'],
                        ['tasseled_cap', 1, 'TC_BRIGHT'],
                        ['tasseled_cap', 2, 'TC_GREEN'],
                        ['tasseled_cap', 3, 'TC_WET']
                    ]
                    for band_name, band_num, field in zs_list:
                        image_name = image_name_fmt.format(band_name)
                        logging.debug('  {} {}'.format(image_name, field))
                        if image_name not in image_list:
                            logging.debug('    Image doesn\'t exist, skipping')
                            continue
                        image_path = os.path.join(images_year_ws, image_name)
                        # logging.debug('  {}'.format(image_path))

                        image_input_array, image_nodata = gdc.raster_to_array(
                            image_path, band=band_num, mask_extent=clip_extent,
                            fill_value=None, return_nodata=True)

                        # GRA_NearestNeighbour, GRA_Bilinear, GRA_Cubic,
                        #   GRA_CubicSpline
                        image_array = gdc.project_array(
                            image_input_array, gdal.GRA_NearestNeighbour,
                            landsat_osr, landsat_cs, clip_extent,
                            zone_osr, zone_cs, zone_extent,
                            output_nodata=None)
                        image_mask = np.copy(zone_mask) & mask_array
                        if image_array.dtype in [np.float32, np.float64]:
                            image_mask &= np.isfinite(image_array)
                        else:
                            image_mask &= image_array != image_nodata
                        del image_input_array

                        if fmask_flag:
                            # Fmask array was converted into a mask
                            # 1 for cloud, 0 for clear
                            image_mask &= (fmask_array == 0)
                        if acca_flag:
                            image_mask &= (cloud_array < 50)

                        # Skip fully masked zones
                        # This would not work for FMASK and CLOUD_SCORE if we
                        #   weren't using nearest neighbor for resampling
                        if not np.any(image_mask):
                            logging.debug('    Empty array, skipping')
                            continue

                        image_dict[field] = float(np.mean(
                            image_array[image_mask]))

                        # Should check "first" image instead of Ts specifically
                        if band_name == 'ts':
                            image_dict['DATA_COUNT'] = int(np.sum(image_mask))

                        del image_array, image_mask

                    if not image_dict:
                        logging.debug(
                            '    {} - no image data in zone, skipping'.format(
                                image_str))
                        continue

                    # Save date specific properties
                    # Change fid zone strings back to integer values
                    if zone_str.startswith('fid_'):
                        image_dict[zone_field] = int(zone_str[4:])
                    else:
                        image_dict[zone_field] = zone_str
                    image_dict['DATE'] = image_str
                    image_dict['LANDSAT'] = landsat.upper()
                    image_dict['PATH'] = path
                    image_dict['ROW'] = '000'
                    image_dict['SCENE_ID'] = '{}{}{}{}'.format(
                        image_dict['LANDSAT'], image_dict['PATH'],
                        image_dict['ROW'], image_dt.strftime('%Y%j'))
                    image_dict['YEAR'] = image_dt.year
                    image_dict['MONTH'] = image_dt.month
                    image_dict['DAY'] = image_dt.day
                    image_dict['DOY'] = int(image_dt.strftime('%j'))
                    # image_dict['PIXEL_COUNT'] = int(np.sum(zone_mask & mask_array))

                    # Save each row to a list
                    output_list.append(image_dict)

                # Append all rows for the year to a dataframe
                if not output_list:
                    logging.debug('    Empty output list, skipping')
                    continue
                output_df = output_df.append(output_list, ignore_index=True)
                output_df.sort_values(by=['DATE'], inplace=True)
                logging.debug('  {}'.format(output_path))
                output_df.to_csv(output_path, index=False, columns=landsat_daily_fields)


            # Combine/merge annual files into a single CSV
            logging.debug('\n  Merging annual Landsat CSV files')
            output_df = None
            for year in xrange(start_year, end_year + 1):
                # logging.debug('    {}'.format(year))
                input_path = os.path.join(
                    landsat_output_ws, '{}_landsat_{}.csv'.format(zone_str, year))
                try:
                    input_df = pd.read_csv(input_path)
                except:
                    continue
                try:
                    output_df = output_df.append(input_df)
                except:
                    output_df = input_df.copy()

            if output_df is not None and not output_df.empty:
                output_path = os.path.join(
                    zone_output_ws,
                    '{}_landsat_daily.csv'.format(zone_str))
                logging.debug('  {}'.format(output_path))
                output_df.sort_values(by=['DATE', 'ROW'], inplace=True)
                output_df.to_csv(
                    output_path, index=False, columns=landsat_daily_fields)


        if gridmet_flag:
            logging.info('  GRIDMET ETo/PPT')

            # Project the zone extent to the image OSR
            clip_extent = gdc.project_extent(
                zone_extent, zone_osr, gridmet_osr, zone_cs)
            logging.debug('  Extent: {}'.format(clip_extent))
            # clip_extent.buffer_extent(gridmet_cs)
            # logging.debug('  Extent: {}'.format(clip_extent))
            clip_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs)
            logging.debug('  Extent: {}'.format(clip_extent))

            gridmet_images_ws = os.path.join(images_ws, gridmet_images_folder)
            if not os.path.isdir(gridmet_images_ws):
                logging.debug(
                    '  GRIDMET folder doesn\'t exist, skipping\n    {}'.format(
                        gridmet_images_ws))
                continue
            else:
                logging.info('  {}'.format(gridmet_images_ws))

            # Create an empty dataframe
            output_path = os.path.join(
                zone_output_ws,
                '{}_gridmet_monthly.csv'.format(zone_str))
            if os.path.isfile(output_path):
                if overwrite_flag:
                    logging.debug(
                        '  Output CSV already exists, removing\n    {}'.format(
                            output_path))
                    os.remove(output_path)
                else:
                    logging.debug(
                        '  Output CSV already exists, skipping\n    {}'.format(
                            output_path))
                    continue
            output_df = pd.DataFrame(columns=gridmet_monthly_fields)
            output_df[gridmet_int_fields] = output_df[gridmet_int_fields].astype(int)

            # Get list of all images
            image_list = [
                image for image in os.listdir(gridmet_images_ws)
                if gridmet_image_re.match(image)]
            dt_list = sorted(set([
                datetime.datetime(int(image[:4]), int(image[4:6]), 1)
                for image in image_list]))

            output_list = []
            for image_dt in dt_list:
                image_str = image_dt.date().isoformat()
                logging.debug('{}'.format(image_dt.date()))

                image_name_fmt = '{}_gridmet.{}.tif'.format(
                    image_dt.strftime('%Y%m'), '{}')

                # Save date specific properties
                image_dict = dict()

                # Workflow
                zs_list = [
                    ['eto', 'ETO'],
                    ['ppt', 'PPT'],
                ]
                for band_name, field in zs_list:
                    image_name = image_name_fmt.format(band_name)
                    logging.debug('  {} {}'.format(image_name, field))
                    if image_name not in image_list:
                        logging.debug('    Image doesn\'t exist, skipping')
                        continue
                    image_path = os.path.join(gridmet_images_ws, image_name)
                    # logging.debug('  {}'.format(image_path))

                    image_input_array, image_nodata = gdc.raster_to_array(
                        image_path, band=1, mask_extent=clip_extent,
                        fill_value=None, return_nodata=True)

                    # GRA_NearestNeighbour, GRA_Bilinear, GRA_Cubic,
                    #   GRA_CubicSpline
                    image_array = gdc.project_array(
                        image_input_array, gdal.GRA_NearestNeighbour,
                        gridmet_osr, gridmet_cs, clip_extent,
                        zone_osr, zone_cs, zone_extent,
                        output_nodata=None)
                    del image_input_array

                    # Skip fully masked zones
                    if (np.all(np.isnan(image_array)) or
                            np.all(image_array == image_nodata)):
                        logging.debug('    Empty array, skipping')
                        continue

                    image_dict[field] = np.mean(image_array[zone_mask])
                    del image_array

                if not image_dict:
                    logging.debug(
                        '    {} - no image data in zone, skipping'.format(
                            image_str))
                    continue

                # Save date specific properties
                # Change fid zone strings back to integer values
                if zone_str.startswith('fid_'):
                    image_dict[zone_field] = int(zone_str[4:])
                else:
                    image_dict[zone_field] = zone_str
                image_dict['DATE'] = image_str
                image_dict['YEAR'] = image_dt.year
                image_dict['MONTH'] = image_dt.month
                image_dict['WATER_YEAR'] = (image_dt + relativedelta(months=3)).year

                # Save each row to a list
                output_list.append(image_dict)

            # Append all rows for the year to a dataframe
            if not output_list:
                logging.debug('    Empty output list, skipping')
                continue
            output_df = output_df.append(output_list, ignore_index=True)
            output_df.sort_values(by=['DATE'], inplace=True)
            logging.debug('  {}'.format(output_path))
            output_df.to_csv(
                output_path, index=False, columns=gridmet_monthly_fields)


        if pdsi_flag:
            logging.info('  GRIDMET PDSI')
            logging.info('  Not currently implemented')
Exemplo n.º 9
0
def main(netcdf_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         etr_flag=False,
         eto_flag=False,
         start_date=None,
         end_date=None,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Compute daily ETr/ETo from GRIDMET data

    Args:
        netcdf_ws (str): folder of GRIDMET netcdf files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        etr_flag (bool): if True, compute alfalfa reference ET (ETr)
        eto_flag (bool): if True, compute grass reference ET (ETo)
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nComputing GRIDMET ETo/ETr')
    np.seterr(invalid='ignore')

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Save GRIDMET lat, lon, and elevation arrays
    elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img')
    lat_raster = os.path.join(ancillary_ws, 'gridmet_lat.img')

    # Wind speed is measured at 2m
    zw = 10

    etr_fmt = 'etr_{}_daily_gridmet.img'
    eto_fmt = 'eto_{}_daily_gridmet.img'
    # gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc')

    # GRIDMET band name dictionary
    gridmet_band_dict = dict()
    gridmet_band_dict['pr'] = 'precipitation_amount'
    gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air'
    gridmet_band_dict['sph'] = 'specific_humidity'
    gridmet_band_dict['tmmn'] = 'air_temperature'
    gridmet_band_dict['tmmx'] = 'air_temperature'
    gridmet_band_dict['vs'] = 'wind_speed'

    # Get extent/geo from elevation raster
    gridmet_ds = gdal.Open(elev_raster)
    gridmet_osr = gdc.raster_ds_osr(gridmet_ds)
    gridmet_proj = gdc.osr_proj(gridmet_osr)
    gridmet_cs = gdc.raster_ds_cellsize(gridmet_ds, x_only=True)
    gridmet_extent = gdc.raster_ds_extent(gridmet_ds)
    gridmet_full_geo = gridmet_extent.geo(gridmet_cs)
    gridmet_x, gridmet_y = gridmet_extent.origin()
    gridmet_ds = None
    logging.debug('  Projection: {}'.format(gridmet_proj))
    logging.debug('  Cellsize: {}'.format(gridmet_cs))
    logging.debug('  Geo: {}'.format(gridmet_full_geo))
    logging.debug('  Extent: {}'.format(gridmet_extent))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        gridmet_extent = gdc.Extent(output_extent)
        gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y,
                                      gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            gridmet_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            gridmet_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        gridmet_extent = gdc.project_extent(gridmet_extent, extent_osr,
                                            gridmet_osr, extent_cs)
        gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y,
                                      gridmet_cs)
        gridmet_geo = gridmet_extent.geo(gridmet_cs)
        logging.debug('  Geo: {}'.format(gridmet_geo))
        logging.debug('  Extent: {}'.format(gridmet_extent))
    else:
        gridmet_geo = gridmet_full_geo

    # Get indices for slicing/clipping input arrays
    g_i, g_j = gdc.array_geo_offsets(gridmet_full_geo,
                                     gridmet_geo,
                                     cs=gridmet_cs)
    g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs)

    # Read the elevation and latitude arrays
    elev_array = gdc.raster_to_array(elev_raster,
                                     mask_extent=gridmet_extent,
                                     return_nodata=False)
    lat_array = gdc.raster_to_array(lat_raster,
                                    mask_extent=gridmet_extent,
                                    return_nodata=False)
    lat_array *= math.pi / 180

    # Check elevation and latitude arrays
    if np.all(np.isnan(elev_array)):
        logging.error('\nERROR: The elevation array is all nodata, exiting\n')
        sys.exit()
    elif np.all(np.isnan(lat_array)):
        logging.error('\nERROR: The latitude array is all nodata, exiting\n')
        sys.exit()

    # Build output folder
    etr_ws = os.path.join(output_ws, 'etr')
    eto_ws = os.path.join(output_ws, 'eto')
    if etr_flag and not os.path.isdir(etr_ws):
        os.makedirs(etr_ws)
    if eto_flag and not os.path.isdir(eto_ws):
        os.makedirs(eto_ws)

    # By default, try to process all possible years
    if start_dt.year == end_dt.year:
        year_list = [str(start_dt.year)]
    year_list = sorted(map(str, range((start_dt.year), (end_dt.year + 1))))

    # Process each year separately
    for year_str in year_list:
        logging.info("\nYear: {}".format(year_str))
        year_int = int(year_str)
        year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
        if start_dt is not None and year_int < start_dt.year:
            logging.debug('  Before start date, skipping')
            continue
        elif end_dt is not None and year_int > end_dt.year:
            logging.debug('  After end date, skipping')
            continue

        # Build input file path
        tmin_path = os.path.join(netcdf_ws, 'tmmn_{}.nc'.format(year_str))
        tmax_path = os.path.join(netcdf_ws, 'tmmx_{}.nc'.format(year_str))
        sph_path = os.path.join(netcdf_ws, 'sph_{}.nc'.format(year_str))
        rs_path = os.path.join(netcdf_ws, 'srad_{}.nc'.format(year_str))
        wind_path = os.path.join(netcdf_ws, 'vs_{}.nc'.format(year_str))
        # Check that all input files are present
        missing_flag = False
        for input_path in [tmin_path, tmax_path, sph_path, rs_path, wind_path]:
            if not os.path.isfile(input_path):
                logging.debug(
                    '  Input NetCDF doesn\'t exist\n    {}'.format(input_path))
                missing_flag = True
        if missing_flag:
            logging.debug('  skipping')
            continue
        logging.debug("  {}".format(tmin_path))
        logging.debug("  {}".format(tmax_path))
        logging.debug("  {}".format(sph_path))
        logging.debug("  {}".format(rs_path))
        logging.debug("  {}".format(wind_path))

        # Create a single raster for each year with 365 bands
        # Each day will be stored in a separate band
        etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str))
        eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str))
        if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)):
            logging.debug('  {}'.format(etr_raster))
            gdc.build_empty_raster(etr_raster,
                                   band_cnt=366,
                                   output_dtype=np.float32,
                                   output_proj=gridmet_proj,
                                   output_cs=gridmet_cs,
                                   output_extent=gridmet_extent,
                                   output_fill_flag=True)
        if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)):
            logging.debug('  {}'.format(eto_raster))
            gdc.build_empty_raster(eto_raster,
                                   band_cnt=366,
                                   output_dtype=np.float32,
                                   output_proj=gridmet_proj,
                                   output_cs=gridmet_cs,
                                   output_extent=gridmet_extent,
                                   output_fill_flag=True)
        # DEADBEEF - Need to find a way to test if both of these conditionals
        #   did not pass and pass logging debug message to user

        # Read in the GRIDMET NetCDF file
        tmin_nc_f = netCDF4.Dataset(tmin_path, 'r')
        tmax_nc_f = netCDF4.Dataset(tmax_path, 'r')
        sph_nc_f = netCDF4.Dataset(sph_path, 'r')
        rs_nc_f = netCDF4.Dataset(rs_path, 'r')
        wind_nc_f = netCDF4.Dataset(wind_path, 'r')

        logging.info('  Reading NetCDFs into memory')
        # Immediatly clip input arrays to save memory
        tmin_nc = tmin_nc_f.variables[
            gridmet_band_dict['tmmn']][:, g_i:g_i + g_cols,
                                       g_j:g_j + g_rows].copy()
        tmax_nc = tmax_nc_f.variables[
            gridmet_band_dict['tmmx']][:, g_i:g_i + g_cols,
                                       g_j:g_j + g_rows].copy()
        sph_nc = sph_nc_f.variables[gridmet_band_dict['sph']][:,
                                                              g_i:g_i + g_cols,
                                                              g_j:g_j +
                                                              g_rows].copy()
        rs_nc = rs_nc_f.variables[gridmet_band_dict['srad']][:,
                                                             g_i:g_i + g_cols,
                                                             g_j:g_j +
                                                             g_rows].copy()
        wind_nc = wind_nc_f.variables[gridmet_band_dict['vs']][:, g_i:g_i +
                                                               g_cols,
                                                               g_j:g_j +
                                                               g_rows].copy()
        # tmin_nc = tmin_nc_f.variables[gridmet_band_dict['tmmn']][:]
        # tmax_nc = tmax_nc_f.variables[gridmet_band_dict['tmmx']][:]
        # sph_nc = sph_nc_f.variables[gridmet_band_dict['sph']][:]
        # rs_nc = rs_nc_f.variables[gridmet_band_dict['srad']][:]
        # wind_nc = wind_nc_f.variables[gridmet_band_dict['vs']][:]

        # Transpose arrays back to row x col
        tmin_nc = np.transpose(tmin_nc, (0, 2, 1))
        tmax_nc = np.transpose(tmax_nc, (0, 2, 1))
        sph_nc = np.transpose(sph_nc, (0, 2, 1))
        rs_nc = np.transpose(rs_nc, (0, 2, 1))
        wind_nc = np.transpose(wind_nc, (0, 2, 1))

        # A numpy array is returned when slicing a masked array
        #   if there are no masked pixels
        # This is a hack to force the numpy array back to a masked array
        # For now assume all arrays need to be converted
        if type(tmin_nc) != np.ma.core.MaskedArray:
            tmin_nc = np.ma.core.MaskedArray(
                tmin_nc, np.zeros(tmin_nc.shape, dtype=bool))
        if type(tmax_nc) != np.ma.core.MaskedArray:
            tmax_nc = np.ma.core.MaskedArray(
                tmax_nc, np.zeros(tmax_nc.shape, dtype=bool))
        if type(sph_nc) != np.ma.core.MaskedArray:
            sph_nc = np.ma.core.MaskedArray(sph_nc,
                                            np.zeros(sph_nc.shape, dtype=bool))
        if type(rs_nc) != np.ma.core.MaskedArray:
            rs_nc = np.ma.core.MaskedArray(rs_nc,
                                           np.zeros(rs_nc.shape, dtype=bool))
        if type(wind_nc) != np.ma.core.MaskedArray:
            wind_nc = np.ma.core.MaskedArray(
                wind_nc, np.zeros(wind_nc.shape, dtype=bool))

        # Check all valid dates in the year
        year_dates = date_range(dt.datetime(year_int, 1, 1),
                                dt.datetime(year_int + 1, 1, 1))
        for date_dt in year_dates:
            if start_dt is not None and date_dt < start_dt:
                logging.debug('  {} - before start date, skipping'.format(
                    date_dt.date()))
                continue
            elif end_dt is not None and date_dt > end_dt:
                logging.debug('  {} - after end date, skipping'.format(
                    date_dt.date()))
                continue
            else:
                logging.info('  {}'.format(date_dt.date()))

            doy = int(date_dt.strftime('%j'))
            doy_i = range(1, year_days + 1).index(doy)

            # Arrays are being read as masked array with a fill value of -9999
            # Convert to basic numpy array arrays with nan values
            try:
                tmin_ma = tmin_nc[doy_i, :, :]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            tmin_array = tmin_ma.data.astype(np.float32)
            tmin_nodata = float(tmin_ma.fill_value)
            tmin_array[tmin_array == tmin_nodata] = np.nan

            try:
                tmax_ma = tmax_nc[doy_i, :, :]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            tmax_array = tmax_ma.data.astype(np.float32)
            tmax_nodata = float(tmax_ma.fill_value)
            tmax_array[tmax_array == tmax_nodata] = np.nan

            try:
                sph_ma = sph_nc[doy_i, :, :]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            sph_array = sph_ma.data.astype(np.float32)
            sph_nodata = float(sph_ma.fill_value)
            sph_array[sph_array == sph_nodata] = np.nan

            try:
                rs_ma = rs_nc[doy_i, :, :]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            rs_array = rs_ma.data.astype(np.float32)
            rs_nodata = float(rs_ma.fill_value)
            rs_array[rs_array == rs_nodata] = np.nan

            try:
                wind_ma = wind_nc[doy_i, :, :]
            except IndexError:
                logging.info('    date not in netcdf, skipping')
                continue
            wind_array = wind_ma.data.astype(np.float32)
            wind_nodata = float(wind_ma.fill_value)
            wind_array[wind_array == wind_nodata] = np.nan
            del tmin_ma, tmax_ma, sph_ma, rs_ma, wind_ma

            # Since inputs are netcdf, need to create GDAL raster
            #   datasets in order to use gdal_common functions
            # Create an in memory dataset of the full ETo array
            tmin_ds = gdc.array_to_mem_ds(
                tmin_array,
                output_geo=gridmet_geo,
                # tmin_array, output_geo=gridmet_full_geo,
                output_proj=gridmet_proj)
            tmax_ds = gdc.array_to_mem_ds(
                tmax_array,
                output_geo=gridmet_geo,
                # tmax_array, output_geo=gridmet_full_geo,
                output_proj=gridmet_proj)
            sph_ds = gdc.array_to_mem_ds(
                sph_array,
                output_geo=gridmet_geo,
                # sph_array, output_geo=gridmet_full_geo,
                output_proj=gridmet_proj)
            rs_ds = gdc.array_to_mem_ds(
                rs_array,
                output_geo=gridmet_geo,
                # rs_array, output_geo=gridmet_full_geo,
                output_proj=gridmet_proj)
            wind_ds = gdc.array_to_mem_ds(
                wind_array,
                output_geo=gridmet_geo,
                # wind_array, output_geo=gridmet_full_geo,
                output_proj=gridmet_proj)

            # Then extract the subset from the in memory dataset
            tmin_array = gdc.raster_ds_to_array(tmin_ds,
                                                1,
                                                mask_extent=gridmet_extent,
                                                return_nodata=False)
            tmax_array = gdc.raster_ds_to_array(tmax_ds,
                                                1,
                                                mask_extent=gridmet_extent,
                                                return_nodata=False)
            sph_array = gdc.raster_ds_to_array(sph_ds,
                                               1,
                                               mask_extent=gridmet_extent,
                                               return_nodata=False)
            rs_array = gdc.raster_ds_to_array(rs_ds,
                                              1,
                                              mask_extent=gridmet_extent,
                                              return_nodata=False)
            wind_array = gdc.raster_ds_to_array(wind_ds,
                                                1,
                                                mask_extent=gridmet_extent,
                                                return_nodata=False)
            del tmin_ds, tmax_ds, sph_ds, rs_ds, wind_ds

            # Adjust units
            tmin_array -= 273.15
            tmax_array -= 273.15
            rs_array *= 0.0864

            # ETr/ETo
            if etr_flag:
                etr_array = et_common.refet_daily_func(tmin_array, tmax_array,
                                                       sph_array, rs_array,
                                                       wind_array, zw,
                                                       elev_array, lat_array,
                                                       doy, 'ETR')
            if eto_flag:
                eto_array = et_common.refet_daily_func(tmin_array, tmax_array,
                                                       sph_array, rs_array,
                                                       wind_array, zw,
                                                       elev_array, lat_array,
                                                       doy, 'ETO')
            # del tmin_array, tmax_array, sph_array, rs_array, wind_array

            # Save the projected array as 32-bit floats
            if etr_flag:
                gdc.array_to_comp_raster(etr_array.astype(np.float32),
                                         etr_raster,
                                         band=doy,
                                         stats_flag=False)
                # gdc.array_to_raster(
                #     etr_array.astype(np.float32), etr_raster,
                #     output_geo=gridmet_geo, output_proj=gridmet_proj,
                #     stats_flag=stats_flag)
                del etr_array
            if eto_flag:
                gdc.array_to_comp_raster(eto_array.astype(np.float32),
                                         eto_raster,
                                         band=doy,
                                         stats_flag=False)
                # gdc.array_to_raster(
                #     eto_array.astype(np.float32), eto_raster,
                #     output_geo=gridmet_geo, output_proj=gridmet_proj,
                #     stats_flag=stats_flag)
                del eto_array

        del tmin_nc
        del tmax_nc
        del sph_nc
        del rs_nc
        del wind_nc

        tmin_nc_f.close()
        tmax_nc_f.close()
        sph_nc_f.close()
        rs_nc_f.close()
        wind_nc_f.close()
        del tmin_nc_f, tmax_nc_f, sph_nc_f, rs_nc_f, wind_nc_f

        if stats_flag and etr_flag:
            gdc.raster_statistics(etr_raster)
        if stats_flag and eto_flag:
            gdc.raster_statistics(eto_raster)

    logging.debug('\nScript Complete')
Exemplo n.º 10
0
def main(netcdf_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         variables=['prcp'],
         daily_flag=False,
         monthly_flag=True,
         annual_flag=False,
         start_year=1981,
         end_year=2010,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False):
    """Extract DAYMET temperature

    Args:
        netcdf_ws (str): folder of DAYMET netcdf files
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        variables (list): DAYMET variables to download
          ('prcp', 'srad', 'vp', 'tmmn', 'tmmx')
          Set as ['all'] to process all variables
        daily_flag (bool): if True, compute daily (DOY) climatologies
        monthly_flag (bool): if True, compute monthly climatologies
        annual_flag (bool): if True, compute annual climatologies
        start_year (int): YYYY
        end_year (int): YYYY
        extent_path (str): filepath a raster defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nGenerating DAYMET climatologies')

    daily_fmt = 'daymet_{var}_30yr_normal_{doy:03d}.img'
    monthly_fmt = 'daymet_{var}_30yr_normal_{month:02d}.img'
    annual_fmt = 'daymet_{var}_30yr_normal.img'
    # daily_fmt = 'daymet_{var}_normal_{start}_{end}_{doy:03d}.img'
    # monthly_fmt = 'daymet_{var}_normal_{start}_{end}_{month:02d}.img'
    # annual_fmt = 'daymet_{var}_normal_{start}_{end}.img'

    # If a date is not set, process 1981-2010 climatology
    try:
        start_dt = dt.datetime(start_year, 1, 1)
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(1981, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime(end_year, 12, 31)
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2010, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    # Get DAYMET spatial reference from an ancillary raster
    mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img')

    daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$')

    # DAYMET rasters to extract
    var_full_list = ['prcp', 'tmmn', 'tmmx']
    # data_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx']
    if not variables:
        logging.error('\nERROR: variables parameter is empty\n')
        sys.exit()
    elif type(variables) is not list:
        # DEADBEEF - I could try converting comma separated strings to lists?
        logging.warning('\nERROR: variables parameter must be a list\n')
        sys.exit()
    elif 'all' in variables:
        logging.error('\nDownloading all variables\n  {}'.format(
            ','.join(var_full_list)))
        var_list = var_full_list[:]
    elif not set(variables).issubset(set(var_full_list)):
        logging.error(
            '\nERROR: variables parameter is invalid\n  {}'.format(variables))
        sys.exit()
    else:
        var_list = variables[:]

    # Get extent/geo from mask raster
    daymet_ds = gdal.Open(mask_raster)
    daymet_osr = gdc.raster_ds_osr(daymet_ds)
    daymet_proj = gdc.osr_proj(daymet_osr)
    daymet_cs = gdc.raster_ds_cellsize(daymet_ds, x_only=True)
    daymet_extent = gdc.raster_ds_extent(daymet_ds)
    daymet_geo = daymet_extent.geo(daymet_cs)
    daymet_x, daymet_y = daymet_extent.origin()
    daymet_ds = None
    logging.debug('  Projection: {}'.format(daymet_proj))
    logging.debug('  Cellsize: {}'.format(daymet_cs))
    logging.debug('  Geo: {}'.format(daymet_geo))
    logging.debug('  Extent: {}'.format(daymet_extent))
    logging.debug('  Origin: {} {}'.format(daymet_x, daymet_y))

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        # Assume input extent is in decimal degrees
        output_extent = gdc.project_extent(gdc.Extent(output_extent),
                                           gdc.epsg_osr(4326), daymet_osr,
                                           0.001)
        output_extent = gdc.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        output_extent = gdc.project_extent(
            gdc.raster_path_extent(extent_path),
            gdc.raster_path_osr(extent_path), daymet_osr,
            gdc.raster_path_cellsize(extent_path, x_only=True))
        output_extent = gdc.intersect_extents([daymet_extent, output_extent])
        output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs)
        output_geo = output_extent.geo(daymet_cs)
        logging.debug('  Geo: {}'.format(output_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    else:
        output_extent = daymet_extent.copy()
        output_geo = daymet_geo[:]
    output_shape = output_extent.shape(cs=daymet_cs)
    xi, yi = gdc.array_geo_offsets(daymet_geo, output_geo, daymet_cs)
    output_rows, output_cols = output_extent.shape(daymet_cs)
    logging.debug('  Shape: {} {}'.format(output_rows, output_cols))
    logging.debug('  Offsets: {} {} (x y)'.format(xi, yi))

    # Process each variable
    for input_var in var_list:
        logging.info("\nVariable: {}".format(input_var))

        # Rename variables to match cimis
        if input_var == 'prcp':
            output_var = 'ppt'
        else:
            output_var = input_var
        logging.debug("Output name: {}".format(output_var))

        # Build output folder
        var_ws = os.path.join(output_ws, output_var)
        if not os.path.isdir(var_ws):
            os.makedirs(var_ws)

        # Build output arrays
        logging.debug('  Building arrays')
        if daily_flag:
            daily_sum = np.full((365, output_shape[0], output_shape[1]), 0,
                                np.float64)
            daily_count = np.full((365, output_shape[0], output_shape[1]), 0,
                                  np.uint8)
        if monthly_flag:
            monthly_sum = np.full((12, output_shape[0], output_shape[1]), 0,
                                  np.float64)
            monthly_count = np.full((12, output_shape[0], output_shape[1]), 0,
                                    np.uint8)
        if monthly_flag:
            annual_sum = np.full((output_shape[0], output_shape[1]), 0,
                                 np.float64)
            annual_count = np.full((output_shape[0], output_shape[1]), 0,
                                   np.uint8)

        # Process each file/year separately
        for input_name in sorted(os.listdir(netcdf_ws)):
            logging.debug("  {}".format(input_name))
            input_match = daymet_re.match(input_name)
            if not input_match:
                logging.debug('  Regular expression didn\'t match, skipping')
                continue
            elif input_match.group('VAR') != input_var:
                logging.debug('  Variable didn\'t match, skipping')
                continue
            year_str = input_match.group('YEAR')
            logging.info("  Year: {}".format(year_str))
            year_int = int(year_str)
            year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
            if start_dt is not None and year_int < start_dt.year:
                logging.debug('    Before start date, skipping')
                continue
            elif end_dt is not None and year_int > end_dt.year:
                logging.debug('    After end date, skipping')
                continue

            # Build input file path
            input_raster = os.path.join(netcdf_ws, input_name)
            if not os.path.isfile(input_raster):
                logging.debug(
                    '  Input raster doesn\'t exist, skipping    {}'.format(
                        input_raster))
                continue

            # Build output folder
            if daily_flag:
                daily_ws = os.path.join(var_ws, 'daily')
                if not os.path.isdir(daily_ws):
                    os.makedirs(daily_ws)

            if monthly_flag:
                monthly_temp_sum = np.full(
                    (12, output_shape[0], output_shape[1]), 0, np.float64)
                monthly_temp_count = np.full(
                    (12, output_shape[0], output_shape[1]), 0, np.uint8)

            # Read in the DAYMET NetCDF file
            input_nc_f = netCDF4.Dataset(input_raster, 'r')
            # logging.debug(input_nc_f.variables)

            # Check all valid dates in the year
            year_dates = date_range(dt.datetime(year_int, 1, 1),
                                    dt.datetime(year_int + 1, 1, 1))
            for date_dt in year_dates:
                logging.debug('  {}'.format(date_dt.date()))
                # if start_dt is not None and date_dt < start_dt:
                #     logging.debug(
                #         '  {} - before start date, skipping'.format(
                #             date_dt.date()))
                #     continue
                # elif end_dt is not None and date_dt > end_dt:
                #     logging.debug('  {} - after end date, skipping'.format(
                #         date_dt.date()))
                #     continue
                # else:
                #     logging.info('  {}'.format(date_dt.date()))

                doy = int(date_dt.strftime('%j'))
                doy_i = range(1, year_days + 1).index(doy)
                month_i = date_dt.month - 1

                # Arrays are being read as masked array with a -9999 fill value
                # Convert to basic numpy array arrays with nan values
                try:
                    input_ma = input_nc_f.variables[input_var][doy_i, yi:yi +
                                                               output_rows,
                                                               xi:xi +
                                                               output_cols]
                except IndexError:
                    logging.info('    date not in netcdf, skipping')
                    continue
                input_nodata = float(input_ma.fill_value)
                output_array = input_ma.data.astype(np.float32)
                output_array[output_array == input_nodata] = np.nan
                output_mask = np.isfinite(output_array)

                # Convert Kelvin to Celsius
                if input_var in ['tmax', 'tmin']:
                    output_array -= 273.15

                # Save values
                if daily_flag:
                    daily_sum[doy_i, :, :] += output_array
                    daily_count[doy_i, :, :] += output_mask
                if monthly_flag:
                    monthly_temp_sum[month_i, :, :] += output_array
                    monthly_temp_count[month_i, :, :] += output_mask
                if annual_flag:
                    annual_sum[:, :] += output_array
                    annual_count[:, :] += output_mask

                # Cleanup
                # del input_ds, input_array
                del input_ma, output_array, output_mask

            # Compute mean monthly for the year
            if monthly_flag:
                # Sum precipitation
                if input_var == 'prcp':
                    monthly_sum += monthly_temp_sum
                else:
                    monthly_sum += monthly_temp_sum / monthly_temp_count
                # Is this the right count?
                monthly_count += np.any(monthly_temp_count, axis=0)
                del monthly_temp_sum, monthly_temp_count

            input_nc_f.close()
            del input_nc_f

        # Save the projected climatology arrays
        if daily_flag:
            for doy_i in range(daily_sum.shape[0]):
                daily_name = daily_fmt.format(var=output_var,
                                              start=start_year,
                                              end=end_year,
                                              doy=doy_i + 1)
                daily_path = os.path.join(daily_ws, daily_name)
                gdc.array_to_raster(daily_sum[doy_i, :, :] /
                                    daily_count[doy_i, :, :],
                                    daily_path,
                                    output_geo=output_geo,
                                    output_proj=daymet_proj,
                                    stats_flag=stats_flag)
            del daily_sum, daily_count
        if monthly_flag:
            for month_i in range(monthly_sum.shape[0]):
                monthly_name = monthly_fmt.format(var=output_var,
                                                  start=start_year,
                                                  end=end_year,
                                                  month=month_i + 1)
                monthly_path = os.path.join(var_ws, monthly_name)
                gdc.array_to_raster(monthly_sum[month_i, :, :] /
                                    monthly_count[month_i, :, :],
                                    monthly_path,
                                    output_geo=output_geo,
                                    output_proj=daymet_proj,
                                    stats_flag=stats_flag)
            del monthly_sum, monthly_count
        if annual_flag:
            annual_name = annual_fmt.format(var=output_var,
                                            start=start_year,
                                            end=end_year)
            annual_path = os.path.join(var_ws, annual_name)
            gdc.array_to_raster(annual_sum / annual_count,
                                annual_path,
                                output_geo=output_geo,
                                output_proj=daymet_proj,
                                stats_flag=stats_flag)
            del annual_sum, annual_count

    logging.debug('\nScript Complete')
Exemplo n.º 11
0
def main(img_ws=os.getcwd(),
         ancillary_ws=os.getcwd(),
         output_ws=os.getcwd(),
         etr_flag=False,
         eto_flag=False,
         start_date=None,
         end_date=None,
         extent_path=None,
         output_extent=None,
         stats_flag=True,
         overwrite_flag=False,
         use_cimis_eto_flag=False):
    """Compute daily ETr/ETo from CIMIS data

    Args:
        img_ws (str): root folder of GRIDMET data
        ancillary_ws (str): folder of ancillary rasters
        output_ws (str): folder of output rasters
        etr_flag (bool): if True, compute alfalfa reference ET (ETr)
        eto_flag (bool): if True, compute grass reference ET (ETo)
        start_date (str): ISO format date (YYYY-MM-DD)
        end_date (str): ISO format date (YYYY-MM-DD)
        extent_path (str): file path defining the output extent
        output_extent (list): decimal degrees values defining output extent
        stats_flag (bool): if True, compute raster statistics.
            Default is True.
        overwrite_flag (bool): If True, overwrite existing files
        use_cimis_eto_flag (bool): if True, use CIMIS ETo raster if one of
            the component rasters is missing and ETo/ETr cannot be computed

    Returns:
        None
    """
    logging.info('\nComputing CIMIS ETo/ETr')
    np.seterr(invalid='ignore')

    # Use CIMIS ETo raster directly instead of computing from components
    # Currently this will only be applied if one of the inputs is missing
    use_cimis_eto_flag = True

    # Compute ETr and/or ETo
    if not etr_flag and not eto_flag:
        logging.info('  ETo/ETr flag(s) not set, defaulting to ETr')
        etr_flag = True

    # If a date is not set, process 2017
    try:
        start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
        logging.debug('  Start date: {}'.format(start_dt))
    except:
        start_dt = dt.datetime(2017, 1, 1)
        logging.info('  Start date: {}'.format(start_dt))
    try:
        end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')
        logging.debug('  End date:   {}'.format(end_dt))
    except:
        end_dt = dt.datetime(2017, 12, 31)
        logging.info('  End date:   {}'.format(end_dt))

    etr_folder = 'etr'
    eto_folder = 'eto'
    etr_fmt = 'etr_{}_daily_cimis.img'
    eto_fmt = 'eto_{}_daily_cimis.img'

    # DEM for air pressure calculation
    mask_raster = os.path.join(ancillary_ws, 'cimis_mask.img')
    dem_raster = os.path.join(ancillary_ws, 'cimis_elev.img')
    lat_raster = os.path.join(ancillary_ws, 'cimis_lat.img')
    # lon_raster = os.path.join(ancillary_ws, 'cimis_lon.img')

    # Interpolate zero windspeed pixels
    # interpolate_zero_u2_flag = False

    # Interpolate edge and coastal cells
    # interpolate_edge_flag = False

    # Resample type
    # 0 = GRA_NearestNeighbour, Nearest neighbour (select on one input pixel)
    # 1 = GRA_Bilinear,Bilinear (2x2 kernel)
    # 2 = GRA_Cubic, Cubic Convolution Approximation (4x4 kernel)
    # 3 = GRA_CubicSpline, Cubic B-Spline Approximation (4x4 kernel)
    # 4 = GRA_Lanczos, Lanczos windowed sinc interpolation (6x6 kernel)
    # 5 = GRA_Average, Average (computes the average of all non-NODATA contributing pixels)
    # 6 = GRA_Mode, Mode (selects the value which appears most often of all the sampled points)
    resample_type = gdal.GRA_CubicSpline

    # Wind speed is measured at 2m
    zw = 2

    # Output workspaces
    etr_ws = os.path.join(output_ws, etr_folder)
    eto_ws = os.path.join(output_ws, eto_folder)
    if etr_flag and not os.path.isdir(etr_ws):
        os.makedirs(etr_ws)
    if eto_flag and not os.path.isdir(eto_ws):
        os.makedirs(eto_ws)

    # Check ETr/ETo functions
    test_flag = False

    # Check that the daily_refet_func produces the correct values
    if test_flag:
        doy_test = 245
        elev_test = 1050.0
        lat_test = 39.9396 * math.pi / 180
        tmin_test = 11.07
        tmax_test = 34.69
        rs_test = 22.38
        u2_test = 1.94
        zw_test = 2.5
        tdew_test = -3.22
        ea_test = et_common.saturation_vapor_pressure_func(tdew_test)
        pair_test = 101.3 * np.power((285 - 0.0065 * elev_test) / 285, 5.26)
        q_test = 0.622 * ea_test / (pair_test - (0.378 * ea_test))
        etr = float(
            et_common.daily_refet_func(tmin_test, tmax_test, q_test, rs_test,
                                       u2_test, zw_test, elev_test, doy_test,
                                       lat_test, 'ETR'))
        eto = float(
            et_common.daily_refet_func(tmin_test, tmax_test, q_test, rs_test,
                                       u2_test, zw_test, elev_test, doy_test,
                                       lat_test, 'ETO'))
        print('ETr: 8.89', etr)
        print('ETo: 6.16', eto)
        sys.exit()

    # Get CIMIS grid properties from mask
    cimis_mask_ds = gdal.Open(mask_raster)
    cimis_osr = gdc.raster_ds_osr(cimis_mask_ds)
    cimis_proj = gdc.osr_proj(cimis_osr)
    cimis_cs = gdc.raster_ds_cellsize(cimis_mask_ds, x_only=True)
    cimis_extent = gdc.raster_ds_extent(cimis_mask_ds)
    cimis_full_geo = cimis_extent.geo(cimis_cs)
    cimis_x, cimis_y = cimis_extent.origin()
    cimis_mask_ds = None
    logging.debug('  Projection: {}'.format(cimis_proj))
    logging.debug('  Cellsize: {}'.format(cimis_cs))
    logging.debug('  Geo: {}'.format(cimis_full_geo))
    logging.debug('  Extent: {}'.format(cimis_extent))

    # Manually set CIMIS grid properties
    # cimis_extent = gdc.Extent((-400000, -650000, 600000, 454000))
    # cimis_cs = 2000
    # cimis_geo = gdc.extent_geo(cimis_extent, cellsize)
    # cimis_epsg = 3310  # NAD_1983_California_Teale_Albers
    # cimis_x, cimis_y = (0,0)

    # Subset data to a smaller extent
    if output_extent is not None:
        logging.info('\nComputing subset extent & geo')
        logging.debug('  Extent: {}'.format(output_extent))
        cimis_extent = gdc.Extent(output_extent)
        cimis_extent.adjust_to_snap('EXPAND', cimis_x, cimis_y, cimis_cs)
        cimis_geo = cimis_extent.geo(cimis_cs)
        logging.debug('  Geo: {}'.format(cimis_geo))
        logging.debug('  Extent: {}'.format(output_extent))
    elif extent_path is not None:
        logging.info('\nComputing subset extent & geo')
        if extent_path.lower().endswith('.shp'):
            cimis_extent = gdc.feature_path_extent(extent_path)
            extent_osr = gdc.feature_path_osr(extent_path)
            extent_cs = None
        else:
            cimis_extent = gdc.raster_path_extent(extent_path)
            extent_osr = gdc.raster_path_osr(extent_path)
            extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True)
        cimis_extent = gdc.project_extent(cimis_extent, extent_osr, cimis_osr,
                                          extent_cs)
        cimis_extent.adjust_to_snap('EXPAND', cimis_x, cimis_y, cimis_cs)
        cimis_geo = cimis_extent.geo(cimis_cs)
        logging.debug('  Geo: {}'.format(cimis_geo))
        logging.debug('  Extent: {}'.format(cimis_extent))
    else:
        cimis_geo = cimis_full_geo

    # Latitude
    lat_array = gdc.raster_to_array(lat_raster,
                                    mask_extent=cimis_extent,
                                    return_nodata=False)
    lat_array = lat_array.astype(np.float32)
    lat_array *= math.pi / 180

    # Elevation data
    elev_array = gdc.raster_to_array(dem_raster,
                                     mask_extent=cimis_extent,
                                     return_nodata=False)
    elev_array = elev_array.astype(np.float32)

    # Process each year in the input workspace
    logging.info("")
    for year_str in sorted(os.listdir(img_ws)):
        logging.debug('{}'.format(year_str))
        if not re.match('^\d{4}$', year_str):
            logging.debug('  Not a 4 digit year folder, skipping')
            continue
        year_ws = os.path.join(img_ws, year_str)
        year_int = int(year_str)
        # year_days = int(dt.datetime(year_int, 12, 31).strftime('%j'))
        if start_dt is not None and year_int < start_dt.year:
            logging.debug('  Before start date, skipping')
            continue
        elif end_dt is not None and year_int > end_dt.year:
            logging.debug('  After end date, skipping')
            continue
        logging.info('{}'.format(year_str))

        # Output paths
        etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str))
        eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str))
        if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)):
            logging.debug('  {}'.format(etr_raster))
            gdc.build_empty_raster(etr_raster,
                                   band_cnt=366,
                                   output_dtype=np.float32,
                                   output_proj=cimis_proj,
                                   output_cs=cimis_cs,
                                   output_extent=cimis_extent,
                                   output_fill_flag=True)
        if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)):
            logging.debug('  {}'.format(eto_raster))
            gdc.build_empty_raster(eto_raster,
                                   band_cnt=366,
                                   output_dtype=np.float32,
                                   output_proj=cimis_proj,
                                   output_cs=cimis_cs,
                                   output_extent=cimis_extent,
                                   output_fill_flag=True)

        # Process each date in the year
        for date_str in sorted(os.listdir(year_ws)):
            logging.debug('{}'.format(date_str))
            try:
                date_dt = dt.datetime.strptime(date_str, '%Y_%m_%d')
            except ValueError:
                logging.debug(
                    '  Invalid folder date format (YYYY_MM_DD), skipping')
                continue
            if start_dt is not None and date_dt < start_dt:
                logging.debug('  Before start date, skipping')
                continue
            elif end_dt is not None and date_dt > end_dt:
                logging.debug('  After end date, skipping')
                continue
            logging.info(date_str)
            date_ws = os.path.join(year_ws, date_str)
            doy = int(date_dt.strftime('%j'))

            # Set file paths
            tmax_path = os.path.join(date_ws, 'Tx.img')
            tmin_path = os.path.join(date_ws, 'Tn.img')
            tdew_path = os.path.join(date_ws, 'Tdew.img')
            rso_path = os.path.join(date_ws, 'Rso.img')
            rs_path = os.path.join(date_ws, 'Rs.img')
            u2_path = os.path.join(date_ws, 'U2.img')
            eto_path = os.path.join(date_ws, 'ETo.img')
            # k_path = os.path.join(date_ws, 'K.img')
            # rnl_path = os.path.join(date_ws, 'Rnl.img')
            input_list = [
                tmin_path, tmax_path, tdew_path, u2_path, rs_path, rso_path
            ]

            # If any input raster is missing, skip the day
            #   Unless ETo is present (and use_cimis_eto_flag is True)
            day_skip_flag = False
            for t_path in input_list:
                if not os.path.isfile(t_path):
                    logging.info('    {} is missing'.format(t_path))
                    day_skip_flag = True

            if (day_skip_flag and use_cimis_eto_flag
                    and os.path.isfile(eto_path)):
                logging.info('    Using CIMIS ETo directly')
                eto_array = gdc.raster_to_array(eto_path,
                                                1,
                                                cimis_extent,
                                                return_nodata=False)
                eto_array = eto_array.astype(np.float32)
                if not np.any(eto_array):
                    logging.info('    {} is empty or missing'.format(eto_path))
                    logging.info('    Skipping date')
                    continue
                # ETr
                if etr_flag:
                    gdc.array_to_comp_raster(1.2 * eto_array,
                                             etr_raster,
                                             band=doy,
                                             stats_flag=False)
                    # gdc.array_to_raster(
                    #     1.2 * eto_array, etr_raster,
                    #     output_geo=cimis_geo, output_proj=cimis_proj,
                    #     stats_flag=stats_flag)
                # ETo
                if eto_flag:
                    gdc.array_to_comp_raster(eto_array,
                                             eto_raster,
                                             band=doy,
                                             stats_flag=False)
                    # gdc.array_to_raster(
                    #     eto_array, eto_raster,
                    #     output_geo=cimis_geo, output_proj=cimis_proj,
                    #     stats_flag=stats_flag)
                del eto_array
                continue
            elif not day_skip_flag:
                # Read in rasters
                # DEADBEEF - Read with extent since some arrays are too big
                # i.e. 2012-03-21, 2013-03-20, 2014-02-27
                tmin_array = gdc.raster_to_array(tmin_path,
                                                 1,
                                                 cimis_extent,
                                                 return_nodata=False)
                tmax_array = gdc.raster_to_array(tmax_path,
                                                 1,
                                                 cimis_extent,
                                                 return_nodata=False)
                tdew_array = gdc.raster_to_array(tdew_path,
                                                 1,
                                                 cimis_extent,
                                                 return_nodata=False)
                rso_array = gdc.raster_to_array(rso_path,
                                                1,
                                                cimis_extent,
                                                return_nodata=False)
                rs_array = gdc.raster_to_array(rs_path,
                                               1,
                                               cimis_extent,
                                               return_nodata=False)
                u2_array = gdc.raster_to_array(u2_path,
                                               1,
                                               cimis_extent,
                                               return_nodata=False)
                # k_array = gdc.raster_to_array(
                #     k_path, 1, cimis_extent, return_nodata=False)
                # rnl_array = gdc.raster_to_array(
                #     rnl_path, 1, cimis_extent, return_nodata=False)

                # Check that all input arrays have data
                for t_name, t_array in [[tmin_path, tmin_array],
                                        [tmax_path, tmax_array],
                                        [tdew_path, tdew_array],
                                        [u2_path, u2_array],
                                        [rs_path, rs_array]]:
                    if not np.any(t_array):
                        logging.warning(
                            '    {} is empty or missing'.format(t_name))
                        day_skip_flag = True
                if day_skip_flag:
                    logging.warning('    Skipping date')
                    continue

                # DEADBEEF - Some arrays have a 500m cellsize
                # i.e. 2011-07-25, 2010-01-01 -> 2010-07-27
                tmin_array = rescale_array_func(tmin_array, elev_array, 'tmin')
                tmax_array = rescale_array_func(tmax_array, elev_array, 'tmax')
                tdew_array = rescale_array_func(tdew_array, elev_array, 'tdew')
                rso_array = rescale_array_func(rso_array, elev_array, 'rso')
                rs_array = rescale_array_func(rs_array, elev_array, 'rs')
                u2_array = rescale_array_func(u2_array, elev_array, 'u2')
                # k_array = rescale_array_func(k_array, elev_array, 'k')
                # rnl_array = rescale_array_func(rnl_array, elev_array, 'rnl')

                # Back calculate q from tdew by first calculating ea from tdew
                es_array = et_common.saturation_vapor_pressure_func(tdew_array)
                pair_array = et_common.air_pressure_func(elev_array)
                q_array = 0.622 * es_array / (pair_array - (0.378 * es_array))
                del es_array, pair_array, tdew_array

                # Back calculate rhmin/rhmax from tdew
                # ea_tmax = et_common.saturation_vapor_pressure_func(tmax_array)
                # ea_tmin = et_common.saturation_vapor_pressure_func(tmin_array)
                # rhmin = ea_tdew * 2 / (ea_tmax + ea_tmin);
                # rhmax = ea_tdew * 2 / (ea_tmax + ea_tmin);
                # del ea_tmax, ea_tmin

                # ETr
                if etr_flag:
                    etr_array = et_common.refet_daily_func(tmin_array,
                                                           tmax_array,
                                                           q_array,
                                                           rs_array,
                                                           u2_array,
                                                           zw,
                                                           elev_array,
                                                           lat_array,
                                                           doy,
                                                           ref_type='ETR',
                                                           rso_type='ARRAY',
                                                           rso=rso_array)
                    gdc.array_to_comp_raster(etr_array.astype(np.float32),
                                             etr_raster,
                                             band=doy,
                                             stats_flag=False)
                    # gdc.array_to_raster(
                    #     etr_array.astype(np.float32), etr_raster,
                    #     output_geo=cimis_geo, output_proj=cimis_proj,
                    #     stats_flag=stats_flag)
                    del etr_array
                # ETo
                if eto_flag:
                    eto_array = et_common.refet_daily_func(tmin_array,
                                                           tmax_array,
                                                           q_array,
                                                           rs_array,
                                                           u2_array,
                                                           zw,
                                                           elev_array,
                                                           lat_array,
                                                           doy,
                                                           ref_type='ETO',
                                                           rso_type='ARRAY',
                                                           rso=rso_array)
                    gdc.array_to_comp_raster(eto_array.astype(np.float32),
                                             eto_raster,
                                             band=doy,
                                             stats_flag=False)
                    # gdc.array_to_raster(
                    #     eto_array.astype(np.float32), eto_raster,
                    #     output_geo=cimis_geo, output_proj=cimis_proj,
                    #     stats_flag=stats_flag)
                    del eto_array
                # Cleanup
                del tmin_array, tmax_array, u2_array, rs_array, q_array
                # del rnl, rs, rso
            else:
                logging.info('    Skipping date')
                continue

        if stats_flag and etr_flag:
            gdc.raster_statistics(etr_raster)
        if stats_flag and eto_flag:
            gdc.raster_statistics(eto_raster)

    logging.debug('\nScript Complete')