Exemplo n.º 1
0
    def _aggregate_state(self):
        """Aggregate data out of the state file and load it into `met_data`"""
        # Precipitation record
        trailing = self.state['prec']
        begin_record = self.params['start'] - pd.Timedelta("90 days")
        end_record = self.params['start'] - pd.Timedelta("1 days")
        record_dates = date_range(begin_record, end_record,
                                  calendar=self.params['calendar'])
        trailing['time'] = record_dates
        total_precip = xr.concat([trailing, self.met_data['prec']], dim='time')
        total_precip = total_precip.rolling(time=90).mean().drop(record_dates,
                                                                 dim='time')
        self.met_data['seasonal_prec'] = total_precip

        # Smoothed daily temperature range
        trailing = self.state['t_max'] - self.state['t_min']
        begin_record = self.params['start'] - pd.Timedelta("90 days")
        end_record = self.params['start'] - pd.Timedelta("1 days")
        record_dates = date_range(begin_record, end_record,
                                  calendar=self.params['calendar'])
        trailing['time'] = record_dates
        dtr = self.met_data['t_max'] - self.met_data['t_min']
        sm_dtr = xr.concat([trailing, dtr], dim='time')
        sm_dtr = sm_dtr.rolling(time=30).mean().drop(record_dates, dim='time')
        self.met_data['smoothed_dtr'] = sm_dtr

        # Put in SWE data
        self.met_data['swe'] = xr.Variable(('lat', 'lon'),
                                           self.state['swe'].values)
Exemplo n.º 2
0
    def _aggregate_state(self):
        """Aggregate data out of the state file and load it into `met_data`"""
        # Precipitation record

        assert self.state.dims['time'] == 90, self.state['time']
        record_dates = date_range(self.params['state_start'],
                                  self.params['state_stop'],
                                  calendar=self.params['calendar'])
        trailing = self.state['prec']
        trailing['time'] = record_dates
        total_precip = xr.concat([trailing, self.met_data['prec']],
                                 dim='time').load()
        total_precip = (
            cnst.DAYS_PER_YEAR * total_precip.rolling(time=90).mean().sel(
                time=slice(self.params['start'], self.params['stop'])))

        self.met_data['seasonal_prec'] = total_precip

        # Smoothed daily temperature range
        trailing = self.state['t_max'] - self.state['t_min']

        trailing['time'] = record_dates
        dtr = self.met_data['t_max'] - self.met_data['t_min']
        if (dtr < 0).any():
            raise ValueError("Daily maximum temperature lower"
                             " than daily minimum temperature!")
        sm_dtr = xr.concat([trailing, dtr], dim='time').load()
        sm_dtr = sm_dtr.rolling(time=30).mean().drop(record_dates, dim='time')
        self.met_data['dtr'] = dtr
        self.met_data['smoothed_dtr'] = sm_dtr
Exemplo n.º 3
0
    def setup_output(self, prototype: xr.Dataset=None):
        if not prototype:
            prototype = self.met_data
        self.output = self.domain.copy(deep=True)
        self.output.attrs = attrs['_global']
        # Number of timesteps
        if self.disagg:
            delta = pd.Timedelta('1 days') - pd.Timedelta(
                '{} minutes'.format(self.params['time_step']))
        else:
            delta = pd.Timedelta('0 days')

        start = pd.Timestamp(prototype.time.values[0]).to_datetime()
        stop = pd.Timestamp(prototype.time.values[-1]).to_datetime()
        times = date_range(start, stop + delta,
                           freq="{}T".format(MetSim.params['time_step']),
                           calendar=self.params['calendar'])
        n_ts = len(times)

        shape = (n_ts, ) + self.domain_shape
        dims = ('time', ) + self.domain_dims
        coords = {'time': times, **self.domain.mask.coords}
        for varname in MetSim.params['out_vars']:
            self.output[varname] = xr.DataArray(
                data=np.full(shape, np.nan),
                coords=coords, dims=dims,
                name=varname, attrs=attrs.get(varname, {}),
                encoding={'dtype': 'f8', '_FillValue': cnst.FILL_VALUES['f8']})
Exemplo n.º 4
0
 def read_ascii(self, fpath: str) -> xr.Dataset:
     """Read in an ascii forcing file"""
     dates = date_range(MetSim.params['start'], MetSim.params['stop'],
                        calendar=self.params['calendar'])
     names = MetSim.params['in_vars'].keys()
     ds = pd.read_table(fpath, header=None, delim_whitespace=True,
                        names=names).head(len(dates))
     ds.index = dates
     return ds
Exemplo n.º 5
0
def read_ascii(data_handle, domain=None, is_worker=False,
               start=None, stop=None, calendar='standard',
               var_dict=None) -> xr.Dataset:
    """Read in an ascii forcing file"""
    dates = date_range(start, stop, calendar=calendar)
    names = var_dict.keys()
    ds = pd.read_csv(data_handle, header=None, delim_whitespace=True,
                     names=names).head(len(dates))
    ds.index = dates
    return ds
Exemplo n.º 6
0
Arquivo: io.py Projeto: shulele/MetSim
def read_binary(data_handle,
                domain=None,
                iter_dims=['lat', 'lon'],
                start=None,
                stop=None,
                calendar='standard',
                var_dict=None) -> xr.Dataset:
    """Reads a binary forcing file (VIC 4 format)"""
    dates = date_range(start, stop, calendar=calendar)
    n_days = len(dates)
    type_lookup = {'signed': 'h', 'unsigned': 'H'}
    # Pack these for nicer syntax in the loop
    var_names = var_dict.keys()
    data_list = [[] for var in var_dict.keys()]
    n_vars = len(var_names)
    params = [p for p in var_dict.values()]
    scales = [float(s.split()[0]) for s in params]
    datatypes = [type_lookup[s.split()[-1]] for s in params]
    with open(data_handle, 'rb') as f:
        i = 0
        points_read = 0
        points_needed = 4 * n_days
        while points_read != points_needed:
            bytes = f.read(2)
            if bytes:
                # Get correct variable and data type with i,
                # then unpack & scale
                data_list[i].append(
                    struct.unpack(datatypes[i], bytes)[0] / scales[i])
                i = (i + 1) % n_vars
                points_read += 1
            else:
                break

    # Binary forcing files have naming format $NAME_$LAT_$LON
    param_list = os.path.basename(data_handle).split("_")[-3:]
    params = {
        "name": param_list[0],
        "lat": float(param_list[1]),
        "lon": float(param_list[2]),
        "n_days": int(n_days)
    }

    # Assemble the dataset
    data_dict = {c[0]: (['time'], c[1]) for c in zip(var_names, data_list)}
    data_dict['day_of_year'] = (['time'], dates.dayofyear)
    df = xr.Dataset(data_dict,
                    coords={
                        'lon': [params['lon']],
                        'lat': [params['lat']],
                        'time': dates
                    },
                    attrs={'n_days': params['n_days']})
    return df
Exemplo n.º 7
0
    def setup_output(self, prototype: xr.Dataset = None):
        if not prototype.variables:
            prototype = self.met_data
        self.disagg = int(self.params['time_step']) < cnst.MIN_PER_DAY
        # Number of timesteps
        if self.disagg:
            delta = pd.Timedelta('1 days') - pd.Timedelta('{} minutes'.format(
                self.params['time_step']))
        else:
            delta = pd.Timedelta('0 days')

        start = pd.Timestamp(prototype['time'].values[0]).to_pydatetime()
        stop = pd.Timestamp(prototype['time'].values[-1]).to_pydatetime()
        times = date_range(start,
                           stop + delta,
                           freq="{}T".format(self.params['time_step']),
                           calendar=self.params['calendar'])
        n_ts = len(times)

        shape = (n_ts, ) + self.domain['mask'].shape
        dims = ('time', ) + self.domain['mask'].dims
        coords = {'time': times, **self.domain['mask'].coords}
        self.output = xr.Dataset(coords=coords)
        self.output['time'].encoding['calendar'] = self.params['calendar']
        for p in ['elev', 'lat', 'lon']:
            if p in self.params:
                self.params.pop(p)
        for k, v in self.params.items():
            # Need to convert some parameters to strings
            if k in ['start', 'stop', 'time_grouper', 'utc_offset']:
                v = str(v)
            elif k in ['state_start', 'state_stop']:
                # skip
                continue
            # Don't include complex types
            if isinstance(v, dict):
                v = json.dumps(v)
            elif not isinstance(v, str) and isinstance(v, Iterable):
                v = ', '.join(v)
            attrs['_global'][k] = v
        self.output.attrs = attrs['_global']

        dtype = self.params['out_precision']

        for varname in self.params['out_vars']:
            self.output[varname] = xr.DataArray(data=np.full(shape,
                                                             np.nan,
                                                             dtype=dtype),
                                                coords=coords,
                                                dims=dims,
                                                name=varname,
                                                attrs=attrs.get(varname, {}))
        self.output['time'].attrs.update(attrs['time'])
Exemplo n.º 8
0
 def __init__(self, params: dict):
     """
     Constructor
     """
     # Record parameters
     MetSim.params.update(params)
     MetSim.params['dates'] = date_range(params['start'], params['stop'],
                                         calendar=self.params['calendar'])
     logger.setLevel(MetSim.params['verbose'])
     ch.setLevel(MetSim.params['verbose'])
     logger.addHandler(ch)
     self.output = None
     self.met_data = None
     self.ready = False
Exemplo n.º 9
0
Arquivo: io.py Projeto: shulele/MetSim
def process_vic(params: dict, domain: xr.Dataset) -> xr.Dataset:
    """Process VIC-like data"""
    read_funcs = {
        "binary": read_binary,
        "ascii": read_ascii,
    }

    if 'lon' not in params['iter_dims'] or 'lat' not in params['iter_dims']:
        raise ValueError('Using VIC type input requires lat and lon to be'
                         ' specified via `iter_dims` in configuration.')

    # Creates the master dataset which will be used to parallelize
    dates = date_range(params['start'],
                       params['stop'],
                       calendar=params['calendar'])
    coords = {'time': dates, 'lon': domain['lon'], 'lat': domain['lat']}
    shape = (len(dates), len(domain['lat']), len(domain['lon']))
    dims = (
        'time',
        'lat',
        'lon',
    )

    met_data = xr.Dataset(coords=coords, attrs={'n_days': len(dates)})
    for var in params['forcing_vars']:
        met_data[var] = xr.DataArray(data=np.full(shape, np.nan),
                                     coords=coords,
                                     dims=dims,
                                     name=var)

    # Fill in the data
    for job in params['forcing']:
        try:
            _, lat, lon = os.path.basename(job).split("_")[-3:]
            lat, lon = float(lat), float(lon)
            if not domain['mask'].sel(lat=lat, lon=lon).values > 0:
                continue
            ds = read_funcs[params['forcing_fmt']](
                job,
                start=params['start'],
                stop=params['stop'],
                calendar=params['calendar'],
                var_dict=params['forcing_vars'])
            for var in params['forcing_vars'].keys():
                met_data[var].loc[{'lat': lat, 'lon': lon}] = ds[var]
        except (ValueError, KeyError):
            continue
    return met_data
Exemplo n.º 10
0
    def read_binary(self, fpath: str) -> xr.Dataset:
        """ Reads a binary forcing file (VIC 4 format) """
        dates = date_range(MetSim.params['start'], MetSim.params['stop'],
                           calendar=self.params['calendar'])
        n_days = len(dates)
        type_lookup = {'signed': 'h', 'unsigned': 'H'}
        # Pack these for nicer syntax in the loop
        var_names = MetSim.params['in_vars'].keys()
        data_list = [[] for var in MetSim.params['in_vars'].keys()]
        n_vars = len(var_names)
        params = [p for p in MetSim.params['in_vars'].values()]
        scales = [float(s.split()[0]) for s in params]
        datatypes = [type_lookup[s.split()[-1]] for s in params]
        with open(fpath, 'rb') as f:
            i = 0
            points_read = 0
            points_needed = 4*n_days
            while points_read != points_needed:
                bytes = f.read(2)
                if bytes:
                    # Get correct variable and data type with i,
                    # then unpack & scale
                    data_list[i].append(
                        struct.unpack(datatypes[i], bytes)[0] / scales[i])
                    i = (i + 1) % n_vars
                    points_read += 1
                else:
                    break

        # Binary forcing files have naming format $NAME_$LAT_$LON
        param_list = os.path.basename(fpath).split("_")
        params = {"name": param_list[0],
                  "lat": float(param_list[1]),
                  "lon": float(param_list[2]),
                  "n_days": int(n_days)}
        MetSim.params.update(params)
        params['elev'] = [[self.find_elevation(params['lat'], params['lon'])]]

        # Assemble the dataset
        data_dict = {c[0]: (['time'], c[1]) for c in zip(var_names, data_list)}
        data_dict['elev'] = (['lon', 'lat'], params['elev'])
        data_dict['day_of_year'] = (['time'], dates.dayofyear)
        df = xr.Dataset(data_dict,
                        coords={'lon': [params['lon']],
                                'lat': [params['lat']],
                                'time': dates},
                        attrs={'n_days': params['n_days']})
        return df
Exemplo n.º 11
0
 def _unpack_state(self, result: pd.DataFrame, locs: dict):
     """Put restart values in the state dataset"""
     # We concatenate with the old state values in case we don't
     # have 90 new days to use
     tmin = np.concatenate((self.state['t_min'].isel(**locs).values[:],
                            result['t_min'].values))
     tmax = np.concatenate((self.state['t_max'].isel(**locs).values[:],
                            result['t_max'].values))
     prec = np.concatenate(
         (self.state['prec'].isel(**locs).values[:], result['prec'].values))
     self.state['t_min'].isel(**locs).values[:] = tmin[-90:]
     self.state['t_max'].isel(**locs).values[:] = tmax[-90:]
     self.state['prec'].isel(**locs).values[:] = prec[-90:]
     state_start = result.index[-1] - pd.Timedelta('89 days')
     self.state['time'].values = date_range(
         state_start, result.index[-1], calendar=self.params['calendar'])
Exemplo n.º 12
0
    def _get_output_times(self, freq=None, period_ending=False):
        """
        Generate chunked time vectors

        Parameters
        ----------
        freq:
            Output frequency. Given as a Pandas timegrouper string.
            If not given, the entire timeseries will be used.
        period_ending:
            Flag to specify if output timesteps should be period-
            ending. Default is period-beginning

        Returns
        -------
        times:
            A list of timeseries which represent each of times that
            output files will be created for.
        """
        prototype = self.met_data
        self.disagg = int(self.params['time_step']) < cnst.MIN_PER_DAY

        if self.disagg:
            delta = pd.Timedelta('1 days') - pd.Timedelta('{} minutes'.format(
                self.params['time_step']))
        else:
            delta = pd.Timedelta('0 days')
        if period_ending:
            offset = pd.Timedelta('{} minutes'.format(
                self.params['time_step']))
        else:
            offset = pd.Timedelta('0 minutes')

        start = pd.Timestamp(prototype['time'].values[0]).to_pydatetime()
        stop = pd.Timestamp(prototype['time'].values[-1]).to_pydatetime()
        times = date_range(start + offset,
                           stop + offset + delta,
                           freq="{}T".format(self.params['time_step']),
                           calendar=self.params['calendar'])

        if freq is None or freq == '':
            times = [times]
        else:
            dummy = pd.Series(np.arange(len(times)), index=times)
            grouper = pd.Grouper(freq=freq)
            times = [t.index for k, t in dummy.groupby(grouper)]
        return times
Exemplo n.º 13
0
def disaggregate(df_daily: pd.DataFrame,
                 params: dict,
                 solar_geom: dict,
                 t_begin: list = None,
                 t_end: list = None) -> pd.DataFrame:
    """
    Take a daily timeseries and scale it down to a finer
    time scale.

    Parameters
    ----------
    df_daily:
        Dataframe containing daily timeseries.
        Should be the result of one of the methods
        provided in the `methods` directory.
    params:
        A dictionary containing the class parameters
        of the MetSim object.
    solar_geom:
        A dictionary of solar geometry variables
    t_begin:
        List of t_min and t_max for day previous to the
        start of `df_daily`. None indicates no extension
        of the record.
    t_end:
        List of t_min and t_max for day after the end
        of `df_daily`. None indicates no extension of
        the record.

    Returns
    -------
    df_disagg:
        A dataframe with sub-daily timeseries.
    """
    stop = (df_daily.index[-1] + pd.Timedelta('1 days') -
            pd.Timedelta("{} minutes".format(params['time_step'])))
    dates_disagg = date_range(df_daily.index[0],
                              stop,
                              freq='{}T'.format(params['time_step']),
                              calendar=params['calendar'])
    df_disagg = pd.DataFrame(index=dates_disagg)
    n_days = len(df_daily)
    n_disagg = len(df_disagg)
    ts = int(params['time_step'])
    df_disagg['shortwave'] = shortwave(df_daily['shortwave'].values,
                                       df_daily['daylength'].values,
                                       df_daily.index.dayofyear,
                                       solar_geom['tiny_rad_fract'], params)

    t_Tmin, t_Tmax = set_min_max_hour(solar_geom['tiny_rad_fract'],
                                      df_daily.index.dayofyear - 1,
                                      solar_geom['daylength'], n_days, ts,
                                      params)

    df_disagg['temp'] = temp(df_daily['t_min'].values,
                             df_daily['t_max'].values, n_disagg, t_Tmin,
                             t_Tmax, ts, t_begin, t_end)

    df_disagg['vapor_pressure'] = vapor_pressure(
        df_daily['vapor_pressure'].values, df_disagg['temp'].values, t_Tmin,
        n_disagg, ts)
    df_disagg['vapor_pressure'] = (df_disagg['vapor_pressure'].fillna(
        method='ffill').fillna(method='bfill'))

    df_disagg['rel_humid'] = relative_humidity(
        df_disagg['vapor_pressure'].values, df_disagg['temp'].values)

    df_disagg['air_pressure'] = pressure(df_disagg['temp'].values,
                                         params['elev'], params['lapse_rate'])

    df_disagg['spec_humid'] = specific_humidity(
        df_disagg['vapor_pressure'].values, df_disagg['air_pressure'].values)

    df_disagg['tskc'] = tskc(df_daily['tskc'].values, ts, params)

    if 'longwave' in df_daily:
        daily_lw = df_daily['longwave']
    else:
        daily_lw = None
    df_disagg['longwave'] = longwave(df_disagg['temp'].values,
                                     df_disagg['vapor_pressure'].values,
                                     df_disagg['tskc'].values, params,
                                     daily_lw)
    df_disagg['prec'] = prec(df_daily['prec'], df_daily['t_min'], ts, params,
                             df_daily.get('t_pk'), df_daily.get('dur'))

    if 'wind' in df_daily:
        df_disagg['wind'] = wind(df_daily['wind'].values, ts, params)

    if params['period_ending']:
        df_disagg.index += pd.Timedelta('{} minutes'.format(
            params['time_step']))
    return df_disagg.fillna(method='ffill').fillna(method='bfill')
Exemplo n.º 14
0
def wrap_run_cell(func: callable, params: dict, ds: xr.Dataset,
                  state: xr.Dataset, disagg: bool,
                  out_times: pd.DatetimeIndex):
    """
    Iterate over a chunk of the domain. This is wrapped
    so we can return a tuple of locs and df.

    Parameters
    ----------
    func: callable
        The function to call to do the work
    params: dict
        Parameters from a MetSim object
    ds: xr.Dataset
        Input forcings and domain
    state: xr.Dataset
        State variables at the point of interest
    disagg: bool
        Whether or not we should run a disagg routine
    out_times: pd.DatetimeIndex
        Times to return (should be trimmed 1 day at
        each end from the given index)

    Returns
    -------
    df_complete: pd.DataFrame
        A dataframe with the disaggregated data in it
    df_base: pd.DataFrame
        A dataframe with the state data in it
    """
    lat = ds['lat'].values.flatten()[0]
    lon = ds['lon'].values.flatten()[0]
    elev = ds['elev'].values.flatten()[0]
    params['elev'] = elev
    params['lat'] = lat
    params['lon'] = lon
    df = ds.to_dataframe()

    # solar_geom returns a tuple due to restrictions of numba
    # for clarity we convert it to a dictionary here
    sg = solar_geom(elev, lat, params['lapse_rate'])
    sg = {
        'tiny_rad_fract': sg[0],
        'daylength': sg[1],
        'potrad': sg[2],
        'tt_max0': sg[3]
    }
    yday = df.index.dayofyear - 1
    df['daylength'] = sg['daylength'][yday]
    df['potrad'] = sg['potrad'][yday]
    df['tt_max'] = sg['tt_max0'][yday]

    # Generate the daily values - these are saved
    # so that we can use a subset of them to write
    # out the state file later
    df_base = func(df, params)

    if disagg:
        # Get some values for padding the time list,
        # so that when interpolating in the disaggregation
        # functions we can match endpoints with adjoining
        # chunks - if no data is available, just repeat some
        # default values (this case is used at the very
        # beginning and end of the record)
        t_begin = [state['t_min'].values[-1], state['t_max'].values[-1]]
        try:
            nextday = out_times[-1] + pd.Timedelta('1 days')
            t_end = [
                ds['t_min'].sel(time=nextday), ds['t_max'].sel(time=nextday)
            ]
        except (KeyError, ValueError):
            # None so that we don't extend the record
            t_end = None

        # Disaggregate to subdaily values
        df_complete = disaggregate(df, params, sg, t_begin, t_end)

        # Calculate the times that we want to get out by chopping
        # off the endpoints that were added on previously
        start = out_times.values[0]
        stop = (out_times.values[-1] + pd.Timedelta('1 days') -
                pd.Timedelta("{} minutes".format(params['time_step'])))

        if params['period_ending']:
            start += pd.Timedelta('{} minutes'.format(params['time_step']))
            stop += pd.Timedelta('{} minutes'.format(params['time_step']))

        new_times = date_range(start,
                               stop,
                               freq='{}T'.format(params['time_step']),
                               calendar=params['calendar'])
    else:
        # convert srad to daily average flux from daytime flux
        df_base['shortwave'] *= df_base['daylength'] / cnst.SEC_PER_DAY
        # If we're outputting daily values, we dont' need to
        # change the output dates - see inside of `if` condition
        # above for more explanation
        new_times = out_times
        df_complete = df_base

    # Cut the returned data down to the correct time index
    df_complete = df_complete.loc[new_times[0]:new_times[-1]]
    return df_complete, df_base
Exemplo n.º 15
0
def wrap_run(func: callable, loc: dict, params: dict,
             ds: xr.Dataset, state: xr.Dataset, disagg: bool,
             out_times: pd.DatetimeIndex, year: str):
    """
    Iterate over a chunk of the domain. This is wrapped
    so we can return a tuple of locs and df.

    Parameters
    ----------
    func: callable
        The function to call to do the work
    loc: dict
        Some subset of the domain to do work on
    params: dict
        Parameters from a MetSim object
    ds: xr.Dataset
        Input forcings and domain
    state: xr.Dataset
        State variables at the point of interest
    disagg: bool
        Whether or not we should run a disagg routine
    out_times: pd.DatetimeIndex
        Times to return (should be trimmed 1 day at
        each end from the given index)
    year: str
        The year being run. This is used to add on
        extra times to make output smooth at endpoints
        if the run is chunked in time.

    Returns
    -------
    results
        A list of tuples arranged as
        (location, hourly_output, daily_output)
    """
    logger.info("Processing {}".format(loc))
    lat = ds['lat'].values
    elev = ds['elev'].values
    swe = ds['swe'].values
    df = ds.drop(['lat', 'lon', 'elev', 'swe']).to_dataframe()
    # solar_geom returns a tuple due to restrictions of numba
    # for clarity we convert it to a dictionary here
    sg = solar_geom(elev, lat)
    sg = {'tiny_rad_fract': sg[0], 'daylength': sg[1],
          'potrad': sg[2], 'tt_max0': sg[3]}

    # Generate the daily values - these are saved
    # so that we can use a subset of them to write
    # out the state file later
    df_base = func(df, params, sg, elev=elev, swe=swe)

    if disagg:
        # Get some values for padding the time list,
        # so that when interpolating in the disaggregation
        # functions we can match endpoints with adjoining
        # chunks - if no data is available, just repeat some
        # default values (this case is used at the very
        # beginning and end of the record)
        try:
            prevday = out_times[0] - pd.Timedelta('1 days')
            t_begin = [ds['t_min'].sel(time=prevday),
                       ds['t_max'].sel(time=prevday)]
        except (KeyError, ValueError):
            t_begin = [state['t_min'].values[-1],
                       state['t_max'].values[-1]]
        try:
            nextday = pd.datetime(int(year)+1, 1, 1)
            t_end = [ds['t_min'].sel(time=nextday),
                     ds['t_max'].sel(time=nextday)]
        except (KeyError, ValueError):
            # None so that we don't extend the record
            t_end = None

        # Disaggregate to subdaily values
        df_complete = disaggregate(df, params, sg, t_begin, t_end)
        # Calculate the times that we want to get out by chopping
        # off the endpoints that were added on previously
        start = out_times[0]
        stop = out_times[-1] + pd.Timedelta('23 hours')
        new_times = date_range(
            start, stop, freq='{}T'.format(params['time_step']),
            calendar=params['calendar'])
    else:
        # convert srad to daily average flux from daytime flux
        df_base['swrad'] *= df_base['dayl'] / cnst.SEC_PER_DAY
        # If we're outputting daily values, we dont' need to
        # change the output dates - see inside of `if` condition
        # above for more explanation
        new_times = out_times

    # Cut the returned data down to the correct time index
    df_complete = df_complete.loc[new_times[0]:new_times[-1]]
    df_base = df_base.loc[new_times[0]:new_times[-1]]
    return (loc, df_complete, df_base)
Exemplo n.º 16
0
    def run(self):
        """
        Kicks off the disaggregation and queues up data for IO
        """
        time_dim = pd.to_datetime(self.met_data.time.values)
        if self.params['annual']:
            groups = time_dim.groupby(time_dim.year)
        else:
            groups = {'total': time_dim}
        for label, times in groups.items():
            logger.info("Beginning {}".format(label))
            # Add in some end point data for continuity
            times_ext = times.union([times[0] - pd.Timedelta("1 days"),
                                     times[-1] + pd.Timedelta("1 days")]
                                    ).intersection(time_dim)
            data = self.met_data.sel(time=times_ext)
            self.setup_output(self.met_data.sel(time=times))
            for i, j in self.locations:
                locs = dict(lat=i, lon=j)
                logger.info("Processing {}".format(locs))
                ds = data.isel(**locs)
                lat = ds['lat'].values
                elev = ds['elev'].values
                swe = ds['swe'].values
                df = ds.drop(['lat', 'lon', 'elev', 'swe']).to_dataframe()
                # solar_geom returns a tuple due to restrictions of numba
                # for clarity we convert it to a dictionary here
                sg = solar_geom(elev, lat)
                sg = {'tiny_rad_fract': sg[0], 'daylength': sg[1],
                      'potrad': sg[2], 'tt_max0': sg[3]}

                # Generate the daily values - these are saved
                # so that we can use a subset of them to write
                # out the state file later
                df = self.method.run(df, self.params, sg,
                                     elev=elev, swe=swe)

                # Get some values for padding the time list,
                # so that when interpolating in the disaggregation
                # functions we can match endpoints with adjoining
                # chunks - if no data is available, just repeat some
                # default values (this case is used at the very
                # beginning and end of the record)
                if self.disagg:
                    try:
                        prevday = data.time[0] - pd.Timedelta('1 days')
                        t_begin = [self.met_data['t_min'].sel(
                                       time=prevday).isel(lat=i, lon=j),
                                   self.met_data['t_max'].sel(
                                       time=prevday).isel(lat=i, lon=j)]
                    except (KeyError, ValueError):
                        t_begin = [self.state['t_min'].values[-1, i, j],
                                   self.state['t_max'].values[-1, i, j]]
                    try:
                        nextday = pd.datetime(int(label)+1, 1, 1)
                        t_end = [self.met_data['t_min'].sel(
                                     time=nextday).isel(lat=i, lon=j),
                                 self.met_data['t_max'].sel(
                                     time=nextday).isel(lat=i, lon=j)]
                    except (KeyError, ValueError):
                        # None so that we don't extend the record
                        t_end = None

                    self._unpack_state(df, locs)
                    df = disaggregate(df, self.params, sg, t_begin, t_end)
                    start = times[0]
                    stop = (times[-1] + pd.Timedelta('1 days')
                            - pd.Timedelta(self.params['time_step']))
                    new_times = date_range(
                        start, stop,
                        freq='{}T'.format(self.params['time_step']),
                        calendar=self.params['calendar'])
                else:
                    # convert srad to daily average flux from daytime flux
                    self._unpack_state(df, locs)
                    df['swrad'] *= df['dayl'] / cnst.SEC_PER_DAY
                    # If we're outputting daily values, we dont' need to
                    # change the output dates - see inside of `if` condition
                    # above for more explanation
                    new_times = times

                # Cut the returned data down to the correct time index
                self._unpack_results((locs, df.loc[new_times[0]:new_times[-1]]))

            self.write(label)