def create_age_sex_xarray():
    LOGGER.debug("Creating xarray of age-sex patterns for migration")
    # load patterns
    qatar = pd.read_csv(QATAR_PATTERN)
    eurostat = pd.read_csv(EUROSTAT_PATTERN)
    # convert to xarrays
    qatar = df_to_xr(qatar, dims=PATTERN_ID_VARS)
    eurostat = df_to_xr(eurostat, dims=PATTERN_ID_VARS)
    # create superarray to hold all locs
    all_locs_xr_list = []
    # Put dataframes for each location into a list
    for loc in WPP_LOCATION_IDS:
        if loc in QATAR_LOCS:
            data = qatar
        else:
            data = eurostat
        data = expand_dimensions(data, location_id=[loc])
        all_locs_xr_list.append(data)
    # Concat all locations together
    result = xr.concat(all_locs_xr_list, dim='location_id')
    # Save all locs pattern
    LOGGER.debug("Saving age-sex pattern xarray")
    pattern_dir = FBDPath(f'/{gbd_round_id}/future/migration/'
                          f'{PATTERN_VERSION}')
    pattern_path = pattern_dir / f"combined_age_sex_pattern.nc"
    save_xr(pattern, pattern_path, metric="percent", space="identity")
    LOGGER.debug("Saved age-sex pattern xarray")
    return result
Exemplo n.º 2
0
def main(migration_version, past_pop_version, forecast_pop_version,
         gbd_round_id, draws, years):
    """
    Load pops and migration rate, multiply to get counts
    """
    # Load migration data
    mig_dir = FBDPath(f"/{gbd_round_id}/future/migration/{migration_version}/")
    mig_path = mig_dir / "mig_star.nc"
    mig_da = open_xr(mig_path).data

    # Load pops
    past_pop_da = load_past_pop(gbd_round_id, past_pop_version)
    forecast_pop_da = load_forecast_pop(gbd_round_id, forecast_pop_version,
                                        years, draws)

    # Give past populations dummy draws/scenarios to be concatenated with
    # forecast pops
    past_pop_da = expand_dimensions(past_pop_da,
                                    draw=forecast_pop_da["draw"].values)
    past_pop_da = expand_dimensions(
        past_pop_da, scenario=forecast_pop_da["scenario"].values)

    # Subset to coordinates relevant to mig_da
    forecast_pop_da = forecast_pop_da.sel(
        sex_id=3,
        age_group_id=22,
        location_id=mig_da.location_id.values,
        scenario=0)
    past_pop_da = past_pop_da.sel(sex_id=3,
                                  age_group_id=22,
                                  location_id=mig_da.location_id.values,
                                  scenario=0)

    # Combine past and forecast pop
    pop_da = past_pop_da.combine_first(forecast_pop_da)

    # Multiply rates by pop to get counts
    mig_counts = mig_da * pop_da
    mig_counts = mig_counts / SCALE_FACTOR

    # Save out
    mig_counts_path = mig_dir / "mig_counts.nc"
    save_xr(mig_counts, mig_counts_path, metric="number", space="identity")
Exemplo n.º 3
0
def _linear_then_constant_arc(past_data, years, target_year, target_value):
    r"""Makes rate forecasts by linearly extrapolating the point ARC from the
    last past year till the target year to reach the target value.

    The steps for extrapolating the point ARCs are:

    (1) Calculate the rate of change between the last year of the past
        data (eg.2017) and ``target_year`` (eg. 2030).

        .. Math::

            R =
             \frac{target\_value - past\_last\_year_value}
            {target\_year- past\_last\_year}

        where :math:`R` is the slope of the desired linear trend.

    (2) Calculate the rates of change between the  last year of the past and
        each future year by multiplying R with future year weights till
        ``target_year``.

        .. math::

            \vec{W} = [1, ..., m]

            \vec{F_r} = \vec{W} * R

        where :math:`m` is the number of years between the ``target_year`` and
        the last year of the past, and :math:`\vec{W}` forms the vector of
        year weights.
        :math:`\vec{F_r}` contains the linearly extrapolated ARCs for each
        future year till the ``target_year``.

    (3) Add the future rates :math: `\vec{F_r}` to last year of the past
        (eg. 2017) to get the forecasted results.

    (4) Extend the forecasted results till the ``forecast_end`` year by
        filling the ``target_value`` for all the remaining future years.

    Args:
        past_data (xarray.DataArray):
            The past data with all past years. The data is assumed to be in
            normal space.
        years (YearRange):
            past and future year-ids
        target_year (int):
            The year at which the target value will be reached.
        target_value (int):
            The value that needs to be achieved by the `target_year`.
    Returns:
        (xarray.DataArray):
            The forecasted results.
    """
    LOGGER.info("Entered `linear_then_constant_arc` function.")
    pre_target_years = np.arange(years.forecast_start, target_year + 1)
    post_target_years = np.arange(target_year + 1, years.forecast_end + 1)

    past_last_year = past_data.sel(year_id=years.past_end)
    target_yr_arc = (target_value - past_last_year) / (target_year -
                                                       years.past_end)

    forecast_year_multipliers = xr.DataArray(
        np.arange(len(pre_target_years)) + 1,
        dims=["year_id"],
        coords={"year_id": pre_target_years})

    LOGGER.info("Calculating future rates of change.")
    future_change = target_yr_arc * forecast_year_multipliers
    forecast_bfr_target_year = past_last_year + future_change

    forecast = expand_dimensions(forecast_bfr_target_year,
                                 fill_value=target_value,
                                 year_id=post_target_years)

    LOGGER.info("Leaving `linear_then_constant_arc`.")
    return forecast
Exemplo n.º 4
0
def weighted_quantile_with_extra_dim(data,
                                     quantiles,
                                     stat_dims,
                                     weights,
                                     extra_dim=None):
    """Calculates the weighted-mean. If `extra_dim` is a dimension of `data`
    then loop through the `extra_dim` coordinates and calculate coord-specific
    ARCs using that coord's specific weights. Otherwise one ARC for all coords.

    Args:
        data (xarray.DataArray):
            Data to compute a weighted mean for.
        quantiles (float or list of float):
            quantile(s) to evaluate.  Must be <= 1.
        stat_dims (str, list[str]):
            dimension(s) of the dataarray to reduce over
        weights (xarray.DataArray):
            a 1-D dataarray the same length as the weighted dim, with dimension
            name equal to that of the weighted dim. Must be nonnegative.
        extra_dim (str):
            Extra dimension that exists in `weights` and `data`. It should not
            be in `stat_dims`.
    Returns:
        (xarray.DataArray):
            The mean over the given dimension. So it will contain all
            dimensions of the input that are not in ``stat_dims``.
    Raises:
        (ValueError):
            * If `weights` has more than 1 dimension while `extra_dim` is None.
            * If `extra_dim` is in `stat_dims`.
            * If `extra_dim` is not in a dimension of `weights`.
            * If `extra_dim` is not in a dimension of `data`.
            * If `extra_dim` does must have the same coordinates for `weights`
              and `data`.
    """
    LOGGER.debug("Entering the `weighted_quantile_with_extra_dim` function")

    LOGGER.debug("extra_dim:{}".format(extra_dim))

    if len(weights.dims) > 1 and not extra_dim:
        dim_err_msg = ("`weights` cannot have more than 1 dim if `extra_dim` "
                       "is None")
        LOGGER.error(dim_err_msg)
        raise ValueError(dim_err_msg)
    elif extra_dim and extra_dim in stat_dims:
        dim_err_msg = "{} must cannot be in `stat_dims`".format(extra_dim)
        LOGGER.error(dim_err_msg)
        raise ValueError(dim_err_msg)
    elif extra_dim and extra_dim not in weights.dims:
        dim_err_msg = "{} must a dimension of `weights`".format(extra_dim)
        LOGGER.error(dim_err_msg)
        raise ValueError(dim_err_msg)
    elif extra_dim and extra_dim in weights.dims:
        if extra_dim and extra_dim not in data.dims:
            data = expand_dimensions(data, draw=weights["draw"].values)
        elif extra_dim and not data[extra_dim].equals(weights[extra_dim]):
            dim_err_msg = ("The {} dimension must have the same coordinates "
                           "for `weights` and `data`".format(extra_dim))
            LOGGER.error(dim_err_msg)
            raise ValueError(dim_err_msg)
        else:
            pass  # `data` already has "draw" dim with same coords as `weights`
        quantile_values = []
        for coord in weights[extra_dim].values:
            LOGGER.debug("coord: {}".format(coord))
            coord_specific_data = data.loc[{extra_dim: coord}]
            coord_specific_weights = weights.loc[{extra_dim: coord}]
            coord_specific_quantile_values = (weighted_quantile(
                da=coord_specific_data,
                q=quantiles,
                dim=stat_dims,
                ws=coord_specific_weights))
            quantile_values.append(coord_specific_quantile_values)
        quantile_values = xr.concat(quantile_values, dim=extra_dim)
    else:
        quantile_values = weighted_quantile(da=data,
                                            q=quantiles,
                                            dim=stat_dims,
                                            ws=weights)
    LOGGER.debug("Leaving the `weighted_quantile_with_extra_dim` function")
    return quantile_values
Exemplo n.º 5
0
def arc(past_data_da,
        years,
        weight_exp,
        stat_dims,
        statistic,
        quantiles=None,
        diff_over_mean=False,
        truncate=False,
        truncate_dims=None,
        truncate_quantiles=None,
        replace_with_mean=False,
        extra_dim=None):
    r"""Makes rate forecasts by forecasting the Annualized Rates-of-Change
    (ARC) using either weighted means or weighted quantiles .

    The steps for forecasting logged or logitted rates with ARCs are:

    (1) Annualized rate differentials (or annualized rates-of-change if data is
        in log or logit space) are calculated.

        .. Math::

            \vec{D_{p}} =
            [x_{1991} - x_{1990}, x_{1992} - x_{1991}, ... x_{2016} - x_{2015}]

        where :math:`x` are values from ``past_data_da`` for each year and
        :math:`\vec{D_p}` is the vector of differentials in the past.

    (2) Year weights are used to weight recent years more heavily. Year weights
        are made by taking the interval

        .. math::

            \vec{W} = [1, ..., n]^w

        where :math:`n` is the number of past years, :math:`\vec{w}` is the
        value given by ``weight_exp``, and :math:`\vec{W}` is the vector of
        year weights.

    (3) Weighted quantiles or the weighted mean of the annualized
        rates-of-change are taken over the dimensions.

        .. math::

            s = \text{weighted-statistic}(\vec{W}, \vec{D})

        where :math:`s` is the weighted quantile or weighted mean.

    (4) Future rates-of-change are simulated by taking the interval

        .. math::

            \vec{D_{f}} = [1, ..., m] * s

        where :math:`\vec{D_f}` is the vector of differentials in the future
        and :math:`m` is the number of future years to forecast and

    (5) Lastly, these future differentials are added to the rate of the last
        observed year.

        .. math::

            \vec{X_{f}} = \vec{D_{f}} + x_{2016} = [x_{2017}, ..., x_{2040}]

        where :math:`X_{f}` is the vector of forecasted rates.

    Args:
        past_data_da (xarray.DataArray):
            Past data with a year-id dimension. Must be in log or logit space
            in order for this function to actually calculate ARCs, otherwise
            it's just calculating weighted statistic of the first differences.
        years (YearRange):
            past and future year-ids
        weight_exp (float | int | xarray.DataArray):
            power to raise the increasing year weights -- must be nonnegative.
            It can be dataarray, but must have only one dimension, "draw", it
            must have the same coordinates on that dimension as
            ``past_data_da``.
        stat_dims (list[str]):
            list of dimensions to take quantiles over
        statistic (str):
            The statistic to use for calculating the ARC of past years. Can
            either be "mean" or "quantile".
        quantiles (object, optional):
            The quantile or quantiles to take on ``past_data``. Defaults to
            None, but must be a float, or a iterable of floats if
            statistic="quantile".
        diff_over_mean (bool, optional):
            If True, then take annual differences for means-of-draws, instead
            of draws. Defaults to False.
        truncate (bool, optional):
            If True, then truncates the dataarray over the given dimensions.
            Defaults to False.
        truncate_dims (list[str], optional):
            A list of strings representing the dimensions to truncate over.
        truncate_quantiles (object, optional):
            The iterable of two floats representing the quantiles to take.
        replace_with_mean (bool, optional):
            If True and `truncate` is True, then replace values outside of the
            upper and lower quantiles taken across "location_id" and "year_id"
            and with the mean across "year_id", if False, then replace with the
            upper and lower bounds themselves.
        extra_dim (str):
            Extra dimension that exists in `weights` and `data`. It should not
            be in `stat_dims`.
    Returns:
        (xarray.DataArray):
            Forecasts made using the ARC method.
    Raises:
        ValueError:
            If ``statistic`` is not equal to one of the strings "mean" or
            "quantile"
        ValueError:
            If ``weight_exp`` is a negative number
        ValueError:
            If `truncate` is True, then `truncate_quantiles` must be a list of
            floats.
    """
    LOGGER.debug("Entering the `arc` function")

    LOGGER.debug("years:{}".format(years))
    LOGGER.debug("weight_exp:{}".format(weight_exp))
    LOGGER.debug("statistic:{}".format(statistic))
    LOGGER.debug("stat_dims:{}".format(stat_dims))
    LOGGER.debug("quantiles:{}".format(quantiles))
    LOGGER.debug("diff_over_mean:{}".format(diff_over_mean))
    LOGGER.debug("truncate:{}".format(truncate))
    LOGGER.debug("replace_with_mean:{}".format(replace_with_mean))
    LOGGER.debug("truncate_quantiles:{}".format(truncate_quantiles))
    LOGGER.debug("extra_dim:{}".format(extra_dim))

    quantile_is_valid = (all([
        isinstance(quantile, float) for quantile in quantiles
    ]) if hasattr(quantiles, "__iter__") else isinstance(quantiles, float))

    if truncate and not truncate_dims:
        truncate_dims = ["location_id", "year_id"]

    if statistic not in ("mean", "quantile"):
        stat_arg_err_msg = (
            "`statistic` must be one of ('mean', 'quantile'), {} is not valid"
        ).format(statistic)
        LOGGER.error(stat_arg_err_msg)
        raise ValueError(stat_arg_err_msg)
    elif statistic == "quantile" and not quantile_is_valid:
        qnt_arg_err_msg = (
            "If `statistic='quantile'`, then `quantiles` must be of type float"
            " or a list of floats.").format(statistic)
        LOGGER.error(qnt_arg_err_msg)
        raise ValueError(qnt_arg_err_msg)
    else:
        pass  # valid input given for `statistic` arg

    stat_dims = list(stat_dims)

    trunc_quantile_is_valid = (all([
        isinstance(trunc_quantile, float)
        for trunc_quantile in truncate_quantiles
    ]) if hasattr(truncate_quantiles, "__iter__") else False)

    if truncate and not trunc_quantile_is_valid:
        truncate_err_msg = ("If `truncate` is True, then "
                            "`truncate_quantiles` must be a list of floats.")
        LOGGER.error(truncate_err_msg)
        raise ValueError(truncate_err_msg)
    elif truncate and trunc_quantile_is_valid:
        truncate_quantiles = Quantiles(*sorted(truncate_quantiles))
    else:
        pass  # `truncate_quantiles` can be None

    # Calculate the annual differentials.
    if diff_over_mean and "draw" in past_data_da.dims:
        annual_diff = past_data_da.mean("draw").sel(
            year_id=years.past_years).diff("year_id", n=1)
    else:
        annual_diff = past_data_da.sel(year_id=years.past_years).diff(
            "year_id", n=1)

    if isinstance(weight_exp, xr.DataArray) and "draw" in weight_exp.dims:
        weight_exp = expand_dimensions(weight_exp,
                                       year_id=annual_diff["year_id"].values)
    elif isinstance(weight_exp, float) or isinstance(weight_exp, int):
        pass  # weight_exp can be a float or an integer
    else:
        weight_exp_err_msg = (
            "`weight_exp` must be a float, an int, or an xarray.DataArray "
            "with a 'draw' dimension")
        LOGGER.error(weight_exp_err_msg)
        raise ValueError(weight_exp_err_msg)

    year_weights = xr.DataArray((np.arange(len(years.past_years) - 1) + 1),
                                dims="year_id",
                                coords={"year_id":
                                        years.past_years[1:]})**weight_exp

    # If annual-differences were taken over means (`annual_diff` doesn't have
    # a "draw" dimension), but `year_weights` does have a "draw" dimension,
    # then the draw dimension needs to be expanded for `annual_diff` such that
    # the mean is replicated for each draw.
    if "draw" in year_weights.dims and "draw" not in annual_diff.dims:
        annual_diff = expand_dimensions(annual_diff,
                                        draw=year_weights["draw"].values)
    else:
        pass  # `annual_diff` already has a draw dim, or `year_weights` doesn't

    if truncate:
        annual_diff = truncate_dataarray(annual_diff,
                                         truncate_dims,
                                         replace_with_mean=replace_with_mean,
                                         mean_dims=["year_id"],
                                         weights=year_weights,
                                         quantiles=truncate_quantiles,
                                         extra_dim=extra_dim)
    else:
        pass  # Annual differences are not truncated

    if (xr.DataArray(weight_exp) > 0).any():
        if statistic == "mean":
            arc_da = weighted_mean_with_extra_dim(annual_diff, stat_dims,
                                                  year_weights, extra_dim)
        else:
            arc_da = weighted_quantile_with_extra_dim(annual_diff, quantiles,
                                                      stat_dims, year_weights,
                                                      extra_dim)
    elif (xr.DataArray(weight_exp) == 0).all():
        # If ``weight_exp`` is zero, then just take the unweighted mean or
        # quantile.
        if statistic == "mean":
            arc_da = annual_diff.mean(stat_dims)
        else:
            arc_da = annual_diff.quantile(q=quantiles, dim=stat_dims)
    else:
        err_msg = "weight_exp must be nonnegative."
        LOGGER.error(err_msg)
        raise ValueError(err_msg)

    # Find future change by multiplying an array that counts the future
    # years, by the quantiles, which is weighted if `weight_exp` > 0. We want
    # the multipliers to start at 1, for the first year of forecasts, and count
    # to one more than the number of years to forecast.
    forecast_year_multipliers = xr.DataArray(
        np.arange(len(years.forecast_years)) + 1,
        dims=["year_id"],
        coords={"year_id": years.forecast_years})
    future_change = arc_da * forecast_year_multipliers

    forecast_data_da = past_data_da.sel(year_id=years.past_end) + future_change
    LOGGER.debug("Leaving the `arc` function")
    return forecast_data_da