示例#1
0
文件: session.py 项目: vierja/econuy
    def rolling(self, periods: Optional[int] = None,
                operation: str = "sum"):
        """
        Calculate rolling averages or sums.

        See Also
        --------
        :func:`~econuy.transform.rolling`

        """
        if isinstance(self.dataset, dict):
            output = {}
            for key, value in self.dataset.items():
                table = transform.rolling(value, periods=periods,
                                          operation=operation)
                output.update({key: table})
        else:
            output = transform.rolling(self.dataset, periods=periods,
                                       operation=operation)
        self.logger.info(f"Applied 'rolling' transformation with "
                         f"{periods} periods and '{operation}' operation.")
        if self.inplace is True:
            self.dataset = output
            return self
        else:
            return Session(location=self.location,
                           revise_rows=self.revise_rows,
                           only_get=self.only_get,
                           dataset=output,
                           logger=self.logger,
                           inplace=self.inplace)
示例#2
0
def test_rolling():
    data_m = dummy_df(freq="M", ts_type="Flujo")
    session = Session(location=TEST_CON, dataset=data_m, inplace=True)
    trf_none = session.rolling(operation="sum").dataset
    trf_none.columns = data_m.columns
    assert trf_none.equals(data_m.rolling(window=12, min_periods=12).sum())
    data_q1 = dummy_df(freq="M", ts_type="Flujo")
    data_q2 = dummy_df(freq="M", ts_type="Flujo")
    data_dict = {"data_q1": data_q1, "data_q2": data_q2}
    session = Session(location=TEST_CON, dataset=data_dict)
    trf_inter = session.rolling(operation="sum").dataset
    trf_inter["data_q1"].columns = trf_inter[
        "data_q2"].columns = data_q1.columns
    assert trf_inter["data_q1"].equals(data_q1.rolling(window=12,
                                                       min_periods=12).sum())
    assert trf_inter["data_q2"].equals(data_q2.rolling(window=12,
                                                       min_periods=12).sum())
    with pytest.warns(UserWarning):
        data_wrong = dummy_df(freq="M", ts_type="Stock")
        transform.rolling(data_wrong, periods=4, operation="average")
示例#3
0
def _lin_gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
             save_loc: Union[str, PathLike, Engine, Connection, None] = None,
             name: str = "lin_gdp",
             index_label: str = "index",
             only_get: bool = True,
             only_get_na: bool = True):
    """Get nominal GDP data in UYU and USD with forecasts.

    Update nominal GDP data for use in the `convert.convert_gdp()` function.
    Get IMF forecasts for year of last available data point and the next
    year (for example, if the last period available at the BCU website is
    september 2019, fetch forecasts for 2019 and 2020).

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'lin_gdp'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.
    only_get_na : bool, default True
        If True, don't download national accounts data,
        retrieve what is available from ``update_loc``.

    Returns
    -------
    output : Pandas dataframe
        Quarterly GDP in UYU and USD with 1 year forecasts.

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    data_uyu = get(update_loc=update_loc, only_get=only_get_na)["gdp_cur_nsa"]
    data_uyu = transform.rolling(data_uyu, periods=4, operation="sum")
    data_usd = transform.convert_usd(data_uyu,
                                     update_loc=update_loc,
                                     only_get=only_get)

    data = [data_uyu, data_usd]
    last_year = data_uyu.index.max().year
    if data_uyu.index.max().month == 12:
        last_year += 1

    results = []
    for table, gdp in zip(["NGDP", "NGDPD"], data):
        table_url = (f"https://www.imf.org/external/pubs/ft/weo/2019/02/weodat"
                     f"a/weorept.aspx?sy={last_year - 1}&ey={last_year + 1}"
                     f"&scsm=1&ssd=1&sort=country&ds=.&br=1&pr1.x=27&pr1.y=9&c"
                     f"=298&s={table}&grp=0&a=")
        imf_data = pd.to_numeric(pd.read_html(table_url)[4].iloc[2, [5, 6, 7]])
        imf_data = imf_data.reset_index(drop=True)
        fcast = (gdp.loc[[dt.datetime(last_year - 1, 12, 31)]].multiply(
            imf_data.iloc[1]).divide(imf_data.iloc[0]))
        fcast = fcast.rename(index={
            dt.datetime(last_year - 1, 12, 31):
            dt.datetime(last_year, 12, 31)
        })
        next_fcast = (gdp.loc[[dt.datetime(last_year - 1, 12, 31)]].multiply(
            imf_data.iloc[2]).divide(imf_data.iloc[0]))
        next_fcast = next_fcast.rename(index={
            dt.datetime(last_year - 1, 12, 31):
            dt.datetime(last_year + 1, 12, 31)
        })
        fcast = fcast.append(next_fcast)
        gdp = gdp.append(fcast)
        results.append(gdp)

    output = pd.concat(results, axis=1)
    output = output.resample("Q-DEC").interpolate("linear").dropna(how="all")
    arrays = []
    for level in range(0, 9):
        arrays.append(list(output.columns.get_level_values(level)))
    arrays[0] = ["PBI UYU", "PBI USD"]
    tuples = list(zip(*arrays))
    output.columns = pd.MultiIndex.from_tuples(tuples,
                                               names=[
                                                   "Indicador", "Área",
                                                   "Frecuencia", "Moneda",
                                                   "Inf. adj.", "Unidad",
                                                   "Seas. Adj.", "Tipo",
                                                   "Acum. períodos"
                                               ])

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output
示例#4
0
def test_fiscal():
    remove_clutter()
    session = Session(location=TEST_CON)
    assert isinstance(session, Session)
    assert isinstance(session.dataset, pd.DataFrame)
    fiscal_tfm = session.get_frequent(dataset="fiscal",
                                      aggregation="nfps",
                                      fss=True,
                                      unit="gdp").dataset
    remove_clutter()
    fiscal_ = session.get(dataset="fiscal").dataset
    nfps = fiscal_["nfps"]
    gc = fiscal_["gc-bps"]
    proc = pd.DataFrame(index=nfps.index)
    proc["Ingresos: SPNF-SPC"] = nfps["Ingresos: SPNF"]
    proc["Egresos: Primarios SPNF-SPC"] = nfps["Egresos: Primarios SPNF"]
    proc["Egresos: Inversiones SPNF-SPC"] = nfps["Egresos: Inversiones"]
    proc["Intereses: SPNF"] = nfps["Intereses: Totales"]
    proc["Egresos: Totales SPNF"] = (proc["Egresos: Primarios SPNF-SPC"] +
                                     proc["Intereses: SPNF"])
    proc["Resultado: Primario intendencias"] = nfps[
        "Resultado: Primario intendencias"]
    proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"]
    proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"]
    proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"]

    proc["Ingresos: FSS"] = gc["Ingresos: FSS"]
    proc["Intereses: FSS"] = gc["Intereses: BPS-FSS"]
    proc["Ingresos: SPNF-SPC aj. FSS"] = (proc["Ingresos: SPNF-SPC"] -
                                          proc["Ingresos: FSS"])
    proc["Intereses: SPNF aj. FSS"] = (proc["Intereses: SPNF"] -
                                       proc["Intereses: FSS"])
    proc["Egresos: Totales SPNF aj. FSS"] = (proc["Egresos: Totales SPNF"] -
                                             proc["Intereses: FSS"])
    proc["Resultado: Primario SPNF aj. FSS"] = (
        proc["Resultado: Primario SPNF"] - proc["Ingresos: FSS"])
    proc["Resultado: Global SPNF aj. FSS"] = (proc["Resultado: Global SPNF"] -
                                              proc["Ingresos: FSS"] +
                                              proc["Intereses: FSS"])

    cols = fiscal_metadata["nfps"][True]
    compare = proc.loc[:, cols]
    metadata._set(compare,
                  area="Cuentas fiscales y deuda",
                  currency="UYU",
                  inf_adj="No",
                  unit="No",
                  seas_adj="NSA",
                  ts_type="Flujo",
                  cumperiods=1)
    compare_gdp = transform.rolling(compare, periods=12, operation="sum")
    compare_gdp = transform.convert_gdp(compare_gdp)
    compare_gdp.columns = fiscal_tfm.columns
    assert compare_gdp.equals(fiscal_tfm)
    remove_clutter()
    fiscal_tfm = session.get_frequent(dataset="fiscal",
                                      aggregation="nfps",
                                      fss=True,
                                      unit="usd").dataset
    compare_usd = transform.convert_usd(compare)
    compare_usd.columns = fiscal_tfm.columns
    assert compare_usd.equals(fiscal_tfm)
    remove_clutter()
    fiscal_tfm = session.get_frequent(dataset="fiscal",
                                      aggregation="nfps",
                                      fss=True,
                                      unit="real").dataset
    compare_real = transform.convert_real(compare)
    compare_real.columns = fiscal_tfm.columns
    assert compare_real.equals(fiscal_tfm)
    remove_clutter()
    start_date = "2010-01-31"
    end_date = "2010-12-31"
    fiscal_tfm = session.get_frequent(dataset="fiscal",
                                      aggregation="nfps",
                                      fss=True,
                                      unit="real_usd",
                                      start_date=start_date,
                                      end_date=end_date).dataset
    compare_real_usd = transform.convert_real(compare,
                                              start_date=start_date,
                                              end_date=end_date)
    xr = nxr.get_monthly(update_loc=None, save_loc=None)
    compare_real_usd = compare_real_usd.divide(
        xr[start_date:end_date].mean()[1])
    compare_real_usd.columns = fiscal_tfm.columns
    assert compare_real_usd.equals(fiscal_tfm)
    remove_clutter()
    with pytest.raises(ValueError):
        session.get_frequent(dataset="fiscal",
                             aggregation="nfps",
                             unit="wrong")
    with pytest.raises(ValueError):
        session.get_frequent(dataset="fiscal", aggregation="wrong")
    remove_clutter()
    fiscal_ = session.get(dataset="fiscal").dataset
    session.only_get = True
    compare = session.get(dataset="fiscal").dataset
    for v, v2 in zip(fiscal_.values(), compare.values()):
        assert v.round(4).equals(v2.round(4))
    remove_clutter()
    session.only_get = False
示例#5
0
    def store_transformed_data(
        real_start,
        real_end,
        resample_freq,
        resample_operation,
        rolling_periods,
        rolling_operation,
        chg_diff_operation,
        chg_diff_period,
        rebase_start,
        rebase_end,
        rebase_base,
        decompose_method,
        decompose_component,
        order,
        query_data,
        query_metadata,
    ):
        if not order:
            return query_data, query_metadata
        if not query_data:
            return {}, {}
        if (
            ("resample" in order and (not resample_freq or not resample_operation))
            or ("rolling" in order and (not rolling_periods or not rolling_operation))
            or ("chg-diff" in order and (not chg_diff_operation or not chg_diff_period))
            or ("rebase" in order and (not rebase_start or not rebase_base))
            or (
                "decompose" in order
                and (not decompose_method or not decompose_component)
            )
        ):
            raise PreventUpdate
        data = pd.DataFrame.from_records(query_data, coerce_float=True, index="index")
        data.index = pd.to_datetime(data.index)
        metadata = pd.DataFrame.from_records(query_metadata)
        data.columns = pd.MultiIndex.from_frame(metadata)
        p = Pipeline(location=db.engine, download=False)

        transformations = {
            "usd": lambda x: convert_usd(x, pipeline=p, errors="ignore"),
            "real": lambda x: convert_real(
                x, start_date=real_start, end_date=real_end, pipeline=p, errors="ignore"
            ),
            "gdp": lambda x: convert_gdp(x, pipeline=p, errors="ignore"),
            "resample": lambda x: resample(
                x, rule=resample_freq, operation=resample_operation
            ),
            "rolling": lambda x: rolling(
                x, window=rolling_periods, operation=rolling_operation
            ),
            "chg-diff": lambda x: chg_diff(
                x, operation=chg_diff_operation, period=chg_diff_period
            ),
            "rebase": lambda x: rebase(
                x, start_date=rebase_start, end_date=rebase_end, base=rebase_base
            ),
            "decompose": lambda x: decompose(
                x,
                component=decompose_component,
                method=decompose_method,
                force_x13=True,
                errors="ignore",
            ),
        }
        transformed_data = data.copy()
        for t in order:
            transformed_data = transformations[t](transformed_data)

        transformed_metadata = transformed_data.columns.to_frame()
        transformed_data.columns = transformed_data.columns.get_level_values(0)
        transformed_data.reset_index(inplace=True)

        return transformed_data.to_dict("records"), transformed_metadata.to_dict(
            "records"
        )
示例#6
0
文件: frequent.py 项目: vierja/econuy
def fiscal(aggregation: str = "gps",
           fss: bool = True,
           unit: Optional[str] = None,
           start_date: Union[str, date, None] = None,
           end_date: Union[str, date, None] = None,
           update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = True,
           name: str = "tfm_fiscal",
           index_label: str = "index") -> pd.DataFrame:
    """
    Get fiscal accounts data.

    Allow choosing government aggregation, whether to exclude the FSS
    (Fideicomiso  de la Seguridad Social, Social Security Trust Fund), the unit
    (UYU, real UYU, USD, real USD or percent of GDP), periods to accumuldate
    for rolling sums and seasonal adjustment.

    Parameters
    ----------
    aggregation : {'gps', 'nfps', 'gc'}
        Government aggregation. Can be ``gps`` (consolidated public sector),
        ``nfps`` (non-financial public sector) or ``gc`` (central government).
    fss : bool, default True
        If ``True``, exclude the `FSS's <https://www.impo.com.uy/bases/decretos
        /71-2018/25>`_ income from gov't revenues and the FSS's
        interest revenues from gov't interest payments.
    unit : {None, 'gdp', 'usd', 'real', 'real_usd'}
        Unit in which data should be expressed. Possible values are ``real``,
        ``usd``, ``real_usd`` and ``gdp``. If ``None`` or another string is
        set, no unit calculations will be performed, rendering the data as is
        (current UYU).
    start_date : str, datetime.date or None, default None
        If ``unit`` is set to ``real`` or ``real_usd``, this parameter and
        ``end_date`` control how deflation is calculated.
    end_date :
        If ``unit`` is set to ``real`` or ``real_usd``, this parameter and
        ``start_date`` control how deflation is calculated.
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_fiscal'
        Either CSV filename for updating and/or saving, or table name if
        using SQL. Options will be appended to the base name.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Fiscal aggregation : pd.DataFrame

    Raises
    ------
    ValueError
        If ``seas_adj``, ``unit`` or ``aggregation`` are given an invalid
        keywords.

    """
    if unit not in ["gdp", "usd", "real", "real_usd", None]:
        raise ValueError("'unit' can be 'gdp', 'usd', 'real', 'real_usd' or"
                         " None.")
    if aggregation not in ["gps", "nfps", "gc"]:
        raise ValueError("'aggregation' can be 'gps', 'nfps' or 'gc'.")

    if unit is None:
        unit = "uyu"
    name = f"{name}_{aggregation}_{unit}"
    if fss:
        name = name + "_fssadj"

    data = fiscal_accounts.get(update_loc=update_loc,
                               save_loc=save_loc,
                               only_get=only_get)
    gps = data["gps"]
    nfps = data["nfps"]
    gc = data["gc-bps"]

    proc = pd.DataFrame(index=gps.index)
    proc["Ingresos: SPNF-SPC"] = nfps["Ingresos: SPNF"]
    proc["Ingresos: GC-BPS"] = gc["Ingresos: GC-BPS"]
    proc["Egresos: Primarios SPNF-SPC"] = nfps["Egresos: Primarios SPNF"]
    proc["Egresos: Totales GC-BPS"] = gc["Egresos: GC-BPS"]
    proc["Egresos: Inversiones SPNF-SPC"] = nfps["Egresos: Inversiones"]
    proc["Egresos: Inversiones GC-BPS"] = gc["Egresos: Inversión"]
    proc["Intereses: SPNF"] = nfps["Intereses: Totales"]
    proc["Intereses: BCU"] = gps["Intereses: BCU"]
    proc["Intereses: SPC"] = proc["Intereses: SPNF"] + proc["Intereses: BCU"]
    proc["Intereses: GC-BPS"] = gc["Intereses: Total"]
    proc["Egresos: Totales SPNF"] = (proc["Egresos: Primarios SPNF-SPC"] +
                                     proc["Intereses: SPNF"])
    proc["Egresos: Totales SPC"] = (proc["Egresos: Totales SPNF"] +
                                    proc["Intereses: BCU"])
    proc["Egresos: Primarios GC-BPS"] = (proc["Egresos: Totales GC-BPS"] -
                                         proc["Intereses: GC-BPS"])
    proc["Resultado: Primario intendencias"] = nfps[
        "Resultado: Primario intendencias"]
    proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"]
    proc["Resultado: Primario BCU"] = gps["Resultado: Primario BCU"]
    proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"]
    proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"]
    proc["Resultado: Primario SPC"] = gps["Resultado: Primario SPC"]
    proc["Resultado: Global SPC"] = gps["Resultado: Global SPC"]
    proc["Resultado: Primario GC-BPS"] = (proc["Ingresos: GC-BPS"] -
                                          proc["Egresos: Primarios GC-BPS"])
    proc["Resultado: Global GC-BPS"] = gc["Resultado: Global GC-BPS"]

    proc["Ingresos: FSS"] = gc["Ingresos: FSS"]
    proc["Intereses: FSS"] = gc["Intereses: BPS-FSS"]
    proc["Ingresos: SPNF-SPC aj. FSS"] = (proc["Ingresos: SPNF-SPC"] -
                                          proc["Ingresos: FSS"])
    proc["Ingresos: GC-BPS aj. FSS"] = (proc["Ingresos: GC-BPS"] -
                                        proc["Ingresos: FSS"])
    proc["Intereses: SPNF aj. FSS"] = (proc["Intereses: SPNF"] -
                                       proc["Intereses: FSS"])
    proc["Intereses: SPC aj. FSS"] = (proc["Intereses: SPC"] -
                                      proc["Intereses: FSS"])
    proc["Intereses: GC-BPS aj. FSS"] = (proc["Intereses: GC-BPS"] -
                                         proc["Intereses: FSS"])
    proc["Egresos: Totales SPNF aj. FSS"] = (proc["Egresos: Totales SPNF"] -
                                             proc["Intereses: FSS"])
    proc["Egresos: Totales SPC aj. FSS"] = (proc["Egresos: Totales SPC"] -
                                            proc["Intereses: FSS"])
    proc["Egresos: Totales GC-BPS aj. FSS"] = (
        proc["Egresos: Totales GC-BPS"] - proc["Intereses: FSS"])
    proc["Resultado: Primario SPNF aj. FSS"] = (
        proc["Resultado: Primario SPNF"] - proc["Ingresos: FSS"])
    proc["Resultado: Global SPNF aj. FSS"] = (proc["Resultado: Global SPNF"] -
                                              proc["Ingresos: FSS"] +
                                              proc["Intereses: FSS"])
    proc["Resultado: Primario SPC aj. FSS"] = (
        proc["Resultado: Primario SPC"] - proc["Ingresos: FSS"])
    proc["Resultado: Global SPC aj. FSS"] = (proc["Resultado: Global SPC"] -
                                             proc["Ingresos: FSS"] +
                                             proc["Intereses: FSS"])
    proc["Resultado: Primario GC-BPS aj. FSS"] = (
        proc["Resultado: Primario GC-BPS"] - proc["Ingresos: FSS"])
    proc["Resultado: Global GC-BPS aj. FSS"] = (
        proc["Resultado: Global GC-BPS"] - proc["Ingresos: FSS"] +
        proc["Intereses: FSS"])

    output = proc.loc[:, fiscal_metadata[aggregation][fss]]
    metadata._set(output,
                  area="Cuentas fiscales y deuda",
                  currency="UYU",
                  inf_adj="No",
                  unit="Millones",
                  seas_adj="NSA",
                  ts_type="Flujo",
                  cumperiods=1)

    if unit == "gdp":
        output = transform.rolling(output, periods=12, operation="sum")
        output = transform.convert_gdp(output,
                                       update_loc=update_loc,
                                       save_loc=save_loc,
                                       only_get=only_get)
    elif unit == "usd":
        output = transform.convert_usd(output,
                                       update_loc=update_loc,
                                       save_loc=save_loc,
                                       only_get=only_get)
    elif unit == "real_usd":
        output = transform.convert_real(output,
                                        start_date=start_date,
                                        end_date=end_date,
                                        update_loc=update_loc,
                                        save_loc=save_loc,
                                        only_get=only_get)
        xr = nxr.get_monthly(update_loc=update_loc,
                             save_loc=save_loc,
                             only_get=only_get)
        output = output.divide(xr[start_date:end_date].mean()[1])
        metadata._set(output, currency="USD")
    elif unit == "real":
        output = transform.convert_real(output,
                                        start_date=start_date,
                                        end_date=end_date,
                                        update_loc=update_loc,
                                        save_loc=save_loc,
                                        only_get=only_get)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output