Пример #1
0
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get currencies data.

    Selected currencies are the US dollar index, USDEUR, USDJPY and USDCNY.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily currencies : pd.DataFrame

    """
    name = "global_nxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    output = []
    for series in ["dollar", "eur", "jpy", "cny"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        if series == "dollar":
            aux.dropna(inplace=True)
        output.append(aux)
    output = output[0].join(output[1:]).interpolate(method="linear",
                                                    limit_area="inside")
    output.columns = ["Índice Dólar", "Euro", "Yen", "Renminbi"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3, 5],
        new_arrays=[["USD", "EUR", "JPY", "CNY"],
                    ["Canasta/USD", "EUR/USD", "JPY/USD", "CNY/USD"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #2
0
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get seasonally adjusted real quarterly GDP for select countries.

    Countries/aggregates are US, EU-27, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Quarterly real GDP in seasonally adjusted terms : pd.DataFrame

    """
    name = "global_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    chn_y = dt.datetime.now().year + 1
    chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4")
    chn_json = chn_r.json()
    chn_datasets = []
    for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]):
        raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][
            "observations"]
        values = [x[0] for x in raw.values()]
        df = pd.DataFrame(data=values,
                          index=pd.date_range(start=start,
                                              freq="Q-DEC",
                                              periods=len(values)),
                          columns=["China"])
        chn_datasets.append(df)
    chn_qoq = chn_datasets[0]
    chn_yoy = chn_datasets[1]
    chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"],
                            index_col=0).dropna(how="all",
                                                axis=1).dropna(how="all",
                                                               axis=0)
    chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01")
                          & (chn_obs.index < "2016-01-01")]
    chn_yoy["volume"] = chn_obs
    for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))):
        if pd.isna(chn_yoy.iloc[row, 1]):
            chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] /
                                    (1 + chn_yoy.iloc[row + 4, 0] / 100))
    chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"]
    metadata._set(chn_yoy)
    chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"],
                       component="seas",
                       method="x13")
    chn_sa = pd.concat([chn_sa, chn_qoq], axis=1)
    for row in range(len(chn_sa)):
        if not pd.isna(chn_sa.iloc[row, 1]):
            chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] *
                                   (1 + chn_sa.iloc[row, 1] / 100))
    chn = chn_sa.iloc[:, [0]].div(10)

    gdps = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]:
        r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key="
                         f"{fred_api_key}&file_type=json")
        aux = pd.DataFrame.from_records(r.json()["observations"])
        aux = aux[["date", "value"]].set_index("date")
        aux.index = pd.to_datetime(aux.index)
        aux.index = aux.index.shift(3, freq="M") + MonthEnd(0)
        aux.columns = [series]
        aux = aux.apply(pd.to_numeric, errors="coerce")
        if series == "GDPC1":
            aux = aux.div(4)
        elif series == "CLVMNACSCAB1GQEU272020":
            aux = aux.div(1000)
        gdps.append(aux)
    gdps = pd.concat(gdps, axis=1)

    output = pd.concat([gdps, chn], axis=1)
    output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="Const.",
                  unit="Miles de millones",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #3
0
def long_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               revise_rows: Union[str, int] = "nodup",
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               only_get: bool = False) -> pd.DataFrame:
    """Get 10-year government bonds interest rates.

    Countries/aggregates selected are US, Germany, France, Italy, Spain
    United Kingdom, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily 10-year government bonds interest rates : pd.DataFrame

    """
    name = "global_long_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    bonds = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key="
                     f"{fred_api_key}&file_type=json")
    us = pd.DataFrame.from_records(r.json()["observations"])
    us = us[["date", "value"]].set_index("date")
    us.index = pd.to_datetime(us.index)
    us.columns = ["United States"]
    bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna())

    for country, sid in zip([
            "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan",
            "China"
    ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]):
        end_date_dt = dt.datetime(2000, 1, 1)
        start_date_dt = dt.datetime(2000, 1, 1)
        aux = []
        while end_date_dt < dt.datetime.now():
            end_date_dt = start_date_dt + dt.timedelta(days=5000)
            params = {
                "curr_id": sid,
                "smlID": str(randint(1000000, 99999999)),
                "header": f"{country} 10-Year Bond Yield Historical Data",
                "st_date": start_date_dt.strftime("%m/%d/%Y"),
                "end_date": end_date_dt.strftime("%m/%d/%Y"),
                "interval_sec": "Daily",
                "sort_col": "date",
                "sort_ord": "DESC",
                "action": "historical_data"
            }
            r = requests.post(urls["global_long_rates"]["dl"]["main"],
                              headers=investing_headers,
                              data=params)
            aux.append(
                pd.read_html(r.content,
                             match="Price",
                             index_col=0,
                             parse_dates=True)[0])
            start_date_dt = end_date_dt + dt.timedelta(days=1)
        aux = pd.concat(aux, axis=0)[["Price"]].sort_index()
        aux.columns = [country]
        bonds.append(aux)

    output = bonds[0].join(bonds[1:], how="left")
    output = output.interpolate(method="linear", limit_area="inside")
    output.columns = [
        "Estados Unidos", "Alemania", "Francia", "Italia", "España",
        "Reino Unido", "Japón", "China"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3],
        new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #4
0
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           revise_rows: Union[str, int] = "nodup",
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = False) -> pd.DataFrame:
    """Get stock market index data.

    Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai
    Composite.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily stock market index in USD : pd.DataFrame

    """
    name = "global_stocks"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    yahoo = []
    for series in ["spy", "n100", "nikkei", "sse"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        yahoo.append(aux)
    output = pd.concat(yahoo, axis=1).interpolate(method="linear",
                                                  limit_area="inside")
    output.columns = [
        "S&P 500", "Euronext 100", "Nikkei 225",
        "Shanghai Stock Exchange Composite"
    ]
    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])
    output = rebase(output, start_date="2019-01-02")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #5
0
def policy_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """Get central bank policy interest rates data.

    Countries/aggregates selected are US, Euro Area, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily policy interest rates : pd.DataFrame

    """
    name = "global_policy_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    r = requests.get(urls[name]["dl"]["main"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(r.content), "r") as f:
        f.extractall(path=temp_dir.name)
        path_temp = path.join(temp_dir.name,
                              "WEBSTATS_CBPOL_D_DATAFLOW_csv_row.csv")
        raw = pd.read_csv(path_temp,
                          usecols=[0, 7, 19, 36, 37],
                          index_col=0,
                          header=2,
                          parse_dates=True).dropna(how="all")
    output = (raw.apply(pd.to_numeric,
                        errors="coerce").interpolate(method="linear",
                                                     limit_area="inside"))
    output.columns = ["China", "Japón", "Estados Unidos", "Eurozona"]
    output = output[["Estados Unidos", "Eurozona", "Japón", "China"]]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #6
0
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get USDARS and USDBRL.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily exchange rates : pd.DataFrame

    """
    name = "regional_nxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = []
    for dollar in ["ar", "ar_unofficial"]:
        r = requests.get(urls[name]["dl"][dollar])
        aux = pd.DataFrame(r.json())[[0, 2]]
        aux.set_index(0, drop=True, inplace=True)
        aux.drop("Fecha", inplace=True)
        aux = aux.replace(",", ".", regex=True).apply(pd.to_numeric)
        aux.index = pd.to_datetime(aux.index, format="%d-%m-%Y")
        aux.sort_index(inplace=True)
        aux.columns = [dollar]
        arg.append(aux)
    arg = arg[0].join(arg[1], how="left")
    arg.columns = ["Argentina - oficial", "Argentina - informal"]

    r = requests.get(urls[name]["dl"]["bra"])
    bra = pd.DataFrame(r.json())
    bra = [(x["VALDATA"], x["VALVALOR"]) for x in bra["value"]]
    bra = pd.DataFrame.from_records(bra).dropna(how="any")
    bra.set_index(0, inplace=True)
    bra.index = pd.to_datetime(bra.index.str[:-4],
                               format="%Y-%m-%dT%H:%M:%S").tz_localize(None)
    bra.columns = ["Brasil"]

    output = arg.join(bra, how="left").interpolate(method="linear",
                                                   limit_area="inside")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Regional",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3, 5],
                                new_arrays=[["ARS", "ARS", "BRL"],
                                            ["ARS/USD", "ARS/USD", "BRL/USD"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #7
0
def rxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get real exchange rates vis-á-vis the US dollar for Argentina and Brasil .

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly real exchange rate : pd.DataFrame

    """
    name = "regional_rxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    proc = _ifs(update_loc=update_loc, save_loc=save_loc, only_get=only_get)

    output = pd.DataFrame()
    output["Argentina"] = (proc["Argentina - oficial"] * proc["US.PCPI_IX"] /
                           proc["ARG CPI"])
    output["Brasil"] = proc["Brasil"] * proc["US.PCPI_IX"] / proc["BRA CPI"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS/USD", "BRL/USD"]])
    output = rebase(output, start_date="2019-01-01",
                    end_date="2019-01-31").dropna(how="all")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #8
0
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False,
        driver: WebDriver = None) -> pd.DataFrame:
    """Get seasonally adjusted real GDP for Argentina and Brazil.

    This function requires a Selenium webdriver. It can be provided in the
    driver parameter, or it will attempt to configure a Chrome webdriver.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.
    driver : selenium.webdriver.chrome.webdriver.WebDriver, default None
        Selenium webdriver for scraping. If None, build a Chrome webdriver.

    Returns
    -------
    Quarterly real GDP : pd.DataFrame

    """
    name = "regional_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    if driver is None:
        driver = _build()
    driver.get(urls[name]["dl"]["arg_new"])
    time.sleep(5)
    soup = BeautifulSoup(driver.page_source, "lxml")
    driver.quit()
    url = soup.find_all(href=re.compile("desest"))[0]["href"]
    full_url = f"https://www.indec.gob.ar{url}"
    arg = pd.read_excel(full_url, skiprows=3, usecols="D").dropna(how="all")
    arg.index = pd.date_range(start="2004-03-31",
                              freq="Q-DEC",
                              periods=len(arg))
    arg_old = pd.read_excel(urls[name]["dl"]["arg_old"],
                            skiprows=7,
                            usecols="D").dropna(how="all")
    arg_old.index = pd.date_range(start="1993-03-31",
                                  freq="Q-DEC",
                                  periods=len(arg_old))
    arg = pd.concat([arg, arg_old], axis=1)
    for row in reversed(range(len(arg))):
        if pd.isna(arg.iloc[row, 0]):
            arg.iloc[row, 0] = (arg.iloc[row, 1] / arg.iloc[row + 1, 1] *
                                arg.iloc[row + 1, 0])
    arg = arg.iloc[:, [0]]

    r = requests.get(urls[name]["dl"]["bra"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(r.content), "r") as f:
        f.extractall(path=temp_dir.name)
    path_temp = path.join(temp_dir.name, listdir(temp_dir.name)[0])
    bra = pd.read_excel(path_temp,
                        usecols="Q",
                        skiprows=3,
                        sheet_name="Val encad preços 95 com ajuste")
    bra.index = pd.date_range(start="1996-03-31",
                              freq="Q-DEC",
                              periods=len(bra))

    output = pd.concat([arg, bra], axis=1).div(1000)
    output.columns = ["Argentina", "Brasil"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="Const.",
                  seas_adj="SA",
                  unit="Miles de millones",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #9
0
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get consumer price index for Argentina and Brazil.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI : pd.DataFrame

    """
    name = "regional_cpi"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = requests.get(urls[name]["dl"]["ar"],
                       params=urls[name]["dl"]["ar_payload"])
    arg = pd.read_html(arg.content)[0]
    arg.set_index("Fecha", drop=True, inplace=True)
    arg.index = pd.to_datetime(arg.index, format="%d/%m/%Y")
    arg.columns = ["nivel"]
    arg = arg.divide(10)

    arg_unoff = pd.read_excel(urls[name]["dl"]["ar_unofficial"])
    arg_unoff.set_index("date", drop=True, inplace=True)
    arg_unoff.index = arg_unoff.index + MonthEnd(0)
    arg_unoff = arg_unoff.loc[(arg_unoff.index >= "2006-12-01") &
                              (arg_unoff.index <= "2016-12-01"), "index"]
    arg_unoff = arg_unoff.to_frame().pct_change(
        periods=1).multiply(100).dropna()
    arg_unoff.columns = ["nivel"]
    arg = (arg.append(arg_unoff).reset_index().drop_duplicates(
        subset="index", keep="last").set_index("index",
                                               drop=True).sort_index())
    arg = arg.divide(100).add(1).cumprod()

    bra_r = requests.get(urls[name]["dl"]["bra"])
    bra = pd.DataFrame(bra_r.json())[["v"]]
    bra.index = pd.date_range(start="1979-12-31", freq="M", periods=len(bra))
    bra = bra.apply(pd.to_numeric, errors="coerce")
    bra = bra.divide(100).add(1).cumprod()

    output = pd.concat([arg, bra], axis=1)
    output.columns = ["Argentina", "Brasil"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])
    output = rebase(output, start_date="2010-10-01", end_date="2010-10-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #10
0
def monthly_gdp(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly GDP data.

    Countries/aggregates selected are Argentina and Brazil.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily policy interest rates : pd.DataFrame

    """
    name = "regional_monthly_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = pd.read_excel(urls[name]["dl"]["arg"], usecols="D",
                        skiprows=4).dropna(how="all")
    arg.index = pd.date_range(start="2004-01-31", freq="M", periods=len(arg))

    bra = pd.read_csv(urls[name]["dl"]["bra"],
                      sep=";",
                      index_col=0,
                      decimal=",")
    bra.index = pd.date_range(start="2003-01-31", freq="M", periods=len(bra))

    output = pd.concat([arg, bra], axis=1)
    output.columns = ["Argentina", "Brasil"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="Const.",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])
    output = rebase(output, start_date="2010-01-01", end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Пример #11
0
def bonds(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
          revise_rows: Union[str, int] = "nodup",
          save_loc: Union[str, PathLike, Engine, Connection, None] = None,
          only_get: bool = False,
          driver: WebDriver = None) -> pd.DataFrame:
    """Get interest rate yield for Uruguayan US-denominated bonds,
    inflation-linked bonds and peso bonds.

    This function requires a Selenium webdriver. It can be provided in the
    driver parameter, or it will attempt to configure a Chrome webdriver.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.
    driver : selenium.webdriver.chrome.webdriver.WebDriver, default None
        Selenium webdriver for scraping. If None, build a Chrome webdriver.

    Returns
    -------
    Daily bond yields in basis points : pd.DataFrame

    """
    name = "bonds"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output

    if driver is None:
        driver = _build()
    dfs = []
    for url in urls[name]["dl"].values():
        driver.get(url)
        start = driver.find_element(
            by="name", value="ctl00$ContentPlaceHolder1$dateDesde$dateInput")
        start.clear()
        start.send_keys("01/01/2000")
        end = driver.find_element(
            by="name", value="ctl00$ContentPlaceHolder1$dateHasta$dateInput")
        end.clear()
        end.send_keys(dt.datetime.now().strftime("%d/%m/%Y"))
        submit = driver.find_element(by="id",
                                     value="ContentPlaceHolder1_LinkFiltrar")
        submit.click()
        time.sleep(10)
        tables = pd.read_html(driver.page_source, decimal=",", thousands=".")

        raw = tables[8]
        df = raw.set_index("FECHA")
        df.index = pd.to_datetime(df.index, format="%d/%m/%Y")
        df.sort_index(inplace=True)
        df = df.loc[:, df.columns.isin(["BPS", "RENDIMIENTO"])]
        df.columns = [url]
        dfs.append(df)
    driver.quit()
    output = dfs[0].join(dfs[1], how="outer").join(dfs[2], how="outer")
    output.columns = ["Bonos soberanos en dólares", "Bonos soberanos en UI",
                      "Bonos soberanos en pesos"]

    if update_loc is not None:
        previous_data = ops._io(operation="update", data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output, area="Sector financiero", currency="-",
                  inf_adj="No", unit="PBS", seas_adj="NSA",
                  ts_type="-", cumperiods=1)
    metadata._modify_multiindex(output, levels=[3, 4],
                                new_arrays=[["USD", "UYU", "UYU"],
                                            ["No", "Const.", "No"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output
Пример #12
0
def interest_rates(update_loc: Union[str, PathLike,
                                     Engine, Connection, None] = None,
                   revise_rows: Union[str, int] = "nodup",
                   save_loc: Union[str, PathLike,
                                   Engine, Connection, None] = None,
                   only_get: bool = False) -> pd.DataFrame:
    """Get interest rates data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly interest rates : pd.DataFrame

    """
    name = "interest_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output

    xls = pd.ExcelFile(urls["interest_rates"]["dl"]["main"])
    sheets = ["Activas $", "Activas UI", "Activas U$S",
              "Pasivas $", "Pasivas UI", "Pasivas U$S"]
    columns = ["B:C,G,K", "B:C,G,K", "B:C,H,L",
               "B:C,N,T", "B:C,P,W", "B:C,N,T"]
    sheet_data = []
    for sheet, columns in zip(sheets, columns):
        if "Activas" in sheet:
            skip = 11
        else:
            skip = 10
        data = pd.read_excel(xls, sheet_name=sheet, skiprows=skip,
                             usecols=columns, index_col=0)
        data.index = pd.to_datetime(data.index, errors="coerce")
        data = data.loc[~pd.isna(data.index)]
        data.index = data.index + MonthEnd(0)
        sheet_data.append(data)
    output = pd.concat(sheet_data, axis=1)
    output.columns = ["Tasas activas: $, promedio",
                      "Tasas activas: $, promedio empresas",
                      "Tasas activas: $, promedio familias",
                      "Tasas activas: UI, promedio",
                      "Tasas activas: UI, promedio empresas",
                      "Tasas activas: UI, promedio familias",
                      "Tasas activas: US$, promedio",
                      "Tasas activas: US$, promedio empresas",
                      "Tasas activas: US$, promedio familias",
                      "Tasas pasivas: $, promedio",
                      "Tasas pasivas: $, promedio empresas",
                      "Tasas pasivas: $, promedio familias",
                      "Tasas pasivas: UI, promedio",
                      "Tasas pasivas: UI, promedio empresas",
                      "Tasas pasivas: UI, promedio familias",
                      "Tasas pasivas: US$, promedio",
                      "Tasas pasivas: US$, promedio empresas",
                      "Tasas pasivas: US$, promedio familias"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output, area="Sector financiero", currency="-",
                  inf_adj="-", unit="Tasa", seas_adj="NSA",
                  ts_type="Flujo", cumperiods=1)
    metadata._modify_multiindex(output, levels=[3, 4],
                                new_arrays=[["UYU", "UYU", "UYU",
                                             "UYU", "UYU", "UYU",
                                             "USD", "USD", "USD",
                                             "UYU", "UYU", "UYU",
                                             "UYU", "UYU", "UYU",
                                             "USD", "USD", "USD"],
                                            ["No", "No", "No",
                                             "Const.", "Const.", "Const.",
                                             "No", "No", "No",
                                             "No", "No", "No",
                                             "Const.", "Const.", "Const.",
                                             "No", "No", "No"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output