def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get currencies data. Selected currencies are the US dollar index, USDEUR, USDJPY and USDCNY. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily currencies : pd.DataFrame """ name = "global_nxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output output = [] for series in ["dollar", "eur", "jpy", "cny"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] if series == "dollar": aux.dropna(inplace=True) output.append(aux) output = output[0].join(output[1:]).interpolate(method="linear", limit_area="inside") output.columns = ["Índice Dólar", "Euro", "Yen", "Renminbi"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex( output, levels=[3, 5], new_arrays=[["USD", "EUR", "JPY", "CNY"], ["Canasta/USD", "EUR/USD", "JPY/USD", "CNY/USD"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get seasonally adjusted real quarterly GDP for select countries. Countries/aggregates are US, EU-27, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Quarterly real GDP in seasonally adjusted terms : pd.DataFrame """ name = "global_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output chn_y = dt.datetime.now().year + 1 chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4") chn_json = chn_r.json() chn_datasets = [] for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]): raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][ "observations"] values = [x[0] for x in raw.values()] df = pd.DataFrame(data=values, index=pd.date_range(start=start, freq="Q-DEC", periods=len(values)), columns=["China"]) chn_datasets.append(df) chn_qoq = chn_datasets[0] chn_yoy = chn_datasets[1] chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"], index_col=0).dropna(how="all", axis=1).dropna(how="all", axis=0) chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01") & (chn_obs.index < "2016-01-01")] chn_yoy["volume"] = chn_obs for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))): if pd.isna(chn_yoy.iloc[row, 1]): chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] / (1 + chn_yoy.iloc[row + 4, 0] / 100)) chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"] metadata._set(chn_yoy) chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"], component="seas", method="x13") chn_sa = pd.concat([chn_sa, chn_qoq], axis=1) for row in range(len(chn_sa)): if not pd.isna(chn_sa.iloc[row, 1]): chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] * (1 + chn_sa.iloc[row, 1] / 100)) chn = chn_sa.iloc[:, [0]].div(10) gdps = [] load_dotenv(Path(get_project_root(), ".env")) fred_api_key = os.environ.get("FRED_API_KEY") for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]: r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key=" f"{fred_api_key}&file_type=json") aux = pd.DataFrame.from_records(r.json()["observations"]) aux = aux[["date", "value"]].set_index("date") aux.index = pd.to_datetime(aux.index) aux.index = aux.index.shift(3, freq="M") + MonthEnd(0) aux.columns = [series] aux = aux.apply(pd.to_numeric, errors="coerce") if series == "GDPC1": aux = aux.div(4) elif series == "CLVMNACSCAB1GQEU272020": aux = aux.div(1000) gdps.append(aux) gdps = pd.concat(gdps, axis=1) output = pd.concat([gdps, chn], axis=1) output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="Const.", unit="Miles de millones", seas_adj="SA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def long_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get 10-year government bonds interest rates. Countries/aggregates selected are US, Germany, France, Italy, Spain United Kingdom, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily 10-year government bonds interest rates : pd.DataFrame """ name = "global_long_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output bonds = [] load_dotenv(Path(get_project_root(), ".env")) fred_api_key = os.environ.get("FRED_API_KEY") r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key=" f"{fred_api_key}&file_type=json") us = pd.DataFrame.from_records(r.json()["observations"]) us = us[["date", "value"]].set_index("date") us.index = pd.to_datetime(us.index) us.columns = ["United States"] bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna()) for country, sid in zip([ "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan", "China" ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]): end_date_dt = dt.datetime(2000, 1, 1) start_date_dt = dt.datetime(2000, 1, 1) aux = [] while end_date_dt < dt.datetime.now(): end_date_dt = start_date_dt + dt.timedelta(days=5000) params = { "curr_id": sid, "smlID": str(randint(1000000, 99999999)), "header": f"{country} 10-Year Bond Yield Historical Data", "st_date": start_date_dt.strftime("%m/%d/%Y"), "end_date": end_date_dt.strftime("%m/%d/%Y"), "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } r = requests.post(urls["global_long_rates"]["dl"]["main"], headers=investing_headers, data=params) aux.append( pd.read_html(r.content, match="Price", index_col=0, parse_dates=True)[0]) start_date_dt = end_date_dt + dt.timedelta(days=1) aux = pd.concat(aux, axis=0)[["Price"]].sort_index() aux.columns = [country] bonds.append(aux) output = bonds[0].join(bonds[1:], how="left") output = output.interpolate(method="linear", limit_area="inside") output.columns = [ "Estados Unidos", "Alemania", "Francia", "Italia", "España", "Reino Unido", "Japón", "China" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex( output, levels=[3], new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get stock market index data. Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai Composite. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily stock market index in USD : pd.DataFrame """ name = "global_stocks" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output yahoo = [] for series in ["spy", "n100", "nikkei", "sse"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] yahoo.append(aux) output = pd.concat(yahoo, axis=1).interpolate(method="linear", limit_area="inside") output.columns = [ "S&P 500", "Euronext 100", "Nikkei 225", "Shanghai Stock Exchange Composite" ] metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) output = rebase(output, start_date="2019-01-02") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def policy_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get central bank policy interest rates data. Countries/aggregates selected are US, Euro Area, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily policy interest rates : pd.DataFrame """ name = "global_policy_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output r = requests.get(urls[name]["dl"]["main"]) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(BytesIO(r.content), "r") as f: f.extractall(path=temp_dir.name) path_temp = path.join(temp_dir.name, "WEBSTATS_CBPOL_D_DATAFLOW_csv_row.csv") raw = pd.read_csv(path_temp, usecols=[0, 7, 19, 36, 37], index_col=0, header=2, parse_dates=True).dropna(how="all") output = (raw.apply(pd.to_numeric, errors="coerce").interpolate(method="linear", limit_area="inside")) output.columns = ["China", "Japón", "Estados Unidos", "Eurozona"] output = output[["Estados Unidos", "Eurozona", "Japón", "China"]] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get USDARS and USDBRL. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily exchange rates : pd.DataFrame """ name = "regional_nxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = [] for dollar in ["ar", "ar_unofficial"]: r = requests.get(urls[name]["dl"][dollar]) aux = pd.DataFrame(r.json())[[0, 2]] aux.set_index(0, drop=True, inplace=True) aux.drop("Fecha", inplace=True) aux = aux.replace(",", ".", regex=True).apply(pd.to_numeric) aux.index = pd.to_datetime(aux.index, format="%d-%m-%Y") aux.sort_index(inplace=True) aux.columns = [dollar] arg.append(aux) arg = arg[0].join(arg[1], how="left") arg.columns = ["Argentina - oficial", "Argentina - informal"] r = requests.get(urls[name]["dl"]["bra"]) bra = pd.DataFrame(r.json()) bra = [(x["VALDATA"], x["VALVALOR"]) for x in bra["value"]] bra = pd.DataFrame.from_records(bra).dropna(how="any") bra.set_index(0, inplace=True) bra.index = pd.to_datetime(bra.index.str[:-4], format="%Y-%m-%dT%H:%M:%S").tz_localize(None) bra.columns = ["Brasil"] output = arg.join(bra, how="left").interpolate(method="linear", limit_area="inside") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Regional", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3, 5], new_arrays=[["ARS", "ARS", "BRL"], ["ARS/USD", "ARS/USD", "BRL/USD"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def rxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get real exchange rates vis-á-vis the US dollar for Argentina and Brasil . Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly real exchange rate : pd.DataFrame """ name = "regional_rxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output proc = _ifs(update_loc=update_loc, save_loc=save_loc, only_get=only_get) output = pd.DataFrame() output["Argentina"] = (proc["Argentina - oficial"] * proc["US.PCPI_IX"] / proc["ARG CPI"]) output["Brasil"] = proc["Brasil"] * proc["US.PCPI_IX"] / proc["BRA CPI"] metadata._set(output, area="Regional", currency="-", inf_adj="-", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS/USD", "BRL/USD"]]) output = rebase(output, start_date="2019-01-01", end_date="2019-01-31").dropna(how="all") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False, driver: WebDriver = None) -> pd.DataFrame: """Get seasonally adjusted real GDP for Argentina and Brazil. This function requires a Selenium webdriver. It can be provided in the driver parameter, or it will attempt to configure a Chrome webdriver. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. driver : selenium.webdriver.chrome.webdriver.WebDriver, default None Selenium webdriver for scraping. If None, build a Chrome webdriver. Returns ------- Quarterly real GDP : pd.DataFrame """ name = "regional_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output if driver is None: driver = _build() driver.get(urls[name]["dl"]["arg_new"]) time.sleep(5) soup = BeautifulSoup(driver.page_source, "lxml") driver.quit() url = soup.find_all(href=re.compile("desest"))[0]["href"] full_url = f"https://www.indec.gob.ar{url}" arg = pd.read_excel(full_url, skiprows=3, usecols="D").dropna(how="all") arg.index = pd.date_range(start="2004-03-31", freq="Q-DEC", periods=len(arg)) arg_old = pd.read_excel(urls[name]["dl"]["arg_old"], skiprows=7, usecols="D").dropna(how="all") arg_old.index = pd.date_range(start="1993-03-31", freq="Q-DEC", periods=len(arg_old)) arg = pd.concat([arg, arg_old], axis=1) for row in reversed(range(len(arg))): if pd.isna(arg.iloc[row, 0]): arg.iloc[row, 0] = (arg.iloc[row, 1] / arg.iloc[row + 1, 1] * arg.iloc[row + 1, 0]) arg = arg.iloc[:, [0]] r = requests.get(urls[name]["dl"]["bra"]) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(BytesIO(r.content), "r") as f: f.extractall(path=temp_dir.name) path_temp = path.join(temp_dir.name, listdir(temp_dir.name)[0]) bra = pd.read_excel(path_temp, usecols="Q", skiprows=3, sheet_name="Val encad preços 95 com ajuste") bra.index = pd.date_range(start="1996-03-31", freq="Q-DEC", periods=len(bra)) output = pd.concat([arg, bra], axis=1).div(1000) output.columns = ["Argentina", "Brasil"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Regional", currency="-", inf_adj="Const.", seas_adj="SA", unit="Miles de millones", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get consumer price index for Argentina and Brazil. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI : pd.DataFrame """ name = "regional_cpi" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = requests.get(urls[name]["dl"]["ar"], params=urls[name]["dl"]["ar_payload"]) arg = pd.read_html(arg.content)[0] arg.set_index("Fecha", drop=True, inplace=True) arg.index = pd.to_datetime(arg.index, format="%d/%m/%Y") arg.columns = ["nivel"] arg = arg.divide(10) arg_unoff = pd.read_excel(urls[name]["dl"]["ar_unofficial"]) arg_unoff.set_index("date", drop=True, inplace=True) arg_unoff.index = arg_unoff.index + MonthEnd(0) arg_unoff = arg_unoff.loc[(arg_unoff.index >= "2006-12-01") & (arg_unoff.index <= "2016-12-01"), "index"] arg_unoff = arg_unoff.to_frame().pct_change( periods=1).multiply(100).dropna() arg_unoff.columns = ["nivel"] arg = (arg.append(arg_unoff).reset_index().drop_duplicates( subset="index", keep="last").set_index("index", drop=True).sort_index()) arg = arg.divide(100).add(1).cumprod() bra_r = requests.get(urls[name]["dl"]["bra"]) bra = pd.DataFrame(bra_r.json())[["v"]] bra.index = pd.date_range(start="1979-12-31", freq="M", periods=len(bra)) bra = bra.apply(pd.to_numeric, errors="coerce") bra = bra.divide(100).add(1).cumprod() output = pd.concat([arg, bra], axis=1) output.columns = ["Argentina", "Brasil"] metadata._set(output, area="Regional", currency="-", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) output = rebase(output, start_date="2010-10-01", end_date="2010-10-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def monthly_gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly GDP data. Countries/aggregates selected are Argentina and Brazil. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily policy interest rates : pd.DataFrame """ name = "regional_monthly_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = pd.read_excel(urls[name]["dl"]["arg"], usecols="D", skiprows=4).dropna(how="all") arg.index = pd.date_range(start="2004-01-31", freq="M", periods=len(arg)) bra = pd.read_csv(urls[name]["dl"]["bra"], sep=";", index_col=0, decimal=",") bra.index = pd.date_range(start="2003-01-31", freq="M", periods=len(bra)) output = pd.concat([arg, bra], axis=1) output.columns = ["Argentina", "Brasil"] metadata._set(output, area="Regional", currency="-", inf_adj="Const.", seas_adj="SA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) output = rebase(output, start_date="2010-01-01", end_date="2010-12-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def bonds(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False, driver: WebDriver = None) -> pd.DataFrame: """Get interest rate yield for Uruguayan US-denominated bonds, inflation-linked bonds and peso bonds. This function requires a Selenium webdriver. It can be provided in the driver parameter, or it will attempt to configure a Chrome webdriver. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. driver : selenium.webdriver.chrome.webdriver.WebDriver, default None Selenium webdriver for scraping. If None, build a Chrome webdriver. Returns ------- Daily bond yields in basis points : pd.DataFrame """ name = "bonds" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output if driver is None: driver = _build() dfs = [] for url in urls[name]["dl"].values(): driver.get(url) start = driver.find_element( by="name", value="ctl00$ContentPlaceHolder1$dateDesde$dateInput") start.clear() start.send_keys("01/01/2000") end = driver.find_element( by="name", value="ctl00$ContentPlaceHolder1$dateHasta$dateInput") end.clear() end.send_keys(dt.datetime.now().strftime("%d/%m/%Y")) submit = driver.find_element(by="id", value="ContentPlaceHolder1_LinkFiltrar") submit.click() time.sleep(10) tables = pd.read_html(driver.page_source, decimal=",", thousands=".") raw = tables[8] df = raw.set_index("FECHA") df.index = pd.to_datetime(df.index, format="%d/%m/%Y") df.sort_index(inplace=True) df = df.loc[:, df.columns.isin(["BPS", "RENDIMIENTO"])] df.columns = [url] dfs.append(df) driver.quit() output = dfs[0].join(dfs[1], how="outer").join(dfs[2], how="outer") output.columns = ["Bonos soberanos en dólares", "Bonos soberanos en UI", "Bonos soberanos en pesos"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Sector financiero", currency="-", inf_adj="No", unit="PBS", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3, 4], new_arrays=[["USD", "UYU", "UYU"], ["No", "Const.", "No"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def interest_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get interest rates data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly interest rates : pd.DataFrame """ name = "interest_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output xls = pd.ExcelFile(urls["interest_rates"]["dl"]["main"]) sheets = ["Activas $", "Activas UI", "Activas U$S", "Pasivas $", "Pasivas UI", "Pasivas U$S"] columns = ["B:C,G,K", "B:C,G,K", "B:C,H,L", "B:C,N,T", "B:C,P,W", "B:C,N,T"] sheet_data = [] for sheet, columns in zip(sheets, columns): if "Activas" in sheet: skip = 11 else: skip = 10 data = pd.read_excel(xls, sheet_name=sheet, skiprows=skip, usecols=columns, index_col=0) data.index = pd.to_datetime(data.index, errors="coerce") data = data.loc[~pd.isna(data.index)] data.index = data.index + MonthEnd(0) sheet_data.append(data) output = pd.concat(sheet_data, axis=1) output.columns = ["Tasas activas: $, promedio", "Tasas activas: $, promedio empresas", "Tasas activas: $, promedio familias", "Tasas activas: UI, promedio", "Tasas activas: UI, promedio empresas", "Tasas activas: UI, promedio familias", "Tasas activas: US$, promedio", "Tasas activas: US$, promedio empresas", "Tasas activas: US$, promedio familias", "Tasas pasivas: $, promedio", "Tasas pasivas: $, promedio empresas", "Tasas pasivas: $, promedio familias", "Tasas pasivas: UI, promedio", "Tasas pasivas: UI, promedio empresas", "Tasas pasivas: UI, promedio familias", "Tasas pasivas: US$, promedio", "Tasas pasivas: US$, promedio empresas", "Tasas pasivas: US$, promedio familias"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Sector financiero", currency="-", inf_adj="-", unit="Tasa", seas_adj="NSA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3, 4], new_arrays=[["UYU", "UYU", "UYU", "UYU", "UYU", "UYU", "USD", "USD", "USD", "UYU", "UYU", "UYU", "UYU", "UYU", "UYU", "USD", "USD", "USD"], ["No", "No", "No", "Const.", "Const.", "Const.", "No", "No", "No", "No", "No", "No", "Const.", "Const.", "Const.", "No", "No", "No"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output