def _prepare_datasets(measures: List[Measure]) -> List[Dataset]: """Given a list of measures, return the consolidated datasets required This is necessary because some measures may require the same dataset, although different variables. So the professor is going to consolidate the required datasets and make a bulk request to the data manager. Parameters ---------- measures : List[Measure] List of measures to estimate Returns ------- List[Dataset] List of consolidated datasets required """ # Get a se of distinct table ids table_ids: Set[TableID] = set( dta.table_id for m in measures for dta in m.datasets_required ) # Init empty dicts: TableID -> Set[VarName] table_varnames = {table_id: set() for table_id in table_ids} table_datevars = {table_id: set() for table_id in table_ids} # Find for each table, the distinct variables required for all measures for dta in (dta for m in measures for dta in m.datasets_required): table_varnames.get(dta.table_id).update(dta.vars) table_datevars.get(dta.table_id).update(dta.date_vars) # Consolidate datasets datasets: List[Dataset] = [] for table_id in table_ids: src, lib, table = table_id varnames = list(table_varnames.get(table_id)) datevars = list(table_datevars.get(table_id)) datasets.append(Dataset(src, lib, table, varnames, datevars)) return datasets
from numpy.lib import recfunctions as rfn import pandas as pd from frds.data import Dataset from frds.measures import Measure from frds.data.utils import filter_funda NAME = "FirmSize" DATASETS_REQUIRED: List[Dataset] = [ Dataset( source="wrds", library="comp", table="funda", vars=[ "datadate", "gvkey", "at", "indfmt", "datafmt", "popsrc", "consol", ], date_vars=["datadate"], ) ] VARIABLE_LABELS = {NAME: "Natural logarithm of total assets"} class FirmSize(Measure): """Firm size: the natural logarithm of total assets""" def __init__(self): super().__init__("Firm Size", DATASETS_REQUIRED)
import numpy as np import pandas as pd from frds.data import Dataset from frds.measures import CorporateFinanceMeasure from frds.data.utils import filter_funda NAME = "ExecutiveOwnership" DATASETS_REQUIRED: List[Dataset] = [ Dataset( source="wrds", library="comp", table="funda", vars=[ "datadate", "gvkey", "fyear", "indfmt", "datafmt", "popsrc", "consol", "csho", ], date_vars=["datadate"], ), Dataset( source="wrds", library="execcomp", table="anncomp", vars=[ "gvkey", "year", "execid",
import numpy as np import pandas as pd from frds.data import Dataset from frds.measures import CorporateFinanceMeasure DATASETS = [ Dataset( source="wrds", library="ciq", table="wrds_erating", vars=["company_id", "rdate", "rtime", "rating", "rtype"], date_vars=["rdate"], ), Dataset( source="wrds", library="ciq", table="wrds_gvkey", vars=["gvkey", "companyid", "startdate", "enddate"], date_vars=["startdate", "enddate"], ), ] VARIABLE_LABELS = { "rdate": "Rating date", "rating_rank": "1 represents a AAA rating and 22 reflects a D rating.", } class CreditRating(CorporateFinanceMeasure): url_docs = "https://frds.io/measures/credit_rating/"
from typing import List, Tuple, Dict import numpy as np import pandas as pd from frds.data import Dataset from frds.measures import CorporateFinanceMeasure from frds.data.utils import filter_funda DATASETS_REQUIRED: List[Dataset] = [ Dataset( source="wrds", library="boardex", table="na_wrds_company_profile", vars=["cikcode", "boardid"], date_vars=[], ), Dataset( source="wrds", library="boardex", table="na_wrds_org_composition", vars=[ "companyid", "datestartrole", "dateendrole", "rolename", "directorid", "seniority", ], date_vars=["datestartrole", "dateendrole"], ), Dataset( source="wrds",
from typing import List import numpy as np import pandas as pd from frds.data import Dataset from frds.measures import CorporateFinanceMeasure NAME = "StockDelisting" DATASETS_REQUIRED: List[Dataset] = [ Dataset( source="wrds", library="crsp", table="dse", vars=["date", "permno", "permco", "dlstcd", "event"], date_vars=["date"], ) ] VARIABLE_LABELS = {} class StockDelisting(CorporateFinanceMeasure): url_docs = "https://frds.io/measures/stock_delisting/" def __init__(self): super().__init__(NAME, DATASETS_REQUIRED) def estimate(self, nparrays: List[np.recarray]): dse = pd.DataFrame.from_records(nparrays[0]) cond = np.in1d(dse.event, ["DELIST"]) & (((500 <= dse.dlstcd) &
Dataset( source="frb_chicago", library="bhc", table="bhcf", vars=[ "RSSD9001", # RSSD ID "RSSD9999", # Reporting date "BHCK2170", # Total assets "BHCK4059", # Fee and interest income from loans in foreign offices "BHCK4107", # Total interest income "BHCK4340", # Net income "BHCK4460", # Cash dividends on common stock "BHCK3792", # Total qualifying capital allowable under the risk-based capital guidelines "BHCKA223", # Risk-weighted assets "BHCK8274", # Tier 1 capital allowable under the risk-based capital guidelines "BHCK8725", # Total gross notional amount of interest rate derivatives held for purposes other than trading (marked to market) "BHCK8729", # Total gross notional amount of interest rate derivatives held for purposes other than trading (not marked to market) "BHCK8726", # Total gross notional amount of foreign exchange rate derivatives held for purposes other than trading (marked to market) "BHCK8730", # Total gross notional amount of foreign exchange rate derivatives held for purposes other than trading (not marked to market) "BHCK3197", # Earning assets that are repriceable or mature within one year "BHCK3296", # Interest-bearing deposits that mature or reprice within one year "BHCK3298", # Long term debt that reprices within one year "BHCK3409", # Long-term debt reported in schedule hc "BHCK3408", # Variable rate preferred stock "BHCK2332", # Other borrowed money with a remaining maturity of one year or less "BHCK2309", # Commercial paper "BHDMB993", # Federal funds purchased in domestic offices "BHCKB995", # Securities sold under agreements to repurchase (repo liabilities) "BHCK2122", # Total loans and leases, net of unearned income ], date_vars=["RSSD9999"], )
from typing import List, Tuple, Dict import numpy as np import pandas as pd from frds.data import Dataset from frds.measures import CorporateFinanceMeasure from frds.data.utils import filter_funda NAME = "ExecutiveTenure" DATASETS_REQUIRED: List[Dataset] = [ Dataset( source="wrds", library="execcomp", table="anncomp", vars=["gvkey", "year", "execid", "co_per_rol", "ceoann"], date_vars=[], ), ] VARIABLE_LABELS: Dict[str, str] = { "execid": "Executive ID from Execucomp", "tenure": "Executive tenure", } class ExecutiveTenure(CorporateFinanceMeasure): url_docs = "https://frds.io/measuers/executive_tenure/" def __init__(self): super().__init__("Executive Tenure", DATASETS_REQUIRED)
from typing import List, Tuple, Dict import numpy as np import pandas as pd from frds.data import Dataset from frds.measures import CorporateFinanceMeasure from frds.data.utils import filter_funda DATASETS_REQUIRED: List[Dataset] = [ Dataset( source="wrds", library="audit", table="auditnonreli", vars=[ "company_fkey", # EDGAR CIK "file_date", # Filing date "res_notif_key", # Restatement notification key "res_accounting", # Restatement accounting "res_adverse", # Restatement adverse "res_fraud", # Restatement fraud "res_cler_err", # Restatement clerical errors "res_sec_invest", # Restatement SEC investigation ], date_vars=["file_date"], ), Dataset( source="wrds", library="comp", table="funda", vars=[ "gvkey", "datadate", "cik",