def data_loader_builder(verbose: bool = True): msci_prices_from_feather = pd.read_feather( '../../data/clean/msci_world_prices.feather') if verbose: print("MSCI World Prices:") msci_prices_from_feather.info() msci_mv_from_feather = pd.read_feather( '../../data/clean/msci_world_mv.feather') if verbose: print("\nMSCI World Market Values:") msci_mv_from_feather.info() # 1-Month Treasury Constant Maturity Rate (GS1M) rf_ = DataReader('GS1M', 'fred', start=datetime.datetime(1990, 1, 1)).resample('MS').mean() # We bring the annual rate to a monthly one rf_m = rf_.div(100).div(12) dl = DataLoader(prices=msci_prices_from_feather, mv=msci_mv_from_feather, rf=rf_m) prices, mv, rf = dl.get_prices(), dl.get_mv(), dl.get_rf() if verbose: print(f'\n\'prices\' shape: {prices.shape}') print(f'\'mv\' shape: {mv.shape}') print(f'\'rf\' shape: {rf.shape}') return rf, prices, mv, dl
def __update(self): """Updates class attributes.""" p, mv, rf = self.__prices, self.__mv, self.__rf # Select attributes different from 'None' li = [x for x in (p, mv, rf) if x is not None] # if there is no element in the list, i.e., if all attributes are 'None' if len(li) == 0: self.__date = None # if there is only one element not 'None' in the list, 'self.__date' should be equal to its index elif len(li) == 1: self.__date: np.ndarray = li[0].index.to_numpy() # if there is at least 2 attributes that are not 'None' we must verify if rows match in length and in values else: # if lengths match (to prevent ValueError) if self.__check_index_length_match(li): # if length and values are the same if self.__check_index_values_match(li): self.__date = li[0].index.to_numpy().copy() # if lengths are equal among each dataset index, but not the values else: # if values do not match, we force them to take the same print( "Lengths of rows match, but not they have different values." ) self.__date = li[0].index.to_numpy().copy() self.__make_indices_values_match() assert self.__check_index_values_match(li) # if any length mismatch, we truncate all DataFrames or Series else: # Get the oldest date among the list of DataFrames min_date = min([df.index.min() for df in li]) # In the case there is a risk-free rate and that it begins after the other series: try # to complete it with the 3 month proxy if (self.__rf is not None) & (self.__rf.index[0] > min_date): # Get initial date of the risk-free rate series end = rf.index[0] # 3-Month Treasury Constant Maturity Rate (GS3M) rf3m = DataReader('GS3M', 'fred', start=min_date, end=end).resample('MS').mean() # We have to drop the last row to prevent overlapping # We couldn't have used timedelta to go back 1 month as some have 31 days while others 30 rf3m.drop(rf3m.tail(1).index, inplace=True) rf3m.columns = rf.columns rf3m = rf3m.div(100).div(12) # Concatenate both risk-free rates pd.Series rf_concat = pd.concat([rf3m, self.__rf], sort=True) errmsg: str = f"Got {rf_concat.shape} shape, but ({len(li[0].index)}, 1) expected." assert rf_concat.shape[1] == 1, errmsg self.__rf = rf_concat # Join both series in a sole one # self.__rf = rf_concat.iloc[:, 0].add(rf_concat.iloc[:, 1], fill_value=0) else: # Truncate rows of different length according to their dates self.__truncate_rows() # Verify if the rows were correctly truncated not_none_attributes_list = self.__among_not_none_attributes( ) err_message = "Rows were not correctly truncated" assert self.__check_index_length_match( not_none_attributes_list), err_message # Update the 'self.__date' attribute with the first item self.__date = not_none_attributes_list[0].index.to_numpy( ).copy() # Propagate same indexes to the other datasets to force a perfect match self.__make_indices_values_match() # Verify that indices have same indexes err_message = "Values do not match among not 'None' attributes." assert self.__check_index_values_match( self.__among_not_none_attributes()), err_message self.__update()