Пример #1
0
def data_loader_builder(verbose: bool = True):
    msci_prices_from_feather = pd.read_feather(
        '../../data/clean/msci_world_prices.feather')
    if verbose:
        print("MSCI World Prices:")
        msci_prices_from_feather.info()
    msci_mv_from_feather = pd.read_feather(
        '../../data/clean/msci_world_mv.feather')
    if verbose:
        print("\nMSCI World Market Values:")
        msci_mv_from_feather.info()

    # 1-Month Treasury Constant Maturity Rate (GS1M)
    rf_ = DataReader('GS1M', 'fred',
                     start=datetime.datetime(1990, 1,
                                             1)).resample('MS').mean()
    # We bring the annual rate to a monthly one
    rf_m = rf_.div(100).div(12)
    dl = DataLoader(prices=msci_prices_from_feather,
                    mv=msci_mv_from_feather,
                    rf=rf_m)
    prices, mv, rf = dl.get_prices(), dl.get_mv(), dl.get_rf()
    if verbose:
        print(f'\n\'prices\' shape: {prices.shape}')
        print(f'\'mv\' shape: {mv.shape}')
        print(f'\'rf\' shape: {rf.shape}')

    return rf, prices, mv, dl
Пример #2
0
    def __update(self):
        """Updates class attributes."""
        p, mv, rf = self.__prices, self.__mv, self.__rf
        # Select attributes different from 'None'
        li = [x for x in (p, mv, rf) if x is not None]

        # if there is no element in the list, i.e., if all attributes are 'None'
        if len(li) == 0:
            self.__date = None
        # if there is only one element not 'None' in the list, 'self.__date' should be equal to its index
        elif len(li) == 1:
            self.__date: np.ndarray = li[0].index.to_numpy()
        # if there is at least 2 attributes that are not 'None' we must verify if rows match in length and in values
        else:
            # if lengths match (to prevent ValueError)
            if self.__check_index_length_match(li):
                # if length and values are the same
                if self.__check_index_values_match(li):
                    self.__date = li[0].index.to_numpy().copy()
                # if lengths are equal among each dataset index, but not the values
                else:
                    # if values do not match, we force them to take the same
                    print(
                        "Lengths of rows match, but not they have different values."
                    )
                    self.__date = li[0].index.to_numpy().copy()
                    self.__make_indices_values_match()
                    assert self.__check_index_values_match(li)
            # if any length mismatch, we truncate all DataFrames or Series
            else:
                # Get the oldest date among the list of DataFrames
                min_date = min([df.index.min() for df in li])
                # In the case there is a risk-free rate and that it begins after the other series: try
                # to complete it with the 3 month proxy
                if (self.__rf is not None) & (self.__rf.index[0] > min_date):
                    # Get initial date of the risk-free rate series
                    end = rf.index[0]
                    # 3-Month Treasury Constant Maturity Rate (GS3M)
                    rf3m = DataReader('GS3M', 'fred', start=min_date,
                                      end=end).resample('MS').mean()
                    # We have to drop the last row to prevent overlapping
                    # We couldn't have used timedelta to go back 1 month as some have 31 days while others 30
                    rf3m.drop(rf3m.tail(1).index, inplace=True)
                    rf3m.columns = rf.columns
                    rf3m = rf3m.div(100).div(12)
                    # Concatenate both risk-free rates pd.Series
                    rf_concat = pd.concat([rf3m, self.__rf], sort=True)
                    errmsg: str = f"Got {rf_concat.shape} shape, but ({len(li[0].index)}, 1) expected."
                    assert rf_concat.shape[1] == 1, errmsg
                    self.__rf = rf_concat
                    # Join both series in a sole one
                    # self.__rf = rf_concat.iloc[:, 0].add(rf_concat.iloc[:, 1], fill_value=0)
                else:
                    # Truncate rows of different length according to their dates
                    self.__truncate_rows()
                    # Verify if the rows were correctly truncated
                    not_none_attributes_list = self.__among_not_none_attributes(
                    )
                    err_message = "Rows were not correctly truncated"
                    assert self.__check_index_length_match(
                        not_none_attributes_list), err_message
                    # Update the 'self.__date' attribute with the first item
                    self.__date = not_none_attributes_list[0].index.to_numpy(
                    ).copy()
                    # Propagate same indexes to the other datasets to force a perfect match
                    self.__make_indices_values_match()

                    # Verify that indices have same indexes
                    err_message = "Values do not match among not 'None' attributes."
                    assert self.__check_index_values_match(
                        self.__among_not_none_attributes()), err_message
                self.__update()