Python DataCollection.get_freq示例，utils.Data.DataCollection.get_freq Python示例

示例#1

0

显示文件

文件： Portfolio.py 项目： zliu2019/ML-APRP-Forecasting

 def calculate_initial_weight(self, input_dc: DataCollection):
     if not isinstance(input_dc.get_freq(), str):
         raise Exception("Optimization failed due to inconsistent series frequencies within input_dc.")
     else: 
         self.input_freq = input_dc.get_freq()
     if self.initial_weight is None:
         self.tickers = input_dc.ticker_list()
         self.initial_weight = self.optimizer(input_dc.to_df().dropna())
     else:
         raise Exception("initial weight was already calculated")

示例#2

0

显示文件

文件： Portfolio.py 项目： zliu2019/ML-APRP-Forecasting

 def calculate_initial_weight(self, input_dc: DataCollection, weight_bounds = (0,1), risk_aversion = 1, 
                             market_neutral = False, risk_free_rate = 0.0, target_volatility = 0.01, 
                             target_return = 0.11, returns_data = True, compounding = False):
     if not isinstance(input_dc.get_freq(), str):
         raise Exception("Optimization failed due to inconsistent series frequencies within input_dc.")
     else: 
         self.input_freq = input_dc.get_freq()
     if self.initial_weight is None:
         self.tickers = input_dc.ticker_list()
         self.initial_weight = self.optimizer(input_dc.to_df().dropna(), self.input_freq, self.solution,
                                             weight_bounds,risk_aversion, market_neutral, risk_free_rate, 
                                             target_volatility, target_return, returns_data, compounding)
     else:
         raise Exception("initial weight was already calculated")

示例#3

0

显示文件

文件： Portfolio_Performance.py 项目： zliu2019/ML-APRP-Forecasting

    def __init__(self, portfolio: Portfolio, evaluate_dc: DataCollection):
        if portfolio.get_tickers() != evaluate_dc.ticker_list():
            raise ValueError(
                "Tickers in portfolio and evaluate data do not match")

        self.portfolio = portfolio
        # Check this
        self.label = portfolio.get_solution()
        if portfolio.get_freq() != evaluate_dc.get_freq():
            raise ValueError(
                "The frequency of the data and portfolio do not match")
        self.price_df = evaluate_dc.to_df().dropna()
        self.freq = evaluate_dc.get_freq()
        self.evaluate_dc = evaluate_dc
        self.metrics = {}

示例#4

0

显示文件

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # fake data by ZZ Daily
        self.a_series = DataSeries(
            'ETF', 'daily',
            pd.DataFrame([10.0, 15.0, 20.0, 30.0],
                         columns=['ABC'],
                         index=pd.to_datetime([
                             '2020-01-01', '2020-01-02', '2020-01-03',
                             '2020-01-04'
                         ])))
        self.b_series = DataSeries(
            'Bond', 'daily',
            pd.DataFrame([1.0, 3.5, 4.5],
                         columns=['KKK'],
                         index=pd.to_datetime([
                             '2020-01-01',
                             '2020-01-02',
                             '2020-01-03',
                         ])))
        self.collect = DataCollection('trial', [self.a_series, self.b_series])

        d = {'Initial weights': [0.6, 0.4]}
        self.weights = pd.DataFrame(data=d).T
        self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'})

        self.p = port.EqualPort("test equal port")
        self.p.calculate_initial_weight(self.collect)

        # Monthly
        path_monthly = os.path.join('test', 'Data', 'Monthly')
        dic_monthly = DataPreprocessing.read_file(path_monthly)

        n_assets = 4
        time_series_group = []
        for i in range(n_assets):
            df = dic_monthly[list(dic_monthly.keys())[i]]
            ds = DataSeries(df[1], 'monthly', df[0])
            time_series_group.append(ds)
        input_dc_test = DataCollection(label='Test Collection',
                                       time_series_group=time_series_group)
        self.input_dc = input_dc_test
        self.input_freq = input_dc_test.get_freq()
        self.input_df = self.input_dc.to_df()
        self.n_asset = len(self.input_df.columns)
        input_weights = [[1 / self.n_asset] * self.n_asset]
        input_weights_df = pd.DataFrame(input_weights,
                                        columns=self.input_df.columns,
                                        index=['Initial weights'])
        self.input_weights_df = input_weights_df

示例#5

0

显示文件

文件： test_Optimization.py 项目： zliu2019/ML-APRP-Forecasting

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        path_monthly = os.path.join('test', 'Data', 'Monthly')
        dic_monthly = DataPreprocessing.read_file(path_monthly)

        n_assets = 4
        time_series_group = []

        for i in range(n_assets):
            df = dic_monthly[list(dic_monthly.keys())[i]]
            ds = DataSeries('ETF', 'monthly', df[0])
            time_series_group.append(ds)

        input_dc_test = DataCollection(label='Test Collection',
                                       time_series_group=time_series_group)
        self.input_dc = input_dc_test
        self.input_freq = input_dc_test.get_freq()
        self.input_df = self.input_dc.to_df().dropna()

        self.a = pd.DataFrame([10, 12, 32, 9, 11, 9],
                              columns=['fakeSPY'],
                              index=pd.to_datetime([
                                  '2020-01-01', '2020-02-01', '2020-03-01',
                                  '2020-04-01', '2020-05-01', '2020-06-01'
                              ]))
        self.a_series = DataSeries('ETF', self.input_freq, self.a)
        self.b = pd.DataFrame([1, 1.2, 3.2, 0.9],
                              columns=['fakeTreasury'],
                              index=pd.to_datetime([
                                  '2019-12-01', '2020-02-01', '2020-03-01',
                                  '2020-04-01'
                              ]))
        self.b_series = DataSeries('Bond', self.input_freq, self.b)
        self.c_collection = DataCollection('trial',
                                           [self.a_series, self.b_series])
        self.c_df = self.c_collection.to_df().interpolate(method='linear',
                                                          axis=0)

示例#6

0

显示文件

class Test_Portfolio_Performance(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # fake data by ZZ Daily
        self.a_series = DataSeries(
            'ETF', 'daily',
            pd.DataFrame([10.0, 15.0, 20.0, 30.0],
                         columns=['ABC'],
                         index=pd.to_datetime([
                             '2020-01-01', '2020-01-02', '2020-01-03',
                             '2020-01-04'
                         ])))
        self.b_series = DataSeries(
            'Bond', 'daily',
            pd.DataFrame([1.0, 3.5, 4.5],
                         columns=['KKK'],
                         index=pd.to_datetime([
                             '2020-01-01',
                             '2020-01-02',
                             '2020-01-03',
                         ])))
        self.collect = DataCollection('trial', [self.a_series, self.b_series])

        d = {'Initial weights': [0.6, 0.4]}
        self.weights = pd.DataFrame(data=d).T
        self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'})

        self.p = port.EqualPort("test equal port")
        self.p.calculate_initial_weight(self.collect)

        # Monthly
        path_monthly = os.path.join('test', 'Data', 'Monthly')
        dic_monthly = DataPreprocessing.read_file(path_monthly)

        n_assets = 4
        time_series_group = []
        for i in range(n_assets):
            df = dic_monthly[list(dic_monthly.keys())[i]]
            ds = DataSeries(df[1], 'monthly', df[0])
            time_series_group.append(ds)
        input_dc_test = DataCollection(label='Test Collection',
                                       time_series_group=time_series_group)
        self.input_dc = input_dc_test
        self.input_freq = input_dc_test.get_freq()
        self.input_df = self.input_dc.to_df()
        self.n_asset = len(self.input_df.columns)
        input_weights = [[1 / self.n_asset] * self.n_asset]
        input_weights_df = pd.DataFrame(input_weights,
                                        columns=self.input_df.columns,
                                        index=['Initial weights'])
        self.input_weights_df = input_weights_df

    def test_annualized_return(self):
        ans = PP.annualized_return(self.weights,
                                   self.collect.to_df().dropna(), 'daily')

        output_return_df = self.collect.to_df().dropna().pct_change().dropna()
        annual_return = output_return_df.mean() * 252
        ans_new = annual_return @ self.weights.T

        self.assertAlmostEqual(ans_new[0], 203.4)
        self.assertAlmostEqual(ans, ans_new[0])

        res2 = PP.annualized_return(self.input_weights_df, self.input_df,
                                    self.input_freq)
        expected2 = np.dot(self.input_weights_df,
                           self.input_df.pct_change().mean() * 12).item()
        self.assertEqual(res2, expected2)

    def test_annualized_volatility(self):
        ans = PP.annualized_volatility(self.weights,
                                       self.collect.to_df().dropna(), 'daily')

        cov = self.collect.to_df().dropna().pct_change().dropna().cov() * 252
        ans_new = (self.weights @ cov @ self.weights.T).iloc[0][0]**0.5

        self.assertAlmostEqual(ans, ans_new)

        res2 = PP.annualized_volatility(self.input_weights_df, self.input_df,
                                        self.input_freq)
        expected2 = np.sqrt(
            np.dot(
                self.input_weights_df,
                np.dot(self.input_df.pct_change().cov() * 12,
                       self.input_weights_df.T)).item())
        self.assertAlmostEqual(res2, expected2)

    def test_sharpe_ratio(self):
        ans = PP.sharpe_ratio(0.6, 0.2, 0.03)
        ans_new = (0.6 - 0.03) / 0.2
        self.assertAlmostEqual(ans, ans_new)

    def test_PnL(self):
        ans = PP.PnL(self.weights, self.collect.to_df().dropna())

        output_return_df = self.collect.to_df().dropna().pct_change().dropna()
        ans_1 = output_return_df.iloc[0][0] * self.weights.iloc[0][
            0] + output_return_df.iloc[0][1] * self.weights.iloc[0][1]
        ans_2 = output_return_df.iloc[1][0] * self.weights.iloc[0][
            0] + output_return_df.iloc[1][1] * self.weights.iloc[0][1]

        self.assertAlmostEqual(ans.iloc[0][0], ans_1)
        self.assertAlmostEqual(ans.iloc[1][0], ans_2)

    def test_max_drawdown(self):
        price = {
            'PnL': [75, 33, 35, 25, 80, 100, 95, 78, 72, 62, 65, 60, 42, 50]
        }
        pnl = pd.DataFrame(data=price).pct_change().dropna()

        ans = PP.max_drawdown(pnl)
        ans_new = (25 - 75) / 75

        self.assertAlmostEqual(ans, ans_new)

    def test_partial_moment(self):
        pnl = PP.PnL(self.weights, self.collect.to_df().dropna())
        pm = PP.partial_moment(pnl, threshold=0.6)

        length = pnl.shape[0]
        threshold = 0.6
        diff_df = threshold - pnl
        drop_minus = diff_df[diff_df >= 0].dropna()
        pm_new = ((drop_minus**2).sum() / length).item()

        self.assertAlmostEqual(pm, 0.0408163265)
        self.assertAlmostEqual(pm, pm_new)

    def test_PP_class(self):
        self.assertEqual(self.p.get_freq(), self.collect.get_freq())

        pp = PP.PortfolioPerformance(self.p, self.collect)
        pp.annualized_return()
        pp.annualized_volatility()
        pp.annualized_sharpe_ratio()
        # pp.print_metrics()
        # pp.get_metrics('annualized_return')
        pp.PnL()
        # print(pp.metrics['PnL'])
        pp.max_drawdown(
        )  # 0 since test data always increasing, but function tested

        self.assertEqual(pp.get_metrics("annualized_return"), 228)
        self.assertEqual(pp.get_metrics("PnL").iloc[0][0], 1.5)
        pp.print_metrics()
        pp.get_metrics('PnL')

        self.assertEqual(pp.metrics['annualized_return'], 228)
        self.assertAlmostEqual(pp.metrics['annualized_volatility'], 13.3630621)
        self.assertAlmostEqual(pp.metrics['sharpe_ratio'], 228 / 13.3630621)
        self.assertAlmostEqual(pp.metrics['PnL'].iloc[0][0], 1.5)
        self.assertAlmostEqual(pp.metrics['PnL'].iloc[1][0],
                               0.30952380952380953)
        self.assertEqual(pp.metrics['max_drawdown'], 0)

        # sortino
        pp.sortino_ratio(threshold=0.6)

        d = {'Initial weights': [0.5, 0.5]}
        self.weights2 = pd.DataFrame(data=d).T
        self.weights2 = self.weights2.rename(columns={0: 'ABC', 1: 'KKK'})
        pnl = PP.PnL(self.weights2, self.collect.to_df().dropna())

        threshold = 0.6
        expected = pp.metrics['annualized_return']
        lpm_sortino = PP.partial_moment(pnl, threshold, order=2,
                                        lower=True)**0.5

        ans_sortino = (expected - threshold) / lpm_sortino

        self.assertAlmostEqual(pp.metrics['sortino_ratio'], ans_sortino)

        # omega
        pp.omega_ratio(threshold=0.6)
        lpm_omega = PP.partial_moment(pnl, threshold, order=1, lower=True)
        ans_omega = ((expected - threshold) / lpm_omega) + 1

        self.assertAlmostEqual(pp.metrics['omega_ratio'], ans_omega)

示例#7

0

显示文件

文件： test_Data.py 项目： zliu2019/ML-APRP-Forecasting

class Test_Data(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.a = pd.DataFrame([10.2, 12, 32.1, 9.32],
                              columns=['fakeSPY'],
                              index=pd.to_datetime([
                                  '2020-01-01', '2020-02-01', '2020-03-01',
                                  '2020-04-01'
                              ]))
        self.a_series = DataSeries('ETF', 'monthly', self.a)
        self.b = pd.DataFrame([2.3, 3.6, 4.5],
                              columns=['fakeTreasury'],
                              index=pd.to_datetime(
                                  ['2019-12-12', '2020-02-05', '2020-09-13']))
        self.b_series = DataSeries('Bond', 'monthly', self.b)
        self.c_collection = DataCollection('trial',
                                           [self.a_series, self.b_series])

        # For test_the_rest_of_entire_dataset():
        self.a_entire = pd.DataFrame([10.2, 12, 32.1, 9.32, 11.5, 9.7],
                                     columns=['fakeSPY'],
                                     index=pd.to_datetime([
                                         '2020-01-01', '2020-02-01',
                                         '2020-03-01', '2020-04-01',
                                         '2020-05-01', '2020-06-01'
                                     ]))
        self.a_series_entire = DataSeries('ETF', 'monthly', self.a_entire)
        self.b_entire = pd.DataFrame([2.3, 3.6, 4.5, 5.5],
                                     columns=['fakeTreasury'],
                                     index=pd.to_datetime([
                                         '2019-12-12', '2020-02-05',
                                         '2020-09-13', '2020-10-13'
                                     ]))
        self.b_series_entire = DataSeries('Bond', 'monthly', self.b_entire)
        self.c_collection_entire = DataCollection(
            'trial', [self.a_series_entire, self.b_series_entire])

        self.a_exp = pd.DataFrame([11.5, 9.7],
                                  columns=['fakeSPY'],
                                  index=pd.to_datetime(
                                      ['2020-05-01', '2020-06-01']))
        self.a_series_exp = DataSeries('ETF', 'monthly', self.a_exp)
        self.b_exp = pd.DataFrame([5.5],
                                  columns=['fakeTreasury'],
                                  index=pd.to_datetime(['2020-10-13']))
        self.b_series_exp = DataSeries('Bond', 'monthly', self.b_exp)
        self.c_collection_exp = DataCollection(
            'trial', [self.a_series_exp, self.b_series_exp])

    def test_DataSeries_basic(self):
        a = self.a
        a_series = self.a_series
        assert (len(a_series) == 4)
        assert (str(a_series) == 'monthly fakeSPY')
        assert (a_series.get_ticker() == 'fakeSPY')
        assert (a_series.get_category() == 'ETF')
        assert (a_series.get_freq() == 'monthly')
        assert (a.equals(a_series.get_ts()))

        # test deep copy
        a_copy = a_series.copy()
        assert (a_copy != a_series
                and a_copy.get_ts().equals(a_series.get_ts()))

        assert (isinstance(a_series.to_Series(), pd.Series))

    def test_DataSeries_add_sub(self):
        diff = self.a_series_entire - self.a_series
        assert (self.compareSeries(diff, self.a_series_exp))
        a_plus = diff + self.a_series
        assert (self.compareSeries(a_plus, self.a_series_entire))

    def test_DataSeries_to_list(self):
        lst = self.a_series.to_list()
        assert (lst == [10.2, 12, 32.1, 9.32])

    def test_last_index(self):
        assert (self.a_series.get_last_date() == pd.to_datetime('2020-04-01'))

    def test_DataSeries_split_and_trim(self):
        # test split
        a_train, a_test = self.a_series.split(pct=0.75)
        assert (isinstance(a_train, DataSeries))
        assert (isinstance(a_test, DataSeries))
        assert (len(a_train) == 3)
        assert (len(a_test) == 1)
        assert (self.a.iloc[:3].equals(a_train.get_ts()))
        assert (self.a.iloc[3:].equals(a_test.get_ts()))

        # test trim
        trimed = self.a_series.trim('2020-02-01', '2020-03-01')
        assert (len(trimed) == 2)
        assert (self.a.loc['2020-02-01':'2020-03-01'].equals(trimed.get_ts()))

    @staticmethod
    def compareSeries(a, b):
        flag = True
        if not isinstance(a, DataSeries):
            print("\n The first item is not a DataSeries object")
            return False
        if not isinstance(b, DataSeries):
            print("\n The Second item is not a DataSeries object")
            return False
        if a == b:
            print("\n The two items are the same object")
            flag = False
        if len(a) != len(b):
            print("\n The two items does not have the same length")
            flag = False

        if str(a) != str(b):
            print("\n The two items does not have the same ticker")
            flag = False

        if a.get_category() != b.get_category():
            print("\n The two items does not have the same category")
            flag = False

        if not a.get_ts().equals(b.get_ts()):
            print("\n The two items does not have the same time series")
            flag = False

        if not a.get_freq() == b.get_freq():
            print("\n The two items does not have the same frequency")
            flag = False

        return flag

    def test_DataCollection_basic(self):
        assert (len(self.c_collection) == 2)
        assert (self.c_collection.get_freq() == 'monthly')
        for item, compare in zip(self.c_collection,
                                 [self.a_series, self.b_series]):
            assert (self.compareSeries(item, compare))

    def test_DataCollection_add_sub(self):
        res = self.c_collection_entire - self.c_collection
        expected = self.c_collection_exp
        for r, e in zip(res, expected):
            assert (self.compareSeries(r, e))
        res_plus = res + self.c_collection
        for r, e in zip(res_plus, self.c_collection_entire):
            assert (self.compareSeries(r, e))

    def test_DataCollection_get_series(self):
        item1 = self.c_collection[1]
        assert (self.compareSeries(item1, self.b_series))

        item2 = self.c_collection.get_series('fakeSPY')
        assert (self.compareSeries(item2, self.a_series))

    def test_DataCollection_copy(self):
        c = self.c_collection.copy()
        assert (c != self.c_collection)
        assert (c.label == self.c_collection.label)
        assert (c.get_freq() == self.c_collection.get_freq())
        for one, two in zip(c, self.c_collection):
            assert (self.compareSeries(one, two))

    def test_DataCollection_summary(self):
        pass

    def test_DataCollection_split(self):

        train, test = self.c_collection.split(pct=0.75)
        assert (str(train) == 'trial')
        assert (train.freq == 'monthly')
        assert (str(test) == 'trial')
        assert (test.freq == 'monthly')

        compare = [self.a_series.split(0.75), self.b_series.split(0.75)]
        compare_train, compare_test = zip(*compare)
        train_col, test_col = list(compare_train), list(compare_test)
        for i, item in enumerate(train):
            assert (self.compareSeries(item, train_col[i]))

        for i, item in enumerate(test):
            assert (self.compareSeries(item, test_col[i]))

    def test_DataCollection_list(self):
        assert (self.c_collection.ticker_list() == ['fakeSPY', 'fakeTreasury'])
        assert (self.c_collection.category_list() == ['ETF', 'Bond'])
        assert (self.c_collection.last_date_list() == pd.to_datetime(
            ['2020-04-01', '2020-09-13']).to_list())
        assert (self.c_collection.to_list() == [[10.2, 12, 32.1, 9.32],
                                                [2.3, 3.6, 4.5]])

    def test_DataCollection_add(self):
        d = pd.DataFrame([11, 22],
                         columns=['fakeZZZ'],
                         index=pd.to_datetime(['2019-1-12', '2019-02-05']))
        d_series = DataSeries('Bond', 'monthly', d)
        c_plus = self.c_collection.copy()
        c_plus.add(d_series)

        compare = [self.a_series, self.b_series, d_series]
        for i, item in enumerate(c_plus):
            assert (self.compareSeries(item, compare[i]))

    def test_DataCollection_df(self):
        df = self.c_collection.to_df()
        compare = pd.concat([self.a, self.b], axis=1)
        assert (df.equals(compare))

    def test_price_to_return(self):
        pass