Пример #1
0
    def test_allowed_missing_doesnt_double_count(self):
        # Test that allowed_missing only counts a row as missing one
        # observation if it's missing in both the dependent and independent
        # variable.
        rand = np.random.RandomState(42)
        true_betas = np.array([-0.5, 0.0, 0.5, 1.0, 1.5])
        independent = as_column(np.linspace(-5.0, 5.0, 30))
        noise = as_column(rand.uniform(-0.1, 0.1, 30))
        dependents = 1.0 + true_betas * independent + noise

        # Each column has three nans in the grid.
        dependent_nan_grid = np.array(
            [
                [0, 1, 1, 1, 0],
                [0, 0, 1, 1, 1],
                [1, 0, 0, 1, 1],
                [1, 1, 0, 0, 1],
                [1, 1, 1, 0, 0],
            ],
            dtype=bool,
        )
        # There are also two nans in the independent data.
        independent_nan_grid = np.array([[0], [0], [1], [1], [0]], dtype=bool)

        dependents[10:15][dependent_nan_grid] = np.nan
        independent[10:15][independent_nan_grid] = np.nan

        # With only two allowed missing values, everything should come up nan,
        # because column has at least 3 nans in the dependent data.
        result2 = vectorized_beta(dependents, independent, allowed_missing=2)
        assert_equal(np.isnan(result2),
                     np.array([True, True, True, True, True]))

        # With three allowed missing values, the first and last columns should
        # produce a value, because they have nans at the same rows where the
        # independent data has nans.
        result3 = vectorized_beta(dependents, independent, allowed_missing=3)
        assert_equal(np.isnan(result3),
                     np.array([False, True, True, True, False]))

        # With four allowed missing values, everything but the middle column
        # should produce a value. The middle column will have 5 nans because
        # the dependent nans have no overlap with the independent nans.
        result4 = vectorized_beta(dependents, independent, allowed_missing=4)
        assert_equal(np.isnan(result4),
                     np.array([False, False, True, False, False]))

        # With five allowed missing values, everything should produce a value.
        result5 = vectorized_beta(dependents, independent, allowed_missing=5)
        assert_equal(np.isnan(result5),
                     np.array([False, False, False, False, False]))
    def test_allowed_missing_doesnt_double_count(self):
        # Test that allowed_missing only counts a row as missing one
        # observation if it's missing in both the dependent and independent
        # variable.
        rand = np.random.RandomState(42)
        true_betas = np.array([-0.5, 0.0, 0.5, 1.0, 1.5])
        independent = as_column(np.linspace(-5., 5., 30))
        noise = as_column(rand.uniform(-.1, .1, 30))
        dependents = 1.0 + true_betas * independent + noise

        # Each column has three nans in the grid.
        dependent_nan_grid = np.array([[0, 1, 1, 1, 0],
                                       [0, 0, 1, 1, 1],
                                       [1, 0, 0, 1, 1],
                                       [1, 1, 0, 0, 1],
                                       [1, 1, 1, 0, 0]], dtype=bool)
        # There are also two nans in the independent data.
        independent_nan_grid = np.array([[0],
                                         [0],
                                         [1],
                                         [1],
                                         [0]], dtype=bool)

        dependents[10:15][dependent_nan_grid] = np.nan
        independent[10:15][independent_nan_grid] = np.nan

        # With only two allowed missing values, everything should come up nan,
        # because column has at least 3 nans in the dependent data.
        result2 = vectorized_beta(dependents, independent, allowed_missing=2)
        assert_equal(np.isnan(result2),
                     np.array([True, True, True, True, True]))

        # With three allowed missing values, the first and last columns should
        # produce a value, because they have nans at the same rows where the
        # independent data has nans.
        result3 = vectorized_beta(dependents, independent, allowed_missing=3)
        assert_equal(np.isnan(result3),
                     np.array([False, True, True, True, False]))

        # With four allowed missing values, everything but the middle column
        # should produce a value. The middle column will have 5 nans because
        # the dependent nans have no overlap with the independent nans.
        result4 = vectorized_beta(dependents, independent, allowed_missing=4)
        assert_equal(np.isnan(result4),
                     np.array([False, False, True, False, False]))

        # With five allowed missing values, everything should produce a value.
        result5 = vectorized_beta(dependents, independent, allowed_missing=5)
        assert_equal(np.isnan(result5),
                     np.array([False, False, False, False, False]))
Пример #3
0
    def test_produce_nans_when_too_much_missing_data(self, nan_offset):
        rand = np.random.RandomState(42)

        true_betas = np.array([-0.5, 0.0, 0.5, 1.0, 1.5])
        independent = as_column(np.linspace(-5., 5., 30))
        noise = as_column(rand.uniform(-.1, .1, 30))
        dependents = 1.0 + true_betas * independent + noise

        # Write nans in a triangular pattern into the middle of the dependent
        # array.
        nan_grid = np.array([[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 1, 1, 0, 0],
                             [1, 1, 1, 1, 0], [1, 1, 1, 1, 1]],
                            dtype=bool)
        num_nans = nan_grid.sum(axis=0)
        # Move the grid around in the parameterized tests. The positions
        # shouldn't matter.
        dependents[10 + nan_offset:15 + nan_offset][nan_grid] = np.nan

        for allowed_missing in range(7):
            results = vectorized_beta(dependents, independent, allowed_missing)
            for i, expected in enumerate(true_betas):
                result = results[i]
                expect_nan = num_nans[i] > allowed_missing
                true_beta = true_betas[i]
                if expect_nan:
                    self.assertTrue(np.isnan(result))
                else:
                    self.assertTrue(np.abs(result - true_beta) < 0.01)
    def test_produce_nans_when_too_much_missing_data(self, nan_offset):
        rand = np.random.RandomState(42)

        true_betas = np.array([-0.5, 0.0, 0.5, 1.0, 1.5])
        independent = as_column(np.linspace(-5., 5., 30))
        noise = as_column(rand.uniform(-.1, .1, 30))
        dependents = 1.0 + true_betas * independent + noise

        # Write nans in a triangular pattern into the middle of the dependent
        # array.
        nan_grid = np.array([[1, 0, 0, 0, 0],
                             [1, 1, 0, 0, 0],
                             [1, 1, 1, 0, 0],
                             [1, 1, 1, 1, 0],
                             [1, 1, 1, 1, 1]], dtype=bool)
        num_nans = nan_grid.sum(axis=0)
        # Move the grid around in the parameterized tests. The positions
        # shouldn't matter.
        dependents[10 + nan_offset:15 + nan_offset][nan_grid] = np.nan

        for allowed_missing in range(7):
            results = vectorized_beta(dependents, independent, allowed_missing)
            for i, expected in enumerate(true_betas):
                result = results[i]
                expect_nan = num_nans[i] > allowed_missing
                true_beta = true_betas[i]
                if expect_nan:
                    self.assertTrue(np.isnan(result))
                else:
                    self.assertTrue(np.abs(result - true_beta) < 0.01)
Пример #5
0
    def compute(self, today, assets, out, closes, sectors):
        res = np.zeros(closes.shape[1])
        change_ratio = np.diff(closes, axis=0) / closes[:-1]
        latest_sectors = sectors[-1]

        stock_in_sector = latest_sectors == self.sector_code
        change_ratio_in_sector = change_ratio[:, stock_in_sector]

        # epsilon = 0.000001
        # nan_locs = np.where(np.isnan(change_ratio_in_sector))[1]  # 列
        # print(assets[np.unique(nan_locs)])

        # change_ratio_in_sector = np.where(np.isnan(change_ratio_in_sector), epsilon, change_ratio_in_sector)
        # 行业收益率
        sector_returns = nanmean(change_ratio_in_sector, axis=1).reshape(-1, 1)

        allowed_missing = int(self.window_length * 0.25)
        # 行业内各股票收益率基于行业平均收益率回归得到各股票的β值,即敞口
        beta = vectorized_beta(
            dependents=change_ratio_in_sector,
            independent=sector_returns,
            allowed_missing=allowed_missing,
        )
        # 更新β值,其余部分为0
        res[stock_in_sector] = beta
        out[:] = res
 def compare_with_empyrical(self, dependents, independent):
     INFINITY = 1000000  # close enough
     result = vectorized_beta(
         dependents, independent, allowed_missing=INFINITY,
     )
     expected = np.array([
         empyrical_beta(dependents[:, i].ravel(), independent.ravel())
         for i in range(dependents.shape[1])
     ])
     assert_equal(result, expected, array_decimal=7)
     return result
 def compare_with_empyrical(self, dependents, independent):
     INFINITY = 1000000  # close enough
     result = vectorized_beta(
         dependents, independent, allowed_missing=INFINITY,
     )
     expected = np.array([
         empyrical_beta(dependents[:, i].ravel(), independent.ravel())
         for i in range(dependents.shape[1])
     ])
     assert_equal(result, expected, array_decimal=7)
     return result