Python get_ind_mat_average_uniqueness示例，mlfinlab.sampling.bootstrapping.get_ind_mat_average_uniqueness Python示例

示例#1

0

显示文件

    def test_seq_bootstrap(self):
        """
        Test sequential bootstrapping length, indicator matrix length and NaN checks
        """

        non_nan_meta_labels = self.meta_labeled_events.dropna()
        ind_mat = get_ind_matrix(non_nan_meta_labels, self.data)

        label_endtime = non_nan_meta_labels.t1
        trimmed_price_bars_index = self.data[(self.data.index >= non_nan_meta_labels.index.min()) &
                                             (self.data.index <= non_nan_meta_labels.t1.max())].index
        bar_index = list(non_nan_meta_labels.index)  # Generate index for indicator matrix from t1 and index
        bar_index.extend(non_nan_meta_labels.t1)
        bar_index.extend(trimmed_price_bars_index)
        bar_index = sorted(list(set(bar_index)))  # Drop duplicates and sort
        ind_mat_book_implementation = book_ind_mat_implementation(bar_index, label_endtime)

        self.assertTrue(bool((ind_mat_book_implementation.values == ind_mat).all()) is True)
        # Indicator matrix shape should be (unique(meta_label_index+t1+price_bars_index), t1)
        self.assertTrue(ind_mat.shape == (782, 7))

        # Check indicator matrix values for specific labels
        self.assertTrue(bool((ind_mat[:100, 0] == np.ones(100)).all()) is True)
        self.assertTrue(bool((ind_mat[191:340, 2] == np.ones(149)).all()) is True)
        self.assertTrue(bool((ind_mat[341:420, 2] == np.zeros(79)).all()) is True)
        self.assertTrue(bool((ind_mat[406:412, 4] == np.ones(6)).all()) is True)
        self.assertTrue(bool((ind_mat[662:, 6] == np.ones(120)).all()) is True)

        bootstrapped_samples = seq_bootstrap(ind_mat, compare=False, verbose=True, warmup_samples=None)
        bootstrapped_samples_1000 = seq_bootstrap(ind_mat, compare=True, sample_length=100)
        self.assertTrue(len(bootstrapped_samples) == non_nan_meta_labels.shape[0])
        self.assertTrue(len(bootstrapped_samples_1000) == 100)

        # Test sequential bootstrapping on example from a book
        ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3))
        ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0]
        ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0]
        ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1]
        ind_mat = ind_mat.values

        seq_bootstrap(ind_mat, sample_length=3, verbose=True, warmup_samples=[1])  # Show printed probabilities

        # Perform Monte-Carlo test
        standard_unq_array = np.zeros(1000) * np.nan
        seq_unq_array = np.zeros(1000) * np.nan
        for i in range(0, 1000):
            bootstrapped_samples = seq_bootstrap(ind_mat, sample_length=3)
            random_samples = np.random.choice(ind_mat.shape[1], size=3)

            random_unq = get_ind_mat_average_uniqueness(ind_mat[:, random_samples])
            sequential_unq = get_ind_mat_average_uniqueness(ind_mat[:, bootstrapped_samples])

            standard_unq_array[i] = random_unq
            seq_unq_array[i] = sequential_unq

        self.assertTrue(np.mean(seq_unq_array) >= np.mean(standard_unq_array))
        self.assertTrue(np.median(seq_unq_array) >= np.median(standard_unq_array))

示例#2

0

显示文件

    def test_get_ind_mat_av_uniqueness(self):
        """
        Tests get_ind_mat_average_uniqueness function using indicator matrix from the book example
        """

        ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3))
        ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0]
        ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0]
        ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1]
        ind_mat = ind_mat.values

        labels_av_uniqueness = get_ind_mat_average_uniqueness(ind_mat)
        first_sample_unq = labels_av_uniqueness[0]
        second_sample_unq = labels_av_uniqueness[1]
        third_sample_unq = labels_av_uniqueness[2]

        self.assertTrue(
            abs(first_sample_unq[first_sample_unq > 0].mean() - 0.8333) <=
            1e-4)  # First sample uniqueness
        self.assertTrue(
            abs(second_sample_unq[second_sample_unq > 0].mean() -
                0.75) <= 1e-4)
        self.assertTrue(
            abs(third_sample_unq[third_sample_unq > 0].mean() - 1.0) <= 1e-4)
        self.assertTrue(
            abs(labels_av_uniqueness[labels_av_uniqueness > 0].mean() - 0.8571)
            <= 1e-4)  # Test matrix av.uniqueness

示例#3

0

显示文件

    def test_get_ind_mat_av_uniqueness(self):
        """
        Tests get_ind_mat_average_uniqueness function using indicator matrix from the book example
        """

        ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3))
        ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0]
        ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0]
        ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1]
        ind_mat = ind_mat.values

        labels_av_uniqueness = get_ind_mat_average_uniqueness(ind_mat)
        self.assertTrue(abs(labels_av_uniqueness - 0.8571) <= 1e-4)  # Test matrix av.uniqueness

示例#4

0

显示文件

文件： sampling.py 项目： LaoKpa/openquant

# With sequential bootsrapping our goal is to select samples such that with each iteration we can
# maximize average unqiueness of subsamples

ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3))
ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0]
ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0]
ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1]
ind_mat
print(ind_mat)

# Get triple barier method indicator matrix
triple_barrier_ind_mat = get_ind_matrix(barrier_events,
                                        price_bars=close_prices['close'])
print(triple_barrier_ind_mat)

ind_mat_uniqueness = get_ind_mat_average_uniqueness(
    triple_barrier_ind_mat)  ### CHECK BACK AFTER FIXING DUPLICATE T Values
print(ind_mat_uniqueness)

first_sample = ind_mat_uniqueness
first_sample[first_sample > 0].mean()

# Jupyter notebook output
# av_unique.loc[0]

# Get the values
ind_mat = ind_mat.values

# On the first step all labels will have equal probabilities as average uniquess of matrix with 1 column is 1
phi = [1]
uniqueness_array = np.array([None, None, None])
for i in range(0, 3):

示例#5

0

显示文件

    def test_seq_bootstrap(self):
        """
        Test sequential bootstrapping length, indicator matrix length and NaN checks
        """

        non_nan_meta_labels = self.meta_labeled_events.dropna()
        ind_mat = get_ind_matrix(non_nan_meta_labels)
        self.assertTrue(ind_mat.shape == (
            13,
            7))  # Indicator matrix shape should be (meta_label_index+t1, t1)
        # Check indicator matrix values for specific labels
        self.assertTrue(
            bool((ind_mat[:, 0] == [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                  ).all()) is True)
        self.assertTrue(
            bool((ind_mat[:, 2] == [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
                  ).all()) is True)
        self.assertTrue(
            bool((ind_mat[:, 4] == [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0]
                  ).all()) is True)
        self.assertTrue(
            bool((ind_mat[:, 6] == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
                  ).all()) is True)

        bootstrapped_samples = seq_bootstrap(ind_mat,
                                             compare=False,
                                             verbose=True,
                                             warmup_samples=None)
        bootstrapped_samples_1000 = seq_bootstrap(ind_mat,
                                                  compare=True,
                                                  sample_length=100)
        self.assertTrue(
            len(bootstrapped_samples) == non_nan_meta_labels.shape[0])
        self.assertTrue(len(bootstrapped_samples_1000) == 100)

        # Test sequential bootstrapping on example from a book
        ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3))
        ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0]
        ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0]
        ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1]
        ind_mat = ind_mat.values

        seq_bootstrap(ind_mat,
                      sample_length=3,
                      verbose=True,
                      warmup_samples=[1])  # Show printed probabilities

        # Perform Monte-Carlo test
        standard_unq_array = np.zeros(1000) * np.nan
        seq_unq_array = np.zeros(1000) * np.nan
        for i in range(0, 1000):
            bootstrapped_samples = seq_bootstrap(ind_mat, sample_length=3)
            random_samples = np.random.choice(ind_mat.shape[1], size=3)

            random_unq = get_ind_mat_average_uniqueness(
                ind_mat[:, random_samples])
            random_unq_mean = random_unq[random_unq > 0].mean()

            sequential_unq = get_ind_mat_average_uniqueness(
                ind_mat[:, bootstrapped_samples])
            sequential_unq_mean = sequential_unq[sequential_unq > 0].mean()

            standard_unq_array[i] = random_unq_mean
            seq_unq_array[i] = sequential_unq_mean

        self.assertTrue(np.mean(seq_unq_array) >= np.mean(standard_unq_array))
        self.assertTrue(
            np.median(seq_unq_array) >= np.median(standard_unq_array))