def test_seq_bootstrap(self): """ Test sequential bootstrapping length, indicator matrix length and NaN checks """ non_nan_meta_labels = self.meta_labeled_events.dropna() ind_mat = get_ind_matrix(non_nan_meta_labels, self.data) label_endtime = non_nan_meta_labels.t1 trimmed_price_bars_index = self.data[(self.data.index >= non_nan_meta_labels.index.min()) & (self.data.index <= non_nan_meta_labels.t1.max())].index bar_index = list(non_nan_meta_labels.index) # Generate index for indicator matrix from t1 and index bar_index.extend(non_nan_meta_labels.t1) bar_index.extend(trimmed_price_bars_index) bar_index = sorted(list(set(bar_index))) # Drop duplicates and sort ind_mat_book_implementation = book_ind_mat_implementation(bar_index, label_endtime) self.assertTrue(bool((ind_mat_book_implementation.values == ind_mat).all()) is True) # Indicator matrix shape should be (unique(meta_label_index+t1+price_bars_index), t1) self.assertTrue(ind_mat.shape == (782, 7)) # Check indicator matrix values for specific labels self.assertTrue(bool((ind_mat[:100, 0] == np.ones(100)).all()) is True) self.assertTrue(bool((ind_mat[191:340, 2] == np.ones(149)).all()) is True) self.assertTrue(bool((ind_mat[341:420, 2] == np.zeros(79)).all()) is True) self.assertTrue(bool((ind_mat[406:412, 4] == np.ones(6)).all()) is True) self.assertTrue(bool((ind_mat[662:, 6] == np.ones(120)).all()) is True) bootstrapped_samples = seq_bootstrap(ind_mat, compare=False, verbose=True, warmup_samples=None) bootstrapped_samples_1000 = seq_bootstrap(ind_mat, compare=True, sample_length=100) self.assertTrue(len(bootstrapped_samples) == non_nan_meta_labels.shape[0]) self.assertTrue(len(bootstrapped_samples_1000) == 100) # Test sequential bootstrapping on example from a book ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3)) ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0] ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0] ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1] ind_mat = ind_mat.values seq_bootstrap(ind_mat, sample_length=3, verbose=True, warmup_samples=[1]) # Show printed probabilities # Perform Monte-Carlo test standard_unq_array = np.zeros(1000) * np.nan seq_unq_array = np.zeros(1000) * np.nan for i in range(0, 1000): bootstrapped_samples = seq_bootstrap(ind_mat, sample_length=3) random_samples = np.random.choice(ind_mat.shape[1], size=3) random_unq = get_ind_mat_average_uniqueness(ind_mat[:, random_samples]) sequential_unq = get_ind_mat_average_uniqueness(ind_mat[:, bootstrapped_samples]) standard_unq_array[i] = random_unq seq_unq_array[i] = sequential_unq self.assertTrue(np.mean(seq_unq_array) >= np.mean(standard_unq_array)) self.assertTrue(np.median(seq_unq_array) >= np.median(standard_unq_array))
def test_get_ind_mat_av_uniqueness(self): """ Tests get_ind_mat_average_uniqueness function using indicator matrix from the book example """ ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3)) ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0] ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0] ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1] ind_mat = ind_mat.values labels_av_uniqueness = get_ind_mat_average_uniqueness(ind_mat) first_sample_unq = labels_av_uniqueness[0] second_sample_unq = labels_av_uniqueness[1] third_sample_unq = labels_av_uniqueness[2] self.assertTrue( abs(first_sample_unq[first_sample_unq > 0].mean() - 0.8333) <= 1e-4) # First sample uniqueness self.assertTrue( abs(second_sample_unq[second_sample_unq > 0].mean() - 0.75) <= 1e-4) self.assertTrue( abs(third_sample_unq[third_sample_unq > 0].mean() - 1.0) <= 1e-4) self.assertTrue( abs(labels_av_uniqueness[labels_av_uniqueness > 0].mean() - 0.8571) <= 1e-4) # Test matrix av.uniqueness
def test_get_ind_mat_av_uniqueness(self): """ Tests get_ind_mat_average_uniqueness function using indicator matrix from the book example """ ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3)) ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0] ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0] ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1] ind_mat = ind_mat.values labels_av_uniqueness = get_ind_mat_average_uniqueness(ind_mat) self.assertTrue(abs(labels_av_uniqueness - 0.8571) <= 1e-4) # Test matrix av.uniqueness
# With sequential bootsrapping our goal is to select samples such that with each iteration we can # maximize average unqiueness of subsamples ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3)) ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0] ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0] ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1] ind_mat print(ind_mat) # Get triple barier method indicator matrix triple_barrier_ind_mat = get_ind_matrix(barrier_events, price_bars=close_prices['close']) print(triple_barrier_ind_mat) ind_mat_uniqueness = get_ind_mat_average_uniqueness( triple_barrier_ind_mat) ### CHECK BACK AFTER FIXING DUPLICATE T Values print(ind_mat_uniqueness) first_sample = ind_mat_uniqueness first_sample[first_sample > 0].mean() # Jupyter notebook output # av_unique.loc[0] # Get the values ind_mat = ind_mat.values # On the first step all labels will have equal probabilities as average uniquess of matrix with 1 column is 1 phi = [1] uniqueness_array = np.array([None, None, None]) for i in range(0, 3):
def test_seq_bootstrap(self): """ Test sequential bootstrapping length, indicator matrix length and NaN checks """ non_nan_meta_labels = self.meta_labeled_events.dropna() ind_mat = get_ind_matrix(non_nan_meta_labels) self.assertTrue(ind_mat.shape == ( 13, 7)) # Indicator matrix shape should be (meta_label_index+t1, t1) # Check indicator matrix values for specific labels self.assertTrue( bool((ind_mat[:, 0] == [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ).all()) is True) self.assertTrue( bool((ind_mat[:, 2] == [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0] ).all()) is True) self.assertTrue( bool((ind_mat[:, 4] == [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0] ).all()) is True) self.assertTrue( bool((ind_mat[:, 6] == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1] ).all()) is True) bootstrapped_samples = seq_bootstrap(ind_mat, compare=False, verbose=True, warmup_samples=None) bootstrapped_samples_1000 = seq_bootstrap(ind_mat, compare=True, sample_length=100) self.assertTrue( len(bootstrapped_samples) == non_nan_meta_labels.shape[0]) self.assertTrue(len(bootstrapped_samples_1000) == 100) # Test sequential bootstrapping on example from a book ind_mat = pd.DataFrame(index=range(0, 6), columns=range(0, 3)) ind_mat.loc[:, 0] = [1, 1, 1, 0, 0, 0] ind_mat.loc[:, 1] = [0, 0, 1, 1, 0, 0] ind_mat.loc[:, 2] = [0, 0, 0, 0, 1, 1] ind_mat = ind_mat.values seq_bootstrap(ind_mat, sample_length=3, verbose=True, warmup_samples=[1]) # Show printed probabilities # Perform Monte-Carlo test standard_unq_array = np.zeros(1000) * np.nan seq_unq_array = np.zeros(1000) * np.nan for i in range(0, 1000): bootstrapped_samples = seq_bootstrap(ind_mat, sample_length=3) random_samples = np.random.choice(ind_mat.shape[1], size=3) random_unq = get_ind_mat_average_uniqueness( ind_mat[:, random_samples]) random_unq_mean = random_unq[random_unq > 0].mean() sequential_unq = get_ind_mat_average_uniqueness( ind_mat[:, bootstrapped_samples]) sequential_unq_mean = sequential_unq[sequential_unq > 0].mean() standard_unq_array[i] = random_unq_mean seq_unq_array[i] = sequential_unq_mean self.assertTrue(np.mean(seq_unq_array) >= np.mean(standard_unq_array)) self.assertTrue( np.median(seq_unq_array) >= np.median(standard_unq_array))