def test_first_generation(self): """ Test first generation intra-bar features """ roll_measure = get_roll_measure(self.data.close, window=20) roll_impact = get_roll_impact(self.data.close, self.data.cum_dollar, window=20) corwin_schultz = get_corwin_schultz_estimator(self.data.high, self.data.low, window=20) bekker_parkinson = get_bekker_parkinson_vol(self.data.high, self.data.low, window=20) # Shape assertions self.assertEqual(self.data.shape[0], roll_measure.shape[0]) self.assertEqual(self.data.shape[0], roll_impact.shape[0]) self.assertEqual(self.data.shape[0], corwin_schultz.shape[0]) self.assertEqual(self.data.shape[0], bekker_parkinson.shape[0]) # Roll measure/impact assertions self.assertAlmostEqual(roll_measure.max(), 7.1584, delta=1e-4) self.assertAlmostEqual(roll_measure.mean(), 2.341, delta=1e-3) self.assertAlmostEqual(roll_measure[25], 1.176, delta=1e-3) # Test some random value self.assertAlmostEqual(roll_impact.max(), 1.022e-7, delta=1e-7) self.assertAlmostEqual(roll_impact.mean(), 3.3445e-8, delta=1e-7) self.assertAlmostEqual(roll_impact[25], 1.6807e-8, delta=1e-4) # Test Corwin-Schultz self.assertAlmostEqual(corwin_schultz.max(), 0.01652, delta=1e-4) self.assertAlmostEqual(corwin_schultz.mean(), 0.00151602, delta=1e-4) self.assertAlmostEqual(corwin_schultz[25], 0.00139617, delta=1e-4) self.assertAlmostEqual(bekker_parkinson.max(), 0.018773, delta=1e-4) self.assertAlmostEqual(bekker_parkinson.mean(), 0.001456, delta=1e-4) self.assertAlmostEqual(bekker_parkinson[25], 0.000517, delta=1e-4)
window=60, min_periods=60, center=False).kurt() data['kurtosis_30'] = np.log(data['close']).diff().rolling( window=30, min_periods=30, center=False).kurt() data['kurtosis_15'] = np.log(data['close']).diff().rolling( window=15, min_periods=15, center=False).kurt() data['kurtosis_10'] = np.log(data['close']).diff().rolling( window=10, min_periods=10, center=False).kurt() data['kurtosis_5'] = np.log(data['close']).diff().rolling(window=5, min_periods=5, center=False).kurt() # microstructural features data['roll_measure'] = micro.get_roll_measure(data['close']) data['corwin_schultz_est'] = micro.get_corwin_schultz_estimator( data['high'], data['low'], 100) data['bekker_parkinson_vol'] = micro.get_bekker_parkinson_vol( data['high'], data['low'], 100) data['kyle_lambda'] = micro.get_bekker_parkinson_vol(data['close'], data['volume']) data['amihud_lambda'] = micro.get_bar_based_amihud_lambda( data['close'], data['volume']) data['hasbrouck_lambda'] = micro.get_bar_based_hasbrouck_lambda( data['close'], data['volume']) tick_diff = data['close'].diff() data['tick_rule'] = np.where(tick_diff != 0, np.sign(tick_diff), np.sign(tick_diff).shift(periods=-1)) ### REMOVE NAN FOR INDICATORS data.isna().sum().sort_values(ascending=False).head(20) columns_na_below = data.isna().sum() < 12010 data = data.loc[:, columns_na_below] cols_remove_na = range((np.where(data.columns == 'volume')[0].item() + 1),