def test_bad_dates(self): loader = self.loader engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) msg = "start_date must be before end_date .*" with self.assertRaisesRegexp(ValueError, msg): engine.factor_matrix({}, self.dates[2], self.dates[1]) with self.assertRaisesRegexp(ValueError, msg): engine.factor_matrix({}, self.dates[2], self.dates[2])
def test_multiple_rolling_factors(self): loader = self.loader engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) shape = num_dates, num_assets = (5, len(self.assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) results = engine.factor_matrix( {'short': short_factor, 'long': long_factor, 'high': high_factor}, dates[0], dates[-1], ) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) # row-wise sum over an array whose values are all (1 - 2) assert_array_equal( results['short'].unstack().values, full(shape, -short_factor.window_length), ) assert_array_equal( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) assert_array_equal( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def test_engine_with_multicolumn_loader(self): open_, close = USEquityPricing.open, USEquityPricing.close loader = MultiColumnLoader({ open_: ConstantLoader(dates=self.dates, assets=self.assets, constants={open_: 1}), close: ConstantLoader(dates=self.dates, assets=self.assets, constants={close: 2}) }) engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) factor = RollingSumDifference() result = engine.factor_matrix({'f': factor}, self.dates[2], self.dates[-1]) self.assertIsNotNone(result) self.assertEqual({'f'}, set(result.columns)) # (close - open) * window = (1 - 2) * 3 = -3 # skipped 2 from the start, so that the window is full check_arrays(result['f'], Series([-3] * len(self.assets) * (len(self.dates) - 2)))
def test_engine_with_multicolumn_loader(self): open_, close = USEquityPricing.open, USEquityPricing.close # Test for thirty days up to the second to last day that we think all # the assets existed. If we test the last day of our calendar, no # assets will be in our output, because their end dates are all dates_to_test = self.dates[-32:-2] loader = MultiColumnLoader({ open_: ConstantLoader(dates=self.dates, assets=self.assets, constants={open_: 1}), close: ConstantLoader(dates=self.dates, assets=self.assets, constants={close: 2}) }) engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) factor = RollingSumDifference() result = engine.factor_matrix({'f': factor}, dates_to_test[0], dates_to_test[-1]) self.assertIsNotNone(result) self.assertEqual({'f'}, set(result.columns)) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index),) check_arrays( result['f'], Series(index=result_index, data=full(result_shape, -3)), )
def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) num_dates = 5 dates = self.dates[10:10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.factor_matrix( { 'high_low': high_minus_low, 'open_close': open_minus_close, 'avg': avg, }, dates[0], dates[-1], ) high_low_result = results['high_low'].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal( high_low_result, DataFrame( expected_high_low, index=dates, columns=self.assets, ) ) open_close_result = results['open_close'].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal( open_close_result, DataFrame( expected_open_close, index=dates, columns=self.assets, ) ) avg_result = results['avg'].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal( avg_result, DataFrame( expected_avg, index=dates, columns=self.assets, ) )
def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) num_dates = 5 dates = self.dates[10:10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.factor_matrix( { 'high_low': high_minus_low, 'open_close': open_minus_close, 'avg': avg, }, dates[0], dates[-1], ) high_low_result = results['high_low'].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal( high_low_result, DataFrame( expected_high_low, index=dates, columns=self.assets, )) open_close_result = results['open_close'].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal( open_close_result, DataFrame( expected_open_close, index=dates, columns=self.assets, )) avg_result = results['avg'].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal( avg_result, DataFrame( expected_avg, index=dates, columns=self.assets, ))
def test_single_factor(self): loader = self.loader engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) result_shape = (num_dates, num_assets) = (5, len(self.assets)) dates = self.dates[10:10 + num_dates] factor = RollingSumDifference() result = engine.factor_matrix({'f': factor}, dates[0], dates[-1]) self.assertEqual(set(result.columns), {'f'}) assert_array_equal( result['f'].unstack().values, full(result_shape, -factor.window_length), )
def test_SMA(self): engine = SimpleFFCEngine( self.ffc_loader, self.env.trading_days, self.finder, ) window_length = 5 assets = self.all_assets dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, freq=self.trading_day, ) dates_to_test = dates[window_length:] SMA = SimpleMovingAverage( inputs=(USEquityPricing.close,), window_length=window_length, ) results = engine.factor_matrix( {'sma': SMA}, dates_to_test[0], dates_to_test[-1], ) # Shift back the raw inputs by a trading day because we expect our # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( self.writer.expected_values_2d( dates - self.trading_day, assets, 'close', ), window_length, min_periods=1, ) expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] columns=self.finder.retrieve_all(assets), ) self.write_nans(expected) result = results['sma'].unstack() assert_frame_equal(result, expected)
def test_multiple_rolling_factors(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) results = engine.factor_matrix( { 'short': short_factor, 'long': long_factor, 'high': high_factor }, dates[0], dates[-1], ) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_product(self, results.index, dates, finder.retrieve_all(assets)) # row-wise sum over an array whose values are all (1 - 2) assert_array_equal( results['short'].unstack().values, full(shape, -short_factor.window_length), ) assert_array_equal( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) assert_array_equal( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def test_drawdown(self): # The monotonically-increasing data produced by SyntheticDailyBarWriter # exercises two pathological cases for MaxDrawdown. The actual # computed results are pretty much useless (everything is either NaN) # or zero, but verifying we correctly handle those corner cases is # valuable. engine = SimpleFFCEngine( self.ffc_loader, self.env.trading_days, self.finder, ) window_length = 5 assets = self.all_assets dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, freq=self.trading_day, ) dates_to_test = dates[window_length:] drawdown = MaxDrawdown( inputs=(USEquityPricing.close,), window_length=window_length, ) results = engine.factor_matrix( {'drawdown': drawdown}, dates_to_test[0], dates_to_test[-1], ) # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = DataFrame( data=zeros((len(dates_to_test), len(assets)), dtype=float), index=dates_to_test, columns=self.finder.retrieve_all(assets), ) self.write_nans(expected) result = results['drawdown'].unstack() assert_frame_equal(expected, result)
def test_drawdown(self): # The monotonically-increasing data produced by SyntheticDailyBarWriter # exercises two pathological cases for MaxDrawdown. The actual # computed results are pretty much useless (everything is either NaN) # or zero, but verifying we correctly handle those corner cases is # valuable. engine = SimpleFFCEngine( self.ffc_loader, self.env.trading_days, self.finder, ) dates, assets = self.all_dates, self.all_assets window_length = 5 drawdown = MaxDrawdown( inputs=(USEquityPricing.close,), window_length=window_length, ) results = engine.factor_matrix( {'drawdown': drawdown}, dates[window_length], dates[-1], ) dd_result = results['drawdown'] # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = self.writer.expected_values_2d(dates, assets, 'close') expected[~isnan(expected)] = 0 expected = expected[window_length:] assert_frame_equal( dd_result.unstack(), DataFrame( expected, index=dates[window_length:], columns=assets, ), )
def test_drawdown(self): # The monotonically-increasing data produced by SyntheticDailyBarWriter # exercises two pathological cases for MaxDrawdown. The actual # computed results are pretty much useless (everything is either NaN) # or zero, but verifying we correctly handle those corner cases is # valuable. engine = SimpleFFCEngine( self.ffc_loader, self.env.trading_days, self.finder, ) dates, assets = self.all_dates, self.all_assets window_length = 5 drawdown = MaxDrawdown( inputs=(USEquityPricing.close, ), window_length=window_length, ) results = engine.factor_matrix( {'drawdown': drawdown}, dates[window_length], dates[-1], ) dd_result = results['drawdown'] # We expect NaNs when the asset was undefined, otherwise 0 everywhere, # since the input is always increasing. expected = self.writer.expected_values_2d(dates, assets, 'close') expected[~isnan(expected)] = 0 expected = expected[window_length:] assert_frame_equal( dd_result.unstack(), DataFrame( expected, index=dates[window_length:], columns=assets, ), )
def test_SMA(self): engine = SimpleFFCEngine( self.ffc_loader, self.env.trading_days, self.finder, ) dates, assets = self.all_dates, self.all_assets window_length = 5 SMA = SimpleMovingAverage( inputs=(USEquityPricing.close, ), window_length=window_length, ) results = engine.factor_matrix( {'sma': SMA}, dates[window_length], dates[-1], ) raw_closes = self.writer.expected_values_2d(dates, assets, 'close') expected_sma_result = rolling_mean( raw_closes, window_length, min_periods=1, ) expected_sma_result[isnan(raw_closes)] = nan expected_sma_result = expected_sma_result[window_length:] sma_result = results['sma'].unstack() assert_frame_equal( sma_result, DataFrame( expected_sma_result, index=dates[window_length:], columns=assets, ), )
def test_SMA(self): engine = SimpleFFCEngine( self.ffc_loader, self.env.trading_days, self.finder, ) dates, assets = self.all_dates, self.all_assets window_length = 5 SMA = SimpleMovingAverage( inputs=(USEquityPricing.close,), window_length=window_length, ) results = engine.factor_matrix( {'sma': SMA}, dates[window_length], dates[-1], ) raw_closes = self.writer.expected_values_2d(dates, assets, 'close') expected_sma_result = rolling_mean( raw_closes, window_length, min_periods=1, ) expected_sma_result[isnan(raw_closes)] = nan expected_sma_result = expected_sma_result[window_length:] sma_result = results['sma'].unstack() assert_frame_equal( sma_result, DataFrame( expected_sma_result, index=dates[window_length:], columns=assets, ), )
def test_compute_with_adjustments(self): dates, assets = self.dates, self.assets low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records([ dict( kind=MULTIPLY, sid=assets[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=assets[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=assets[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ]) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameFFCLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameFFCLoader(high, high_base, adjustments) loader = MultiColumnLoader({low: low_loader, high: high_loader}) engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.factor_matrix( { 'low': low_mavg, 'high': high_mavg }, dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
def test_compute_with_adjustments(self): dates, assets = self.dates, self.assets low, high = USEquityPricing.low, USEquityPricing.high apply_idxs = [3, 10, 16] def apply_date(idx, offset=0): return dates[apply_idxs[idx] + offset] adjustments = DataFrame.from_records( [ dict( kind=MULTIPLY, sid=assets[1], value=2.0, start_date=None, end_date=apply_date(0, offset=-1), apply_date=apply_date(0), ), dict( kind=MULTIPLY, sid=assets[1], value=3.0, start_date=None, end_date=apply_date(1, offset=-1), apply_date=apply_date(1), ), dict( kind=MULTIPLY, sid=assets[1], value=5.0, start_date=None, end_date=apply_date(2, offset=-1), apply_date=apply_date(2), ), ] ) low_base = DataFrame(self.make_frame(30.0)) low_loader = DataFrameFFCLoader(low, low_base.copy(), adjustments=None) # Pre-apply inverse of adjustments to the baseline. high_base = DataFrame(self.make_frame(30.0)) high_base.iloc[:apply_idxs[0], 1] /= 2.0 high_base.iloc[:apply_idxs[1], 1] /= 3.0 high_base.iloc[:apply_idxs[2], 1] /= 5.0 high_loader = DataFrameFFCLoader(high, high_base, adjustments) loader = MultiColumnLoader({low: low_loader, high: high_loader}) engine = SimpleFFCEngine(loader, self.dates, self.asset_finder) for window_length in range(1, 4): low_mavg = SimpleMovingAverage( inputs=[USEquityPricing.low], window_length=window_length, ) high_mavg = SimpleMovingAverage( inputs=[USEquityPricing.high], window_length=window_length, ) bounds = product_upper_triangle(range(window_length, len(dates))) for start, stop in bounds: results = engine.factor_matrix( {'low': low_mavg, 'high': high_mavg}, dates[start], dates[stop], ) self.assertEqual(set(results.columns), {'low', 'high'}) iloc_bounds = slice(start, stop + 1) # +1 to include end date low_results = results.unstack()['low'] assert_frame_equal(low_results, low_base.iloc[iloc_bounds]) high_results = results.unstack()['high'] assert_frame_equal(high_results, high_base.iloc[iloc_bounds])