def test_load_one_day_parquet(self): day = 17 month = 5 product = "LTC-USD" st = datetime.datetime(2018, month, day, 0, 0, 0) et = datetime.datetime(2018, month, day, 1, 59, 59) feed_df = DataLoader.load_feed("/Users/jamesprince/project-data/data/consolidated-feed/" + product + "/", st, et, product) print(feed_df)
def test_get_orders_per_minute(self): product = "LTC-USD" root = "/Users/jamesprince/project-data/data/consolidated-feed/" st = datetime.datetime(2018, 5, 17, 0, 0, 0) et = datetime.datetime(2018, 5, 17, 23, 59, 59) feed_df = DataLoader.load_feed(root + product + "/", st, et, product) orders = DataSplitter.get_orders(feed_df) limit_orders = DataSplitter.get_limit_orders(orders) print( str(len(limit_orders)) + " total limit orders per day for " + product) print( str(len(limit_orders) / (24 * 60)) + " limit orders per minute (on average) for " + product)
def test_lypaunov_windowed(self): st = datetime.datetime(2018, 5, 17, 1, 0, 0) et = datetime.datetime(2018, 5, 17, 23, 0, 0) conf = configparser.ConfigParser() conf.read("../config/backtest.ini") config = BacktestConfig(conf) ob_seq, ob_state = reconstruct_orderbook(config, st, logging.getLogger("test")) orderbook_evo = OrderBookEvolutor(ob_state, st, ob_seq) feed_df = DataLoader.load_feed(self.root, st, et, "LTC-USD") evo = orderbook_evo.evolve_orderbook_discrete(feed_df, 1) window_minutes = 30 step_minutes = 5 num_samples = int((et - st).total_seconds() / (step_minutes * 60)) times = [] lyap_exps = [] for i in range(0, num_samples): window_st = st + datetime.timedelta(seconds=i * step_minutes * 60) window_et = window_st + datetime.timedelta(seconds=window_minutes * 60) evo_filt = evo[evo['time'] > window_st] evo_filt = evo_filt[evo_filt['time'] < window_et] midprices = evo_filt['midprice'].dropna() prices = np.asarray(midprices, dtype=np.float32) print(prices) res = nolds.lyap_e(prices) print(res) times.append(window_st) lyap_exps.append(res[0]) Statistics.plot_lyapunov_exponent(times, lyap_exps, "LTC-USD", st, step_minutes, window_minutes)
def reconstruct_orderbook(config, sim_st, logger): try: closest_state_time_utc_1, closest_state_str = OrderBookCreator.locate_closest_ob_state( config.orderbook_input_root, sim_st + datetime.timedelta(hours=1)) # - 10 seconds so that we definitely get all of the messages closest_state_time_utc_0 = closest_state_time_utc_1 - datetime.timedelta( hours=1, seconds=10) feed_df = DataLoader.load_feed(config.real_root, closest_state_time_utc_0, sim_st, config.product) closest_state_file_path = config.orderbook_input_root + closest_state_str logger.info("Closest order book path: " + closest_state_file_path) ob_state_seq, ob_state_df = OrderBookCreator().load_orderbook_state( closest_state_file_path) logger.info("Orderbook state sequence: " + str(ob_state_seq)) logger.info("Feed first sequence: " + str(feed_df['sequence'].values.min())) ob_final, ob_final_seq = OrderBookCreator().get_orderbook( feed_df, ob_state_df, ob_state_seq) return ob_final_seq, ob_final except Exception as e: logger.error("Order Book Reconstruction failed: " + str(e))
def prepare_simulation(self): try: ob_state_seq, ob_state = reconstruct_orderbook(self.config, self.sim_st, self.logger) # Save orderbook orderbook_path = self.config.orderbook_output_root + self.orderbook_window_end_time.isoformat() + ".csv" OrderBookCreator.orderbook_to_file(ob_state, orderbook_path) params_path = self.params_root \ + self.sim_st.time().isoformat() + ".json" if self.config.use_cached_params and os.path.isfile(params_path): self.logger.info("Params file exists, therefore we're using it! " + params_path) else: self.logger.info("Not using params cache" + "\nGenerating params...") # Get parameters orders_df, trades_df, cancels_df = self.all_sampling_data # TODO: massive duplication in the arguments params_ob_state_seq, params_ob_state = reconstruct_orderbook(self.config, self.sampling_window_start_time, self.logger) feed_df = DataLoader.load_feed(self.config.real_root, self.sampling_window_start_time, self.sim_st, self.config.product) params = Sample.generate_sim_params(orders_df, trades_df, cancels_df, feed_df, params_ob_state, params_ob_state_seq, self.sim_st) # Save params (that can be reused!) Writer.json_to_file(params, params_path) self.logger.info("Permanent parameters saved to: " + params_path) # Generate .conf file sim_config = self.generate_config_dict(orderbook_path, params_path) sim_config_string = SimConfig.generate_config_string(sim_config) self.save_config(sim_config_string) return True except Exception as e: self.logger.error( "Simulation preparation failed, skipping, at: " + self.sim_st.isoformat() + "\nError was\n" + str(e)) return False
def test_lypaunov(self): st = datetime.datetime(2018, 5, 17, 1, 0, 0) et = datetime.datetime(2018, 5, 17, 1, 30, 0) conf = configparser.ConfigParser() conf.read("../config/backtest.ini") config = BacktestConfig(conf) ob_seq, ob_state = reconstruct_orderbook(config, st, logging.getLogger("test")) orderbook_evo = OrderBookEvolutor(ob_state, st, ob_seq) feed_df = DataLoader.load_feed(self.root, st, et, "LTC-USD") evo = orderbook_evo.evolve_orderbook_discrete(feed_df, 1) prices = np.asarray(evo['midprice'].dropna(), dtype=np.float32) print(prices) # print(len(prices)) res = nolds.lyap_e(prices) print(res)
def test_orders_per_minute_windowed(self): product = "LTC-USD" root = "/Users/jamesprince/project-data/data/consolidated-feed/" st = datetime.datetime(2018, 5, 17, 0, 0, 0) et = datetime.datetime(2018, 5, 17, 23, 59, 59) feed_df = DataLoader.load_feed(root + product + "/", st, et, product) orders = DataSplitter.get_orders(feed_df) limit_orders = DataSplitter.get_limit_orders(orders) market_orders = DataSplitter.get_market_orders(orders) trades = DataSplitter.get_trades(feed_df) cancels = DataSplitter.get_cancellations(feed_df) print("Total limit orders: " + str(len(limit_orders))) print("Total market orders: " + str(len(market_orders))) print("Total trades: " + str(len(trades))) print("Total cancels: " + str(len(cancels))) # total_vol = trades['remaining_size'].sum() # print("Total traded volume: " + str(total_vol)) window_minutes = 60 step_minutes = 5 times = [] num_limit_orders = [] num_market_orders = [] num_trades = [] num_cancels = [] traded_vols = [] for i in range(0, int((24 * 60) / step_minutes - 1)): window_st = st + datetime.timedelta(seconds=i * step_minutes * 60) window_et = window_st + datetime.timedelta(seconds=window_minutes * 60) limit_orders_this_window = DataSplitter.get_between( limit_orders, window_st, window_et) market_orders_this_window = DataSplitter.get_between( market_orders, window_st, window_et) trades_this_window = DataSplitter.get_between( trades, window_st, window_et) cancels_this_window = DataSplitter.get_between( cancels, window_st, window_et) times.append(window_st) num_limit_orders.append(len(limit_orders_this_window)) num_market_orders.append(len(market_orders_this_window)) num_trades.append(len(trades_this_window)) num_cancels.append(len(cancels_this_window)) # vol_this_window = trades_this_window['remaining_size'].sum() # traded_vols.append(vol_this_window) Statistics.plot_metric_daily_comparison(times, num_limit_orders, num_cancels, "LTC-USD", st, step_minutes, window_minutes, "Limit Orders", "Cancels") Statistics.plot_metric_daily(times, num_limit_orders, "LTC-USD", st, step_minutes, window_minutes, "Limit Orders") Statistics.plot_metric_daily(times, num_market_orders, "LTC-USD", st, step_minutes, window_minutes, "Market Orders") Statistics.plot_metric_daily(times, num_trades, "LTC-USD", st, step_minutes, window_minutes, "Trades") Statistics.plot_metric_daily(times, num_cancels, "LTC-USD", st, step_minutes, window_minutes, "Cancels") Statistics.plot_metric_daily(times, traded_vols, "LTC-USD", st, step_minutes, window_minutes, "Traded Volume")
def test_real_spread_plot(self): plt.figure(figsize=(12, 8)) product = "LTC-USD" root = "/Users/jamesprince/project-data/data/consolidated-feed/" st = datetime.datetime(2018, 5, 17, 1, 0, 0) et = datetime.datetime(2018, 5, 17, 1, 5, 0) feed_df = DataLoader.load_feed(root + product + "/", st, et, product) conf = configparser.ConfigParser() conf.read("../config/backtest.ini") config = BacktestConfig(conf) ob_seq, ob_state = reconstruct_orderbook(config, st, logging.getLogger("test")) orderbook_evo = OrderBookEvolutor(ob_state, st, ob_seq) res_df = orderbook_evo.evolve_orderbook(feed_df) res_df['seconds'] = ( res_df['time'] - res_df['time'].iloc[0]).apply(lambda x: x.total_seconds()) print(res_df) limit_orders = DataSplitter.get_limit_orders_from_feed(feed_df) limit_orders['seconds'] = ( limit_orders['time'] - limit_orders['time'].iloc[0]).apply(lambda x: x.total_seconds()) buy_limit_orders = DataSplitter.get_side("buy", limit_orders) sell_limit_orders = DataSplitter.get_side("sell", limit_orders) cancels = DataSplitter.get_cancellations(feed_df) # print(cancels) cancels_merged = cancels.merge(limit_orders, on='order_id', how='left') # print(cancels_merged) cancels_merged['price'] = cancels_merged['price_x'] cancels_merged['side'] = cancels_merged['side_x'] cancels_merged['seconds'] = (cancels_merged['time_x'] - cancels_merged['time_x'].iloc[0] ).apply(lambda x: x.total_seconds()) cancels_merged['lifetime'] = abs(cancels_merged['time_x'] - cancels_merged['time_y']).dropna() print(cancels_merged) median_idx = int(len(cancels_merged['lifetime']) / 2) print(cancels_merged['lifetime'].sort_values().iloc[median_idx]) buy_cancels = DataSplitter.get_side("buy", cancels_merged) sell_cancels = DataSplitter.get_side("sell", cancels_merged) plt.plot(buy_limit_orders['seconds'], buy_limit_orders['price'], 'r+', label="Buy limit orders") plt.plot(sell_limit_orders['seconds'], sell_limit_orders['price'], 'b+', label="Sell limit orders") # plt.plot(buy_cancels['seconds'], buy_cancels['price'], 'r+', label="Buy side cancels") # plt.plot(sell_cancels['seconds'], sell_cancels['price'], 'b+', label="Sell side cancels") plt.plot(res_df['seconds'], res_df['best_bid'], label='Best bid price') plt.plot(res_df['seconds'], res_df['best_ask'], label='Best ask price') start_price = res_df['midprice'].iloc[0] plt.ylim(start_price - 5, start_price + 5) plt.legend() plt.show()