def update_config(self, config: Dict, split: Dict, balance_and_pos: Optional[Dict] = None): if balance_and_pos: config.update(balance_and_pos) config.update({ "start_date": ts_to_date((split["start_ms"] + self.ts_start) / 1000), "end_date": ts_to_date((split["end_ms"] + self.ts_start) / 1000), "n_days": split["diff_days"], })
async def init_my_trades(self, age_limit_days: float = 7.0) -> [dict]: age_limit = self.cc.milliseconds( ) - 1000 * 60 * 60 * 24 * age_limit_days mtl = self.load_cached_my_trades() print(f'loaded {len(mtl)} cached my trades') if not mtl: mtl = await self.fetch_my_trades(start_time_ms=age_limit) else: mtl += await self.fetch_my_trades( start_time_ms=mtl[-1]['timestamp']) mtd = {t['order_id']: t for t in mtl} mt = sorted(mtd.values(), key=lambda x: x['timestamp']) if len(mt) == 0: return while True: print('fetching my trades', ts_to_date(mt[-1]['timestamp'] / 1000)) new_mt = await self.fetch_my_trades(order_id=mt[-1]['order_id'] + 1 ) if len(new_mt) == 0: break mt += new_mt mtd = {t['order_id']: t for t in mt} my_trades = sorted(mtd.values(), key=lambda x: x['order_id']) print('dumping trades to cache...') with open(self.my_trades_cache_filepath, 'w') as f: for t in my_trades: f.write(json.dumps(t) + '\n') self.my_trades = my_trades
async def fetch_ticks(self, from_id: int = None, start_time: int = None, end_time: int = None, do_print: bool = True): params = {'symbol': self.symbol, 'limit': 1000} if from_id is not None: params['fromId'] = max(0, from_id) if start_time is not None: params['startTime'] = start_time if end_time is not None: params['endTime'] = end_time try: fetched = await self.private_get(self.endpoints['ticks'], params) except Exception as e: print('error fetching ticks a', e) return [] try: ticks = [{'trade_id': int(t['a']), 'price': float(t['p']), 'qty': float(t['q']), 'timestamp': int(t['T']), 'is_buyer_maker': t['m']} for t in fetched] if do_print: print_(['fetched ticks', self.symbol, ticks[0]['trade_id'], ts_to_date(float(ticks[0]['timestamp']) / 1000)]) except Exception as e: print('error fetching ticks b', e, fetched) ticks = [] if do_print: print_(['fetched no new ticks', self.symbol]) return ticks
def get_downloaded_trades(filepath: str, age_limit_millis: float) -> (pd.DataFrame, dict): if os.path.isdir(filepath): filenames = sorted( [f for f in os.listdir(filepath) if f.endswith('.csv')], key=lambda x: int(x[:x.find('_')].replace('.cs', '').replace( 'v', ''))) chunks = [] chunk_lengths = {} for f in filenames[::-1]: chunk = pd.read_csv(filepath + f).set_index('trade_id') chunk_lengths[f] = len(chunk) print('\rloaded chunk of trades', f, ts_to_date(chunk.timestamp.iloc[0] / 1000), end=' ') chunks.append(chunk) if chunk.timestamp.iloc[0] < age_limit_millis: break if chunks: df = pd.concat(chunks, axis=0).sort_index() return df[~df.index.duplicated()], chunk_lengths else: return None, {} else: return None, {}
async def main(): exchange = sys.argv[1] user = sys.argv[2] settings_filepath = os.path.join('backtesting_settings', exchange, '') backtesting_settings = \ json.load(open(os.path.join(settings_filepath, 'backtesting_settings.json'))) symbol = backtesting_settings['symbol'] n_days = backtesting_settings['n_days'] ranges = json.load(open(os.path.join(settings_filepath, 'ranges.json'))) print(settings_filepath) results_filepath = make_get_filepath( os.path.join( 'backtesting_results', exchange, ts_to_date(time())[:19].replace(':', '_') + f'_{int(round(n_days))}', '')) trade_cache_filepath = make_get_filepath( os.path.join(settings_filepath, 'trade_cache', '')) trades_filename = f'{symbol}_raw_trades_{exchange}_{n_days}_days_{ts_to_date(time())[:10]}.npy' trades_filepath = f"{trade_cache_filepath}{trades_filename}" if os.path.exists(trades_filepath): print('loading cached trade list', trades_filepath) trades_list = np.load(trades_filepath, allow_pickle=True) else: agg_trades = await load_trades(exchange, user, symbol, n_days) print('preparing trades...') trades_list = prep_trades_list(agg_trades) np.save(trades_filepath, trades_list) jackrabbit(trades_list, backtesting_settings, ranges, results_filepath)
async def fetch_trades(cc, symbol: str, from_id: int = None) -> [dict]: params = {'symbol': symbol, 'limit': 1000} if from_id: params['fromId'] = from_id fetched_trades = await cc.fapiPublic_get_aggtrades(params=params) trades = [{'trade_id': int(t['a']), 'price': float(t['p']), 'qty': float(t['q']), 'timestamp': t['T'], 'is_buyer_maker': t['m']} for t in fetched_trades] print_(['fetched trades', symbol, trades[0]['trade_id'], ts_to_date(trades[0]['timestamp'] / 1000)]) return trades
async def fetch_ticks(cc, symbol: str, from_id: int = None, do_print=True) -> [dict]: params = {'symbol': symbol, 'limit': 1000} if from_id: params['from'] = max(0, from_id) try: fetched_trades = await cc.v2_public_get_trading_records(params=params) except Exception as e: print(e) return [] trades = [format_tick(t) for t in fetched_trades['result']] if do_print: print_(['fetched trades', symbol, trades[0]['trade_id'], ts_to_date(trades[0]['timestamp'] / 1000)]) return trades
def iter_chunks(exchange: str, symbol: str) -> Iterator[pd.DataFrame]: chunk_size = 100000 filepath = f'historical_data/{exchange}/agg_trades_futures/{symbol}/' if os.path.isdir(filepath): filenames = sorted( [f for f in os.listdir(filepath) if f.endswith('.csv')]) for f in filenames[::-1]: chunk = pd.read_csv(filepath + f).set_index('trade_id') if chunk is not None: print('loaded chunk of trades', f, ts_to_date(chunk.timestamp.iloc[0] / 1000)) yield chunk else: yield None yield None else: yield None
async def fetch_trades(cc, symbol: str, from_id: int = None) -> [dict]: params = {'symbol': symbol, 'limit': 1000} if from_id: params['from'] = from_id fetched_trades = await cc.public_get_trading_records(params=params) trades = [{ 'trade_id': int(t['id']), 'side': t['side'], 'price': t['price'], 'qty': t['qty'], 'timestamp': date_to_ts(t['time'][:-1]) } for t in fetched_trades['result']] print_([ 'fetched trades', symbol, trades[0]['trade_id'], ts_to_date(trades[0]['timestamp'] / 1000) ]) return trades
async def fetch_ticks(self, from_id: int = None, do_print: bool = True): params = {'symbol': self.symbol, 'limit': 1000} if from_id is not None: params['from'] = max(0, from_id) try: ticks = await self.public_get(self.endpoints['ticks'], params) except Exception as e: print('error fetching ticks', e) return [] try: trades = list(map(format_tick, ticks['result'])) if do_print: print_(['fetched trades', self.symbol, trades[0]['trade_id'], ts_to_date(float(trades[0]['timestamp']) / 1000)]) except: trades = [] if do_print: print_(['fetched no new trades', self.symbol]) return trades
async def init_my_trades(self, age_limit_days: float = 7.0) -> [dict]: age_limit = self.cc.milliseconds( ) - 1000 * 60 * 60 * 24 * age_limit_days mtl = await self.fetch_my_trades() print('loading my trades cache...') mtl += self.load_cached_my_trades() mtd = {t['order_id']: t for t in mtl} mt = sorted(mtd.values(), key=lambda x: x['timestamp']) page = 2 while mt[0]['timestamp'] > age_limit: print('fetching my trades', ts_to_date(mt[0]['timestamp'] / 1000)) new_mt = await self.fetch_my_trades(page) if len(new_mt) == 0 or new_mt[0]['order_id'] in mtd: break page += 1 mtd = {t['order_id']: t for t in mt + new_mt} mt = sorted(mtd.values(), key=lambda x: x['timestamp']) my_trades = [t for t in mt if t['timestamp'] > age_limit] print('dumping trades to cache...') with open(self.my_trades_cache_filepath, 'w') as f: for t in my_trades: f.write(json.dumps(t) + '\n') self.my_trades = my_trades
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) backtest_config['optimize_dirpath'] = os.path.join( backtest_config['optimize_dirpath'], ts_to_date(time())[:19].replace(':', ''), '') if 'iters' in backtest_config: iters = backtest_config['iters'] else: print( 'Parameter iters should be defined in the configuration. Defaulting to 10.' ) iters = 10 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print( 'Parameter num_cpus should be defined in the configuration. Defaulting to 2.' ) num_cpus = 2 n_particles = backtest_config[ 'n_particles'] if 'n_particles' in backtest_config else 10 phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) if c not in current_best_params: current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() if 'wfo' in config and config['wfo']: print('\n\nwalk forward optimization\n\n') wfo = WFO(ticks, backtest_config, P_train=0.5).set_train_N(4) backtest_wrap = lambda config: tune_report(wfo.backtest(config)) else: print('\n\nsimple sliding window optimization\n\n') backtest_wrap = tune.with_parameters(simple_sliding_window_wrap, ticks=ticks) analysis = tune.run(backtest_wrap, metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=backtest_config['optimize_dirpath'], progress_reporter=LogReporter( metric_columns=[ 'daily_gain', 'closest_liquidation', 'max_hrs_no_fills', 'max_hrs_no_fills_same_side', 'objective' ], parameter_columns=[ k for k in backtest_config['ranges'] if type(config[k]) == ray.tune.sample.Float or type(config[k]) == ray.tune.sample.Integer ]), raise_on_failed_trial=False) ray.shutdown() return analysis
async def load_trades(exchange: str, user: str, symbol: str, n_days: float) -> pd.DataFrame: def skip_ids(id_, ids_): if id_ in ids_: print('skipping from', id_) while id_ in ids_: id_ -= 1 print(' to', id_) return id_ def load_cache(): cache_filenames = [ f for f in os.listdir(cache_filepath) if '.csv' in f ] if cache_filenames: print('loading cached trades') cache_df = pd.concat( [pd.read_csv(cache_filepath + f) for f in cache_filenames], axis=0) cache_df = cache_df.set_index('trade_id') return cache_df return None if exchange == 'binance': fetch_trades_func = binance_fetch_trades elif exchange == 'bybit': fetch_trades_func = bybit_fetch_trades else: print(exchange, 'not found') return cc = init_ccxt(exchange, user) filepath = make_get_filepath( os.path.join('historical_data', exchange, 'agg_trades_futures', symbol, '')) cache_filepath = make_get_filepath( filepath.replace(symbol, symbol + '_cache')) age_limit = time() - 60 * 60 * 24 * n_days age_limit_millis = age_limit * 1000 print('age_limit', ts_to_date(age_limit)) cache_df = load_cache() trades_df, chunk_lengths = get_downloaded_trades(filepath, age_limit_millis) ids = set() if trades_df is not None: ids.update(trades_df.index) if cache_df is not None: ids.update(cache_df.index) gaps = [] if trades_df is not None and len(trades_df) > 0: # sids = sorted(ids) for i in range(1, len(sids)): if sids[i - 1] + 1 != sids[i]: gaps.append((sids[i - 1], sids[i])) if gaps: print('gaps', gaps) # prev_fetch_ts = time() new_trades = await fetch_trades_func(cc, symbol) k = 0 while True: k += 1 if (break_ := new_trades[0]['timestamp'] <= age_limit_millis) or k % 20 == 0: print('caching trades...') new_tdf = pd.DataFrame(new_trades).set_index('trade_id') cache_filename = f'{cache_filepath}{new_tdf.index[0]}_{new_tdf.index[-1]}.csv' new_tdf.to_csv(cache_filename) new_trades = [new_trades[0]] if break_: break from_id = skip_ids(new_trades[0]['trade_id'] - 1, ids) - 999 # wait at least 0.75 sec between each fetch sleep_for = max(0.0, 0.75 - (time() - prev_fetch_ts)) await asyncio.sleep(sleep_for) prev_fetch_ts = time() new_trades = await fetch_trades_func(cc, symbol, from_id=from_id) + new_trades ids.update([e['trade_id'] for e in new_trades])
async def load_trades(exchange: str, user: str, symbol: str, n_days: float) -> pd.DataFrame: def skip_ids(id_, ids_): if id_ in ids_: print('skipping from', id_) while id_ in ids_: id_ -= 1 print(' to', from_id) return id_ cc = init_ccxt(exchange, user) try: if exchange == 'binance': fetch_trades_func = binance_fetch_trades elif exchange == 'bybit': fetch_trades_func = bybit_fetch_trades else: print(exchange, 'not found') return filepath = make_get_filepath( f'historical_data/{exchange}/agg_trades_futures/{symbol}/') cache_filepath = make_get_filepath( f'historical_data/{exchange}/agg_trades_futures/{symbol}_cache/') cache_filenames = [ f for f in os.listdir(cache_filepath) if f.endswith('.csv') ] ids = set() if cache_filenames: print('loading cached trades...') cached_trades = pd.concat( [pd.read_csv(cache_filepath + f) for f in cache_filenames], axis=0) cached_trades = cached_trades.set_index('trade_id').sort_index() cached_trades = cached_trades[~cached_trades.index.duplicated()] ids.update(cached_trades.index) else: cached_trades = None age_limit = time() - 60 * 60 * 24 * n_days age_limit_millis = age_limit * 1000 print('age_limit', ts_to_date(age_limit)) chunk_iterator = iter_chunks(exchange, symbol) chunk = next(chunk_iterator) chunks = {} if chunk is None else {int(chunk.index[0]): chunk} if chunk is not None: ids.update(chunk.index) min_id = min(ids) if ids else 0 new_trades = await fetch_trades_func(cc, symbol) cached_ids = set() k = 0 while True: if new_trades[0]['timestamp'] <= age_limit_millis: break from_id = new_trades[0]['trade_id'] - 1 while True: if chunk is None: min_id = 0 break from_id = skip_ids(from_id, ids) if from_id < min_id: chunk = next(chunk_iterator) if chunk is None: min_id = 0 break else: chunks[int(chunk.index[0])] = chunk ids.update(chunk.index) min_id = min(ids) if chunk.timestamp.max() < age_limit_millis: break else: break from_id = skip_ids(from_id, ids) from_id -= 999 new_trades = await fetch_trades_func(cc, symbol, from_id=from_id) + new_trades k += 1 if k % 20 == 0: print('dumping cache') cache_df = pd.DataFrame([ t for t in new_trades if t['trade_id'] not in cached_ids ]).set_index('trade_id') cache_df.to_csv(cache_filepath + str(int(time() * 1000)) + '.csv') cached_ids.update(cache_df.index) new_trades_df = pd.DataFrame(new_trades).set_index('trade_id') trades_updated = pd.concat(list(chunks.values()) + [new_trades_df, cached_trades], axis=0) no_dup = trades_updated[~trades_updated.index.duplicated()] no_dup_sorted = no_dup.sort_index() chunk_size = 100000 chunk_ids = no_dup_sorted.index // chunk_size * chunk_size for g in no_dup_sorted.groupby(chunk_ids): if g[0] not in chunks or len(chunks[g[0]]) != chunk_size: print('dumping chunk', g[0]) g[1].to_csv(f'{filepath}{str(g[0])}.csv') for f in [ f_ for f_ in os.listdir(cache_filepath) if f_.endswith('.csv') ]: os.remove(cache_filepath + f) await cc.close() return no_dup_sorted[no_dup_sorted.timestamp >= age_limit_millis] except KeyboardInterrupt: await cc.close()