def main(): parser = argparse.ArgumentParser( description='generate states from replays') parser.add_argument('--binary', type=str, default='../../build/minirts-backend') parser.add_argument('--replays-root', type=str) parser.add_argument('--output-root', type=str) parser.add_argument('--replay-file-extension', type=str, default='.rep') # configs for generating jsons parser.add_argument('--human', action='store_true', default=False) parser.add_argument('--player1', type=str, default='dummy,fs=50') parser.add_argument('--player2', type=str, default='dummy,fs=50') parser.add_argument('--max-tick', type=int, default=40000) args = parser.parse_args() args.binary = os.path.abspath(args.binary) if not os.path.exists(args.binary): print('cannot find binary at:', args.binary) assert False logger_path = os.path.join(args.output_root, 'config') sys.stdout = Logger(logger_path) replays = get_all_files(args.replays_root, args.replay_file_extension) bad_replays = generate_states(replays, args, args.replays_root, args.output_root) print('number of corrupted replays: %d' % len(bad_replays)) for replay in bad_replays: print(replay)
def self_play(args): wandb.init(project="adapt-minirts", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" wandb.run.name = ( f"multitask-fixed_selfplay-{wandb.run.id}-{args.coach1}-{args.executor1}" f"-{args.train_mode}-rule{args.rule}-{args.tag}") # wandb.run.save() wandb.config.update(args) print("args:") pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) print("Train Mode: {}".format(args.train_mode)) if args.coach_reload: print("Reloading coach model.... ") args.coach1 = args.coach_load_file _coach1 = os.path.basename(args.coach1).replace(".pt", "") else: _coach1 = args.coach1 args.coach1 = best_coaches[args.coach1] if args.exec_reload: print("Reloading executor model.... ") args.executor1 = args.exec_load_file _executor1 = os.path.basename(args.executor1).replace(".pt", "") else: _executor1 = args.executor1 args.executor1 = best_executors[args.executor1] log_name = "multitask-fixed_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format( _coach1, args.coach2, _executor1, args.executor2, args.lr, args.num_sp, args.num_rb, args.tag, random.randint(1111, 9999), ) writer = SummaryWriter(comment=log_name) args.coach2 = best_coaches[args.coach2] args.executor2 = best_executors[args.executor2] logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) sp_agent = Agent( coach=args.coach1, executor=args.executor1, device=device, args=args, writer=writer, trainable=True, exec_sample=True, pg=args.pg, ) sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=False, exec_sample=False, ) print("Progress: ") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) cur_iter_idx = 1 rules = [args.rule] for rule_idx in rules: print("Current rule: {}".format(rule_idx)) game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir) for epoch in range(args.train_epochs): if epoch % args.eval_factor == 0: game.evaluate_rules(cur_iter_idx, rules, "train") rule = game.train_permute[rule_idx] print("Current rule: {}".format(rule)) game.init_rule_games(rule) agent1, agent2 = game.start() agent1.train() agent2.train() pbar = tqdm(total=(args.num_sp * 2 + args.num_rb)) while not game.finished(): data = game.get_input() if len(data) == 0: continue for key in data: # print(key) batch = to_device(data[key], device) if key == "act1": batch["actor"] = "act1" reply = agent1.simulate(cur_iter_idx, batch) t_count = agent1.update_logs(cur_iter_idx, batch, reply) elif key == "act2": batch["actor"] = "act2" reply = agent2.simulate(cur_iter_idx, batch) t_count = agent2.update_logs(cur_iter_idx, batch, reply) else: assert False game.set_reply(key, reply) pbar.update(t_count) if args.train_mode == "coach": agent1.train_coach(cur_iter_idx) elif args.train_mode == "executor": agent1.train_executor(cur_iter_idx) elif args.train_mode == "both": agent1.train_both(cur_iter_idx) else: raise Exception("Invalid train mode.") game.print_logs(cur_iter_idx) cur_iter_idx += 1 wandb.run.summary[f"max_iterations"] = cur_iter_idx pbar.close() game.terminate() del game writer.close()
def self_play(args): wandb.init(project="adapt-minirts-pop-eval", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" wandb.run.name = ( f"multitask-pop-eval-{wandb.run.id}-{args.coach1}-{args.executor1}" f"-{args.train_mode}-rule{args.rule}-{args.tag}") # wandb.run.save() wandb.config.update(args) print("args:") pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # args.coach1 = get_coach_path(model_dicts["ft_pop[80,40,20]"][0], coach_variant=80) # args.executor1 = get_executor_path(model_dicts["ft_pop[80,40,20]"][0]) args.coach1 = get_coach_path(model_dicts["ft_both[80]"][0]) args.executor1 = get_executor_path(model_dicts["ft_both[80]"][0]) args.coach2 = get_coach_path(model_dicts["bc"][0]) args.executor2 = get_executor_path(model_dicts["bc"][0]) _coach1 = os.path.basename(args.coach1).replace(".pt", "") _executor1 = os.path.basename(args.executor1).replace(".pt", "") log_name = "multitask-pop-eval-analyze_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format( _coach1, args.coach2, _executor1, args.executor2, args.lr, args.num_sp, args.num_rb, args.tag, random.randint(1111, 9999), ) writer = SummaryWriter(comment=log_name) logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) sp_agent = Agent( coach=args.coach1, executor=args.executor1, device=device, args=args, writer=writer, trainable=True, exec_sample=True, pg=args.pg, ) sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=True, exec_sample=True, ) print("Progress: ") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) cur_iter_idx = 1 rules = [args.rule] for rule_idx in rules: print("Current rule: {}".format(rule_idx)) game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir) game.analyze_rule_games( cur_iter_idx, rules, "train", viz=args.viz, num_games=0, num_sp=100, ) game.terminate() del game writer.close()
def self_play(args): wandb.init(project="adapt-minirts-pop-eval", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" date = datetime.date(datetime.now()) wandb.run.name = f"multitask-pop-drift-eval-{wandb.run.id}-{date}-{args.tag}" # wandb.run.save() wandb.config.update(args) # print("args:") # pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) partial_json_save_dir = os.path.join(args.eval_folder, f"drift-eval-{date}-{args.tag}") if os.path.exists(partial_json_save_dir): print("Attempting to create an existing folder.. hence skipping...") else: os.makedirs(partial_json_save_dir) coach1 = get_coach_path(model_dicts["ft_coach[7]"][0]) executors = { "bc": get_executor_path(model_dicts["bc"][0]), "ft_both[80]": get_executor_path(model_dicts["ft_both[80]"][0]), "ft_pop[80,40,20]": get_executor_path(model_dicts["ft_pop[80,40,20]"][0]), } rules = [7] args.coach_random_init = True NUM_GAMES = 250 args.coach1 = coach1 args.coach2 = coach1 args.executor2 = executors["bc"] _coach1 = os.path.basename(args.coach1).replace(".pt", "") _executor1 = os.path.basename(args.executor1).replace(".pt", "") log_name = "multitask-pop-analyze-drift_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format( _coach1, args.coach2, _executor1, args.executor2, args.lr, args.num_sp, args.num_rb, args.tag, random.randint(1111, 9999), ) writer = SummaryWriter(comment=log_name) logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) sp_agent = MultiExecutorAgent( coach=args.coach1, executors=executors, device=device, args=args, writer=writer, trainable=False, exec_sample=True, pg=args.pg, ) sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=False, exec_sample=False, ) print("Progress: ") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) print("Current rule: {}".format(rules[0])) sub_win_rates = [] full_reply_dicts = [] if not os.path.exists(os.path.join(args.save_img_dir, "matrices.npy")): for i in tqdm(range(NUM_GAMES)): game = MultiTaskGame(sp_agent, bc_agent, 0, args, working_rule_dir) # rule = randint(0, 80) result, reply_dicts = game.drift_analysis_games( 0, rules, "train", viz=args.viz, num_games=1, ) full_reply_dicts += reply_dicts game.terminate() del game inst_list = [ sp_agent.model.coach.inst_dict._idx2inst[ reply["bc_coach"]["inst"].squeeze().tolist()] for reply in full_reply_dicts ] build_unit_list = [] for inst in inst_list: if "create" in inst or "build" in inst or "train" in inst or "make" in inst: if "peasant" in inst: unit = UNIT_TYPE_TO_IDX["PEASANT"] elif "dragon" in inst: unit = UNIT_TYPE_TO_IDX["DRAGON"] elif "archer" in inst: unit = UNIT_TYPE_TO_IDX["ARCHER"] elif "cavalry" in inst or "cavs" in inst: unit = UNIT_TYPE_TO_IDX["CAVALRY"] elif "spearman" in inst: unit = UNIT_TYPE_TO_IDX["SPEARMAN"] elif ("swordman" in inst or "swordsman" in inst or "swords" in inst or "sword" in inst): unit = UNIT_TYPE_TO_IDX["SWORDMAN"] elif "catapult" in inst: unit = UNIT_TYPE_TO_IDX["CATAPULT"] else: unit = None build_unit_list.append(unit) else: build_unit_list.append(None) build_building_list = [] for inst in inst_list: if "create" in inst or "build" in inst or "train" in inst or "make" in inst: if "shop" in inst: unit = UNIT_TYPE_TO_IDX["WORKSHOP"] elif "stable" in inst: unit = UNIT_TYPE_TO_IDX["STABLE"] elif "barrack" in inst: unit = UNIT_TYPE_TO_IDX["BARRACK"] elif "tower" in inst: unit = UNIT_TYPE_TO_IDX["GUARD_TOWER"] elif "blacksmith" in inst: unit = UNIT_TYPE_TO_IDX["BLACKSMITH"] else: unit = None build_building_list.append(unit) else: build_building_list.append(None) bc_executor_unit_types = ( np.asarray([ ( reply["bc_executor"]["one_hot_reply"] ["unit_type_prob"].squeeze() # * ( # reply["bc_executor"]["one_hot_reply"]["cmd_type_prob"] # .squeeze()[:, [4, 6]] # .sum(1) # >= 1.0 # ).unsqueeze(1) ).sum(0).tolist()[1:] for reply in full_reply_dicts ]).argmax(1) + 1) ft_both_unit_type = ( np.asarray([ ( reply["ft_both[80]"]["one_hot_reply"] ["unit_type_prob"].squeeze() # * ( # reply["ft_both[80]"]["one_hot_reply"]["cmd_type_prob"] # .squeeze()[:, [4, 6]] # .sum(1) # >= 1.0 # ).unsqueeze(1) ).sum(0).tolist()[1:] for reply in full_reply_dicts ]).argmax(1) + 1) ft_pop_unit_type = ( np.asarray([ ( reply["ft_pop[80,40,20]"]["one_hot_reply"] ["unit_type_prob"].squeeze() # * ( # reply["ft_pop[80,40,20]"]["one_hot_reply"]["cmd_type_prob"] # .squeeze()[:, [4, 6]] # .sum(1) # >= 1.0 # ).unsqueeze(1) ).sum(0).tolist()[1:] for reply in full_reply_dicts ]).argmax(1) + 1) bc_executor_building_types = ( np.asarray([ ( reply["bc_executor"]["one_hot_reply"] ["building_type_prob"].squeeze() # * ( # reply["bc_executor"]["one_hot_reply"]["cmd_type_prob"] # .squeeze()[:, [3, 6]] # .sum(1) # > 1.0 # ).unsqueeze(1) ).sum(0).tolist()[1:] for reply in full_reply_dicts ]).argmax(1) + 1) ft_both_building_type = ( np.asarray([ ( reply["ft_both[80]"]["one_hot_reply"] ["building_type_prob"].squeeze() # * ( # reply["ft_both[80]"]["one_hot_reply"]["cmd_type_prob"] # .squeeze()[:, [3, 6]] # .sum(1) # > 1.0 # ).unsqueeze(1) ).sum(0).tolist()[1:] for reply in full_reply_dicts ]).argmax(1) + 1) ft_pop_building_type = ( np.asarray([ ( reply["ft_pop[80,40,20]"]["one_hot_reply"] ["building_type_prob"].squeeze() # * ( # reply["ft_pop[80,40,20]"]["one_hot_reply"]["cmd_type_prob"] # .squeeze()[:, [3, 6]] # .sum(1) # > 1.0 # ).unsqueeze(1) ).sum(0).tolist()[1:] for reply in full_reply_dicts ]).argmax(1) + 1) mat_bc_units = create_matrix(build_unit_list, bc_executor_unit_types, title="bc-bc") mat_both_units = create_matrix(build_unit_list, ft_both_unit_type, title="bc-both") mat_pop_units = create_matrix(build_unit_list, ft_pop_unit_type, title="bc-pop") mat_bc_buildings = create_matrix(build_building_list, bc_executor_building_types, title="bc-bc") mat_both_buildings = create_matrix(build_building_list, ft_both_building_type, title="bc-both") mat_pop_buildings = create_matrix(build_building_list, ft_pop_building_type, title="bc-pop") mat_bc = mat_bc_units # + mat_bc_buildings mat_both = mat_both_units # + mat_both_buildings mat_pop = mat_pop_units # + mat_pop_buildings print("Saving Numpy matrices...") with open(os.path.join(args.save_img_dir, "matrices.npy"), "wb") as f: np.save(f, mat_bc) np.save(f, mat_both) np.save(f, mat_pop) else: print("Loading Numpy matrices...") with open(os.path.join(args.save_img_dir, "matrices.npy"), "rb") as f: mat_bc = np.load(f) mat_both = np.load(f) mat_pop = np.load(f) plot_matrices(mat_bc, mat_both, mat_pop, title="Original", save_dir=args.save_img_dir) print_summary(mat_bc, mat_both, mat_pop, "original") diff_bc_bc = np.absolute(mat_bc - mat_bc) diff_both_bc = np.absolute(mat_both - mat_bc) diff_pop_bc = np.absolute(mat_pop - mat_bc) plot_matrices( diff_bc_bc, diff_both_bc, diff_pop_bc, title="Original-mat_bc", save_dir=args.save_img_dir, ) print_summary(diff_bc_bc, diff_both_bc, diff_pop_bc, "bc") diff_bc_both = np.absolute(mat_bc - mat_both) diff_both_both = np.absolute(mat_both - mat_both) diff_pop_both = np.absolute(mat_pop - mat_both) plot_matrices( diff_bc_both, diff_both_both, diff_pop_both, title="Original-mat_both", save_dir=args.save_img_dir, ) print_summary(diff_bc_both, diff_both_both, diff_pop_both, "both") diff_bc_pop = np.absolute(mat_bc - mat_pop) diff_both_pop = np.absolute(mat_both - mat_pop) diff_pop_pop = np.absolute(mat_pop - mat_pop) plot_matrices( diff_bc_pop, diff_both_pop, diff_pop_pop, title="Original-mat_pop", save_dir=args.save_img_dir, ) print_summary(diff_bc_pop, diff_both_pop, diff_pop_pop, "pop") sub_win_rates.append(result[args.rule]["win"]) writer.close()
from threading import Thread, Lock import time from common_utils import Logger from common_utils.publisher import Publisher from common_utils.consumer import Consumer from config import * tops = get_tops(["AAPL"]) MD_SUBSCRIPTIONS = [] IEX_TOKEN = "Tpk_36614967265944c6b4b3e47be6b2b3ca" MUTEX = Lock() MD_BOOK = dict() logger = Logger.Logger("market_data").get() logger.setLevel(logging.DEBUG) def on_callback(body): print("Received {}".format(body)) body = json.loads(body) if 'symbol' not in body: logger.error('Unexpected trade with no symbol') else: MUTEX.acquire() MD_SUBSCRIPTIONS.append(body['symbol']) MUTEX.release() def start_main():
# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # import argparse import os import sys import pprint from set_path import append_sys_path append_sys_path() import torch import tube from pytube import DataChannelManager import minirts import numpy as np import random import pickle from collections import defaultdict from rnn_coach import ConvRnnCoach from onehot_coach import ConvOneHotCoach from rnn_generator import RnnGenerator from itertools import groupby from executor_wrapper import ExecutorWrapper from executor import Executor from common_utils import to_device, ResultStat, Logger from best_models import best_executors, best_coaches from tqdm import tqdm p1dict = defaultdict(list)
import json import logging from threading import Thread import time from common_utils import Logger from common_utils.consumer import Consumer # noinspection PyUnresolvedReferences from config import * # noinspection PyUnresolvedReferences from models import * # noinspection PyUnresolvedReferences from calculators import * logger = Logger.Logger("risk_publisher").get() logger.setLevel(logging.DEBUG) class RiskPublisher: """Manages the risk metrics for all incoming orders on a per-trader level""" def __init__(self): self.threads = [] self.calculators = [] self.cache_md = None self.inventory_store = InventoryStore() def start(self): self.create_calculators() logger.info('Listening to entered trades...') t = Thread(target=self.start_listening_trades)
executor_wrapper.train(False) return executor_wrapper if __name__ == '__main__': args = parse_args() print('args:') pprint.pprint(vars(args)) os.environ['LUA_PATH'] = os.path.join(args.lua_files, '?.lua') print('lua path:', os.environ['LUA_PATH']) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) logger_path = os.path.join(args.save_dir, 'train.log') sys.stdout = Logger(logger_path) device = torch.device('cuda:%d' % args.gpu) model1 = load_model(args.coach1, args.executor1, args) model2 = load_model(args.coach2, args.executor2, args) game_option = get_game_option(args) ai1_option, ai2_option = get_ai_options( args, [model1.coach.num_instructions, model2.coach.num_instructions]) context, act1_dc, act2_dc = create_game(args.num_thread, ai1_option, ai2_option, game_option) context.start() dc = DataChannelManager([act1_dc, act2_dc])
# Copyright (c) Facebook, Inc. and its affiliates.
def self_play(args): wandb.init(project="adapt-minirts-pop", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" log_series = ",".join(args.rule_series) wandb.run.name = ( f"multitask-pop_selfplay-{wandb.run.id}-{args.coach1}-{args.executor1}" f"-{args.train_mode}-rule_series={log_series}-random_coach={args.coach_random_init}-{args.tag}" ) # wandb.run.save() wandb.config.update(args) print("args:") pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) print("Train Mode: {}".format(args.train_mode)) if args.coach_reload: print("Reloading coach model.... ") args.coach1 = args.coach_load_file _coach1 = os.path.basename(args.coach1).replace(".pt", "") else: _coach1 = args.coach1 args.coach1 = best_coaches[args.coach1] if args.exec_reload: print("Reloading executor model.... ") args.executor1 = args.exec_load_file _executor1 = os.path.basename(args.executor1).replace(".pt", "") else: _executor1 = args.executor1 args.executor1 = best_executors[args.executor1] log_name = "multitask-pop_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_coach_random_init={}_{}_{}".format( _coach1, args.coach2, _executor1, args.executor2, args.lr, args.num_sp, args.num_rb, args.coach_random_init, args.tag, random.randint(1111, 9999), ) writer = SummaryWriter(comment=log_name) args.coach2 = best_coaches[args.coach2] args.executor2 = best_executors[args.executor2] logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) agent_dict = {} for rule in args.rule_series: sp_agent = Agent( coach=args.coach1, executor=args.executor1, device=device, args=args, writer=writer, trainable=True, exec_sample=True, pg=args.pg, tag=f"_{rule}", ) ## Sharing executors args.executor1 = sp_agent.model.executor sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=False, exec_sample=False, tag=f"_{rule}", ) agent_dict[int(rule)] = {"sp_agent": sp_agent, "bc_agent": bc_agent} if args.same_opt: params = [] for k, v in agent_dict.items(): agent = v["sp_agent"] coach_params = list(agent.model.coach.parameters()) params += coach_params params += list(agent.model.executor.parameters()) optimizer = optim.Adam(params, lr=args.lr) for k, v in agent_dict.items(): agent = v["sp_agent"] agent.set_optimizer(optimizer) print("Progress: ") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) cur_iter_idx = 1 rules = [int(str_rule) for str_rule in args.rule_series] agg_agents = [] agg_win_batches = defaultdict(dict) agg_loss_batches = defaultdict(dict) for epoch in range(args.train_epochs): for rule_idx in rules: if cur_iter_idx % args.eval_factor == 0: for eval_rule_idx in rules: sp_agent = agent_dict[eval_rule_idx]["sp_agent"] bc_agent = agent_dict[eval_rule_idx]["bc_agent"] game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir) game.evaluate_lifelong_rules(cur_iter_idx, [eval_rule_idx], "train") game.terminate() del game sp_agent = agent_dict[rule_idx]["sp_agent"] bc_agent = agent_dict[rule_idx]["bc_agent"] print("Current rule: {}".format(rule_idx)) game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir) rule = game.train_permute[rule_idx] print("Current rule: {}".format(rule)) game.init_rule_games(rule) agent1, agent2 = game.start() agent1.train() agent2.train() pbar = tqdm(total=(args.num_sp * 2 + args.num_rb)) while not game.finished(): data = game.get_input() if len(data) == 0: continue for key in data: # print(key) batch = to_device(data[key], device) if key == "act1": batch["actor"] = "act1" reply = agent1.simulate(cur_iter_idx, batch) t_count = agent1.update_logs(cur_iter_idx, batch, reply) elif key == "act2": batch["actor"] = "act2" reply = agent2.simulate(cur_iter_idx, batch) t_count = agent2.update_logs(cur_iter_idx, batch, reply) else: assert False game.set_reply(key, reply) pbar.update(t_count) if not args.split_train: if args.train_mode == "coach": agent1.train_coach(cur_iter_idx) elif args.train_mode == "executor": agent1.train_executor(cur_iter_idx) elif args.train_mode == "both": agent1.train_both(cur_iter_idx) else: raise Exception("Invalid train mode.") game.print_logs(cur_iter_idx) game.terminate() else: if cur_iter_idx % len(rules): win_batches, loss_batches = agent1.train_coach( cur_iter_idx) agg_win_batches.update(win_batches) agg_loss_batches.update(loss_batches) agg_agents.append((agent1, agent2)) game.print_logs(cur_iter_idx) game.terminate(keep_agents=True) else: win_batches, loss_batches = agent1.train_coach( cur_iter_idx) agg_win_batches.update(win_batches) agg_loss_batches.update(loss_batches) # Change shuffling agent1.train_executor( cur_iter_idx, agg_win_batches=agg_win_batches, agg_loss_batches=agg_loss_batches, ) for agent1, agent2 in agg_agents: agent1.reset() agent2.reset() game.print_logs(cur_iter_idx) game.terminate() del agg_loss_batches del agg_win_batches agg_win_batches = defaultdict(dict) agg_loss_batches = defaultdict(dict) cur_iter_idx += 1 wandb.run.summary[f"max_iterations"] = cur_iter_idx pbar.close() del game writer.close()
def self_play(args): wandb.init(project="adapt-minirts-zero", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" wandb.run.name = ( f"multitask-zero_selfplay-{wandb.run.id}-{args.coach1}-{args.executor1}" f"-{args.train_mode}-{args.tag}") # wandb.run.save() wandb.config.update(args) print("args:") pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) print("Train Mode: {}".format(args.train_mode)) if args.coach_reload: print("Reloading coach model.... ") args.coach1 = args.coach_load_file _coach1 = os.path.basename(args.coach1).replace(".pt", "") else: _coach1 = args.coach1 args.coach1 = best_coaches[args.coach1] if args.exec_reload: print("Reloading executor model.... ") args.executor1 = args.exec_load_file _executor1 = os.path.basename(args.executor1).replace(".pt", "") else: _executor1 = args.executor1 args.executor1 = best_executors[args.executor1] log_name = ( f"multitask-zero_c1_type={_coach1}_c2_type={args.coach2}__e1_type={_executor1}_e2_type={args.executor2}__lr={args.lr}_coach_emb" f"={args.coach_rule_emb_size}_exec_emb={args.executor_rule_emb_size}__num_sp={args.num_sp}__num_rb={args.num_rb}_{args.tag}_{random.randint(1111, 99999)}" ) writer = SummaryWriter(comment=log_name) args.coach2 = best_coaches[args.coach2] args.executor2 = best_executors[args.executor2] logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) sp_agent = Agent( coach=args.coach1, executor=args.executor1, device=device, args=args, writer=writer, trainable=True, exec_sample=True, pg=args.pg, ) sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=False, exec_sample=False, ) print("Progress: ") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) cur_iter_idx = 1 for epoch in range(args.train_epochs): print("Current epoch: {}".format(epoch)) game = MultiTaskGame(sp_agent, bc_agent, epoch, args, working_rule_dir) # game.evaluate(epoch, 'valid', 3) for rule_idx in range(game.num_train_rules): # if rule_idx%args.eval_factor == 0: # game.evaluate(epoch*game.num_train_rules + rule_idx, 'valid', 10) rule = game.train_permute[rule_idx] print(f"Current rule ({rule_idx}): {rule}") game.init_rule_games(rule) agent1, agent2 = game.start() agent1.train() agent2.train() pbar = tqdm(total=(args.num_sp * 2 + args.num_rb)) while not game.finished(): data = game.get_input() if len(data) == 0: continue for key in data: batch = to_device(data[key], device) rule_tensor = (torch.tensor([ UNIT_DICT[unit] for unit in rule ]).to(device).repeat(batch["game_id"].size(0), 1)) batch["rule_tensor"] = rule_tensor if key == "act1": batch["actor"] = "act1" reply = agent1.simulate(cur_iter_idx, batch) t_count = agent1.update_logs(cur_iter_idx, batch, reply) elif key == "act2": batch["actor"] = "act2" reply = agent2.simulate(cur_iter_idx, batch) t_count = agent2.update_logs(cur_iter_idx, batch, reply) else: assert False game.set_reply(key, reply) pbar.update(t_count) cur_iter_idx += 1 pbar.close() if cur_iter_idx % args.update_iter: if args.train_mode == "coach": agent1.train_coach(cur_iter_idx) elif args.train_mode == "executor": agent1.train_executor(cur_iter_idx) elif args.train_mode == "both": agent1.train_both(cur_iter_idx) else: raise Exception("Invalid train mode.") game.print_logs(cur_iter_idx) game.terminate() else: game.terminate(keep_agents=True) del game writer.close()
# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # import argparse import os import sys import pprint from set_path import append_sys_path append_sys_path() import torch import tube from pytube import DataChannelManager import minirts import numpy as np import random import time from torch.utils.tensorboard import SummaryWriter from simanneal.anneal import time_string import pickle from collections import defaultdict from rnn_coach import ConvRnnCoach from onehot_coach import ConvOneHotCoach from rnn_generator import RnnGenerator from itertools import groupby from executor_wrapper import ExecutorWrapper from executor import Executor from common_utils import to_device, ResultStat, Logger
import logging from models import RiskCalculator from common_utils import Logger, calcUtils logger = Logger.Logger("risk_calculators").get() logger.setLevel(logging.DEBUG) class ProfitLoss(RiskCalculator): """Simple pnl calculator""" def ___init__(self): super().__init__() def calculate(self, calculator_input, market_data): """ Use the original prices of all trades from the inventory and find pnl using the current market data. Input: SingleTrade :return: None """ logger.info('Running ProfitLoss calculator') trade_price = calculator_input.get_price() symbol = calculator_input.get_symbol() qty = calculator_input.get_qty() side = calculator_input.get_side() # Market data is the MD_BOOK print('market data ') print(market_data) current_price = calcUtils.get_mid_price(market_data[symbol])
def self_play(args): wandb.init(project="adapt-minirts", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" wandb.run.name = ( f"multitask-fixed_analyse-int-{wandb.run.id}-{args.coach1}-{args.executor1}" f"-{args.train_mode}-rule{args.rule}-{args.tag}" ) # wandb.run.save() wandb.config.update(args) print("args:") pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) output_list = [] print("Overriding args.model.... with random model") # models = ['both_finetuned_rule80', # 'both_finetuned_rule3', # 'both_finetuned_rule7', # "both_finetuned_rule14", # "both_finetuned_rule12"] # models = ['both_finetuned_rule80', 'both_finetuned_rule3', 'behaviour_cloned', 'hier_exec_finetuned_rule21', # "hier_exec_finetuned_rule80", "both_finetuned_rule12", "both_finetuned_rule7", # "hier_exec_finetuned_rule14", "both_finetuned_rule3", "hier_coach_finetuned_rule80", "hier_coach_finetuned_rule21"] models = [ "hier_exec_finetuned_rule21", "hier_exec_finetuned_rule80", "hier_exec_finetuned_rule14", ] # models = ["hier_coach_finetuned_rule80", "hier_coach_finetuned_rule21"] args.model = random.choice(models) print(f"Using model {args.model}") print("Overriding args.rule.... with random rule") rules = [80, 7, 14, 12, 3] args.rule = random.choice(rules) print(f"Using rule {args.rule}") if args.model == "behaviour_cloned": args.coach1 = best_coaches["rnn500"] args.executor1 = best_executors["rnn"] else: print("Reloading coach model.... ") print("Reloading executor model.... ") model_dict = model_dicts[args.model] args.coach1 = wandb.restore( model_dict["best_coach"], run_path=model_dict["run_path"] ).name wandb.restore( model_dict["best_coach"] + ".params", run_path=model_dict["run_path"] ) args.executor1 = wandb.restore( model_dict["best_exec"], run_path=model_dict["run_path"] ).name wandb.restore( model_dict["best_exec"] + ".params", run_path=model_dict["run_path"] ) args.coach2 = args.coach1 args.executor2 = args.executor1 _coach1 = os.path.basename(args.coach1).replace(".pt", "") _executor1 = os.path.basename(args.executor1).replace(".pt", "") log_name = "multitask-fixed-analyze-int_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format( _coach1, args.coach2, _executor1, args.executor2, args.lr, args.num_sp, args.num_rb, args.tag, random.randint(1111, 9999), ) writer = SummaryWriter(comment=log_name) logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) sp_agent = Agent( coach=args.coach1, executor=args.executor1, device=device, args=args, writer=writer, trainable=True, exec_sample=True, pg=args.pg, ) sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=False, exec_sample=False, ) print("Progress: ") def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) print(f"sp_agent coach params: {count_parameters(sp_agent.model.coach)}") print(f"sp_agent exec params: {count_parameters(sp_agent.model.executor)}") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) cur_iter_idx = 1 rule_idx = args.rule game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir) for r in [80, 40, 20, 21, 14, 7, 3, 12, 13]: game.print_rule_desc(r, split="train") writer.close() print("#" * 40) print("#" * 40) print("#" * 40) print() print("\n\n".join(output_list)) print() print("#" * 40) print("#" * 40) print("#" * 40)
def self_play(args): wandb.init(project="adapt-minirts-pop-eval", sync_tensorboard=True, dir=args.wandb_dir) # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}" date = datetime.date(datetime.now()) wandb.run.name = f"multitask-pop-eval-{wandb.run.id}-{date}-{args.tag}" # wandb.run.save() wandb.config.update(args) # print("args:") # pprint.pprint(vars(args)) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) result_dict = {} exp_code = args.experiment_code eval_exp_list = [experiment_list[exp_code] ] if exp_code != -1 else experiment_list partial_json_save_dir = os.path.join(args.eval_folder, f"eval-{date}-{args.tag}") if os.path.exists(partial_json_save_dir): print("Attempting to create an existing folder.. hence skipping...") else: os.makedirs(partial_json_save_dir) if args.model_json is not None: if os.path.exists(args.model_json): print("Using model json dictionary...") with open(os.path.join(args.model_json, "model_paths.json")) as f: model_json = json.load(f) else: FileNotFoundError("Model json dict cannot be found...") else: model_json = None for exp_name in eval_exp_list: print("#" * 40) print("#" * 40) print(f"Experiment: {exp_name}") print("-" * 40) print("-" * 40) sub_exp_result_dict = {} for (sub_exp_name, sub_exp_dict) in experiment_dict[exp_name].items(): print("*" * 40) print(f"Sub experiment name: {sub_exp_name}") print("*" * 40) coaches = sub_exp_dict["coach"] if "random" in coaches: coach_variant = coaches["variant"] random_coach = coaches["random"] coaches = coaches["coach"] else: coach_variant = None random_coach = False execs = sub_exp_dict["executor"] if "variant" in execs: exec_variant = execs["variant"] execs = execs["executor"] else: exec_variant = None rule = sub_exp_dict["env"] num_sub_exps = 0 win_rates = [] num_total_sub_exps = min(len(coaches), len(execs)) if sub_exp_name == "random-bc" or sub_exp_name == "bc-bc": num_total_sub_exps = 1 if sub_exp_name != "ft_coach[21]-bc" and sub_exp_name.startswith( "ft_coach[21]"): coaches = [coaches[0]] * len(execs) + [coaches[1]] * len(execs) execs = execs * 2 num_total_sub_exps = len(execs) for (coach, executor) in zip( coaches, execs): ## Do we want to check if coaches == execs? if coaches == execs and coach != executor: continue print(f"Experiment number: {num_sub_exps}") if model_json is not None and args.model_json is not None: if num_total_sub_exps != len( model_json[exp_name][sub_exp_name]["model_paths"]): downloaded_count = len( model_json[exp_name][sub_exp_name]["model_paths"]) print( f"Downloaded number: {downloaded_count} != num_total_sub_exps: {num_total_sub_exps}" ) raise OSError("Number of sub-exp mismatch.") args.coach1 = model_json[exp_name][sub_exp_name][ "model_paths"][num_sub_exps]["coach"] else: args.coach1 = get_coach_path(coach, coach_variant=coach_variant) if random_coach: args.coach_random_init = True else: args.coach_random_init = False if model_json is not None and args.model_json is not None: args.executor1 = model_json[exp_name][sub_exp_name][ "model_paths"][num_sub_exps]["executor"] else: args.executor1 = get_executor_path( executor, exec_variant=exec_variant) args.rule = rule args.coach2 = args.coach1 args.executor2 = args.executor1 _coach1 = os.path.basename(args.coach1).replace(".pt", "") _executor1 = os.path.basename(args.executor1).replace( ".pt", "") log_name = "multitask-pop-analyze_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format( _coach1, args.coach2, _executor1, args.executor2, args.lr, args.num_sp, args.num_rb, args.tag, random.randint(1111, 9999), ) writer = SummaryWriter(comment=log_name) logger_path = os.path.join(args.save_dir, "train.log") sys.stdout = Logger(logger_path) device = torch.device("cuda:%d" % args.gpu) sp_agent = Agent( coach=args.coach1, executor=args.executor1, device=device, args=args, writer=writer, trainable=True, exec_sample=True, pg=args.pg, ) sp_agent.init_save_folder(wandb.run.name) bc_agent = Agent( coach=args.coach2, executor=args.executor2, device=device, args=args, writer=writer, trainable=False, exec_sample=False, ) print("Progress: ") ## Create Save folder: working_rule_dir = os.path.join(sp_agent.save_folder, "rules") create_working_dir(args, working_rule_dir) cur_iter_idx = 1 rules = [args.rule] print("Current rule: {}".format(rules[0])) NUM_SPLIT_GAMES = 4 sub_win_rates = [] for i in range(NUM_SPLIT_GAMES): game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir) print(f"Num games: {args.num_games}") num_sub_games = args.num_games // NUM_SPLIT_GAMES print(f"Num sub games: {num_sub_games}") result = game.analyze_rule_games( cur_iter_idx, rules, "train", viz=args.viz, num_games=num_sub_games, ) sub_win_rates.append(result[args.rule]["win"]) game.terminate() del game writer.close() print(f"Total sub win rates: {sub_win_rates}") win_rate = sum(sub_win_rates) / NUM_SPLIT_GAMES print(f"Win Rates: {win_rate}") win_rates.append(win_rate * 100) num_sub_exps += 1 if sub_exp_name == "bc-bc" or sub_exp_name == "random-bc": break sub_exp_result_dict[sub_exp_name] = { "win_rate": win_rates, "Win_rate mean": np.mean(win_rates), "Win_rate variance ": np.var(win_rates), "Num total trials": num_sub_exps, } print("++" * 50) pprint.pprint(sub_exp_result_dict[sub_exp_name]) print("++" * 50) result_dict[exp_name] = sub_exp_result_dict print("--" * 50) print("Results so far: ") print("--" * 50) pprint.pprint(result_dict) print("--" * 50) print("Final Results: ") print("##" * 50) pprint.pprint(result_dict) print("##" * 50) print("Saving result jsons...") code = exp_code if exp_code != -1 else "all" random_number = randint(1, 100000) with open( os.path.join(partial_json_save_dir, f"partial-{random_number}-{code}.json"), "w") as fp: json.dump(result_dict, fp)