Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        description='generate states from replays')
    parser.add_argument('--binary',
                        type=str,
                        default='../../build/minirts-backend')
    parser.add_argument('--replays-root', type=str)
    parser.add_argument('--output-root', type=str)
    parser.add_argument('--replay-file-extension', type=str, default='.rep')
    # configs for generating jsons
    parser.add_argument('--human', action='store_true', default=False)
    parser.add_argument('--player1', type=str, default='dummy,fs=50')
    parser.add_argument('--player2', type=str, default='dummy,fs=50')
    parser.add_argument('--max-tick', type=int, default=40000)
    args = parser.parse_args()

    args.binary = os.path.abspath(args.binary)
    if not os.path.exists(args.binary):
        print('cannot find binary at:', args.binary)
        assert False

    logger_path = os.path.join(args.output_root, 'config')
    sys.stdout = Logger(logger_path)

    replays = get_all_files(args.replays_root, args.replay_file_extension)

    bad_replays = generate_states(replays, args, args.replays_root,
                                  args.output_root)
    print('number of corrupted replays: %d' % len(bad_replays))
    for replay in bad_replays:
        print(replay)
Пример #2
0
def self_play(args):

    wandb.init(project="adapt-minirts",
               sync_tensorboard=True,
               dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    wandb.run.name = (
        f"multitask-fixed_selfplay-{wandb.run.id}-{args.coach1}-{args.executor1}"
        f"-{args.train_mode}-rule{args.rule}-{args.tag}")
    # wandb.run.save()
    wandb.config.update(args)

    print("args:")
    pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    print("Train Mode: {}".format(args.train_mode))

    if args.coach_reload:
        print("Reloading coach model.... ")
        args.coach1 = args.coach_load_file
        _coach1 = os.path.basename(args.coach1).replace(".pt", "")

    else:
        _coach1 = args.coach1
        args.coach1 = best_coaches[args.coach1]

    if args.exec_reload:
        print("Reloading executor model.... ")
        args.executor1 = args.exec_load_file
        _executor1 = os.path.basename(args.executor1).replace(".pt", "")
    else:
        _executor1 = args.executor1
        args.executor1 = best_executors[args.executor1]

    log_name = "multitask-fixed_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format(
        _coach1,
        args.coach2,
        _executor1,
        args.executor2,
        args.lr,
        args.num_sp,
        args.num_rb,
        args.tag,
        random.randint(1111, 9999),
    )
    writer = SummaryWriter(comment=log_name)

    args.coach2 = best_coaches[args.coach2]
    args.executor2 = best_executors[args.executor2]

    logger_path = os.path.join(args.save_dir, "train.log")

    sys.stdout = Logger(logger_path)

    device = torch.device("cuda:%d" % args.gpu)

    sp_agent = Agent(
        coach=args.coach1,
        executor=args.executor1,
        device=device,
        args=args,
        writer=writer,
        trainable=True,
        exec_sample=True,
        pg=args.pg,
    )

    sp_agent.init_save_folder(wandb.run.name)

    bc_agent = Agent(
        coach=args.coach2,
        executor=args.executor2,
        device=device,
        args=args,
        writer=writer,
        trainable=False,
        exec_sample=False,
    )

    print("Progress: ")
    ## Create Save folder:
    working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
    create_working_dir(args, working_rule_dir)

    cur_iter_idx = 1
    rules = [args.rule]
    for rule_idx in rules:

        print("Current rule: {}".format(rule_idx))
        game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args,
                             working_rule_dir)

        for epoch in range(args.train_epochs):
            if epoch % args.eval_factor == 0:
                game.evaluate_rules(cur_iter_idx, rules, "train")

            rule = game.train_permute[rule_idx]
            print("Current rule: {}".format(rule))
            game.init_rule_games(rule)
            agent1, agent2 = game.start()

            agent1.train()
            agent2.train()

            pbar = tqdm(total=(args.num_sp * 2 + args.num_rb))

            while not game.finished():

                data = game.get_input()

                if len(data) == 0:
                    continue
                for key in data:
                    # print(key)
                    batch = to_device(data[key], device)

                    if key == "act1":
                        batch["actor"] = "act1"
                        reply = agent1.simulate(cur_iter_idx, batch)
                        t_count = agent1.update_logs(cur_iter_idx, batch,
                                                     reply)

                    elif key == "act2":
                        batch["actor"] = "act2"
                        reply = agent2.simulate(cur_iter_idx, batch)
                        t_count = agent2.update_logs(cur_iter_idx, batch,
                                                     reply)

                    else:
                        assert False

                    game.set_reply(key, reply)
                    pbar.update(t_count)

            if args.train_mode == "coach":
                agent1.train_coach(cur_iter_idx)
            elif args.train_mode == "executor":
                agent1.train_executor(cur_iter_idx)
            elif args.train_mode == "both":
                agent1.train_both(cur_iter_idx)
            else:
                raise Exception("Invalid train mode.")

            game.print_logs(cur_iter_idx)
            cur_iter_idx += 1
            wandb.run.summary[f"max_iterations"] = cur_iter_idx
            pbar.close()
            game.terminate()

        del game

    writer.close()
Пример #3
0
def self_play(args):

    wandb.init(project="adapt-minirts-pop-eval",
               sync_tensorboard=True,
               dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    wandb.run.name = (
        f"multitask-pop-eval-{wandb.run.id}-{args.coach1}-{args.executor1}"
        f"-{args.train_mode}-rule{args.rule}-{args.tag}")
    # wandb.run.save()
    wandb.config.update(args)

    print("args:")
    pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # args.coach1 = get_coach_path(model_dicts["ft_pop[80,40,20]"][0], coach_variant=80)
    # args.executor1 = get_executor_path(model_dicts["ft_pop[80,40,20]"][0])

    args.coach1 = get_coach_path(model_dicts["ft_both[80]"][0])
    args.executor1 = get_executor_path(model_dicts["ft_both[80]"][0])

    args.coach2 = get_coach_path(model_dicts["bc"][0])
    args.executor2 = get_executor_path(model_dicts["bc"][0])

    _coach1 = os.path.basename(args.coach1).replace(".pt", "")
    _executor1 = os.path.basename(args.executor1).replace(".pt", "")

    log_name = "multitask-pop-eval-analyze_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format(
        _coach1,
        args.coach2,
        _executor1,
        args.executor2,
        args.lr,
        args.num_sp,
        args.num_rb,
        args.tag,
        random.randint(1111, 9999),
    )
    writer = SummaryWriter(comment=log_name)

    logger_path = os.path.join(args.save_dir, "train.log")

    sys.stdout = Logger(logger_path)

    device = torch.device("cuda:%d" % args.gpu)

    sp_agent = Agent(
        coach=args.coach1,
        executor=args.executor1,
        device=device,
        args=args,
        writer=writer,
        trainable=True,
        exec_sample=True,
        pg=args.pg,
    )

    sp_agent.init_save_folder(wandb.run.name)

    bc_agent = Agent(
        coach=args.coach2,
        executor=args.executor2,
        device=device,
        args=args,
        writer=writer,
        trainable=True,
        exec_sample=True,
    )

    print("Progress: ")
    ## Create Save folder:
    working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
    create_working_dir(args, working_rule_dir)

    cur_iter_idx = 1
    rules = [args.rule]
    for rule_idx in rules:
        print("Current rule: {}".format(rule_idx))
        game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args,
                             working_rule_dir)
        game.analyze_rule_games(
            cur_iter_idx,
            rules,
            "train",
            viz=args.viz,
            num_games=0,
            num_sp=100,
        )
        game.terminate()
        del game

    writer.close()
Пример #4
0
def self_play(args):

    wandb.init(project="adapt-minirts-pop-eval",
               sync_tensorboard=True,
               dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    date = datetime.date(datetime.now())
    wandb.run.name = f"multitask-pop-drift-eval-{wandb.run.id}-{date}-{args.tag}"
    # wandb.run.save()
    wandb.config.update(args)

    # print("args:")
    # pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    partial_json_save_dir = os.path.join(args.eval_folder,
                                         f"drift-eval-{date}-{args.tag}")

    if os.path.exists(partial_json_save_dir):
        print("Attempting to create an existing folder.. hence skipping...")
    else:
        os.makedirs(partial_json_save_dir)

    coach1 = get_coach_path(model_dicts["ft_coach[7]"][0])
    executors = {
        "bc": get_executor_path(model_dicts["bc"][0]),
        "ft_both[80]": get_executor_path(model_dicts["ft_both[80]"][0]),
        "ft_pop[80,40,20]":
        get_executor_path(model_dicts["ft_pop[80,40,20]"][0]),
    }
    rules = [7]
    args.coach_random_init = True
    NUM_GAMES = 250

    args.coach1 = coach1
    args.coach2 = coach1
    args.executor2 = executors["bc"]
    _coach1 = os.path.basename(args.coach1).replace(".pt", "")
    _executor1 = os.path.basename(args.executor1).replace(".pt", "")

    log_name = "multitask-pop-analyze-drift_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format(
        _coach1,
        args.coach2,
        _executor1,
        args.executor2,
        args.lr,
        args.num_sp,
        args.num_rb,
        args.tag,
        random.randint(1111, 9999),
    )
    writer = SummaryWriter(comment=log_name)

    logger_path = os.path.join(args.save_dir, "train.log")

    sys.stdout = Logger(logger_path)

    device = torch.device("cuda:%d" % args.gpu)
    sp_agent = MultiExecutorAgent(
        coach=args.coach1,
        executors=executors,
        device=device,
        args=args,
        writer=writer,
        trainable=False,
        exec_sample=True,
        pg=args.pg,
    )

    sp_agent.init_save_folder(wandb.run.name)

    bc_agent = Agent(
        coach=args.coach2,
        executor=args.executor2,
        device=device,
        args=args,
        writer=writer,
        trainable=False,
        exec_sample=False,
    )

    print("Progress: ")
    ## Create Save folder:
    working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
    create_working_dir(args, working_rule_dir)

    print("Current rule: {}".format(rules[0]))
    sub_win_rates = []
    full_reply_dicts = []
    if not os.path.exists(os.path.join(args.save_img_dir, "matrices.npy")):
        for i in tqdm(range(NUM_GAMES)):
            game = MultiTaskGame(sp_agent, bc_agent, 0, args, working_rule_dir)
            # rule = randint(0, 80)
            result, reply_dicts = game.drift_analysis_games(
                0,
                rules,
                "train",
                viz=args.viz,
                num_games=1,
            )
            full_reply_dicts += reply_dicts
            game.terminate()
            del game

        inst_list = [
            sp_agent.model.coach.inst_dict._idx2inst[
                reply["bc_coach"]["inst"].squeeze().tolist()]
            for reply in full_reply_dicts
        ]

        build_unit_list = []
        for inst in inst_list:
            if "create" in inst or "build" in inst or "train" in inst or "make" in inst:
                if "peasant" in inst:
                    unit = UNIT_TYPE_TO_IDX["PEASANT"]
                elif "dragon" in inst:
                    unit = UNIT_TYPE_TO_IDX["DRAGON"]
                elif "archer" in inst:
                    unit = UNIT_TYPE_TO_IDX["ARCHER"]
                elif "cavalry" in inst or "cavs" in inst:
                    unit = UNIT_TYPE_TO_IDX["CAVALRY"]
                elif "spearman" in inst:
                    unit = UNIT_TYPE_TO_IDX["SPEARMAN"]
                elif ("swordman" in inst or "swordsman" in inst
                      or "swords" in inst or "sword" in inst):
                    unit = UNIT_TYPE_TO_IDX["SWORDMAN"]
                elif "catapult" in inst:
                    unit = UNIT_TYPE_TO_IDX["CATAPULT"]
                else:
                    unit = None

                build_unit_list.append(unit)
            else:
                build_unit_list.append(None)

        build_building_list = []
        for inst in inst_list:
            if "create" in inst or "build" in inst or "train" in inst or "make" in inst:
                if "shop" in inst:
                    unit = UNIT_TYPE_TO_IDX["WORKSHOP"]
                elif "stable" in inst:
                    unit = UNIT_TYPE_TO_IDX["STABLE"]
                elif "barrack" in inst:
                    unit = UNIT_TYPE_TO_IDX["BARRACK"]
                elif "tower" in inst:
                    unit = UNIT_TYPE_TO_IDX["GUARD_TOWER"]
                elif "blacksmith" in inst:
                    unit = UNIT_TYPE_TO_IDX["BLACKSMITH"]
                else:
                    unit = None

                build_building_list.append(unit)
            else:
                build_building_list.append(None)

        bc_executor_unit_types = (
            np.asarray([
                (
                    reply["bc_executor"]["one_hot_reply"]
                    ["unit_type_prob"].squeeze()
                    # * (
                    #     reply["bc_executor"]["one_hot_reply"]["cmd_type_prob"]
                    #     .squeeze()[:, [4, 6]]
                    #     .sum(1)
                    #     >= 1.0
                    # ).unsqueeze(1)
                ).sum(0).tolist()[1:] for reply in full_reply_dicts
            ]).argmax(1) + 1)
        ft_both_unit_type = (
            np.asarray([
                (
                    reply["ft_both[80]"]["one_hot_reply"]
                    ["unit_type_prob"].squeeze()
                    # * (
                    #     reply["ft_both[80]"]["one_hot_reply"]["cmd_type_prob"]
                    #     .squeeze()[:, [4, 6]]
                    #     .sum(1)
                    #     >= 1.0
                    # ).unsqueeze(1)
                ).sum(0).tolist()[1:] for reply in full_reply_dicts
            ]).argmax(1) + 1)
        ft_pop_unit_type = (
            np.asarray([
                (
                    reply["ft_pop[80,40,20]"]["one_hot_reply"]
                    ["unit_type_prob"].squeeze()
                    # * (
                    #     reply["ft_pop[80,40,20]"]["one_hot_reply"]["cmd_type_prob"]
                    #     .squeeze()[:, [4, 6]]
                    #     .sum(1)
                    #     >= 1.0
                    # ).unsqueeze(1)
                ).sum(0).tolist()[1:] for reply in full_reply_dicts
            ]).argmax(1) + 1)

        bc_executor_building_types = (
            np.asarray([
                (
                    reply["bc_executor"]["one_hot_reply"]
                    ["building_type_prob"].squeeze()
                    # * (
                    #     reply["bc_executor"]["one_hot_reply"]["cmd_type_prob"]
                    #     .squeeze()[:, [3, 6]]
                    #     .sum(1)
                    #     > 1.0
                    # ).unsqueeze(1)
                ).sum(0).tolist()[1:] for reply in full_reply_dicts
            ]).argmax(1) + 1)
        ft_both_building_type = (
            np.asarray([
                (
                    reply["ft_both[80]"]["one_hot_reply"]
                    ["building_type_prob"].squeeze()
                    # * (
                    #     reply["ft_both[80]"]["one_hot_reply"]["cmd_type_prob"]
                    #     .squeeze()[:, [3, 6]]
                    #     .sum(1)
                    #     > 1.0
                    # ).unsqueeze(1)
                ).sum(0).tolist()[1:] for reply in full_reply_dicts
            ]).argmax(1) + 1)
        ft_pop_building_type = (
            np.asarray([
                (
                    reply["ft_pop[80,40,20]"]["one_hot_reply"]
                    ["building_type_prob"].squeeze()
                    # * (
                    #     reply["ft_pop[80,40,20]"]["one_hot_reply"]["cmd_type_prob"]
                    #     .squeeze()[:, [3, 6]]
                    #     .sum(1)
                    #     > 1.0
                    # ).unsqueeze(1)
                ).sum(0).tolist()[1:] for reply in full_reply_dicts
            ]).argmax(1) + 1)

        mat_bc_units = create_matrix(build_unit_list,
                                     bc_executor_unit_types,
                                     title="bc-bc")
        mat_both_units = create_matrix(build_unit_list,
                                       ft_both_unit_type,
                                       title="bc-both")
        mat_pop_units = create_matrix(build_unit_list,
                                      ft_pop_unit_type,
                                      title="bc-pop")

        mat_bc_buildings = create_matrix(build_building_list,
                                         bc_executor_building_types,
                                         title="bc-bc")
        mat_both_buildings = create_matrix(build_building_list,
                                           ft_both_building_type,
                                           title="bc-both")
        mat_pop_buildings = create_matrix(build_building_list,
                                          ft_pop_building_type,
                                          title="bc-pop")

        mat_bc = mat_bc_units  # + mat_bc_buildings
        mat_both = mat_both_units  # + mat_both_buildings
        mat_pop = mat_pop_units  # + mat_pop_buildings

        print("Saving Numpy matrices...")
        with open(os.path.join(args.save_img_dir, "matrices.npy"), "wb") as f:
            np.save(f, mat_bc)
            np.save(f, mat_both)
            np.save(f, mat_pop)
    else:
        print("Loading Numpy matrices...")
        with open(os.path.join(args.save_img_dir, "matrices.npy"), "rb") as f:
            mat_bc = np.load(f)
            mat_both = np.load(f)
            mat_pop = np.load(f)

    plot_matrices(mat_bc,
                  mat_both,
                  mat_pop,
                  title="Original",
                  save_dir=args.save_img_dir)

    print_summary(mat_bc, mat_both, mat_pop, "original")

    diff_bc_bc = np.absolute(mat_bc - mat_bc)
    diff_both_bc = np.absolute(mat_both - mat_bc)
    diff_pop_bc = np.absolute(mat_pop - mat_bc)
    plot_matrices(
        diff_bc_bc,
        diff_both_bc,
        diff_pop_bc,
        title="Original-mat_bc",
        save_dir=args.save_img_dir,
    )

    print_summary(diff_bc_bc, diff_both_bc, diff_pop_bc, "bc")

    diff_bc_both = np.absolute(mat_bc - mat_both)
    diff_both_both = np.absolute(mat_both - mat_both)
    diff_pop_both = np.absolute(mat_pop - mat_both)

    plot_matrices(
        diff_bc_both,
        diff_both_both,
        diff_pop_both,
        title="Original-mat_both",
        save_dir=args.save_img_dir,
    )

    print_summary(diff_bc_both, diff_both_both, diff_pop_both, "both")

    diff_bc_pop = np.absolute(mat_bc - mat_pop)
    diff_both_pop = np.absolute(mat_both - mat_pop)
    diff_pop_pop = np.absolute(mat_pop - mat_pop)

    plot_matrices(
        diff_bc_pop,
        diff_both_pop,
        diff_pop_pop,
        title="Original-mat_pop",
        save_dir=args.save_img_dir,
    )

    print_summary(diff_bc_pop, diff_both_pop, diff_pop_pop, "pop")

    sub_win_rates.append(result[args.rule]["win"])
    writer.close()
Пример #5
0
from threading import Thread, Lock
import time

from common_utils import Logger
from common_utils.publisher import Publisher
from common_utils.consumer import Consumer
from config import *

tops = get_tops(["AAPL"])

MD_SUBSCRIPTIONS = []
IEX_TOKEN = "Tpk_36614967265944c6b4b3e47be6b2b3ca"
MUTEX = Lock()
MD_BOOK = dict()

logger = Logger.Logger("market_data").get()
logger.setLevel(logging.DEBUG)


def on_callback(body):
    print("Received {}".format(body))
    body = json.loads(body)
    if 'symbol' not in body:
        logger.error('Unexpected trade with no symbol')
    else:
        MUTEX.acquire()
        MD_SUBSCRIPTIONS.append(body['symbol'])
        MUTEX.release()


def start_main():
Пример #6
0
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import argparse
import os
import sys
import pprint
from set_path import append_sys_path
append_sys_path()
import torch
import tube
from pytube import DataChannelManager
import minirts
import numpy as np
import random
import pickle
from collections import defaultdict
from rnn_coach import ConvRnnCoach
from onehot_coach import ConvOneHotCoach
from rnn_generator import RnnGenerator
from itertools import groupby
from executor_wrapper import ExecutorWrapper
from executor import Executor
from common_utils import to_device, ResultStat, Logger
from best_models import best_executors, best_coaches
from tqdm import tqdm
p1dict = defaultdict(list)
Пример #7
0
import json
import logging
from threading import Thread
import time

from common_utils import Logger
from common_utils.consumer import Consumer
# noinspection PyUnresolvedReferences
from config import *
# noinspection PyUnresolvedReferences
from models import *
# noinspection PyUnresolvedReferences
from calculators import *

logger = Logger.Logger("risk_publisher").get()
logger.setLevel(logging.DEBUG)


class RiskPublisher:
    """Manages the risk metrics for all incoming orders on a per-trader level"""
    def __init__(self):
        self.threads = []
        self.calculators = []
        self.cache_md = None
        self.inventory_store = InventoryStore()

    def start(self):
        self.create_calculators()

        logger.info('Listening to entered trades...')
        t = Thread(target=self.start_listening_trades)
Пример #8
0
    executor_wrapper.train(False)
    return executor_wrapper


if __name__ == '__main__':
    args = parse_args()
    print('args:')
    pprint.pprint(vars(args))

    os.environ['LUA_PATH'] = os.path.join(args.lua_files, '?.lua')
    print('lua path:', os.environ['LUA_PATH'])

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    logger_path = os.path.join(args.save_dir, 'train.log')
    sys.stdout = Logger(logger_path)

    device = torch.device('cuda:%d' % args.gpu)

    model1 = load_model(args.coach1, args.executor1, args)
    model2 = load_model(args.coach2, args.executor2, args)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(
        args, [model1.coach.num_instructions, model2.coach.num_instructions])

    context, act1_dc, act2_dc = create_game(args.num_thread, ai1_option,
                                            ai2_option, game_option)
    context.start()
    dc = DataChannelManager([act1_dc, act2_dc])
Пример #9
0
# Copyright (c) Facebook, Inc. and its affiliates.
Пример #10
0
def self_play(args):

    wandb.init(project="adapt-minirts-pop",
               sync_tensorboard=True,
               dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    log_series = ",".join(args.rule_series)
    wandb.run.name = (
        f"multitask-pop_selfplay-{wandb.run.id}-{args.coach1}-{args.executor1}"
        f"-{args.train_mode}-rule_series={log_series}-random_coach={args.coach_random_init}-{args.tag}"
    )
    # wandb.run.save()
    wandb.config.update(args)

    print("args:")
    pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    print("Train Mode: {}".format(args.train_mode))

    if args.coach_reload:
        print("Reloading coach model.... ")
        args.coach1 = args.coach_load_file
        _coach1 = os.path.basename(args.coach1).replace(".pt", "")

    else:
        _coach1 = args.coach1
        args.coach1 = best_coaches[args.coach1]

    if args.exec_reload:
        print("Reloading executor model.... ")
        args.executor1 = args.exec_load_file
        _executor1 = os.path.basename(args.executor1).replace(".pt", "")
    else:
        _executor1 = args.executor1
        args.executor1 = best_executors[args.executor1]

    log_name = "multitask-pop_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_coach_random_init={}_{}_{}".format(
        _coach1,
        args.coach2,
        _executor1,
        args.executor2,
        args.lr,
        args.num_sp,
        args.num_rb,
        args.coach_random_init,
        args.tag,
        random.randint(1111, 9999),
    )
    writer = SummaryWriter(comment=log_name)

    args.coach2 = best_coaches[args.coach2]
    args.executor2 = best_executors[args.executor2]

    logger_path = os.path.join(args.save_dir, "train.log")

    sys.stdout = Logger(logger_path)

    device = torch.device("cuda:%d" % args.gpu)

    agent_dict = {}
    for rule in args.rule_series:
        sp_agent = Agent(
            coach=args.coach1,
            executor=args.executor1,
            device=device,
            args=args,
            writer=writer,
            trainable=True,
            exec_sample=True,
            pg=args.pg,
            tag=f"_{rule}",
        )

        ## Sharing executors
        args.executor1 = sp_agent.model.executor
        sp_agent.init_save_folder(wandb.run.name)

        bc_agent = Agent(
            coach=args.coach2,
            executor=args.executor2,
            device=device,
            args=args,
            writer=writer,
            trainable=False,
            exec_sample=False,
            tag=f"_{rule}",
        )

        agent_dict[int(rule)] = {"sp_agent": sp_agent, "bc_agent": bc_agent}

    if args.same_opt:
        params = []
        for k, v in agent_dict.items():
            agent = v["sp_agent"]
            coach_params = list(agent.model.coach.parameters())
            params += coach_params

        params += list(agent.model.executor.parameters())
        optimizer = optim.Adam(params, lr=args.lr)

        for k, v in agent_dict.items():
            agent = v["sp_agent"]
            agent.set_optimizer(optimizer)

    print("Progress: ")
    ## Create Save folder:
    working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
    create_working_dir(args, working_rule_dir)

    cur_iter_idx = 1
    rules = [int(str_rule) for str_rule in args.rule_series]
    agg_agents = []
    agg_win_batches = defaultdict(dict)
    agg_loss_batches = defaultdict(dict)

    for epoch in range(args.train_epochs):
        for rule_idx in rules:
            if cur_iter_idx % args.eval_factor == 0:
                for eval_rule_idx in rules:
                    sp_agent = agent_dict[eval_rule_idx]["sp_agent"]
                    bc_agent = agent_dict[eval_rule_idx]["bc_agent"]
                    game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx,
                                         args, working_rule_dir)
                    game.evaluate_lifelong_rules(cur_iter_idx, [eval_rule_idx],
                                                 "train")
                    game.terminate()
                    del game

            sp_agent = agent_dict[rule_idx]["sp_agent"]
            bc_agent = agent_dict[rule_idx]["bc_agent"]

            print("Current rule: {}".format(rule_idx))
            game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args,
                                 working_rule_dir)

            rule = game.train_permute[rule_idx]
            print("Current rule: {}".format(rule))
            game.init_rule_games(rule)
            agent1, agent2 = game.start()

            agent1.train()
            agent2.train()

            pbar = tqdm(total=(args.num_sp * 2 + args.num_rb))

            while not game.finished():

                data = game.get_input()

                if len(data) == 0:
                    continue
                for key in data:
                    # print(key)
                    batch = to_device(data[key], device)

                    if key == "act1":
                        batch["actor"] = "act1"
                        reply = agent1.simulate(cur_iter_idx, batch)
                        t_count = agent1.update_logs(cur_iter_idx, batch,
                                                     reply)

                    elif key == "act2":
                        batch["actor"] = "act2"
                        reply = agent2.simulate(cur_iter_idx, batch)
                        t_count = agent2.update_logs(cur_iter_idx, batch,
                                                     reply)

                    else:
                        assert False

                    game.set_reply(key, reply)
                    pbar.update(t_count)

            if not args.split_train:
                if args.train_mode == "coach":
                    agent1.train_coach(cur_iter_idx)
                elif args.train_mode == "executor":
                    agent1.train_executor(cur_iter_idx)
                elif args.train_mode == "both":
                    agent1.train_both(cur_iter_idx)
                else:
                    raise Exception("Invalid train mode.")
                game.print_logs(cur_iter_idx)
                game.terminate()
            else:

                if cur_iter_idx % len(rules):
                    win_batches, loss_batches = agent1.train_coach(
                        cur_iter_idx)
                    agg_win_batches.update(win_batches)
                    agg_loss_batches.update(loss_batches)
                    agg_agents.append((agent1, agent2))
                    game.print_logs(cur_iter_idx)
                    game.terminate(keep_agents=True)

                else:
                    win_batches, loss_batches = agent1.train_coach(
                        cur_iter_idx)

                    agg_win_batches.update(win_batches)
                    agg_loss_batches.update(loss_batches)

                    # Change shuffling
                    agent1.train_executor(
                        cur_iter_idx,
                        agg_win_batches=agg_win_batches,
                        agg_loss_batches=agg_loss_batches,
                    )

                    for agent1, agent2 in agg_agents:
                        agent1.reset()
                        agent2.reset()

                    game.print_logs(cur_iter_idx)
                    game.terminate()

                    del agg_loss_batches
                    del agg_win_batches

                    agg_win_batches = defaultdict(dict)
                    agg_loss_batches = defaultdict(dict)

                cur_iter_idx += 1
                wandb.run.summary[f"max_iterations"] = cur_iter_idx
                pbar.close()

        del game

    writer.close()
Пример #11
0
def self_play(args):

    wandb.init(project="adapt-minirts-zero",
               sync_tensorboard=True,
               dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    wandb.run.name = (
        f"multitask-zero_selfplay-{wandb.run.id}-{args.coach1}-{args.executor1}"
        f"-{args.train_mode}-{args.tag}")
    # wandb.run.save()
    wandb.config.update(args)

    print("args:")
    pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    print("Train Mode: {}".format(args.train_mode))

    if args.coach_reload:
        print("Reloading coach model.... ")
        args.coach1 = args.coach_load_file
        _coach1 = os.path.basename(args.coach1).replace(".pt", "")

    else:
        _coach1 = args.coach1
        args.coach1 = best_coaches[args.coach1]

    if args.exec_reload:
        print("Reloading executor model.... ")
        args.executor1 = args.exec_load_file
        _executor1 = os.path.basename(args.executor1).replace(".pt", "")
    else:
        _executor1 = args.executor1
        args.executor1 = best_executors[args.executor1]

    log_name = (
        f"multitask-zero_c1_type={_coach1}_c2_type={args.coach2}__e1_type={_executor1}_e2_type={args.executor2}__lr={args.lr}_coach_emb"
        f"={args.coach_rule_emb_size}_exec_emb={args.executor_rule_emb_size}__num_sp={args.num_sp}__num_rb={args.num_rb}_{args.tag}_{random.randint(1111, 99999)}"
    )
    writer = SummaryWriter(comment=log_name)

    args.coach2 = best_coaches[args.coach2]
    args.executor2 = best_executors[args.executor2]

    logger_path = os.path.join(args.save_dir, "train.log")

    sys.stdout = Logger(logger_path)

    device = torch.device("cuda:%d" % args.gpu)

    sp_agent = Agent(
        coach=args.coach1,
        executor=args.executor1,
        device=device,
        args=args,
        writer=writer,
        trainable=True,
        exec_sample=True,
        pg=args.pg,
    )

    sp_agent.init_save_folder(wandb.run.name)

    bc_agent = Agent(
        coach=args.coach2,
        executor=args.executor2,
        device=device,
        args=args,
        writer=writer,
        trainable=False,
        exec_sample=False,
    )

    print("Progress: ")
    ## Create Save folder:
    working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
    create_working_dir(args, working_rule_dir)

    cur_iter_idx = 1
    for epoch in range(args.train_epochs):
        print("Current epoch: {}".format(epoch))

        game = MultiTaskGame(sp_agent, bc_agent, epoch, args, working_rule_dir)
        # game.evaluate(epoch, 'valid', 3)

        for rule_idx in range(game.num_train_rules):

            # if rule_idx%args.eval_factor == 0:
            #     game.evaluate(epoch*game.num_train_rules + rule_idx, 'valid', 10)

            rule = game.train_permute[rule_idx]
            print(f"Current rule ({rule_idx}): {rule}")
            game.init_rule_games(rule)
            agent1, agent2 = game.start()

            agent1.train()
            agent2.train()

            pbar = tqdm(total=(args.num_sp * 2 + args.num_rb))

            while not game.finished():

                data = game.get_input()

                if len(data) == 0:
                    continue
                for key in data:

                    batch = to_device(data[key], device)
                    rule_tensor = (torch.tensor([
                        UNIT_DICT[unit] for unit in rule
                    ]).to(device).repeat(batch["game_id"].size(0), 1))
                    batch["rule_tensor"] = rule_tensor

                    if key == "act1":
                        batch["actor"] = "act1"
                        reply = agent1.simulate(cur_iter_idx, batch)
                        t_count = agent1.update_logs(cur_iter_idx, batch,
                                                     reply)

                    elif key == "act2":
                        batch["actor"] = "act2"
                        reply = agent2.simulate(cur_iter_idx, batch)
                        t_count = agent2.update_logs(cur_iter_idx, batch,
                                                     reply)

                    else:
                        assert False

                    game.set_reply(key, reply)
                    pbar.update(t_count)

            cur_iter_idx += 1
            pbar.close()

            if cur_iter_idx % args.update_iter:
                if args.train_mode == "coach":
                    agent1.train_coach(cur_iter_idx)
                elif args.train_mode == "executor":
                    agent1.train_executor(cur_iter_idx)
                elif args.train_mode == "both":
                    agent1.train_both(cur_iter_idx)
                else:
                    raise Exception("Invalid train mode.")
                game.print_logs(cur_iter_idx)
                game.terminate()
            else:
                game.terminate(keep_agents=True)

        del game

    writer.close()
Пример #12
0
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import argparse
import os
import sys
import pprint
from set_path import append_sys_path
append_sys_path()
import torch
import tube
from pytube import DataChannelManager
import minirts
import numpy as np
import random
import time
from torch.utils.tensorboard import SummaryWriter
from simanneal.anneal import time_string
import pickle
from collections import defaultdict
from rnn_coach import ConvRnnCoach
from onehot_coach import ConvOneHotCoach
from rnn_generator import RnnGenerator
from itertools import groupby
from executor_wrapper import ExecutorWrapper
from executor import Executor
from common_utils import to_device, ResultStat, Logger
Пример #13
0
import logging

from models import RiskCalculator
from common_utils import Logger, calcUtils

logger = Logger.Logger("risk_calculators").get()
logger.setLevel(logging.DEBUG)


class ProfitLoss(RiskCalculator):
    """Simple pnl calculator"""
    def ___init__(self):
        super().__init__()

    def calculate(self, calculator_input, market_data):
        """
        Use the original prices of all trades from the inventory and
        find pnl using the current market data.
        Input: SingleTrade
        :return: None
        """
        logger.info('Running ProfitLoss calculator')
        trade_price = calculator_input.get_price()
        symbol = calculator_input.get_symbol()
        qty = calculator_input.get_qty()
        side = calculator_input.get_side()

        # Market data is the MD_BOOK
        print('market data ')
        print(market_data)
        current_price = calcUtils.get_mid_price(market_data[symbol])
Пример #14
0
# Copyright (c) Facebook, Inc. and its affiliates.
Пример #15
0
def self_play(args):

    wandb.init(project="adapt-minirts", sync_tensorboard=True, dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    wandb.run.name = (
        f"multitask-fixed_analyse-int-{wandb.run.id}-{args.coach1}-{args.executor1}"
        f"-{args.train_mode}-rule{args.rule}-{args.tag}"
    )
    # wandb.run.save()
    wandb.config.update(args)

    print("args:")
    pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    output_list = []

    print("Overriding args.model.... with random model")
    # models = ['both_finetuned_rule80',
    #  'both_finetuned_rule3',
    #  'both_finetuned_rule7',
    #  "both_finetuned_rule14",
    #  "both_finetuned_rule12"]
    # models = ['both_finetuned_rule80', 'both_finetuned_rule3', 'behaviour_cloned', 'hier_exec_finetuned_rule21',
    #          "hier_exec_finetuned_rule80", "both_finetuned_rule12", "both_finetuned_rule7",
    #          "hier_exec_finetuned_rule14", "both_finetuned_rule3", "hier_coach_finetuned_rule80", "hier_coach_finetuned_rule21"]

    models = [
        "hier_exec_finetuned_rule21",
        "hier_exec_finetuned_rule80",
        "hier_exec_finetuned_rule14",
    ]

    # models = ["hier_coach_finetuned_rule80", "hier_coach_finetuned_rule21"]

    args.model = random.choice(models)
    print(f"Using model {args.model}")

    print("Overriding args.rule.... with random rule")
    rules = [80, 7, 14, 12, 3]
    args.rule = random.choice(rules)
    print(f"Using rule {args.rule}")

    if args.model == "behaviour_cloned":
        args.coach1 = best_coaches["rnn500"]
        args.executor1 = best_executors["rnn"]
    else:
        print("Reloading coach model.... ")
        print("Reloading executor model.... ")
        model_dict = model_dicts[args.model]
        args.coach1 = wandb.restore(
            model_dict["best_coach"], run_path=model_dict["run_path"]
        ).name
        wandb.restore(
            model_dict["best_coach"] + ".params", run_path=model_dict["run_path"]
        )
        args.executor1 = wandb.restore(
            model_dict["best_exec"], run_path=model_dict["run_path"]
        ).name
        wandb.restore(
            model_dict["best_exec"] + ".params", run_path=model_dict["run_path"]
        )

    args.coach2 = args.coach1
    args.executor2 = args.executor1
    _coach1 = os.path.basename(args.coach1).replace(".pt", "")
    _executor1 = os.path.basename(args.executor1).replace(".pt", "")

    log_name = "multitask-fixed-analyze-int_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format(
        _coach1,
        args.coach2,
        _executor1,
        args.executor2,
        args.lr,
        args.num_sp,
        args.num_rb,
        args.tag,
        random.randint(1111, 9999),
    )
    writer = SummaryWriter(comment=log_name)

    logger_path = os.path.join(args.save_dir, "train.log")

    sys.stdout = Logger(logger_path)

    device = torch.device("cuda:%d" % args.gpu)

    sp_agent = Agent(
        coach=args.coach1,
        executor=args.executor1,
        device=device,
        args=args,
        writer=writer,
        trainable=True,
        exec_sample=True,
        pg=args.pg,
    )

    sp_agent.init_save_folder(wandb.run.name)

    bc_agent = Agent(
        coach=args.coach2,
        executor=args.executor2,
        device=device,
        args=args,
        writer=writer,
        trainable=False,
        exec_sample=False,
    )

    print("Progress: ")

    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    print(f"sp_agent coach params: {count_parameters(sp_agent.model.coach)}")
    print(f"sp_agent exec params: {count_parameters(sp_agent.model.executor)}")
    ## Create Save folder:
    working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
    create_working_dir(args, working_rule_dir)

    cur_iter_idx = 1
    rule_idx = args.rule
    game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx, args, working_rule_dir)
    for r in [80, 40, 20, 21, 14, 7, 3, 12, 13]:
        game.print_rule_desc(r, split="train")

    writer.close()

    print("#" * 40)
    print("#" * 40)
    print("#" * 40)
    print()
    print("\n\n".join(output_list))
    print()
    print("#" * 40)
    print("#" * 40)
    print("#" * 40)
Пример #16
0
def self_play(args):

    wandb.init(project="adapt-minirts-pop-eval",
               sync_tensorboard=True,
               dir=args.wandb_dir)
    # run_id = f"multitask-fixed_selfplay-{args.coach1}-{args.executor1}-{args.train_mode}-rule{args.rule}-{args.tag}"
    date = datetime.date(datetime.now())
    wandb.run.name = f"multitask-pop-eval-{wandb.run.id}-{date}-{args.tag}"
    # wandb.run.save()
    wandb.config.update(args)

    # print("args:")
    # pprint.pprint(vars(args))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    result_dict = {}

    exp_code = args.experiment_code
    eval_exp_list = [experiment_list[exp_code]
                     ] if exp_code != -1 else experiment_list
    partial_json_save_dir = os.path.join(args.eval_folder,
                                         f"eval-{date}-{args.tag}")

    if os.path.exists(partial_json_save_dir):
        print("Attempting to create an existing folder.. hence skipping...")
    else:
        os.makedirs(partial_json_save_dir)

    if args.model_json is not None:
        if os.path.exists(args.model_json):
            print("Using model json dictionary...")
            with open(os.path.join(args.model_json, "model_paths.json")) as f:
                model_json = json.load(f)
        else:
            FileNotFoundError("Model json dict cannot be found...")
    else:
        model_json = None

    for exp_name in eval_exp_list:
        print("#" * 40)
        print("#" * 40)
        print(f"Experiment: {exp_name}")
        print("-" * 40)
        print("-" * 40)
        sub_exp_result_dict = {}

        for (sub_exp_name, sub_exp_dict) in experiment_dict[exp_name].items():
            print("*" * 40)
            print(f"Sub experiment name: {sub_exp_name}")
            print("*" * 40)
            coaches = sub_exp_dict["coach"]

            if "random" in coaches:
                coach_variant = coaches["variant"]
                random_coach = coaches["random"]
                coaches = coaches["coach"]
            else:
                coach_variant = None
                random_coach = False

            execs = sub_exp_dict["executor"]
            if "variant" in execs:
                exec_variant = execs["variant"]
                execs = execs["executor"]
            else:
                exec_variant = None

            rule = sub_exp_dict["env"]
            num_sub_exps = 0
            win_rates = []
            num_total_sub_exps = min(len(coaches), len(execs))
            if sub_exp_name == "random-bc" or sub_exp_name == "bc-bc":
                num_total_sub_exps = 1

            if sub_exp_name != "ft_coach[21]-bc" and sub_exp_name.startswith(
                    "ft_coach[21]"):
                coaches = [coaches[0]] * len(execs) + [coaches[1]] * len(execs)
                execs = execs * 2
                num_total_sub_exps = len(execs)

            for (coach, executor) in zip(
                    coaches,
                    execs):  ## Do we want to check if coaches == execs?
                if coaches == execs and coach != executor:
                    continue

                print(f"Experiment number: {num_sub_exps}")
                if model_json is not None and args.model_json is not None:

                    if num_total_sub_exps != len(
                            model_json[exp_name][sub_exp_name]["model_paths"]):
                        downloaded_count = len(
                            model_json[exp_name][sub_exp_name]["model_paths"])
                        print(
                            f"Downloaded number: {downloaded_count} !=  num_total_sub_exps: {num_total_sub_exps}"
                        )
                        raise OSError("Number of sub-exp mismatch.")

                    args.coach1 = model_json[exp_name][sub_exp_name][
                        "model_paths"][num_sub_exps]["coach"]
                else:
                    args.coach1 = get_coach_path(coach,
                                                 coach_variant=coach_variant)

                if random_coach:
                    args.coach_random_init = True
                else:
                    args.coach_random_init = False

                if model_json is not None and args.model_json is not None:
                    args.executor1 = model_json[exp_name][sub_exp_name][
                        "model_paths"][num_sub_exps]["executor"]
                else:
                    args.executor1 = get_executor_path(
                        executor, exec_variant=exec_variant)

                args.rule = rule

                args.coach2 = args.coach1
                args.executor2 = args.executor1
                _coach1 = os.path.basename(args.coach1).replace(".pt", "")
                _executor1 = os.path.basename(args.executor1).replace(
                    ".pt", "")

                log_name = "multitask-pop-analyze_c1_type={}_c2_type={}__e1_type={}_e2_type={}__lr={}__num_sp={}__num_rb={}_{}_{}".format(
                    _coach1,
                    args.coach2,
                    _executor1,
                    args.executor2,
                    args.lr,
                    args.num_sp,
                    args.num_rb,
                    args.tag,
                    random.randint(1111, 9999),
                )
                writer = SummaryWriter(comment=log_name)

                logger_path = os.path.join(args.save_dir, "train.log")

                sys.stdout = Logger(logger_path)

                device = torch.device("cuda:%d" % args.gpu)

                sp_agent = Agent(
                    coach=args.coach1,
                    executor=args.executor1,
                    device=device,
                    args=args,
                    writer=writer,
                    trainable=True,
                    exec_sample=True,
                    pg=args.pg,
                )

                sp_agent.init_save_folder(wandb.run.name)

                bc_agent = Agent(
                    coach=args.coach2,
                    executor=args.executor2,
                    device=device,
                    args=args,
                    writer=writer,
                    trainable=False,
                    exec_sample=False,
                )

                print("Progress: ")
                ## Create Save folder:
                working_rule_dir = os.path.join(sp_agent.save_folder, "rules")
                create_working_dir(args, working_rule_dir)

                cur_iter_idx = 1
                rules = [args.rule]

                print("Current rule: {}".format(rules[0]))
                NUM_SPLIT_GAMES = 4
                sub_win_rates = []
                for i in range(NUM_SPLIT_GAMES):
                    game = MultiTaskGame(sp_agent, bc_agent, cur_iter_idx,
                                         args, working_rule_dir)
                    print(f"Num games: {args.num_games}")

                    num_sub_games = args.num_games // NUM_SPLIT_GAMES
                    print(f"Num sub games: {num_sub_games}")

                    result = game.analyze_rule_games(
                        cur_iter_idx,
                        rules,
                        "train",
                        viz=args.viz,
                        num_games=num_sub_games,
                    )
                    sub_win_rates.append(result[args.rule]["win"])
                    game.terminate()
                    del game

                writer.close()

                print(f"Total sub win rates: {sub_win_rates}")
                win_rate = sum(sub_win_rates) / NUM_SPLIT_GAMES
                print(f"Win Rates: {win_rate}")

                win_rates.append(win_rate * 100)
                num_sub_exps += 1

                if sub_exp_name == "bc-bc" or sub_exp_name == "random-bc":
                    break

            sub_exp_result_dict[sub_exp_name] = {
                "win_rate": win_rates,
                "Win_rate mean": np.mean(win_rates),
                "Win_rate variance ": np.var(win_rates),
                "Num total trials": num_sub_exps,
            }

            print("++" * 50)
            pprint.pprint(sub_exp_result_dict[sub_exp_name])
            print("++" * 50)

        result_dict[exp_name] = sub_exp_result_dict
        print("--" * 50)
        print("Results so far: ")
        print("--" * 50)
        pprint.pprint(result_dict)
        print("--" * 50)

    print("Final Results: ")
    print("##" * 50)
    pprint.pprint(result_dict)
    print("##" * 50)

    print("Saving result jsons...")
    code = exp_code if exp_code != -1 else "all"
    random_number = randint(1, 100000)
    with open(
            os.path.join(partial_json_save_dir,
                         f"partial-{random_number}-{code}.json"), "w") as fp:
        json.dump(result_dict, fp)
Пример #17
0
# Copyright (c) Facebook, Inc. and its affiliates.
Пример #18
0
# Copyright (c) Facebook, Inc. and its affiliates.
Пример #19
0
# Copyright (c) Facebook, Inc. and its affiliates.