Пример #1
0
    def start(self):
        assert self.context is not None

        self.context.start()
        self.dc = DataChannelManager([self.act1_dc, self.act2_dc])

        return self.agent1, self.agent2
Пример #2
0
# Copyright (c) Facebook, Inc. and its affiliates.
Пример #3
0
    logger_path = os.path.join(args.save_dir, 'train.log')
    sys.stdout = Logger(logger_path)

    device = torch.device('cuda:%d' % args.gpu)

    model1 = load_model(args.coach1, args.executor1, args)
    model2 = load_model(args.coach2, args.executor2, args)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(
        args, [model1.coach.num_instructions, model2.coach.num_instructions])

    context, act1_dc, act2_dc = create_game(args.num_thread, ai1_option,
                                            ai2_option, game_option)
    context.start()
    dc = DataChannelManager([act1_dc, act2_dc])

    result1 = ResultStat('reward', None)
    result2 = ResultStat('reward', None)
    i = 0
    while not context.terminated():
        i += 1
        if i % 1000 == 0:
            print('%d, progress agent1: win %d, loss %d' %
                  (i, result1.win, result1.loss))

        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        for key in data:
            # print(key)
Пример #4
0
    device = torch.device('cuda:%d' % args.gpu)
    coach = ConvRnnCoach.load(args.coach_path).to(device)
    coach.max_raw_chars = args.max_raw_chars
    executor = Executor.load(args.model_path).to(device)
    executor_wrapper = ExecutorWrapper(coach, executor, coach.num_instructions,
                                       args.max_raw_chars, args.cheat,
                                       args.inst_mode)
    executor_wrapper.train(False)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(args, coach.num_instructions)

    context, act_dc = create_game(args.num_thread, ai1_option, ai2_option,
                                  game_option)
    context.start()
    dc = DataChannelManager([act_dc])

    result_stat = ResultStat('reward', None)
    while not context.terminated():
        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        data = to_device(data['act'], device)
        result_stat.feed(data)
        reply = executor_wrapper.forward(data)

        dc.set_reply('act', reply)

    print(result_stat.log(0))
    dc.terminate()
Пример #5
0
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
import argparse
import os
import sys
import pprint
from set_path import append_sys_path
append_sys_path()
import torch
import tube
from pytube import DataChannelManager
import minirts
import numpy as np
import random
import pickle
from collections import defaultdict
from rnn_coach import ConvRnnCoach
from onehot_coach import ConvOneHotCoach
from rnn_generator import RnnGenerator
from itertools import groupby
from executor_wrapper import ExecutorWrapper
from executor import Executor
from common_utils import to_device, ResultStat, Logger
from best_models import best_executors, best_coaches
from tqdm import tqdm
p1dict = defaultdict(list)
Пример #6
0
def run_eval(args, model1, model2, device, num_games=100):

    num_eval_games = num_games

    result1 = ResultStat("reward", None)
    result2 = ResultStat("reward", None)

    game_option = get_game_option(args)
    ai1_option, ai2_option = get_ai_options(
        args, [model1.coach.num_instructions, model2.coach.num_instructions])

    if args.opponent == "sp":
        context, act1_dc, act2_dc = init_mt_games(num_eval_games, 0, args,
                                                  ai1_option, ai2_option,
                                                  game_option)
        pbar = tqdm(total=num_eval_games * 2)
    else:
        context, act1_dc, act2_dc = init_mt_games(0, num_eval_games, args,
                                                  ai1_option, ai2_option,
                                                  game_option)
        pbar = tqdm(total=num_eval_games)
    # context, act1_dc, act2_dc = init_games(
    #     num_eval_games, ai1_option, ai2_option, game_option)
    context.start()
    dc = DataChannelManager([act1_dc, act2_dc])

    i = 0
    model1.eval()
    model2.eval()

    while not context.terminated():
        i += 1
        # if i % 1000 == 0:
        #     print('%d, progress agent1: win %d, loss %d' % (i, result1.win, result1.loss))

        data = dc.get_input(max_timeout_s=1)
        if len(data) == 0:
            continue
        for key in data:
            # print(key)
            batch = to_device(data[key], device)
            if key == "act1":
                batch["actor"] = "act1"
                ## Add batches to state table using sampling before adding
                ## Add based on the game_id

                result1.feed(batch)
                with torch.no_grad():
                    reply, _ = model1.forward(batch)  # , exec_sample=True)

            elif key == "act2":
                batch["actor"] = "act2"
                result2.feed(batch)

                with torch.no_grad():
                    reply, _ = model2.forward(batch)

            else:
                assert False

            dc.set_reply(key, reply)

            game_ids = batch["game_id"].cpu().numpy()
            terminals = batch["terminal"].cpu().numpy().flatten()

            for i, g_id in enumerate(game_ids):
                if terminals[i] == 1:
                    pbar.update(1)

    model1.eval()
    model2.eval()
    pbar.close()

    return result1, result2
Пример #7
0
# Copyright (c) Facebook, Inc. and its affiliates.
Пример #8
0
# Copyright (c) Facebook, Inc. and its affiliates.