Пример #1
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Set game to online model.
    actors = ["actor"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=1,
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()
    args = env["game"].options
    model = env["model_loaders"][0].load_model(GC.params)

    # for actor_name, stat, model_loader, e in \
    #         zip(actors, stats, env["model_loaders"], evaluators):
    for i in range(len(actors)):
        actor_name = actors[i]
        e = env["eval_" + actor_name]
        mi = env["mi_" + actor_name]

        mi.add_model("actor", model, cuda=(args.gpu >= 0), gpu_id=args.gpu)

        print("register " + actor_name + " for e = " + str(e))
        e.setup(sampler=env["sampler"], mi=mi)

        def actor(batch, e):
            reply = e.actor(batch)
            return reply

        GC.reg_callback(actor_name, lambda batch, e=e: actor(batch, e))

    args = env["game"].options

    GC.start()
    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while True:
        GC.run()

    GC.stop()
Пример #2
0
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
Пример #3
0
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ,
                             evaluator=evaluator,
                             overrides=dict(
                                 num_games=1,
                                 batchsize=1,
                                 num_games_per_thread=1,
                                 greedy=True,
                                 T=1,
                                 additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor",
                     model,
                     copy=True,
                     cuda=args.gpu is not None,
                     gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
Пример #4
0
def main():
    # Set game to online model.
    actors = ["actor_black", "actor_white"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=2,
                   overrides=dict(actor_only=True),
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()

    stats = [Stats(), Stats()]

    # for actor_name, stat, model_loader, e in \
    #         zip(actors, stats, env["model_loaders"], evaluators):
    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]
        e = env["eval_" + actor_name]

        print("register " + actor_name + " for e = " + str(e))
        e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, e, stat):
            reply = e.actor(batch)
            stat.feed(batch)
            # eval_iters.stats.feed_batch(batch)
            return reply

        GC.reg_callback(actor_name,
                        lambda batch, e=e, stat=stat: actor(batch, e, stat))

    root = os.environ.get("root", "./")
    print("Root: \"%s\"" % root)
    args = env["game"].options
    global loop_end
    loop_end = False

    def game_start(batch):
        print("In game start")

        vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    def game_end(batch):
        global loop_end
        # print("In game end")
        wr = batch.GC.getGameStats().getWinRateStats()
        win_rate = 100.0 * wr.black_wins / wr.total_games \
            if wr.total_games > 0 else 0.0
        print("%s B/W: %d/%d. Black winrate: %.2f (%d)" %
              (str(datetime.now()), wr.black_wins, wr.white_wins, win_rate,
               wr.total_games))
        if args.suicide_after_n_games > 0 and \
           wr.total_games >= args.suicide_after_n_games:
            print("#suicide_after_n_games: %d, total_games: %d" %
                  (args.suicide_after_n_games, wr.total_games))
            loop_end = True

    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    # def episode_start(i):
    #     global GC
    #     GC.GC.setSelfplayCount(10000)
    #     evaluator.episode_start(i)

    GC.start()
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])

            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.setRequest(int(black), int(white),
                         env['game'].options.resign_thres, 1)

    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
Пример #5
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'evaluator': (
            Evaluator.get_option_spec(),
            lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(
        os.environ,
        overrides={
            'num_games': 1,
            'greedy': True,
            'T': 1,
            'model': 'online',
            'additional_labels': ['aug_code', 'move_idx'],
        },
        additional_to_load=additional_to_load)

    evaluator = env['evaluator']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)

    mi = env['mi']
    mi.add_model("model", model)
    mi.add_model("actor", model)
    mi["model"].eval()
    mi["actor"].eval()

    console = GoConsoleGTP(GC, evaluator)

    def human_actor(batch):
        return console.prompt("", batch)

    def actor(batch):
        return console.actor(batch)

    def train(batch):
        console.prompt("DF Train> ", batch)

    evaluator.setup(sampler=env["sampler"], mi=mi)

    GC.reg_callback_if_exists("actor_black", actor)
    GC.reg_callback_if_exists("human_actor", human_actor)
    GC.reg_callback_if_exists("train", train)

    GC.start()
    GC.GC.getClient().setRequest(
        mi["actor"].step, -1, env['game'].options.resign_thres, -1)

    evaluator.episode_start(0)

    while True:
        GC.run()
        if console.exit:
            break
    GC.stop()
Пример #6
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'evaluator': (Evaluator.get_option_spec(),
                      lambda object_map: Evaluator(object_map, stats=None)),
        'console': (GoConsoleGTP.get_option_spec(),
                    lambda object_map: GoConsoleGTP(object_map))
    }

    # Set game to online model.
    env = load_env(os.environ,
                   overrides=dict(additional_labels=['aug_code',
                                                     'move_idx'], ),
                   additional_to_load=additional_to_load)
    evaluator = env['evaluator']

    GC = env["game"].initialize()
    console = env["console"]

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    gpu = model_loader.options.gpu
    use_gpu = gpu is not None and gpu >= 0

    mi = env['mi']
    mi.add_model("model", model)
    # mi.add_model(
    #     "actor", model,
    #     copy=True, cuda=use_gpu, gpu_id=gpu)
    mi.add_model("actor", model)
    mi["model"].eval()
    mi["actor"].eval()

    console.setup(GC, evaluator)

    def human_actor(batch):
        #py = psutil.Process(pid)
        #memoryUse = py.memory_info()[0]/2.**30  # memory use in GB...I think
        #print('memory use:', memoryUse)
        return console.prompt("", batch)

    def actor(batch):
        return console.actor(batch)

    def train(batch):
        console.prompt("DF Train> ", batch)

    evaluator.setup(sampler=env["sampler"], mi=mi)

    GC.reg_callback_if_exists("actor_black", actor)
    GC.reg_callback_if_exists("human_actor", human_actor)
    GC.reg_callback_if_exists("train", train)
    GC.start()
    # TODO: For now fixed resign threshold to be 0.05. Will add a switch
    GC.game_obj.setRequest(mi["actor"].step, -1, 0.05, -1)

    evaluator.episode_start(0)

    while True:
        GC.run()
        if console.exit:
            break

    GC.stop()
Пример #7
0
import gevent
import grpc

import os
import signal
import sys
import traceback

import game_pb2
import game_pb2_grpc
import threading
from time import sleep

# Load env
additional_to_load = {
    'evaluator': (Evaluator.get_option_spec(),
                  lambda object_map: Evaluator(object_map, stats=None)),
}

# Set os environment
os.environ.update({
    'game': 'elfgames.go.game',
    'model': 'df_pred',
    'model_file': 'elfgames.go.df_model3',
})

overrides = {
    'num_games': 1,
    'greedy': True,
    'T': 1,
    'model': 'online',
Пример #8
0
import sys
import os

from rlpytorch import ModelLoader, load_module, Sampler, Evaluator, ModelInterface, ArgsProvider, EvalIters

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    model_file = load_module(os.environ["model_file"])
    model_class, method_class = model_file.Models[os.environ["model"]]
    model_loader = ModelLoader(model_class)

    game = load_module(os.environ["game"]).Loader()
    game.args.set_override(actor_only=True, game_multi=2)
    sampler = Sampler()
    evaluator = Evaluator(stats=False)

    eval_iters = EvalIters()

    args = ArgsProvider.Load(parser, [ game, sampler, evaluator, model_loader, eval_iters ])

    GC = game.initialize()
    GC.setup_gpu(args.gpu)

    model = model_loader.load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={ "lr" : 0.001})
    mi.add_model("actor", model, copy=True, cuda=True, gpu_id=args.gpu)

    def actor(batch):
        reply = evaluator.actor(batch)
Пример #9
0
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import datetime

import sys
import os

from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    eval_iters = EvalIters()
    env, args = load_env(os.environ, overrides=dict(actor_only=True), eval_iters=eval_iters, evaluator=evaluator)

    GC = env["game"].initialize_reduced_service()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = env["mi"]
    mi.add_model("actor", model, cuda=args.gpu is not None, gpu_id=args.gpu)

    def reduced_project(batch):
        output = mi["actor"].forward(batch.hist(0))
        eval_iters.stats.feed_batch(batch)
        return dict(reduced_s=output["h"].data)

    def reduced_forward(batch):
Пример #10
0
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Console for DarkForest
import sys
import os
from rlpytorch import load_env, Evaluator, ModelInterface, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    # Set game to online model.
    env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(mode="selfplay", T=1))

    GC = env["game"].initialize()
    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model)
    mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu)
    mi["model"].eval()
    mi["actor"].eval()

    evaluator.setup(mi=mi)

    total_batchsize = 0
    total_sel_batchsize = 0

    def actor(batch):
        global total_batchsize, total_sel_batchsize
Пример #11
0
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import datetime

import sys
import os

from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    eval_iters = EvalIters()
    env, args = load_env(os.environ, overrides=dict(actor_only=True), eval_iters=eval_iters, evaluator=evaluator)

    GC = env["game"].initialize_reduced_service()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = env["mi"]
    mi.add_model("actor", model, cuda=args.gpu is not None, gpu_id=args.gpu)

    def reduced_project(batch):
        output = mi["actor"].forward(batch.hist(0))
        eval_iters.stats.feed_batch(batch)
        return dict(reduced_s=output["h"].data)

    def reduced_forward(batch):
Пример #12
0
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Console for DarkForest

import os
from rlpytorch import Evaluator, load_env
from console_lib import GoConsoleGTP


if __name__ == '__main__':
    additional_to_load = {
        'evaluator': (
            Evaluator.get_option_spec(),
            lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(
        os.environ,
        overrides=dict(
            num_games=1,
            greedy=True,
            T=1,
            model="online",
            additional_labels=['aug_code', 'move_idx'],
        ),
        additional_to_load=additional_to_load)
Пример #13
0
def main():
    address = addrs['game_server']
    if address != "":
        channel = grpc.insecure_channel(address + ':50051')
    else:
        channel = grpc.insecure_channel("localhost:50051")
    stub = play_pb2_grpc.TurnStub(channel)
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'evaluator': (Evaluator.get_option_spec(),
                      lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(os.environ,
                   overrides={
                       'num_games': 1,
                       'greedy': True,
                       'T': 1,
                       'model': 'online',
                       'additional_labels': ['aug_code', 'move_idx'],
                   },
                   additional_to_load=additional_to_load)

    evaluator = env['evaluator']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)

    mi = env['mi']
    mi.add_model("model", model)
    mi.add_model("actor", model)
    mi["model"].eval()
    mi["actor"].eval()

    console = GoConsoleGTP(GC, evaluator)

    # TODO: create an instance of game when the client sends a request

    # print("\n\n\nCheck connect\n\n\n")
    # ID = stub.NewRoom(play_pb2.State(status = True)).ID
    # print("Current AI's ID is ", ID)

    # res_arr = stub.GetResumed(play_pb2.State(status = True, ID = ID)).move
    # console.res_len = len(res_arr)
    # # console.res_ind = 3
    # # arr = ["BKD", "WFB", "BGA"]
    # if console.res_len > 0 and res_arr[-1][0].upper() == "B":
    #     _ = stub.UpdateNext(play_pb2.State(status = True, ID = ID))

    # def check_end_game(m):
    #     if m.quit:
    #         GC.stop()
    #     return m

    def reset():
        ID = stub.NewRoom(play_pb2.State(status=True)).ID
        console.ID = ID
        console.color = {'has_chosen': False, "client": 1, "AI": 2}
        console.prev_player = 0
        print("Current AI's ID is ", console.ID)
        if not console.color["has_chosen"]:
            while not stub.HasChosen(play_pb2.State(status=True,
                                                    ID=ID)).status:
                pass
            # AI_color = stub.GetAIPlayer(play_pb2.State(status = True)).color
            # human_color = AI_color % 2 + 1
            console.color["AI"] = stub.GetAIPlayer(
                play_pb2.State(status=True, ID=ID)).color
            console.color["client"] = console.color["AI"] % 2 + 1
            console.color["has_chosen"] = True
        console.res_arr = stub.GetResumed(play_pb2.State(status=True,
                                                         ID=ID)).move
        console.res_len = len(console.res_arr)
        if console.res_len > 0 and console.res_arr[-1][0].upper() == "B":
            _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID))

    reset()

    def check_reset(reply):
        console.reset = stub.CheckExit(
            play_pb2.State(status=True, ID=console.ID)).status
        if console.reset:
            print("\n\n\nRestarting game...\n\n\n")
            reset()
            console.reset = False
            reply["a"] = console.actions["clear"]
            return True, reply
        return False, reply

    def human_actor(batch):
        # print("\n\n\nCheck human_actor\n\n\n")
        reply = dict(pi=None, a=None, V=0)
        ID = console.ID
        # console.reset = stub.CheckExit(play_pb2.State(status = True, ID = ID)).status
        # if console.reset:
        #     print("\n\n\nRestarting game...\n\n\n")
        #     reset()
        #     console.reset = False
        #     reply["a"] = console.actions["clear"]
        #     return reply
        AI_color = console.color["AI"]
        human_color = console.color["client"]
        # is_resumed = stub.IsResumed(play_pb2.State(status = True)).status
        if console.res_len > 0:
            # print("\n\n\nCheck is_resumed = true\n\n\n")
            # print("\n\n\n", arr[-console.res_ind], "\n\n\n")
            reply["a"] = console.str2action(console.res_arr[-console.res_len])
            console.res_len -= 1
            return reply
        # print("\n\n\nCheck is_resumed = false\n\n\n")
        while True:
            if console.prev_player == 1:
                move = console.get_last_move(batch)
                x, y = move2xy(move)
                _ = stub.SetMove(
                    play_pb2.Step(x=x,
                                  y=y,
                                  player=play_pb2.Player(color=AI_color,
                                                         ID=ID)))
                _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID))
            if stub.IsNextPlayer(play_pb2.Player(color=AI_color,
                                                 ID=ID)).status:
                reply["a"] = console.actions["skip"]
                console.prev_player = 1
                return reply
            # else:
            while stub.IsNextPlayer(play_pb2.Player(color=human_color,
                                                    ID=ID)).status:
                do_reset, reply = check_reset(reply)
                if do_reset:
                    return reply
                pass
            human_xy = stub.GetMove(play_pb2.Player(color=human_color, ID=ID))
            reply["a"] = console.move2action(xy2move(human_xy.x, human_xy.y))
            console.prev_player = 2
            return reply

    def actor(batch):
        return console.actor(batch)

    def train(batch):
        console.prompt("DF Train> ", batch)

    evaluator.setup(sampler=env["sampler"], mi=mi)

    GC.reg_callback_if_exists("actor_black", actor)
    GC.reg_callback_if_exists("human_actor", human_actor)
    GC.reg_callback_if_exists("train", train)

    GC.start()
    GC.GC.getClient().setRequest(mi["actor"].step, -1,
                                 env['game'].options.resign_thres, -1)

    evaluator.episode_start(0)

    while True:
        GC.run()
        if console.exit:
            break
    GC.stop()
Пример #14
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Register player names
    actors = ["actor_white", "actor_black"]
    """
    Class Evaluator is a pure python class, 
    which run neural network in eval mode and get 
    return results and update some stat info.
    Will creates 'eval_actor_white', 'eval_actor_black'.
  """
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    """
    class ModelInterface is a python class saving network models.
    Its member models is a key-value store to call a CNN model by name.
    Will creates 'mi_actor_white', 'mi_actor_black'.
  """
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })
    """
    load_env:
    game - load file game elfgames.american_checkers.game
    method - load "method" passed via params:
        file model_american_checkers.py return array with [model, method]
        model_file=elfgames.american_checkers.model_american_checkers
        model=df_pred 
    model_loaders - prepare to load(returns instance of class ModelLoader)
        "model" passed via params:
        file model_american_checkers.py return array with [model, method]
        model_file=elfgames.american_checkers.model_american_checkers
        model=df_pred
    
    sampler - Used to sample an action from policy.
    mi - class ModelInterface is a python class saving network models.
        Its member models is a key-value store to call a CNN model by name.
    eval_* - run neural network in eval mode and get 
        return results and update some stat info.
  """
    env = load_env(os.environ,
                   num_models=2,
                   overrides={'actor_only': True},
                   additional_to_load=additional_to_load)
    """
    Initializes keys('game_end', 'game_start', 'actor_white', 'actor_black')
    for communication Python and C++ code, defined in Game.py and GameFeature.h.
    Also, initializes GameContext from C++ library wrapped by GC from python side
    + sets mode that parsed from options like play/selfplay/train/offline_train.
  """
    GC = env["game"].initialize()
    """
    Registering the methods in the GameContext on the python side.
    We registered their names earlier when the game was 
    initialized(names were registered on the python and C++ sides).
    Now its a registration of methods that will be called 
    when we try to pass batch on eval from C++ to Python.
    Example:
      We register "human_actor" as key and register the 
      same method on the python side. 
      When AIClientT calls method act(it takes 2 parameters: state, and key)
      act connect to python and transmits the state by 
      key("human_actor", "actor_black")
      to these methods(actor() func defined below).
  """
    # Some statistic about batch usage, also we can add more info about games stats.
    stats = [Stats(), Stats()]

    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]

        evaluator = env["eval_" + actor_name]
        evaluator.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, evaluator, stat):
            reply = evaluator.actor(batch)
            stat.feed(batch)
            return reply

        # To expand the functionality we use lambda
        GC.reg_callback(actor_name,
                        lambda batch, evaluator=evaluator, stat=stat: actor(
                            batch, evaluator, stat))

    # Get the directory containing the models.
    root = os.environ.get("root", "./")
    args = env["game"].options
    # Stops client after N games, defined in --suicide_after_n_games param.
    loop_end = False
    """
    This method is responsible for updating the model to the 
    current one(received from the server) after starting. 
    Called by 'game_start' key from C++ side.
  """
    def game_start(batch):
        info = "game_start() load/reload models\n"
        logger.info(info)

        vers = [int(batch["white_ver"][0]), int(batch["black_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    """
    This method is responsible for displaying game statistics, 
    as well as stopping the client after N games(loop_end).
    Called by 'game_end' key from C++ side.
  """

    def game_end(batch):
        nonlocal loop_end
        wr = batch.GC.getClient().getGameStats().getWinRateStats()
        win_rate = (100.0 * wr.black_wins / (wr.black_wins + wr.white_wins) if
                    (wr.black_wins + wr.white_wins) > 0 else 0.0)

        info = f'game_end()\tB/W: {wr.black_wins}/{wr.white_wins}, '
        info += f'Draw: {wr.both_lost}, '
        info += f'Black winrate: {win_rate:.2f}, '
        info += f'Total Games: {wr.total_games}'

        logger.info(info)
        if args.suicide_after_n_games > 0 and \
            wr.total_games >= args.suicide_after_n_games:
            info = f'game_end()\tTotal Games: {wr.total_games}, '
            info += f'#suicide_after_n_games: {args.suicide_after_n_games}'
            logger.info(info)
            loop_end = True

    # Registering the methods described above in Python's GameContext.
    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    GC.start()
    """
    Upon receiving the --eval_model_pair parameter, we load 2 models 
    from a file and pass models versions to C++ side for evaluation.
  """
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])
            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.getClient().setRequest(int(black), int(white), 1)

    # Called before each episode, resets actor_count(num of total nn call)
    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
Пример #15
0
def main():
  print('Python version:', sys.version)
  print('PyTorch version:', torch.__version__)
  print('CUDA version', torch.version.cuda)
  print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

  """
    Class Evaluator is a pure python class, 
    which run neural network in eval mode and get 
    return results and update some stat info
  """
  additional_to_load = {
    'evaluator': (
      Evaluator.get_option_spec(),
      lambda object_map: Evaluator(object_map, stats=None)),
  }

  """
    load_env:
    game - load file game elfgames.checkers.game
    method - load "method" passed via params:
        file df_model_checkers.py return array with [model, method]
        model_file=elfgames.checkers.df_model_checkers
        model=df_pred 
    model_loaders - prepare to load(returns instance of class ModelLoader)
        "model" passed via params:
        file df_model_checkers.py return array with [model, method]
        model_file=elfgames.checkers.df_model_checkers
        model=df_pred
    
    sampler - Used to sample an action from policy.
    mi - class ModelInterface is a python class saving network models.
        Its member models is a key-value store to call a CNN model by name.
    evaluator - run neural network in eval mode and get 
        return results and update some stat info.
  """
  env = load_env(
    os.environ,
    overrides={
      'num_games': 1,
      'greedy': True,
      'T': 1,
      'additional_labels': ['aug_code', 'move_idx'],
    },
    additional_to_load=additional_to_load)

  evaluator = env['evaluator']
  """
    Initializes keys for communication Python and C++ code, 
    defined in Game.py and GameFeature.h.
    Also, initializes GameContext from C++ library wrapped by GC from python side
    + sets mode that parsed from options like play/selfplay/train/offline_train.
  """
  GC = env["game"].initialize()

  # Load model(use Model_PolicyValue from df_model_checkers.py)
  model_loader = env["model_loaders"][0]
  # Model contains init_conv, value_func, resnet and etc.
  model = model_loader.load_model(GC.params)

  """
    Pass our model in ModelInterface
    ModelInterface stores our saved model and call nn when we need eval 
  """
  mi = env['mi']
  mi.add_model("actor", model)
  # Checking the success installed model
  mi["actor"].eval()

  # Describe more!
  console = UgolkiConsole(GC, evaluator)

  def human_actor(batch):
    return console.prompt("", batch)

  def actor(batch):
    return console.actor(batch)

  evaluator.setup(sampler=env["sampler"], mi=mi)


  """
    Register the methods in the GameContext on the python side. 
    We registered their names earlier when the game was 
    initialized(names were registered on the python and C++ sides).
    Now its a registration of methods that will be called 
    when we try to pass batch on eval from C++ to Python.
    Example:
      We register "human_actor" as key and register the 
      same method on the python side. 
      When our AIClientT calls method act(it takes 2 parameters: state, and key)
      act connect to python and transmits the state by 
      key("human_actor", "actor_black")
      to these methods
  """
  GC.reg_callback_if_exists("human_actor", human_actor)
  GC.reg_callback_if_exists("actor_black", actor)
  GC.start()
  # Tells the С++ side the model version
  GC.GC.getClient().setRequest(
    mi["actor"].step, -1, -1)

  # Called before each episode, resets actor_count(num of total nn call)
  evaluator.episode_start(0)

  while True:
    GC.run()
    if console.exit:
      break
  
  # fix this for normal exit
  # sys.exit()
  
  GC.stop()
Пример #16
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Set game to online model.
    actors = ["actor_black", "actor_white"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=2,
                   overrides={'actor_only': True},
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()

    stats = [Stats(), Stats()]

    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]
        e = env["eval_" + actor_name]

        print(f'register {actor_name} for e = {e!s}')
        e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, e, stat):
            reply = e.actor(batch)
            stat.feed(batch)
            return reply

        GC.reg_callback(actor_name,
                        lambda batch, e=e, stat=stat: actor(batch, e, stat))

    root = os.environ.get("root", "./")
    print(f'Root: "{root}"')
    args = env["game"].options
    loop_end = False

    def game_start(batch):
        print("In game start")

        vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    def game_end(batch):
        nonlocal loop_end
        wr = batch.GC.getClient().getGameStats().getWinRateStats()
        win_rate = (100.0 * wr.black_wins /
                    wr.total_games if wr.total_games > 0 else 0.0)
        print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.'
              f'Black winrate: {win_rate:.2f} ({wr.total_games})')

        if args.suicide_after_n_games > 0 and \
                wr.total_games >= args.suicide_after_n_games:
            print(f'#suicide_after_n_games: {args.suicide_after_n_games}, '
                  f'total_games: {wr.total_games}')
            loop_end = True

    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    GC.start()
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])

            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.getClient().setRequest(int(black), int(white),
                                     env['game'].options.resign_thres, 1)

    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
Пример #17
0
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import datetime

import sys
import os

from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    eval_iters = EvalIters()
    env, args = load_env(os.environ,
                         overrides=dict(actor_only=True),
                         evaluator=evaluator,
                         eval_iters=eval_iters)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    env["mi"].add_model("actor",
                        model,
                        cuda=not args.gpu is None,
                        gpu_id=args.gpu)
    env["mi"]["actor"].eval()
Пример #18
0
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import datetime

import sys
import os

from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    eval_iters = EvalIters()
    env, args = load_env(os.environ, overrides=dict(actor_only=True), evaluator=evaluator, eval_iters=eval_iters)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    env["mi"].add_model("actor", model, cuda=not args.gpu is None, gpu_id=args.gpu)
    env["mi"]["actor"].eval()

    def actor(batch):
        reply = evaluator.actor(batch)
        '''
        s = batch["s"][0][0]
        seq = batch["seq"][0][0]
        for i in range(s.size(0)):