示例#1
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Set game to online model.
    actors = ["actor"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=1,
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()
    args = env["game"].options
    model = env["model_loaders"][0].load_model(GC.params)

    # for actor_name, stat, model_loader, e in \
    #         zip(actors, stats, env["model_loaders"], evaluators):
    for i in range(len(actors)):
        actor_name = actors[i]
        e = env["eval_" + actor_name]
        mi = env["mi_" + actor_name]

        mi.add_model("actor", model, cuda=(args.gpu >= 0), gpu_id=args.gpu)

        print("register " + actor_name + " for e = " + str(e))
        e.setup(sampler=env["sampler"], mi=mi)

        def actor(batch, e):
            reply = e.actor(batch)
            return reply

        GC.reg_callback(actor_name, lambda batch, e=e: actor(batch, e))

    args = env["game"].options

    GC.start()
    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while True:
        GC.run()

    GC.stop()
示例#2
0
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ,
                             evaluator=evaluator,
                             overrides=dict(
                                 num_games=1,
                                 batchsize=1,
                                 num_games_per_thread=1,
                                 greedy=True,
                                 T=1,
                                 additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor",
                     model,
                     copy=True,
                     cuda=args.gpu is not None,
                     gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
示例#3
0
文件: server.py 项目: qucheng/ELF-1
def main():
    print(sys.version)
    print(torch.__version__)
    print(torch.version.cuda)
    print("Conda env: \"%s\"" % os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'trainer':
        (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)),
        'runner': (SingleProcessRun.get_option_spec(),
                   lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, additional_to_load=additional_to_load)

    trainer = env['trainer']
    runner = env['runner']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    env["mi"].add_model("model", model, opt=True)

    GC.reg_callback("train", trainer.train)

    if GC.reg_has_callback("actor"):
        args = env["game"].options
        env["mi"].add_model("actor",
                            model,
                            copy=True,
                            cuda=(args.gpu >= 0),
                            gpu_id=args.gpu)
        GC.reg_callback("actor", trainer.actor)

    trainer.setup(sampler=env["sampler"],
                  mi=env["mi"],
                  rl_method=env["method"])

    runner.setup(GC, episode_summary=trainer.episode_summary, \
            episode_start=trainer.episode_start)

    runner.run()
示例#4
0
文件: df_console.py 项目: GenjiWu/ELF
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
示例#5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, trainer=trainer, runner=runner)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={"lr": 0.001})
    mi.add_model("actor",
                 model,
                 copy=True,
                 cuda=all_args.gpu is not None,
                 gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"])

    GC.reg_callback("train", trainer.train)
示例#6
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'evaluator': (Evaluator.get_option_spec(),
                      lambda object_map: Evaluator(object_map, stats=None)),
        'console': (GoConsoleGTP.get_option_spec(),
                    lambda object_map: GoConsoleGTP(object_map))
    }

    # Set game to online model.
    env = load_env(os.environ,
                   overrides=dict(additional_labels=['aug_code',
                                                     'move_idx'], ),
                   additional_to_load=additional_to_load)
    evaluator = env['evaluator']

    GC = env["game"].initialize()
    console = env["console"]

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    gpu = model_loader.options.gpu
    use_gpu = gpu is not None and gpu >= 0

    mi = env['mi']
    mi.add_model("model", model)
    # mi.add_model(
    #     "actor", model,
    #     copy=True, cuda=use_gpu, gpu_id=gpu)
    mi.add_model("actor", model)
    mi["model"].eval()
    mi["actor"].eval()

    console.setup(GC, evaluator)

    def human_actor(batch):
        #py = psutil.Process(pid)
        #memoryUse = py.memory_info()[0]/2.**30  # memory use in GB...I think
        #print('memory use:', memoryUse)
        return console.prompt("", batch)

    def actor(batch):
        return console.actor(batch)

    def train(batch):
        console.prompt("DF Train> ", batch)

    evaluator.setup(sampler=env["sampler"], mi=mi)

    GC.reg_callback_if_exists("actor_black", actor)
    GC.reg_callback_if_exists("human_actor", human_actor)
    GC.reg_callback_if_exists("train", train)
    GC.start()
    # TODO: For now fixed resign threshold to be 0.05. Will add a switch
    GC.game_obj.setRequest(mi["actor"].step, -1, 0.05, -1)

    evaluator.episode_start(0)

    while True:
        GC.run()
        if console.exit:
            break

    GC.stop()
示例#7
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, EvalIters, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    eval_iters = EvalIters()
    env, all_args = load_env(os.environ,
                             overrides=dict(actor_only=True),
                             trainer=trainer,
                             eval_iters=eval_iters)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model)
    mi.add_model("actor",
                 model,
                 copy=True,
                 cuda=all_args.gpu is not None,
                 gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=env["mi"])
示例#8
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Set game to online model.
    actors = ["actor_black", "actor_white"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=2,
                   overrides={'actor_only': True},
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()

    stats = [Stats(), Stats()]

    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]
        e = env["eval_" + actor_name]

        print(f'register {actor_name} for e = {e!s}')
        e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, e, stat):
            reply = e.actor(batch)
            stat.feed(batch)
            return reply

        GC.reg_callback(actor_name,
                        lambda batch, e=e, stat=stat: actor(batch, e, stat))

    root = os.environ.get("root", "./")
    print(f'Root: "{root}"')
    args = env["game"].options
    loop_end = False

    def game_start(batch):
        print("In game start")

        vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    def game_end(batch):
        nonlocal loop_end
        wr = batch.GC.getClient().getGameStats().getWinRateStats()
        win_rate = (100.0 * wr.black_wins /
                    wr.total_games if wr.total_games > 0 else 0.0)
        print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.'
              f'Black winrate: {win_rate:.2f} ({wr.total_games})')

        if args.suicide_after_n_games > 0 and \
                wr.total_games >= args.suicide_after_n_games:
            print(f'#suicide_after_n_games: {args.suicide_after_n_games}, '
                  f'total_games: {wr.total_games}')
            loop_end = True

    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    GC.start()
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])

            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.getClient().setRequest(int(black), int(white),
                                     env['game'].options.resign_thres, 1)

    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
示例#9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import datetime

import sys
import os

from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    eval_iters = EvalIters()
    env, args = load_env(os.environ,
                         overrides=dict(actor_only=True),
                         evaluator=evaluator,
                         eval_iters=eval_iters)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    env["mi"].add_model("actor",
                        model,
                        cuda=not args.gpu is None,
                        gpu_id=args.gpu)
    env["mi"]["actor"].eval()

    def actor(batch):
        reply = evaluator.actor(batch)
        '''
        s = batch["s"][0][0]
示例#10
0
import os
from rlpytorch import Evaluator, load_env
from console_lib import GoConsoleGTP

if __name__ == '__main__':
    additional_to_load = {
        'evaluator': (Evaluator.get_option_spec(),
                      lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(os.environ,
                   overrides=dict(
                       num_games=1,
                       greedy=True,
                       T=1,
                       model="online",
                       additional_labels=['aug_code', 'move_idx'],
                   ),
                   additional_to_load=additional_to_load)

    evaluator = env['evaluator']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)

    gpu = model_loader.options.gpu
    use_gpu = gpu is not None and gpu >= 0
示例#11
0
                # Check whether the actions remains the same.
                if t < T - 1:
                    key = (id, sel["seq"][t][i], sel["game_counter"][t][i])
                    recorded_a = self.idgseq2action[key]
                    actual_a = sel["a"][t][i]
                    if recorded_a != actual_a:
                        self._debug(
                            "%s Action was different. recorded %d, actual %d" %
                            (prompt, recorded_a, actual_a))

            # Overlapped by 1.
            self.id2seqs_train[id] = last_seq - 1


if __name__ == '__main__':
    collector = StatsCollector()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, collector=collector, runner=runner)

    GC = env["game"].initialize()
    # GC.setup_gpu(0)
    collector.set_params(GC.params)

    GC.reg_callback("actor", collector.actor)
    GC.reg_callback("train", collector.train)
    GC.reg_sig_int()

    runner.setup(GC)
    runner.run()
示例#12
0
def main():
    # Set game to online model.
    actors = ["actor_black", "actor_white"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=2,
                   overrides=dict(actor_only=True),
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()

    stats = [Stats(), Stats()]

    # for actor_name, stat, model_loader, e in \
    #         zip(actors, stats, env["model_loaders"], evaluators):
    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]
        e = env["eval_" + actor_name]

        print("register " + actor_name + " for e = " + str(e))
        e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, e, stat):
            reply = e.actor(batch)
            stat.feed(batch)
            # eval_iters.stats.feed_batch(batch)
            return reply

        GC.reg_callback(actor_name,
                        lambda batch, e=e, stat=stat: actor(batch, e, stat))

    root = os.environ.get("root", "./")
    print("Root: \"%s\"" % root)
    args = env["game"].options
    global loop_end
    loop_end = False

    def game_start(batch):
        print("In game start")

        vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    def game_end(batch):
        global loop_end
        # print("In game end")
        wr = batch.GC.getGameStats().getWinRateStats()
        win_rate = 100.0 * wr.black_wins / wr.total_games \
            if wr.total_games > 0 else 0.0
        print("%s B/W: %d/%d. Black winrate: %.2f (%d)" %
              (str(datetime.now()), wr.black_wins, wr.white_wins, win_rate,
               wr.total_games))
        if args.suicide_after_n_games > 0 and \
           wr.total_games >= args.suicide_after_n_games:
            print("#suicide_after_n_games: %d, total_games: %d" %
                  (args.suicide_after_n_games, wr.total_games))
            loop_end = True

    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    # def episode_start(i):
    #     global GC
    #     GC.GC.setSelfplayCount(10000)
    #     evaluator.episode_start(i)

    GC.start()
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])

            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.setRequest(int(black), int(white),
                         env['game'].options.resign_thres, 1)

    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
示例#13
0
    'gpu': 0,
    'num_block': 20,
    'dim': 224,
    'mcts_puct': 1.50,
    'batchsize': 16,
    'mcts_rollout_per_batch': 16,
    'mcts_threads': 2,
    'mcts_rollout_per_thread':
    64,  # bigger value will spend more time to genmove
    'resign_thres': 0.05,
    'mcts_virtual_loss': 1,
}

# Set game to online model.
env = load_env(os.environ,
               overrides=overrides,
               additional_to_load=additional_to_load)

GC_PARAMS = {
    'ACTION_CLEAR': -97,
    'ACTION_PASS': -99,
    'ACTION_RESIGN': -98,
    'ACTION_SKIP': -100,
    'board_size': 19,
    'num_action': 362,
    'num_future_actions': 1,
    'num_planes': 18,
    'opponent_stone_plane': 1,
    'our_stone_plane': 0,
    'num_group': 2,
    'T': 1
示例#14
0
# LICENSE file in the root directory of this source tree.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from datetime import datetime

import sys
import os

from rlpytorch import load_env, Evaluator, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    eval_iters = EvalIters()
    env, args = load_env(os.environ, overrides=dict(actor_only=True), eval_iters=eval_iters, evaluator=evaluator)

    GC = env["game"].initialize_reduced_service()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = env["mi"]
    mi.add_model("actor", model, cuda=args.gpu is not None, gpu_id=args.gpu)

    def reduced_project(batch):
        output = mi["actor"].forward(batch.hist(0))
        eval_iters.stats.feed_batch(batch)
        return dict(reduced_s=output["h"].data)

    def reduced_forward(batch):
        b = batch.hist(0)
        output = mi["actor"].transition(b["reduced_s"], b["a"])
示例#15
0
import re
import time

from rlpytorch import load_env, SingleProcessRun, Trainer

matcher = re.compile(r"save-(\d+).bin")

if __name__ == '__main__':
    additional_to_load = {
        'trainer':
        (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)),
        'runner': (SingleProcessRun.get_option_spec(),
                   lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, additional_to_load=additional_to_load)

    trainer = env['trainer']
    runner = env['runner']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    env["mi"].add_model("model", model, opt=True)

    keep_prev_selfplay = env["game"].options.keep_prev_selfplay
    model_ver = 0
    model_filename = model_loader.options.load
    if isinstance(model_filename, str) and model_filename != "":
        realpath = os.path.realpath(model_filename)
示例#16
0
文件: eval_lstm.py 项目: GenjiWu/ELF
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, EvalIters, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    eval_iters = EvalIters()
    env, all_args = load_env(os.environ, overrides=dict(actor_only=True), trainer=trainer, eval_iters=eval_iters)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model)
    mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=env["mi"])

    def actor(batch):
        reply = trainer.actor(batch)
        eval_iters.stats.feed_batch(batch)
        return reply
示例#17
0
def main():
    address = addrs['game_server']
    if address != "":
        channel = grpc.insecure_channel(address + ':50051')
    else:
        channel = grpc.insecure_channel("localhost:50051")
    stub = play_pb2_grpc.TurnStub(channel)
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'evaluator': (Evaluator.get_option_spec(),
                      lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(os.environ,
                   overrides={
                       'num_games': 1,
                       'greedy': True,
                       'T': 1,
                       'model': 'online',
                       'additional_labels': ['aug_code', 'move_idx'],
                   },
                   additional_to_load=additional_to_load)

    evaluator = env['evaluator']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)

    mi = env['mi']
    mi.add_model("model", model)
    mi.add_model("actor", model)
    mi["model"].eval()
    mi["actor"].eval()

    console = GoConsoleGTP(GC, evaluator)

    # TODO: create an instance of game when the client sends a request

    # print("\n\n\nCheck connect\n\n\n")
    # ID = stub.NewRoom(play_pb2.State(status = True)).ID
    # print("Current AI's ID is ", ID)

    # res_arr = stub.GetResumed(play_pb2.State(status = True, ID = ID)).move
    # console.res_len = len(res_arr)
    # # console.res_ind = 3
    # # arr = ["BKD", "WFB", "BGA"]
    # if console.res_len > 0 and res_arr[-1][0].upper() == "B":
    #     _ = stub.UpdateNext(play_pb2.State(status = True, ID = ID))

    # def check_end_game(m):
    #     if m.quit:
    #         GC.stop()
    #     return m

    def reset():
        ID = stub.NewRoom(play_pb2.State(status=True)).ID
        console.ID = ID
        console.color = {'has_chosen': False, "client": 1, "AI": 2}
        console.prev_player = 0
        print("Current AI's ID is ", console.ID)
        if not console.color["has_chosen"]:
            while not stub.HasChosen(play_pb2.State(status=True,
                                                    ID=ID)).status:
                pass
            # AI_color = stub.GetAIPlayer(play_pb2.State(status = True)).color
            # human_color = AI_color % 2 + 1
            console.color["AI"] = stub.GetAIPlayer(
                play_pb2.State(status=True, ID=ID)).color
            console.color["client"] = console.color["AI"] % 2 + 1
            console.color["has_chosen"] = True
        console.res_arr = stub.GetResumed(play_pb2.State(status=True,
                                                         ID=ID)).move
        console.res_len = len(console.res_arr)
        if console.res_len > 0 and console.res_arr[-1][0].upper() == "B":
            _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID))

    reset()

    def check_reset(reply):
        console.reset = stub.CheckExit(
            play_pb2.State(status=True, ID=console.ID)).status
        if console.reset:
            print("\n\n\nRestarting game...\n\n\n")
            reset()
            console.reset = False
            reply["a"] = console.actions["clear"]
            return True, reply
        return False, reply

    def human_actor(batch):
        # print("\n\n\nCheck human_actor\n\n\n")
        reply = dict(pi=None, a=None, V=0)
        ID = console.ID
        # console.reset = stub.CheckExit(play_pb2.State(status = True, ID = ID)).status
        # if console.reset:
        #     print("\n\n\nRestarting game...\n\n\n")
        #     reset()
        #     console.reset = False
        #     reply["a"] = console.actions["clear"]
        #     return reply
        AI_color = console.color["AI"]
        human_color = console.color["client"]
        # is_resumed = stub.IsResumed(play_pb2.State(status = True)).status
        if console.res_len > 0:
            # print("\n\n\nCheck is_resumed = true\n\n\n")
            # print("\n\n\n", arr[-console.res_ind], "\n\n\n")
            reply["a"] = console.str2action(console.res_arr[-console.res_len])
            console.res_len -= 1
            return reply
        # print("\n\n\nCheck is_resumed = false\n\n\n")
        while True:
            if console.prev_player == 1:
                move = console.get_last_move(batch)
                x, y = move2xy(move)
                _ = stub.SetMove(
                    play_pb2.Step(x=x,
                                  y=y,
                                  player=play_pb2.Player(color=AI_color,
                                                         ID=ID)))
                _ = stub.UpdateNext(play_pb2.State(status=True, ID=ID))
            if stub.IsNextPlayer(play_pb2.Player(color=AI_color,
                                                 ID=ID)).status:
                reply["a"] = console.actions["skip"]
                console.prev_player = 1
                return reply
            # else:
            while stub.IsNextPlayer(play_pb2.Player(color=human_color,
                                                    ID=ID)).status:
                do_reset, reply = check_reset(reply)
                if do_reset:
                    return reply
                pass
            human_xy = stub.GetMove(play_pb2.Player(color=human_color, ID=ID))
            reply["a"] = console.move2action(xy2move(human_xy.x, human_xy.y))
            console.prev_player = 2
            return reply

    def actor(batch):
        return console.actor(batch)

    def train(batch):
        console.prompt("DF Train> ", batch)

    evaluator.setup(sampler=env["sampler"], mi=mi)

    GC.reg_callback_if_exists("actor_black", actor)
    GC.reg_callback_if_exists("human_actor", human_actor)
    GC.reg_callback_if_exists("train", train)

    GC.start()
    GC.GC.getClient().setRequest(mi["actor"].step, -1,
                                 env['game'].options.resign_thres, -1)

    evaluator.episode_start(0)

    while True:
        GC.run()
        if console.exit:
            break
    GC.stop()
示例#18
0
文件: selfplay.py 项目: alatyshe/ELF
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Register player names
    actors = ["actor_white", "actor_black"]
    """
    Class Evaluator is a pure python class, 
    which run neural network in eval mode and get 
    return results and update some stat info.
    Will creates 'eval_actor_white', 'eval_actor_black'.
  """
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    """
    class ModelInterface is a python class saving network models.
    Its member models is a key-value store to call a CNN model by name.
    Will creates 'mi_actor_white', 'mi_actor_black'.
  """
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })
    """
    load_env:
    game - load file game elfgames.american_checkers.game
    method - load "method" passed via params:
        file model_american_checkers.py return array with [model, method]
        model_file=elfgames.american_checkers.model_american_checkers
        model=df_pred 
    model_loaders - prepare to load(returns instance of class ModelLoader)
        "model" passed via params:
        file model_american_checkers.py return array with [model, method]
        model_file=elfgames.american_checkers.model_american_checkers
        model=df_pred
    
    sampler - Used to sample an action from policy.
    mi - class ModelInterface is a python class saving network models.
        Its member models is a key-value store to call a CNN model by name.
    eval_* - run neural network in eval mode and get 
        return results and update some stat info.
  """
    env = load_env(os.environ,
                   num_models=2,
                   overrides={'actor_only': True},
                   additional_to_load=additional_to_load)
    """
    Initializes keys('game_end', 'game_start', 'actor_white', 'actor_black')
    for communication Python and C++ code, defined in Game.py and GameFeature.h.
    Also, initializes GameContext from C++ library wrapped by GC from python side
    + sets mode that parsed from options like play/selfplay/train/offline_train.
  """
    GC = env["game"].initialize()
    """
    Registering the methods in the GameContext on the python side.
    We registered their names earlier when the game was 
    initialized(names were registered on the python and C++ sides).
    Now its a registration of methods that will be called 
    when we try to pass batch on eval from C++ to Python.
    Example:
      We register "human_actor" as key and register the 
      same method on the python side. 
      When AIClientT calls method act(it takes 2 parameters: state, and key)
      act connect to python and transmits the state by 
      key("human_actor", "actor_black")
      to these methods(actor() func defined below).
  """
    # Some statistic about batch usage, also we can add more info about games stats.
    stats = [Stats(), Stats()]

    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]

        evaluator = env["eval_" + actor_name]
        evaluator.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, evaluator, stat):
            reply = evaluator.actor(batch)
            stat.feed(batch)
            return reply

        # To expand the functionality we use lambda
        GC.reg_callback(actor_name,
                        lambda batch, evaluator=evaluator, stat=stat: actor(
                            batch, evaluator, stat))

    # Get the directory containing the models.
    root = os.environ.get("root", "./")
    args = env["game"].options
    # Stops client after N games, defined in --suicide_after_n_games param.
    loop_end = False
    """
    This method is responsible for updating the model to the 
    current one(received from the server) after starting. 
    Called by 'game_start' key from C++ side.
  """
    def game_start(batch):
        info = "game_start() load/reload models\n"
        logger.info(info)

        vers = [int(batch["white_ver"][0]), int(batch["black_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    """
    This method is responsible for displaying game statistics, 
    as well as stopping the client after N games(loop_end).
    Called by 'game_end' key from C++ side.
  """

    def game_end(batch):
        nonlocal loop_end
        wr = batch.GC.getClient().getGameStats().getWinRateStats()
        win_rate = (100.0 * wr.black_wins / (wr.black_wins + wr.white_wins) if
                    (wr.black_wins + wr.white_wins) > 0 else 0.0)

        info = f'game_end()\tB/W: {wr.black_wins}/{wr.white_wins}, '
        info += f'Draw: {wr.both_lost}, '
        info += f'Black winrate: {win_rate:.2f}, '
        info += f'Total Games: {wr.total_games}'

        logger.info(info)
        if args.suicide_after_n_games > 0 and \
            wr.total_games >= args.suicide_after_n_games:
            info = f'game_end()\tTotal Games: {wr.total_games}, '
            info += f'#suicide_after_n_games: {args.suicide_after_n_games}'
            logger.info(info)
            loop_end = True

    # Registering the methods described above in Python's GameContext.
    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    GC.start()
    """
    Upon receiving the --eval_model_pair parameter, we load 2 models 
    from a file and pass models versions to C++ side for evaluation.
  """
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])
            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.getClient().setRequest(int(black), int(white), 1)

    # Called before each episode, resets actor_count(num of total nn call)
    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
示例#19
0
文件: check.py 项目: GenjiWu/ELF
                if sel["seq"][t][i] != last_seq + 1:
                    self._debug("%s. Invalid next seq. seq should be %d" % (prompt, last_seq + 1))
                last_seq += 1

                # Check whether the actions remains the same.
                if t < T - 1:
                    key = (id, sel["seq"][t][i], sel["game_counter"][t][i])
                    recorded_a = self.idgseq2action[key]
                    actual_a = sel["a"][t][i]
                    if recorded_a != actual_a:
                        self._debug("%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a))

            # Overlapped by 1.
            self.id2seqs_train[id] = last_seq - 1

if __name__ == '__main__':
    collector = StatsCollector()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, collector=collector, runner=runner)

    GC = env["game"].initialize()
    # GC.setup_gpu(0)
    collector.set_params(GC.params)

    GC.reg_callback("actor", collector.actor)
    GC.reg_callback("train", collector.train)
    GC.reg_sig_int()

    runner.setup(GC)
    runner.run()
示例#20
0
文件: train.py 项目: bearrundr/ELF
from rlpytorch import load_env, SingleProcessRun, Trainer

matcher = re.compile(r"save-(\d+).bin")

if __name__ == '__main__':
    additional_to_load = {
        'trainer': (
            Trainer.get_option_spec(),
            lambda option_map: Trainer(option_map)),
        'runner': (
            SingleProcessRun.get_option_spec(),
            lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, additional_to_load=additional_to_load)

    trainer = env['trainer']
    runner = env['runner']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    env["mi"].add_model("model", model, opt=True)

    keep_prev_selfplay = env["game"].options.keep_prev_selfplay
    model_ver = 0
    model_filename = model_loader.options.load
    if isinstance(model_filename, str) and model_filename != "":
        realpath = os.path.realpath(model_filename)
示例#21
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'trainer':
        (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)),
        'runner': (SingleProcessRun.get_option_spec(),
                   lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, additional_to_load=additional_to_load)

    trainer = env['trainer']
    runner = env['runner']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    env["mi"].add_model("model", model, opt=True)

    keep_prev_selfplay = env["game"].options.keep_prev_selfplay
    model_ver = 0
    model_filename = model_loader.options.load
    if isinstance(model_filename, str) and model_filename != "":
        realpath = os.path.realpath(model_filename)
        m = matcher.match(os.path.basename(realpath))
        if m:
            model_ver = int(m.group(1))

    eval_old_model = env["game"].options.eval_old_model

    if eval_old_model >= 0:
        GC.GC.getServer().setEvalMode(model_ver, eval_old_model)
    else:
        GC.GC.getServer().setInitialVersion(model_ver)

    selfplay_ver = model_ver
    root = os.environ["save"]
    print(f'Root: "{root}"')
    print(f'Keep prev_selfplay: {keep_prev_selfplay!s}')

    def train(batch, *args, **kwargs):
        # Check whether the version match.
        if keep_prev_selfplay or \
                (batch["selfplay_ver"] != selfplay_ver).sum() == 0:
            trainer.train(batch, *args, **kwargs)
        else:
            print(f'Get batch whose selfplay ver is different from '
                  f'{selfplay_ver}, skipping')
            runner.inc_episode_counter(-1)

    def train_ctrl(batch, *args, **kwargs):
        nonlocal selfplay_ver
        old_selfplay_ver = selfplay_ver
        selfplay_ver = int(batch["selfplay_ver"][0])
        print(
            f'Train ctrl: selfplay_ver: {old_selfplay_ver} -> {selfplay_ver}')
        GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver)

        # Reload old models.
        real_path = os.path.join(root, "save-" + str(selfplay_ver) + ".bin")
        model_loader.options.load = real_path

        while True:
            try:
                model = model_loader.load_model(GC.params)
                break
            except BaseException:
                time.sleep(10)

        env["mi"].remove_model("model")
        env["mi"].add_model("model", model, opt=True)
        trainer.episode_reset()
        runner.set_episode_counter(-1)

    GC.reg_callback("train", train)
    GC.reg_callback("train_ctrl", train_ctrl)

    if GC.reg_has_callback("actor"):
        args = env["game"].options
        env["mi"].add_model("actor",
                            model,
                            copy=True,
                            cuda=(args.gpu >= 0),
                            gpu_id=args.gpu)
        GC.reg_callback("actor", trainer.actor)

    trainer.setup(sampler=env["sampler"],
                  mi=env["mi"],
                  rl_method=env["method"])

    def episode_summary(i):
        nonlocal selfplay_ver
        ver = trainer.episode_summary(i)
        # This might block (when evaluation does not catch up with training).
        GC.GC.getServer().notifyNewVersion(selfplay_ver, ver)

    offline_training = (env["game"].options.mode == "offline_train")

    def after_start():
        nonlocal selfplay_ver
        if not offline_training:
            print("About to wait for sufficient selfplay")
            GC.GC.getServer().waitForSufficientSelfplay(selfplay_ver)

    runner.setup(GC,
                 after_start=after_start,
                 episode_summary=episode_summary,
                 episode_start=trainer.episode_start)

    runner.run()
示例#22
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    additional_to_load = {
        'evaluator': (
            Evaluator.get_option_spec(),
            lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(
        os.environ,
        overrides={
            'num_games': 1,
            'greedy': True,
            'T': 1,
            'model': 'online',
            'additional_labels': ['aug_code', 'move_idx'],
        },
        additional_to_load=additional_to_load)

    evaluator = env['evaluator']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)

    mi = env['mi']
    mi.add_model("model", model)
    mi.add_model("actor", model)
    mi["model"].eval()
    mi["actor"].eval()

    console = GoConsoleGTP(GC, evaluator)

    def human_actor(batch):
        return console.prompt("", batch)

    def actor(batch):
        return console.actor(batch)

    def train(batch):
        console.prompt("DF Train> ", batch)

    evaluator.setup(sampler=env["sampler"], mi=mi)

    GC.reg_callback_if_exists("actor_black", actor)
    GC.reg_callback_if_exists("human_actor", human_actor)
    GC.reg_callback_if_exists("train", train)

    GC.start()
    GC.GC.getClient().setRequest(
        mi["actor"].step, -1, env['game'].options.resign_thres, -1)

    evaluator.episode_start(0)

    while True:
        GC.run()
        if console.exit:
            break
    GC.stop()
示例#23
0
from console_lib import GoConsoleGTP


if __name__ == '__main__':
    additional_to_load = {
        'evaluator': (
            Evaluator.get_option_spec(),
            lambda object_map: Evaluator(object_map, stats=None)),
    }

    # Set game to online model.
    env = load_env(
        os.environ,
        overrides=dict(
            num_games=1,
            greedy=True,
            T=1,
            model="online",
            additional_labels=['aug_code', 'move_idx'],
        ),
        additional_to_load=additional_to_load)

    evaluator = env['evaluator']

    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)

    gpu = model_loader.options.gpu
    use_gpu = gpu is not None and gpu >= 0
示例#24
0
文件: train_lstm.py 项目: GenjiWu/ELF
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, trainer=trainer, runner=runner)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={ "lr" : 0.001})
    mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"])

    GC.reg_callback("train", trainer.train)
    GC.reg_callback("actor", trainer.actor)
    runner.setup(GC, episode_summary=trainer.episode_summary,
                episode_start=trainer.episode_start)
示例#25
0
            Trainer.get_option_spec(),
            lambda option_map: Trainer(option_map)),
        'trainer1': (
            Trainer.get_option_spec(),
            lambda option_map: Trainer(option_map)),
        'mi0': (
            ModelInterface.get_option_spec(), ModelInterface),
        'mi1': (
            ModelInterface.get_option_spec(), ModelInterface),
        'runner': (
            SingleProcessRun.get_option_spec(),
            lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, num_models=2,
                   additional_to_load=additional_to_load,
                   overrides=dict(backprop0=False,
                                  backprop1=False, mode="offline_train"))

    trainer0 = env['trainer0']
    trainer1 = env['trainer1']
    runner = env['runner']

    GC = env["game"].initialize()

    for i in range(2):
        model_loader = env["model_loaders"][i]
        model = model_loader.load_model(GC.params)
        env["mi%d" % i].add_model("model", model)
        env["mi%d" % i]["model"].eval()

    model_ver = 0
示例#26
0
def main():
  print('Python version:', sys.version)
  print('PyTorch version:', torch.__version__)
  print('CUDA version', torch.version.cuda)
  print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

  """
    Class Evaluator is a pure python class, 
    which run neural network in eval mode and get 
    return results and update some stat info
  """
  additional_to_load = {
    'evaluator': (
      Evaluator.get_option_spec(),
      lambda object_map: Evaluator(object_map, stats=None)),
  }

  """
    load_env:
    game - load file game elfgames.checkers.game
    method - load "method" passed via params:
        file df_model_checkers.py return array with [model, method]
        model_file=elfgames.checkers.df_model_checkers
        model=df_pred 
    model_loaders - prepare to load(returns instance of class ModelLoader)
        "model" passed via params:
        file df_model_checkers.py return array with [model, method]
        model_file=elfgames.checkers.df_model_checkers
        model=df_pred
    
    sampler - Used to sample an action from policy.
    mi - class ModelInterface is a python class saving network models.
        Its member models is a key-value store to call a CNN model by name.
    evaluator - run neural network in eval mode and get 
        return results and update some stat info.
  """
  env = load_env(
    os.environ,
    overrides={
      'num_games': 1,
      'greedy': True,
      'T': 1,
      'additional_labels': ['aug_code', 'move_idx'],
    },
    additional_to_load=additional_to_load)

  evaluator = env['evaluator']
  """
    Initializes keys for communication Python and C++ code, 
    defined in Game.py and GameFeature.h.
    Also, initializes GameContext from C++ library wrapped by GC from python side
    + sets mode that parsed from options like play/selfplay/train/offline_train.
  """
  GC = env["game"].initialize()

  # Load model(use Model_PolicyValue from df_model_checkers.py)
  model_loader = env["model_loaders"][0]
  # Model contains init_conv, value_func, resnet and etc.
  model = model_loader.load_model(GC.params)

  """
    Pass our model in ModelInterface
    ModelInterface stores our saved model and call nn when we need eval 
  """
  mi = env['mi']
  mi.add_model("actor", model)
  # Checking the success installed model
  mi["actor"].eval()

  # Describe more!
  console = UgolkiConsole(GC, evaluator)

  def human_actor(batch):
    return console.prompt("", batch)

  def actor(batch):
    return console.actor(batch)

  evaluator.setup(sampler=env["sampler"], mi=mi)


  """
    Register the methods in the GameContext on the python side. 
    We registered their names earlier when the game was 
    initialized(names were registered on the python and C++ sides).
    Now its a registration of methods that will be called 
    when we try to pass batch on eval from C++ to Python.
    Example:
      We register "human_actor" as key and register the 
      same method on the python side. 
      When our AIClientT calls method act(it takes 2 parameters: state, and key)
      act connect to python and transmits the state by 
      key("human_actor", "actor_black")
      to these methods
  """
  GC.reg_callback_if_exists("human_actor", human_actor)
  GC.reg_callback_if_exists("actor_black", actor)
  GC.start()
  # Tells the С++ side the model version
  GC.GC.getClient().setRequest(
    mi["actor"].step, -1, -1)

  # Called before each episode, resets actor_count(num of total nn call)
  evaluator.episode_start(0)

  while True:
    GC.run()
    if console.exit:
      break
  
  # fix this for normal exit
  # sys.exit()
  
  GC.stop()
示例#27
0
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Console for DarkForest
import sys
import os
from rlpytorch import load_env, Evaluator, ModelInterface, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    # Set game to online model.
    env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(mode="selfplay", T=1))

    GC = env["game"].initialize()
    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model)
    mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu)
    mi["model"].eval()
    mi["actor"].eval()

    evaluator.setup(mi=mi)

    total_batchsize = 0
    total_sel_batchsize = 0

    def actor(batch):
        global total_batchsize, total_sel_batchsize
示例#28
0
文件: train.py 项目: alatyshe/ELF
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    logger = logging.getIndexedLogger(
        '\u001b[31;1m|py|\u001b[0melfgames.checkers.train-', '')

    # Trainer is also a pure python class wrapped on evaluator.
    # Train the models.
    # Runner - seems run all this shit.
    additional_to_load = {
        'trainer':
        (Trainer.get_option_spec(), lambda option_map: Trainer(option_map)),
        'runner': (SingleProcessRun.get_option_spec(),
                   lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, additional_to_load=additional_to_load)

    trainer = env['trainer']
    runner = env['runner']
    """
    Initializes keys('train', 'train_ctrl')
    for communication Python and C++ code, defined in Game.py and GameFeature.h.
    Also, initializes GameContext from C++ library wrapped by GC from python side
    + sets mode that parsed from options like play/selfplay/train/offline_train.
  """
    GC = env["game"].initialize()

    model_loader = env["model_loaders"][0]
    model = model_loader.load_model(GC.params)
    env["mi"].add_model("model", model, opt=True)

    keep_prev_selfplay = env["game"].options.keep_prev_selfplay
    model_ver = 0

    # Грузим модель, если указана
    model_filename = model_loader.options.load
    if isinstance(model_filename, str) and model_filename != "":
        realpath = os.path.realpath(model_filename)
        m = matcher.match(os.path.basename(realpath))
        if m:
            model_ver = int(m.group(1))

    eval_old_model = env["game"].options.eval_old_model

    #
    if eval_old_model >= 0:
        GC.GC.getServer().setEvalMode(model_ver, eval_old_model)
    else:
        GC.GC.getServer().setInitialVersion(model_ver)

    checkers_selfplay_ver = model_ver
    root = os.environ["save"]

    print(f'Save models in\t\t: "{root}"')
    print(f'Keep prev_selfplay\t: {keep_prev_selfplay!s}')

    def train(batch, *args, **kwargs):
        # Check whether the version match.
        if keep_prev_selfplay or \
            (batch["checkers_selfplay_ver"] != checkers_selfplay_ver).sum() == 0:
            trainer.train(batch, *args, **kwargs)
        else:
            print(f'Get batch whose selfplay ver is different from '
                  f'{checkers_selfplay_ver}, skipping')
            runner.inc_episode_counter(-1)

    def train_ctrl(batch, *args, **kwargs):
        nonlocal checkers_selfplay_ver

        old_selfplay_ver = checkers_selfplay_ver
        checkers_selfplay_ver = int(batch["checkers_selfplay_ver"][0])
        logger.info(
            f'Train ctrl: checkers_selfplay_ver: {old_selfplay_ver} -> {checkers_selfplay_ver}'
        )

        # ожидаем нормально запоненого батча от клиентов
        GC.GC.getServer().ServerWaitForSufficientSelfplay(
            checkers_selfplay_ver)

        # Reload old models.
        real_path = os.path.join(root,
                                 "save-" + str(checkers_selfplay_ver) + ".bin")
        model_loader.options.load = real_path

        while True:
            try:
                model = model_loader.load_model(GC.params)
                break
            except BaseException:
                time.sleep(10)

        env["mi"].remove_model("model")
        env["mi"].add_model("model", model, opt=True)
        trainer.episode_reset()
        runner.set_episode_counter(-1)

    GC.reg_callback("train", train)
    GC.reg_callback("train_ctrl", train_ctrl)

    if GC.reg_has_callback("actor"):
        args = env["game"].options
        env["mi"].add_model("actor",
                            model,
                            copy=True,
                            cuda=(args.gpu >= 0),
                            gpu_id=args.gpu)
        GC.reg_callback("actor", trainer.actor)

    # +++++++++++++++++++++++++++++++++++++++++++++++
    # +++++++++++++++++++++++++++++++++++++++++++++++
    # +++++++++++++++++++++++++++++++++++++++++++++++
    trainer.setup(sampler=env["sampler"],
                  mi=env["mi"],
                  rl_method=env["method"])

    def episode_summary(i):
        nonlocal checkers_selfplay_ver

        logger.info("Episode_summary")
        ver = trainer.episode_summary(i)
        # This might block (when evaluation does not catch up with training).
        GC.GC.getServer().notifyNewVersion(checkers_selfplay_ver, ver)

    offline_training = (env["game"].options.mode == "offline_train")

    def after_start():
        logger.info("after_start")

        nonlocal checkers_selfplay_ver
        if not offline_training:
            GC.GC.getServer().ServerWaitForSufficientSelfplay(
                checkers_selfplay_ver)

    # sys.exit(0)

    runner.setup(GC,
                 after_start=after_start,
                 episode_summary=episode_summary,
                 episode_start=trainer.episode_start)

    runner.run_singe_process()
示例#29
0
文件: play_gui.py 项目: alatyshe/ELF
from additional import boardToJson
from rlpytorch import Evaluator, load_env
from py.UgolkiMoves import get_all_moves_ugolki

from flask import Flask, session, redirect, url_for, request, render_template
import json

additional_to_load = {
    'evaluator': (Evaluator.get_option_spec(),
                  lambda object_map: Evaluator(object_map, stats=None)),
}

env = load_env(os.environ,
               overrides={
                   'num_games': 1,
                   'greedy': True,
                   'T': 1,
                   'additional_labels': ['aug_code', 'move_idx'],
               },
               additional_to_load=additional_to_load)

all_session = {}

moves_for_human = get_all_moves_ugolki()


def init_observation(player_id):
    global env
    evaluator = env['evaluator']
    GC = env["game"].initialize()
    model = env["model_loaders"][0].load_model(GC.params)