示例#1
0
class PolicyNetworkBestMovePlayer(GtpInterface):
    def __init__(self, read_file):
        self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                            use_cpu=True)
        self.read_file = read_file
        super().__init__()

    def clear(self):
        super().clear()
        self.refresh_network()

    def refresh_network(self):
        # Ensure that the player is using the latest version of the network
        # so that the network can be continually trained even as it's playing.
        self.policy_network.initialize_variables(self.read_file)

    def suggest_move(self, position):
        if position.recent and position.n > 100 and position.recent[-1] == None:
            # Pass if the opponent passes
            return None
        move_probabilities = self.policy_network.run(position)
        for move in sorted_moves(move_probabilities):
            if go.is_reasonable(position, move):
                return move
        return None
示例#2
0
 def __init__(self, read_file, seconds_per_move=5):
     self.seconds_per_move = seconds_per_move
     self.max_rollout_depth = go.N * go.N * 3
     self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                         use_cpu=True)
     self.read_file = read_file
     super().__init__()
示例#3
0
def make_gtp_instance(strategy_name, read_file):
    n = PolicyNetwork(use_cpu=True)
    n.initialize_variables(read_file)
    if strategy_name == 'random':
        instance = RandomPlayer()
    elif strategy_name == 'policy':
        instance = GreedyPolicyPlayer(n)
    elif strategy_name == 'randompolicy':
        instance = RandomPolicyPlayer(n)
    elif strategy_name == 'mcts':
        instance = MCTSPlayer(n)
    else:
        return None
    gtp_engine = gtp.Engine(instance)
    return gtp_engine
示例#4
0
def make_gtp_instance(strategy_name, read_file):
    n = PolicyNetwork(use_cpu=True)
    n.initialize_variables(read_file)
    if strategy_name == 'random':
        instance = RandomPlayer()
    elif strategy_name == 'policy':
        instance = GreedyPolicyPlayer(n)
    elif strategy_name == 'randompolicy':
        instance = RandomPolicyPlayer(n)
    elif strategy_name == 'mcts':
        instance = MCTSPlayer(n)
    else:
        return None
    gtp_engine = gtp.Engine(instance)
    return gtp_engine
示例#5
0
def gtp(strategy, read_file=None):
    n = PolicyNetwork(use_cpu=True)
    if strategy == 'random':
        instance = RandomPlayer()
    elif strategy == 'policy':
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    elif strategy == 'randompolicy':
        instance = PolicyNetworkRandomMovePlayer(n, read_file)
    elif strategy == 'mcts':
        instance = MCTS(n, read_file)
    else:
        sys.stderr.write("Unknown strategy")
        sys.exit()
    gtp_engine = gtp_lib.Engine(instance)
    sys.stderr.write("GTP engine ready\n")
    sys.stderr.flush()
    while not gtp_engine.disconnect:
        inpt = input()
        # handle either single lines at a time
        # or multiple commands separated by '\n'
        try:
            cmd_list = inpt.split("\n")
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            engine_reply = gtp_engine.send(cmd)
            sys.stdout.write(engine_reply)
            sys.stdout.flush()
示例#6
0
def gtp(strategy, read_file=None):
    n = PolicyNetwork(use_cpu=True)
    if strategy == 'random':
        instance = RandomPlayer()
    elif strategy == 'policy':
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    elif strategy == 'randompolicy':
        instance = PolicyNetworkRandomMovePlayer(n, read_file)
    elif strategy == 'mcts':
        instance = MCTS(n, read_file)
    else:
        sys.stderr.write("错误")
        sys.exit()

    gtp_engine = gtp_lib.Engine(instance)
    sys.stderr.write("GTP\n")
    sys.stderr.flush()
    while not gtp_engine.disconnect:
        inpt = input()

        try:
            cmd_list = inpt.split("\n")
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            engine_reply = gtp_engine.send(cmd)

            sys.stdout.write(engine_reply)
            sys.stdout.flush()
示例#7
0
文件: AI.py 项目: play3577/MuGo-2
 def initialize(self):
     try:
         n = PolicyNetwork(use_cpu=True)
         instance = PolicyNetworkBestMovePlayer(n, self.moudle_file)
         self.gtp_engine = gtp_lib.Engine(instance)
     except BaseException as e:
         raise Exception('Initialization of policy network failed')
def AI(msg):
    global read_file

    # 提取信息
    x = msg['msg'][2].upper()
    y = string.index(msg['msg'][3])
    color = ''
    if msg['msg'][0] == 'B':
        color = 'W'
    else:
        color = 'B'

# 初始化策略网络
    n = PolicyNetwork(use_cpu=True)
    instance = PolicyNetworkBestMovePlayer(n, read_file)
    gtp_engine = gtp_lib.Engine(instance)
    # sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)

    # 查看是否已经开始下棋并记录
    if os.path.exists(data_file):
        rfile = open(data_file, 'r')
        cmd_list = rfile.readlines()
        for cmd in cmd_list:
            cmd = cmd.strip('\n ')
            if cmd == '':
                continue
            gtp_engine.send(cmd)
            # sys.stdout.write(cmd)
            # sys.stdout.flush()
        rfile.close()

    # 解析对方下棋指令,写进data
    wfile = open(data_file, 'a')
    player_cmd = parse_player_input(msg['msg'][0], x, y)
    wfile.write(player_cmd + '\n')
    gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    wfile.write(gtp_cmd)
    wfile.close()
    # sys.stdout.write(gtp_reply)
    # sys.stdout.flush()

    response = color + '[' + gtp_reply[2].lower() + string[int(
        gtp_reply[3:])] + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}
示例#9
0
文件: AI_2.py 项目: play3577/MuGo-2
def AI(msg):
    global read_file  # Extract information

    data_file = data_file_path + '[2]' + msg['game_id']

    x, y, color = parse_input_msg(msg)
    print(x, y, color)

    # Initialize the policy network
    n = PolicyNetwork(use_cpu=True)
    instance = PolicyNetworkBestMovePlayer(n, read_file)
    gtp_engine = gtp_lib.Engine(instance)
    # sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)

    # To see if it has started playing chess and logging
    if os.path.exists(data_file):
        rfile = open(data_file, 'r')
        cmd_list = rfile.readlines()
        for cmd in cmd_list:
            cmd = cmd.strip('\n ')
            if cmd == '':
                continue
            gtp_engine.send(cmd)
        # sys.stdout.write(cmd + '\n')
        # sys.stdout.flush()
        rfile.close()

    # Parse the other side of the chess instructions, write into the record file
    wfile = open(data_file, 'a')
    if msg['msg'][2].lower() == 't' and msg['msg'][3].lower() == 't':
        pass
    else:
        player_cmd = parse_player_input(msg['msg'][0], x, y)
        wfile.write(player_cmd + '\n')
        gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    wfile.write(gtp_cmd)
    wfile.close()
    # sys.stdout.write(gtp_reply + '\n')
    # sys.stdout.flush()

    AI_x, AI_y = parse_AI_reply(gtp_reply)

    response = color + '[' + AI_x + AI_y + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}
示例#10
0
def gtp(strategy, read_file=None):
    network = PolicyNetwork()
    instance = MCTS(network, read_file)
    gtp_engine = gtp_lib.Engine(instance)
    print('gtp engine ready')
    while not gtp_engine.disconnect:
        inpt = input()
        try:
            cmd_list = inpt.split('\n')
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            print('sending cmd %s' % cmd)
            engine_reply = gtp_engine.send(cmd)
            print(engine_reply)
示例#11
0
文件: AI.py 项目: play3577/MuGo-2
    def __init__(self, game_id, mode=0, moudle_file=DEFAULT_AI_MOUDLE_FILE, debug=False):
        '''
        :param mode:
            mode==0 -> human vs AI
            mode==1 -> AI    vs AI
        :param game_id:
            string
        :param moudle_file:
            string
            the AI moudle file
        '''

        # Activate Logging Debug Information
        self.debug = debug

        # initialize
        self.game_id = game_id
        self.command_list = []

        if not (mode == 1 or mode == 0):
            raise Exception('Invalid Game Mode')
        else:
            self.mode = mode

        self.moudle_file = moudle_file

        try:
            n = PolicyNetwork(use_cpu=True)
            instance = PolicyNetworkBestMovePlayer(n, self.moudle_file)
            self.gtp_engine = gtp_lib.Engine(instance)
        except BaseException as e:
            raise Exception('Initialization of policy network failed')

        # TODO: Remove the code below if using remote database
        # Using path 'game_database/data/' to store game data.
        # Make sure the path exists !
        self.local_data_filepath = 'game_database/data/'

        self.data_file = self.local_data_filepath + self.game_id + '.data'
示例#12
0
文件: main.py 项目: iCrown/MiniGo
def train(processed_dir,
          save_dir,
          logdir,
          read_file=None,
          epochs=50,
          checkpoint_freq=2):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_dataset = DataSet.read(os.path.join(processed_dir, "train.chunk.gz"))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print("=====Network initilization=====")
    net = PolicyNetwork(logdir=logdir, read_file=read_file)
    print("=====Start training...=====")
    for i in range(epochs):
        net.train(train_dataset, test_dataset)
        if i % checkpoint_freq == 0:
            net.save_variables(
                os.path.join(save_dir, "epoch_" + str(i) + ".ckpt"))
示例#13
0
def train(processed_dir,
          read_file=None,
          save_file=None,
          epochs=10,
          logdir=None,
          checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, 'test.chunk.gz'))
    #print(test_dataset)
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    print(train_chunk_files)
    if read_file is not None:
        read_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    n.initialize_variables()
    if logdir is not None:
        n.initialize_logging(logdir)

    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in tqdm.tqdm(train_chunk_files, desc='epochs ' + str(i)):
            #print('Using %s' % file)
            with timer('load dataset'):
                train_dataset = DataSet.read(file)
            with timer('training'):
                n.train(train_dataset)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer('save model'):
                    n.save_variables(save_file)
                with timer('test set evaluation'):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()
        with timer('test set evaluation'):
            n.check_accuracy(test_dataset)
示例#14
0
class MCTS(GtpInterface):
    def __init__(self, read_file, seconds_per_move=5):
        self.seconds_per_move = seconds_per_move
        self.max_rollout_depth = go.N * go.N * 3
        self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                            use_cpu=True)
        self.read_file = read_file
        super().__init__()

    def clear(self):
        super().clear()
        self.refresh_network()

    def refresh_network(self):
        # Ensure that the player is using the latest version of the network
        # so that the network can be continually trained even as it's playing.
        self.policy_network.initialize_variables(self.read_file)

    def suggest_move(self, position):
        if position.caps[0] + 50 < position.caps[1]:
            return gtp.RESIGN
        start = time.time()
        move_probs = self.policy_network.run(position)
        root = MCTSNode.root_node(position, move_probs)
        while time.time() - start < self.seconds_per_move:
            self.tree_search(root)
        # there's a theoretical bug here: if you refuse to pass, this AI will
        # eventually start filling in its own eyes.
        return max(root.children.keys(),
                   key=lambda move, root=root: root.children[move].N)

    def tree_search(self, root):
        print("tree search", file=sys.stderr)
        # selection
        chosen_leaf = root.select_leaf()
        # expansion
        position = chosen_leaf.compute_position()
        if position is None:
            print("illegal move!", file=sys.stderr)
            # See go.Position.play_move for notes on detecting legality
            del chosen_leaf.parent.children[chosen_leaf.move]
            return
        print("Investigating following position:\n%s" %
              (chosen_leaf.position, ),
              file=sys.stderr)
        move_probs = self.policy_network.run(position)
        chosen_leaf.expand(move_probs)
        # evaluation
        value = self.estimate_value(chosen_leaf)
        # backup
        print("value: %s" % value, file=sys.stderr)
        chosen_leaf.backup_value(value)

    def estimate_value(self, chosen_leaf):
        # Estimate value of position using rollout only (for now).
        # (TODO: Value network; average the value estimations from rollout + value network)
        leaf_position = chosen_leaf.position
        current = leaf_position
        while current.n < self.max_rollout_depth:
            move_probs = self.policy_network.run(current)
            current = self.play_valid_move(current, move_probs)
            if len(current.recent
                   ) > 2 and current.recent[-1] == current.recent[-2] == None:
                break
        else:
            print("max rollout depth exceeded!", file=sys.stderr)

        perspective = 1 if leaf_position.player1turn else -1
        return current.score() * perspective

    def play_valid_move(self, position, move_probs):
        for move in sorted_moves(move_probs):
            if go.is_eyeish(position.board, move):
                continue
            candidate_pos = position.play_move(move, mutate=True)
            if candidate_pos is not None:
                return candidate_pos
        return position.pass_move(mutate=True)
示例#15
0
def self_play(strategy, read_file=None):
    n = PolicyNetwork(use_cpu=True)
    if strategy == 'random':
        instance = RandomPlayer()
    elif strategy == 'policy':
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    elif strategy == 'randompolicy':
        instance = PolicyNetworkRandomMovePlayer(n, read_file)
    elif strategy == 'mcts':
        instance = MCTS(n, read_file)
    else:
        sys.stderr.write("Unknown strategy")
        sys.exit()
        #instance神经网络
    gtp_engine = gtp_lib.Engine(instance)
    sys.stderr.write("GTP engine ready\n")
    sys.stderr.flush()

    p1 = -1
    save = ''
    inpt = 'genmove b'
    n = 500
    while n > 0:
        inpt = 'genmove b'
        if n % 2 == 1:
            inpt = 'genmove b'
        else:
            inpt = 'genmove w'
        try:
            cmd_list = inpt.split("\n")
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            engine_reply = gtp_engine.send(cmd)
            sys.stdout.write(engine_reply)
            if engine_reply == '= pass\n\n':
                #engine_reply == '= pass\n\n'
                n = 0
            else:
                o1 = ''
                if len(engine_reply) == 7:
                    o1 = engine_reply[3] + engine_reply[4]
                else:
                    o1 = engine_reply[3]

                if n % 2 == 1:
                    o2 = ch.change(engine_reply[2]) + ch.change(o1)
                    save = save + ';B[' + ch.change(
                        engine_reply[2]) + ch.change(o1) + ']'
                else:
                    o2 = ch.change(engine_reply[2]) + ch.change(o1)
                    save = save + ';W[' + ch.change(
                        engine_reply[2]) + ch.change(o1) + ']'

            sys.stdout.flush()

        n = n - 1
    p7 = instance.position.result()
    save2 = '(;GM[1]\n SZ[19]\nPB[go1]\nPW[go2]\nKM[6.50]\nRE[' + p7[0] + ']\n'

    save2 = save2 + save + ')'

    wenjian = ''

    wenjian = str(time.time())
    p3 = '4'
    save_t.make_folder(wenjian + '_selfplay')
    save_t.save_txt(wenjian + '_selfplay', p3, save2)
示例#16
0
 def __init__(self, read_file):
     self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                         use_cpu=True)
     self.read_file = read_file
     super().__init__()
示例#17
0
def train(processed_dir="processed_data"):
    checkpoint_freq = 10000
    read_file = None
    save_file = 'tmp2'
    epochs = 10
    logdir = 'logs2'

    #
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    if read_file is not None:
        read_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    n.initialize_variables(read_file)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("提取 %s" % file)
            with timer("load dataset"):
                train_dataset = DataSet.read(file)
            with timer("training"):
                n.train(train_dataset)
            with timer("save model"):
                n.save_variables(save_file)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer("test set evaluation"):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()
示例#18
0
文件: main.py 项目: brilee/MuGo
def train(processed_dir, read_file=None, save_file=None, epochs=10, logdir=None, checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [os.path.join(processed_dir, fname) 
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)]
    if read_file is not None:
        read_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    n.initialize_variables(read_file)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("Using %s" % file)
            with timer("load dataset"):
                train_dataset = DataSet.read(file)
            with timer("training"):
                n.train(train_dataset)
            with timer("save model"):
                n.save_variables(save_file)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer("test set evaluation"):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()
示例#19
0
文件: main.py 项目: zysilence/MuGo
def train(processed_dir,
          read_file=None,
          save_file=None,
          epochs=10,
          logdir=None,
          checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    n = PolicyNetwork(DEFAULT_FEATURES.planes)
    n.initialize_variables(read_file)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("Using %s" % file)
            train_dataset = DataSet.read(file)
            n.train(train_dataset)
            if save_file is not None and n.get_global_step(
            ) > last_save_checkpoint + checkpoint_freq:
                n.check_accuracy(test_dataset)
                print("Saving checkpoint to %s" % save_file, file=sys.stderr)
                last_save_checkpoint = n.get_global_step()
                n.save_variables(save_file)

    if save_file is not None:
        n.save_variables(save_file)
        print("Finished training. New model saved to %s" % save_file,
              file=sys.stderr)
def AI(msgs, model=DEFAULT_MODEL_PATH, strategy=None):
    print("AI(msg) called,strategy:", strategy)

    # data_file = data_file_path + msg
    lastMsg = msgs[len(msgs) - 1]
    x, y, color = parse_input_msg(lastMsg)
    print('AI(lastMsg) parsed:', x, y, color)

    # Initialize the policy network
    n = PolicyNetwork(use_cpu=True)
    print("PolicyNetwork init.")
    # global read_file
    # read_file = read_file_prefix+str(RANK)+"/savedmodel"
    print("n,read_file:", n, model)

    if strategy == 'random':
        global instance
        instance = RandomPlayer()
    elif strategy == 'best_move':
        global instance
        instance = PolicyNetworkBestMovePlayer(n, model)
    elif strategy == 'random_move':
        global instance
        instance = PolicyNetworkRandomMovePlayer(n, model)
    elif strategy == 'mcts':
        global instance
        instance = MCTS(n, model)
    #instance = PolicyNetworkRandomMovePlayer(n, read_file)
    print("PolicyNetwork instanced.", instance)
    try:
        global gtp_engine
        gtp_engine = gtp_lib.Engine(instance)
    except Exception:
        print(traceback.format_exc())
    print("GTP Engine get ready.")
    #sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)
    print("AI_cmd parsed.")
    # To see if it has started playing chess and logging
    # try:
    #     data_file_exist = os.path.exists(data_file)
    # except Exception:
    #     print(traceback.format_exc())
    # print("os.path.exists?",data_file_exist)
    #sys.setdefaultencoding('utf-8')
    # if os.path.exists(data_file):
    #     print("os.path.exists(data_file)!")
    #     rfile = open(data_file, 'r')
    #     cmd_list = rfile.readlines()
    #     for cmd in cmd_list:
    #         cmd = cmd.strip('\n ')
    #         if cmd == '':
    #             continue
    #         print("gtp_engine.send(cmd):", cmd)
    #         gtp_engine.send(cmd)
    #     # sys.stdout.write(cmd + '\n')
    #     # sys.stdout.flush()
    #     rfile.close()
    # # Parse the other side of the chess instructions, write into the record file
    # wfile = open(data_file, 'a')
    # print("wfiled!!!")
    # if msg['msg'][2].lower() == 't' and msg['msg'][3].lower() == 't':
    #     pass
    # else:
    #     player_cmd = parse_player_input(msg['msg'][0], x, y)
    #     wfile.write(player_cmd + '\n')
    #     gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()
    for msg in msgs:
        x, y, color = parse_input_msg(msg)
        player_cmd = parse_player_input(color, x, y)
        print("gtp_engine.send(cmd):", player_cmd)
        gtp_engine.send(player_cmd)

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    # wfile.write(gtp_cmd)
    # wfile.close()
    # sys.stdout.write(gtp_reply + '\n')
    # sys.stdout.flush()

    AI_x, AI_y = parse_AI_reply(gtp_reply)

    response = color + '[' + AI_x + AI_y + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}
示例#21
0
        new_vars = []
        for name, shape in policy_vars:
            v = tf.contrib.framework.load_variable('model/sl/', name)
            new_vars.append(
                tf.Variable(v,
                            name=name.replace('PolicNetwork',
                                              'PlayerNetwork')))
        saver = tf.train.Saver(new_vars)
        sess.run(tf.global_variables_initializer())
        saver.save(sess,
                   os.path.join(save_dir, str(t), 'player' + str(t) + '.ckpt'))


g1 = tf.Graph()
with g1.as_default():
    train_net = PolicyNetwork(scope="PolicNetwork")
    train_net.initialize_variables('model/sl/epoch_48.ckpt')

pos = go.Position()
train_net.run(pos)

g2 = tf.Graph()
with g2.as_default():
    player_net = PolicyNetwork(scope="PlayerNetwork")
    player_net.initialize_variables('model/rl/2/player2.ckpt')
pos = go.Position()
player_net.run(pos)

save_trained_policy(1, 'model/rl')

print("===========load new model=================")
示例#22
0
else:
    action_dim = env.action_space.n

if observation_space_is(env, gym.spaces.Box):
    state_dim = env.observation_space.shape[0]
else:
    state_dim = env.observation_space.n

hidden_dim = 256

value_net = ValueNetwork(state_dim, hidden_dim).to(device)
target_value_net = ValueNetwork(state_dim, hidden_dim).to(device)

soft_q_net1 = SoftQNetwork(state_dim, action_dim, hidden_dim).to(device)
soft_q_net2 = SoftQNetwork(state_dim, action_dim, hidden_dim).to(device)
policy_net = PolicyNetwork(state_dim, action_dim, hidden_dim).to(device)

for target_param, param in zip(target_value_net.parameters(), value_net.parameters()):
    target_param.data.copy_(param.data)


value_criterion = nn.MSELoss()
soft_q_criterion1 = nn.MSELoss()
soft_q_criterion2 = nn.MSELoss()

value_lr = 3e-4
soft_q_lr = 3e-4
policy_lr = 3e-4

value_optimizer = optim.Adam(value_net.parameters(), lr=value_lr)
soft_q_optimizer1 = optim.Adam(soft_q_net1.parameters(), lr=soft_q_lr)
def AI(msg):
    print("AI(msg) called.")
    global read_file  # Extract information

    data_file = data_file_path + msg['game_id']
    x, y, color = parse_input_msg(msg)
    print(x, y, color)

    # Initialize the policy network
    n = PolicyNetwork(use_cpu=True)
    print("PolicyNetwork init.")
    print("n,read_file:", n, read_file)
    try:
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    except Exception:
        print(traceback.format_exc())
    #instance = PolicyNetworkRandomMovePlayer(n, read_file)
    print("PolicyNetwork instanced.", instance)
    try:
        global gtp_engine
        gtp_engine = gtp_lib.Engine(instance)
        print("GTP Engine get ready.", gtp_engine)
    except Exception:
        print(traceback.format_exc())
    #sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)
    print("AI_cmd parsed.")
    # To see if it has started playing chess and logging
    try:
        data_file_exist = os.path.exists(data_file)
    except Exception:
        print(traceback.format_exc())
    print("os.path.exists?", data_file_exist)
    #sys.setdefaultencoding('utf-8')
    if os.path.exists(data_file):
        print("os.path.exists(data_file)!")
        rfile = open(data_file, 'r')
        cmd_list = rfile.readlines()
        for cmd in cmd_list:
            cmd = cmd.strip('\n ')
            if cmd == '':
                continue
            print("gtp_engine.send(cmd):", cmd)
            gtp_engine.send(cmd)
        # sys.stdout.write(cmd + '\n')
        # sys.stdout.flush()
        rfile.close()
    # Parse the other side of the chess instructions, write into the record file
    wfile = open(data_file, 'a')
    print("wfiled!!!")
    if msg['msg'][2].lower() == 't' and msg['msg'][3].lower() == 't':
        pass
    else:
        player_cmd = parse_player_input(msg['msg'][0], x, y)
        wfile.write(player_cmd + '\n')
        gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    wfile.write(gtp_cmd)
    wfile.close()
    # sys.stdout.write(gtp_reply + '\n')
    # sys.stdout.flush()

    AI_x, AI_y = parse_AI_reply(gtp_reply)

    response = color + '[' + AI_x + AI_y + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}
示例#24
0
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from IPython import display
import pylab as pl
import numpy as np
import os
import random
import re
import sys
import go
from policy import PolicyNetwork
from strategies import MCTSPlayerMixin
read_file = "saved_models/20170718"
WHITE, EMPTY, BLACK, FILL, KO, UNKNOWN = range(-1, 5)
n = PolicyNetwork(use_cpu=True)
n.initialize_variables(read_file)
instance = MCTSPlayerMixin(n)


class User():
    def __init__(self, name, state_size, action_size):
        self.name = name
        self.state_size = state_size
        self.action_size = action_size

    def act(self, state, tau):
        action = int(input('Enter your chosen action: '))
        pi = np.zeros(self.action_size)
        pi[action] = 1
        value = None
示例#25
0
def train(processed_dir,
          save_file=None,
          epochs=10,
          logdir=None,
          checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    save_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    try:
        n.initialize_variables(save_file)
    except:
        n.initialize_variables(None)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("Using %s" % file)
            train_dataset = DataSet.read(file)
            train_dataset.shuffle()
            with timer("training"):
                n.train(train_dataset)
            n.save_variables(save_file)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer("test set evaluation"):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()
示例#26
0
def run_iterations(args):
    # Init model
    state_size = 16
    action_size = 4
    if args.env == "MountainCar-v0":
        state_size = 2
        action_size = 3
    if args.env == "Freeway-ram-v0":
        state_size = 128
        action_size = 3
    if args.env == "CartPole-v0":
        state_size = 4
        action_size = 2
    model = PolicyNetwork(state_size, action_size)
    optimizer = optim.Adam(model.parameters(), args.learning_rate)
    start_n = 4
    reward_per_iteration = []
    for i in range(args.max_iterations):
        # boolean for demo 
        if not args.demo:
            state = to_tensor(ENV.reset(), state_size)
        else:
            # start_n, nde state van demo pakken om als start state te gebruiken
            # hoe deze te kiezen samen met max_iterations,  elke start state paar keer doen of 1x? 
            start_state = get_start_state(ENV, args.env, start_n)
            # probleem met ene environment ENV.env.s en andere ENV.env.state; misschien elegantere oplossing?
            if args.env == "FrozenLake-v0": 
                ENV.env.s = start_state
                state = to_tensor(ENV.env.s, state_size)
            else:
                ENV.env.state = start_state
                state = to_tensor(ENV.env.state, state_size)
        reward_per_episode = []
        episode_loss = 0
        for step in range(args.max_steps):
            if args.render: ENV.render()
            action = select_action(model, state, get_epsilon(i), action_size)
            next_state, reward, done, _ = ENV.step(action) # take a random action
            # compute the q value
            q_val = compute_q_val(model, state, action)


            with torch.no_grad():  # Don't compute gradient info for the target (semi-gradient)
                next_state = to_tensor(next_state, state_size)
                target = compute_target(model, reward, next_state, done, args.discount_factor)

            # loss is measured from error between current and newly expected Q values
            loss = F.smooth_l1_loss(q_val, target)

            # backpropagation of loss to Neural Network (PyTorch magic)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            episode_loss += loss
            state = next_state
            reward_per_episode.append(reward)
            if done: break

        if i % args.print_every == 0:
            print("Reward", reward, sum(reward_per_episode))
            print("Step {:6d} with loss: {:4f}".format(i, episode_loss))
        reward_per_iteration.append(reward_per_episode)
    return reward_per_iteration