Python PolicyNetwork示例，policy.PolicyNetwork Python示例

示例#1

0

显示文件

文件： strategies.py 项目： zysilence/MuGo

class PolicyNetworkBestMovePlayer(GtpInterface):
    def __init__(self, read_file):
        self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                            use_cpu=True)
        self.read_file = read_file
        super().__init__()

    def clear(self):
        super().clear()
        self.refresh_network()

    def refresh_network(self):
        # Ensure that the player is using the latest version of the network
        # so that the network can be continually trained even as it's playing.
        self.policy_network.initialize_variables(self.read_file)

    def suggest_move(self, position):
        if position.recent and position.n > 100 and position.recent[-1] == None:
            # Pass if the opponent passes
            return None
        move_probabilities = self.policy_network.run(position)
        for move in sorted_moves(move_probabilities):
            if go.is_reasonable(position, move):
                return move
        return None

示例#2

0

显示文件

文件： strategies.py 项目： zysilence/MuGo

 def __init__(self, read_file, seconds_per_move=5):
     self.seconds_per_move = seconds_per_move
     self.max_rollout_depth = go.N * go.N * 3
     self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                         use_cpu=True)
     self.read_file = read_file
     super().__init__()

示例#3

0

显示文件

文件： gtp_wrapper.py 项目： lygztq/MuGo

def make_gtp_instance(strategy_name, read_file):
    n = PolicyNetwork(use_cpu=True)
    n.initialize_variables(read_file)
    if strategy_name == 'random':
        instance = RandomPlayer()
    elif strategy_name == 'policy':
        instance = GreedyPolicyPlayer(n)
    elif strategy_name == 'randompolicy':
        instance = RandomPolicyPlayer(n)
    elif strategy_name == 'mcts':
        instance = MCTSPlayer(n)
    else:
        return None
    gtp_engine = gtp.Engine(instance)
    return gtp_engine

示例#4

0

显示文件

def make_gtp_instance(strategy_name, read_file):
    n = PolicyNetwork(use_cpu=True)
    n.initialize_variables(read_file)
    if strategy_name == 'random':
        instance = RandomPlayer()
    elif strategy_name == 'policy':
        instance = GreedyPolicyPlayer(n)
    elif strategy_name == 'randompolicy':
        instance = RandomPolicyPlayer(n)
    elif strategy_name == 'mcts':
        instance = MCTSPlayer(n)
    else:
        return None
    gtp_engine = gtp.Engine(instance)
    return gtp_engine

示例#5

0

显示文件

def gtp(strategy, read_file=None):
    n = PolicyNetwork(use_cpu=True)
    if strategy == 'random':
        instance = RandomPlayer()
    elif strategy == 'policy':
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    elif strategy == 'randompolicy':
        instance = PolicyNetworkRandomMovePlayer(n, read_file)
    elif strategy == 'mcts':
        instance = MCTS(n, read_file)
    else:
        sys.stderr.write("Unknown strategy")
        sys.exit()
    gtp_engine = gtp_lib.Engine(instance)
    sys.stderr.write("GTP engine ready\n")
    sys.stderr.flush()
    while not gtp_engine.disconnect:
        inpt = input()
        # handle either single lines at a time
        # or multiple commands separated by '\n'
        try:
            cmd_list = inpt.split("\n")
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            engine_reply = gtp_engine.send(cmd)
            sys.stdout.write(engine_reply)
            sys.stdout.flush()

示例#6

0

显示文件

文件： main.py 项目： YihaoGuo2018/EasyGo

def gtp(strategy, read_file=None):
    n = PolicyNetwork(use_cpu=True)
    if strategy == 'random':
        instance = RandomPlayer()
    elif strategy == 'policy':
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    elif strategy == 'randompolicy':
        instance = PolicyNetworkRandomMovePlayer(n, read_file)
    elif strategy == 'mcts':
        instance = MCTS(n, read_file)
    else:
        sys.stderr.write("错误")
        sys.exit()

    gtp_engine = gtp_lib.Engine(instance)
    sys.stderr.write("GTP\n")
    sys.stderr.flush()
    while not gtp_engine.disconnect:
        inpt = input()

        try:
            cmd_list = inpt.split("\n")
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            engine_reply = gtp_engine.send(cmd)

            sys.stdout.write(engine_reply)
            sys.stdout.flush()

示例#7

0

显示文件

文件： AI.py 项目： play3577/MuGo-2

 def initialize(self):
     try:
         n = PolicyNetwork(use_cpu=True)
         instance = PolicyNetworkBestMovePlayer(n, self.moudle_file)
         self.gtp_engine = gtp_lib.Engine(instance)
     except BaseException as e:
         raise Exception('Initialization of policy network failed')

示例#8

0

显示文件

文件： interface_to_website.py 项目： colinguozizhong/2017-2018-computing-thinking

def AI(msg):
    global read_file

    # 提取信息
    x = msg['msg'][2].upper()
    y = string.index(msg['msg'][3])
    color = ''
    if msg['msg'][0] == 'B':
        color = 'W'
    else:
        color = 'B'

# 初始化策略网络
    n = PolicyNetwork(use_cpu=True)
    instance = PolicyNetworkBestMovePlayer(n, read_file)
    gtp_engine = gtp_lib.Engine(instance)
    # sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)

    # 查看是否已经开始下棋并记录
    if os.path.exists(data_file):
        rfile = open(data_file, 'r')
        cmd_list = rfile.readlines()
        for cmd in cmd_list:
            cmd = cmd.strip('\n ')
            if cmd == '':
                continue
            gtp_engine.send(cmd)
            # sys.stdout.write(cmd)
            # sys.stdout.flush()
        rfile.close()

    # 解析对方下棋指令，写进data
    wfile = open(data_file, 'a')
    player_cmd = parse_player_input(msg['msg'][0], x, y)
    wfile.write(player_cmd + '\n')
    gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    wfile.write(gtp_cmd)
    wfile.close()
    # sys.stdout.write(gtp_reply)
    # sys.stdout.flush()

    response = color + '[' + gtp_reply[2].lower() + string[int(
        gtp_reply[3:])] + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}

示例#9

0

显示文件

文件： AI_2.py 项目： play3577/MuGo-2

def AI(msg):
    global read_file  # Extract information

    data_file = data_file_path + '[2]' + msg['game_id']

    x, y, color = parse_input_msg(msg)
    print(x, y, color)

    # Initialize the policy network
    n = PolicyNetwork(use_cpu=True)
    instance = PolicyNetworkBestMovePlayer(n, read_file)
    gtp_engine = gtp_lib.Engine(instance)
    # sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)

    # To see if it has started playing chess and logging
    if os.path.exists(data_file):
        rfile = open(data_file, 'r')
        cmd_list = rfile.readlines()
        for cmd in cmd_list:
            cmd = cmd.strip('\n ')
            if cmd == '':
                continue
            gtp_engine.send(cmd)
        # sys.stdout.write(cmd + '\n')
        # sys.stdout.flush()
        rfile.close()

    # Parse the other side of the chess instructions, write into the record file
    wfile = open(data_file, 'a')
    if msg['msg'][2].lower() == 't' and msg['msg'][3].lower() == 't':
        pass
    else:
        player_cmd = parse_player_input(msg['msg'][0], x, y)
        wfile.write(player_cmd + '\n')
        gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    wfile.write(gtp_cmd)
    wfile.close()
    # sys.stdout.write(gtp_reply + '\n')
    # sys.stdout.flush()

    AI_x, AI_y = parse_AI_reply(gtp_reply)

    response = color + '[' + AI_x + AI_y + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}

示例#10

0

显示文件

def gtp(strategy, read_file=None):
    network = PolicyNetwork()
    instance = MCTS(network, read_file)
    gtp_engine = gtp_lib.Engine(instance)
    print('gtp engine ready')
    while not gtp_engine.disconnect:
        inpt = input()
        try:
            cmd_list = inpt.split('\n')
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            print('sending cmd %s' % cmd)
            engine_reply = gtp_engine.send(cmd)
            print(engine_reply)

示例#11

0

显示文件

文件： AI.py 项目： play3577/MuGo-2

    def __init__(self, game_id, mode=0, moudle_file=DEFAULT_AI_MOUDLE_FILE, debug=False):
        '''
        :param mode:
            mode==0 -> human vs AI
            mode==1 -> AI    vs AI
        :param game_id:
            string
        :param moudle_file:
            string
            the AI moudle file
        '''

        # Activate Logging Debug Information
        self.debug = debug

        # initialize
        self.game_id = game_id
        self.command_list = []

        if not (mode == 1 or mode == 0):
            raise Exception('Invalid Game Mode')
        else:
            self.mode = mode

        self.moudle_file = moudle_file

        try:
            n = PolicyNetwork(use_cpu=True)
            instance = PolicyNetworkBestMovePlayer(n, self.moudle_file)
            self.gtp_engine = gtp_lib.Engine(instance)
        except BaseException as e:
            raise Exception('Initialization of policy network failed')

        # TODO: Remove the code below if using remote database
        # Using path 'game_database/data/' to store game data.
        # Make sure the path exists !
        self.local_data_filepath = 'game_database/data/'

        self.data_file = self.local_data_filepath + self.game_id + '.data'

示例#12

0

显示文件

文件： main.py 项目： iCrown/MiniGo

def train(processed_dir,
          save_dir,
          logdir,
          read_file=None,
          epochs=50,
          checkpoint_freq=2):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_dataset = DataSet.read(os.path.join(processed_dir, "train.chunk.gz"))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print("=====Network initilization=====")
    net = PolicyNetwork(logdir=logdir, read_file=read_file)
    print("=====Start training...=====")
    for i in range(epochs):
        net.train(train_dataset, test_dataset)
        if i % checkpoint_freq == 0:
            net.save_variables(
                os.path.join(save_dir, "epoch_" + str(i) + ".ckpt"))

示例#13

0

显示文件

def train(processed_dir,
          read_file=None,
          save_file=None,
          epochs=10,
          logdir=None,
          checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, 'test.chunk.gz'))
    #print(test_dataset)
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    print(train_chunk_files)
    if read_file is not None:
        read_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    n.initialize_variables()
    if logdir is not None:
        n.initialize_logging(logdir)

    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in tqdm.tqdm(train_chunk_files, desc='epochs ' + str(i)):
            #print('Using %s' % file)
            with timer('load dataset'):
                train_dataset = DataSet.read(file)
            with timer('training'):
                n.train(train_dataset)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer('save model'):
                    n.save_variables(save_file)
                with timer('test set evaluation'):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()
        with timer('test set evaluation'):
            n.check_accuracy(test_dataset)

示例#14

0

显示文件

文件： strategies.py 项目： zysilence/MuGo

class MCTS(GtpInterface):
    def __init__(self, read_file, seconds_per_move=5):
        self.seconds_per_move = seconds_per_move
        self.max_rollout_depth = go.N * go.N * 3
        self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                            use_cpu=True)
        self.read_file = read_file
        super().__init__()

    def clear(self):
        super().clear()
        self.refresh_network()

    def refresh_network(self):
        # Ensure that the player is using the latest version of the network
        # so that the network can be continually trained even as it's playing.
        self.policy_network.initialize_variables(self.read_file)

    def suggest_move(self, position):
        if position.caps[0] + 50 < position.caps[1]:
            return gtp.RESIGN
        start = time.time()
        move_probs = self.policy_network.run(position)
        root = MCTSNode.root_node(position, move_probs)
        while time.time() - start < self.seconds_per_move:
            self.tree_search(root)
        # there's a theoretical bug here: if you refuse to pass, this AI will
        # eventually start filling in its own eyes.
        return max(root.children.keys(),
                   key=lambda move, root=root: root.children[move].N)

    def tree_search(self, root):
        print("tree search", file=sys.stderr)
        # selection
        chosen_leaf = root.select_leaf()
        # expansion
        position = chosen_leaf.compute_position()
        if position is None:
            print("illegal move!", file=sys.stderr)
            # See go.Position.play_move for notes on detecting legality
            del chosen_leaf.parent.children[chosen_leaf.move]
            return
        print("Investigating following position:\n%s" %
              (chosen_leaf.position, ),
              file=sys.stderr)
        move_probs = self.policy_network.run(position)
        chosen_leaf.expand(move_probs)
        # evaluation
        value = self.estimate_value(chosen_leaf)
        # backup
        print("value: %s" % value, file=sys.stderr)
        chosen_leaf.backup_value(value)

    def estimate_value(self, chosen_leaf):
        # Estimate value of position using rollout only (for now).
        # (TODO: Value network; average the value estimations from rollout + value network)
        leaf_position = chosen_leaf.position
        current = leaf_position
        while current.n < self.max_rollout_depth:
            move_probs = self.policy_network.run(current)
            current = self.play_valid_move(current, move_probs)
            if len(current.recent
                   ) > 2 and current.recent[-1] == current.recent[-2] == None:
                break
        else:
            print("max rollout depth exceeded!", file=sys.stderr)

        perspective = 1 if leaf_position.player1turn else -1
        return current.score() * perspective

    def play_valid_move(self, position, move_probs):
        for move in sorted_moves(move_probs):
            if go.is_eyeish(position.board, move):
                continue
            candidate_pos = position.play_move(move, mutate=True)
            if candidate_pos is not None:
                return candidate_pos
        return position.pass_move(mutate=True)

示例#15

0

显示文件

文件： main.py 项目： YihaoGuo2018/EasyGo

def self_play(strategy, read_file=None):
    n = PolicyNetwork(use_cpu=True)
    if strategy == 'random':
        instance = RandomPlayer()
    elif strategy == 'policy':
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    elif strategy == 'randompolicy':
        instance = PolicyNetworkRandomMovePlayer(n, read_file)
    elif strategy == 'mcts':
        instance = MCTS(n, read_file)
    else:
        sys.stderr.write("Unknown strategy")
        sys.exit()
        #instance神经网络
    gtp_engine = gtp_lib.Engine(instance)
    sys.stderr.write("GTP engine ready\n")
    sys.stderr.flush()

    p1 = -1
    save = ''
    inpt = 'genmove b'
    n = 500
    while n > 0:
        inpt = 'genmove b'
        if n % 2 == 1:
            inpt = 'genmove b'
        else:
            inpt = 'genmove w'
        try:
            cmd_list = inpt.split("\n")
        except:
            cmd_list = [inpt]
        for cmd in cmd_list:
            engine_reply = gtp_engine.send(cmd)
            sys.stdout.write(engine_reply)
            if engine_reply == '= pass\n\n':
                #engine_reply == '= pass\n\n'
                n = 0
            else:
                o1 = ''
                if len(engine_reply) == 7:
                    o1 = engine_reply[3] + engine_reply[4]
                else:
                    o1 = engine_reply[3]

                if n % 2 == 1:
                    o2 = ch.change(engine_reply[2]) + ch.change(o1)
                    save = save + ';B[' + ch.change(
                        engine_reply[2]) + ch.change(o1) + ']'
                else:
                    o2 = ch.change(engine_reply[2]) + ch.change(o1)
                    save = save + ';W[' + ch.change(
                        engine_reply[2]) + ch.change(o1) + ']'

            sys.stdout.flush()

        n = n - 1
    p7 = instance.position.result()
    save2 = '(;GM[1]\n SZ[19]\nPB[go1]\nPW[go2]\nKM[6.50]\nRE[' + p7[0] + ']\n'

    save2 = save2 + save + ')'

    wenjian = ''

    wenjian = str(time.time())
    p3 = '4'
    save_t.make_folder(wenjian + '_selfplay')
    save_t.save_txt(wenjian + '_selfplay', p3, save2)

示例#16

0

显示文件

文件： strategies.py 项目： zysilence/MuGo

 def __init__(self, read_file):
     self.policy_network = PolicyNetwork(DEFAULT_FEATURES.planes,
                                         use_cpu=True)
     self.read_file = read_file
     super().__init__()

示例#17

0

显示文件

文件： main.py 项目： YihaoGuo2018/EasyGo

def train(processed_dir="processed_data"):
    checkpoint_freq = 10000
    read_file = None
    save_file = 'tmp2'
    epochs = 10
    logdir = 'logs2'

    #
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    if read_file is not None:
        read_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    n.initialize_variables(read_file)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("提取 %s" % file)
            with timer("load dataset"):
                train_dataset = DataSet.read(file)
            with timer("training"):
                n.train(train_dataset)
            with timer("save model"):
                n.save_variables(save_file)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer("test set evaluation"):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()

示例#18

0

显示文件

文件： main.py 项目： brilee/MuGo

def train(processed_dir, read_file=None, save_file=None, epochs=10, logdir=None, checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [os.path.join(processed_dir, fname) 
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)]
    if read_file is not None:
        read_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    n.initialize_variables(read_file)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("Using %s" % file)
            with timer("load dataset"):
                train_dataset = DataSet.read(file)
            with timer("training"):
                n.train(train_dataset)
            with timer("save model"):
                n.save_variables(save_file)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer("test set evaluation"):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()

示例#19

0

显示文件

文件： main.py 项目： zysilence/MuGo

def train(processed_dir,
          read_file=None,
          save_file=None,
          epochs=10,
          logdir=None,
          checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    n = PolicyNetwork(DEFAULT_FEATURES.planes)
    n.initialize_variables(read_file)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("Using %s" % file)
            train_dataset = DataSet.read(file)
            n.train(train_dataset)
            if save_file is not None and n.get_global_step(
            ) > last_save_checkpoint + checkpoint_freq:
                n.check_accuracy(test_dataset)
                print("Saving checkpoint to %s" % save_file, file=sys.stderr)
                last_save_checkpoint = n.get_global_step()
                n.save_variables(save_file)

    if save_file is not None:
        n.save_variables(save_file)
        print("Finished training. New model saved to %s" % save_file,
              file=sys.stderr)

示例#20

0

显示文件

文件： plainAI.py 项目： yangboz/2017-2018-computing-thinking

def AI(msgs, model=DEFAULT_MODEL_PATH, strategy=None):
    print("AI(msg) called,strategy:", strategy)

    # data_file = data_file_path + msg
    lastMsg = msgs[len(msgs) - 1]
    x, y, color = parse_input_msg(lastMsg)
    print('AI(lastMsg) parsed:', x, y, color)

    # Initialize the policy network
    n = PolicyNetwork(use_cpu=True)
    print("PolicyNetwork init.")
    # global read_file
    # read_file = read_file_prefix+str(RANK)+"/savedmodel"
    print("n,read_file:", n, model)

    if strategy == 'random':
        global instance
        instance = RandomPlayer()
    elif strategy == 'best_move':
        global instance
        instance = PolicyNetworkBestMovePlayer(n, model)
    elif strategy == 'random_move':
        global instance
        instance = PolicyNetworkRandomMovePlayer(n, model)
    elif strategy == 'mcts':
        global instance
        instance = MCTS(n, model)
    #instance = PolicyNetworkRandomMovePlayer(n, read_file)
    print("PolicyNetwork instanced.", instance)
    try:
        global gtp_engine
        gtp_engine = gtp_lib.Engine(instance)
    except Exception:
        print(traceback.format_exc())
    print("GTP Engine get ready.")
    #sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)
    print("AI_cmd parsed.")
    # To see if it has started playing chess and logging
    # try:
    #     data_file_exist = os.path.exists(data_file)
    # except Exception:
    #     print(traceback.format_exc())
    # print("os.path.exists?",data_file_exist)
    #sys.setdefaultencoding('utf-8')
    # if os.path.exists(data_file):
    #     print("os.path.exists(data_file)!")
    #     rfile = open(data_file, 'r')
    #     cmd_list = rfile.readlines()
    #     for cmd in cmd_list:
    #         cmd = cmd.strip('\n ')
    #         if cmd == '':
    #             continue
    #         print("gtp_engine.send(cmd):", cmd)
    #         gtp_engine.send(cmd)
    #     # sys.stdout.write(cmd + '\n')
    #     # sys.stdout.flush()
    #     rfile.close()
    # # Parse the other side of the chess instructions, write into the record file
    # wfile = open(data_file, 'a')
    # print("wfiled!!!")
    # if msg['msg'][2].lower() == 't' and msg['msg'][3].lower() == 't':
    #     pass
    # else:
    #     player_cmd = parse_player_input(msg['msg'][0], x, y)
    #     wfile.write(player_cmd + '\n')
    #     gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()
    for msg in msgs:
        x, y, color = parse_input_msg(msg)
        player_cmd = parse_player_input(color, x, y)
        print("gtp_engine.send(cmd):", player_cmd)
        gtp_engine.send(player_cmd)

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    # wfile.write(gtp_cmd)
    # wfile.close()
    # sys.stdout.write(gtp_reply + '\n')
    # sys.stdout.flush()

    AI_x, AI_y = parse_AI_reply(gtp_reply)

    response = color + '[' + AI_x + AI_y + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}

示例#21

0

显示文件

        new_vars = []
        for name, shape in policy_vars:
            v = tf.contrib.framework.load_variable('model/sl/', name)
            new_vars.append(
                tf.Variable(v,
                            name=name.replace('PolicNetwork',
                                              'PlayerNetwork')))
        saver = tf.train.Saver(new_vars)
        sess.run(tf.global_variables_initializer())
        saver.save(sess,
                   os.path.join(save_dir, str(t), 'player' + str(t) + '.ckpt'))


g1 = tf.Graph()
with g1.as_default():
    train_net = PolicyNetwork(scope="PolicNetwork")
    train_net.initialize_variables('model/sl/epoch_48.ckpt')

pos = go.Position()
train_net.run(pos)

g2 = tf.Graph()
with g2.as_default():
    player_net = PolicyNetwork(scope="PlayerNetwork")
    player_net.initialize_variables('model/rl/2/player2.ckpt')
pos = go.Position()
player_net.run(pos)

save_trained_policy(1, 'model/rl')

print("===========load new model=================")

示例#22

0

显示文件

else:
    action_dim = env.action_space.n

if observation_space_is(env, gym.spaces.Box):
    state_dim = env.observation_space.shape[0]
else:
    state_dim = env.observation_space.n

hidden_dim = 256

value_net = ValueNetwork(state_dim, hidden_dim).to(device)
target_value_net = ValueNetwork(state_dim, hidden_dim).to(device)

soft_q_net1 = SoftQNetwork(state_dim, action_dim, hidden_dim).to(device)
soft_q_net2 = SoftQNetwork(state_dim, action_dim, hidden_dim).to(device)
policy_net = PolicyNetwork(state_dim, action_dim, hidden_dim).to(device)

for target_param, param in zip(target_value_net.parameters(), value_net.parameters()):
    target_param.data.copy_(param.data)


value_criterion = nn.MSELoss()
soft_q_criterion1 = nn.MSELoss()
soft_q_criterion2 = nn.MSELoss()

value_lr = 3e-4
soft_q_lr = 3e-4
policy_lr = 3e-4

value_optimizer = optim.Adam(value_net.parameters(), lr=value_lr)
soft_q_optimizer1 = optim.Adam(soft_q_net1.parameters(), lr=soft_q_lr)

示例#23

0

显示文件

文件： simpleAI.py 项目： solversa/2017-2018-computing-thinking

def AI(msg):
    print("AI(msg) called.")
    global read_file  # Extract information

    data_file = data_file_path + msg['game_id']
    x, y, color = parse_input_msg(msg)
    print(x, y, color)

    # Initialize the policy network
    n = PolicyNetwork(use_cpu=True)
    print("PolicyNetwork init.")
    print("n,read_file:", n, read_file)
    try:
        instance = PolicyNetworkBestMovePlayer(n, read_file)
    except Exception:
        print(traceback.format_exc())
    #instance = PolicyNetworkRandomMovePlayer(n, read_file)
    print("PolicyNetwork instanced.", instance)
    try:
        global gtp_engine
        gtp_engine = gtp_lib.Engine(instance)
        print("GTP Engine get ready.", gtp_engine)
    except Exception:
        print(traceback.format_exc())
    #sys.stderr.write("GTP Enginene ready\n")
    AI_cmd = parse_AI_instruction(color)
    print("AI_cmd parsed.")
    # To see if it has started playing chess and logging
    try:
        data_file_exist = os.path.exists(data_file)
    except Exception:
        print(traceback.format_exc())
    print("os.path.exists?", data_file_exist)
    #sys.setdefaultencoding('utf-8')
    if os.path.exists(data_file):
        print("os.path.exists(data_file)!")
        rfile = open(data_file, 'r')
        cmd_list = rfile.readlines()
        for cmd in cmd_list:
            cmd = cmd.strip('\n ')
            if cmd == '':
                continue
            print("gtp_engine.send(cmd):", cmd)
            gtp_engine.send(cmd)
        # sys.stdout.write(cmd + '\n')
        # sys.stdout.flush()
        rfile.close()
    # Parse the other side of the chess instructions, write into the record file
    wfile = open(data_file, 'a')
    print("wfiled!!!")
    if msg['msg'][2].lower() == 't' and msg['msg'][3].lower() == 't':
        pass
    else:
        player_cmd = parse_player_input(msg['msg'][0], x, y)
        wfile.write(player_cmd + '\n')
        gtp_engine.send(player_cmd)
    # sys.stdout.write(player_cmd + '\n')
    # sys.stdout.flush()

    gtp_reply = gtp_engine.send(AI_cmd)
    gtp_cmd = parse_AI_input(color, gtp_reply)
    wfile.write(gtp_cmd)
    wfile.close()
    # sys.stdout.write(gtp_reply + '\n')
    # sys.stdout.flush()

    AI_x, AI_y = parse_AI_reply(gtp_reply)

    response = color + '[' + AI_x + AI_y + ']'
    # sys.stdout.write(response)
    # sys.stdout.flush()

    return {'game_id': msg['game_id'], 'msg': response}

示例#24

0

显示文件

文件： agent.py 项目： Ranyo13/Implementation

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from IPython import display
import pylab as pl
import numpy as np
import os
import random
import re
import sys
import go
from policy import PolicyNetwork
from strategies import MCTSPlayerMixin
read_file = "saved_models/20170718"
WHITE, EMPTY, BLACK, FILL, KO, UNKNOWN = range(-1, 5)
n = PolicyNetwork(use_cpu=True)
n.initialize_variables(read_file)
instance = MCTSPlayerMixin(n)


class User():
    def __init__(self, name, state_size, action_size):
        self.name = name
        self.state_size = state_size
        self.action_size = action_size

    def act(self, state, tau):
        action = int(input('Enter your chosen action: '))
        pi = np.zeros(self.action_size)
        pi[action] = 1
        value = None

示例#25

0

显示文件

def train(processed_dir,
          save_file=None,
          epochs=10,
          logdir=None,
          checkpoint_freq=10000):
    test_dataset = DataSet.read(os.path.join(processed_dir, "test.chunk.gz"))
    train_chunk_files = [
        os.path.join(processed_dir, fname)
        for fname in os.listdir(processed_dir)
        if TRAINING_CHUNK_RE.match(fname)
    ]
    save_file = os.path.join(os.getcwd(), save_file)
    n = PolicyNetwork()
    try:
        n.initialize_variables(save_file)
    except:
        n.initialize_variables(None)
    if logdir is not None:
        n.initialize_logging(logdir)
    last_save_checkpoint = 0
    for i in range(epochs):
        random.shuffle(train_chunk_files)
        for file in train_chunk_files:
            print("Using %s" % file)
            train_dataset = DataSet.read(file)
            train_dataset.shuffle()
            with timer("training"):
                n.train(train_dataset)
            n.save_variables(save_file)
            if n.get_global_step() > last_save_checkpoint + checkpoint_freq:
                with timer("test set evaluation"):
                    n.check_accuracy(test_dataset)
                last_save_checkpoint = n.get_global_step()

示例#26

0

显示文件

def run_iterations(args):
    # Init model
    state_size = 16
    action_size = 4
    if args.env == "MountainCar-v0":
        state_size = 2
        action_size = 3
    if args.env == "Freeway-ram-v0":
        state_size = 128
        action_size = 3
    if args.env == "CartPole-v0":
        state_size = 4
        action_size = 2
    model = PolicyNetwork(state_size, action_size)
    optimizer = optim.Adam(model.parameters(), args.learning_rate)
    start_n = 4
    reward_per_iteration = []
    for i in range(args.max_iterations):
        # boolean for demo 
        if not args.demo:
            state = to_tensor(ENV.reset(), state_size)
        else:
            # start_n, nde state van demo pakken om als start state te gebruiken
            # hoe deze te kiezen samen met max_iterations,  elke start state paar keer doen of 1x? 
            start_state = get_start_state(ENV, args.env, start_n)
            # probleem met ene environment ENV.env.s en andere ENV.env.state; misschien elegantere oplossing?
            if args.env == "FrozenLake-v0": 
                ENV.env.s = start_state
                state = to_tensor(ENV.env.s, state_size)
            else:
                ENV.env.state = start_state
                state = to_tensor(ENV.env.state, state_size)
        reward_per_episode = []
        episode_loss = 0
        for step in range(args.max_steps):
            if args.render: ENV.render()
            action = select_action(model, state, get_epsilon(i), action_size)
            next_state, reward, done, _ = ENV.step(action) # take a random action
            # compute the q value
            q_val = compute_q_val(model, state, action)


            with torch.no_grad():  # Don't compute gradient info for the target (semi-gradient)
                next_state = to_tensor(next_state, state_size)
                target = compute_target(model, reward, next_state, done, args.discount_factor)

            # loss is measured from error between current and newly expected Q values
            loss = F.smooth_l1_loss(q_val, target)

            # backpropagation of loss to Neural Network (PyTorch magic)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            episode_loss += loss
            state = next_state
            reward_per_episode.append(reward)
            if done: break

        if i % args.print_every == 0:
            print("Reward", reward, sum(reward_per_episode))
            print("Step {:6d} with loss: {:4f}".format(i, episode_loss))
        reward_per_iteration.append(reward_per_episode)
    return reward_per_iteration