Python RandomAgent.act示例

def main():
    NUM_OF_GAMES = 100
    ai = RandomAgent()
    s = Sim()
    all_records = []
    for i in tqdm(range(NUM_OF_GAMES)):
        records = []
        # preparation of simulator
        s.reset_s()  # 引数のなしの時は何も置かれていない状態となる
        while True:
            # ban を上書きする前に，勝敗を保存する？
            reshape_self, reshape_opp, reshape_ban, kou = s.get_s()

            if reshape_ban != 2:
                act_num = ai.act(reshape_self, reshape_opp, reshape_ban, kou)
                s.act(act_num)
            else:
                # 黒の勝ち+1,負け-1
                outcome = 2 * is_black_win(s) - 1
                break
            records.append(
                [reshape_self[1:], reshape_opp[1:], reshape_ban, act_num])
        #outcomeを付け足し
        for j in range(len(records)):
            records[j].append((1. - 2. * (j % 2)) * outcome)
        all_records.append(records)
    # stackしたレコードを書き出す
    w_fnc(all_records, 'all_records.csv')
    rec = [records[0][-1] for records in all_records]
    kuro_win = NUM_OF_GAMES / 2 + sum(rec) / 2
    siro_win = NUM_OF_GAMES - kuro_win
    print('黒の勝ち:{0}回, 白の勝ち:{1}回'.format(kuro_win, siro_win))

示例#2

显示文件

文件： simulator_gui_human_vs_random.py 项目： w-az-w/go_simulator

def main():
    # preparation of pygame
    pygame.init()  # 初期化
    AIis = 2  #１が黒，2が白
    ai = RandomAgent()
    bp = [115, 15]
    g = 40
    mar = 20
    x, y = 0, 0
    pixels = [bp, g, mar]
    screen = pygame.display.set_mode((600, 400))  # ウィンドウサイズの指定
    pygame.display.set_caption("GoSimulator")  # ウィンドウの上の方に出てくるアレの指定
    sysfont = pygame.font.SysFont(None, 40)

    # preparation of simulator
    s = Sim()
    s.reset_s()  # 引数のなしの時は何も置かれていない状態となる

    while True:
        screen.fill((
            0,
            100,
            0,
        ))  # 背景色の指定。
        # state,ban = s.get_s()
        # gui用に整形をかませる
        reshape_self, reshape_opp, reshape_ban, kou = s.get_s()
        ban = 2 - int(reshape_ban)
        # game over すなわち ban == 0の時の処理
        if ban != 0:
            state = ban * reshape_self + (3. - ban) * reshape_opp
        bl, wh = s.get_eval()
        # TODO for debug
        draw(pygame, screen, sysfont, pixels, state, ban, x, y, bl, wh)
        pygame.display.update()  # 画面更新
        if ban == AIis:
            s.act(ai.act(reshape_self, reshape_opp, reshape_ban, kou))
        else:
            for event in pygame.event.get():  # 終了処理
                if event.type == QUIT:
                    pygame.quit()
                    sys.exit()
                if event.type == MOUSEBUTTONDOWN and event.button == 1:
                    x, y = event.pos
                    num = convert_to_num(pixels, x, y)
                    if num == -1:
                        s.reset_s()
                    else:
                        if num in s.regal_acts() and ban != 0:
                            s.act(num)
                            bl, wh = s.get_eval()
                            if len(bl) != len(set(bl)):
                                print('[error] get_eval has over lap')

示例#3

显示文件

def train_chaser(env_name, chaser_need_restore, episode_count, close=False):
    """
    Use Actor-Critic TD(0) method to train runner.

    Cannot use batch method to optimize model.
    """
    env = gym.make(env_name)
    features_n = 4

    chaser = ActorCriticAgent(300,
                              300,
                              30,
                              color=(1.0, 0.0, 0.0),
                              agent_type=AgentType.Chaser,
                              features_n=features_n,
                              discounted_value=0.9,
                              learning_rate=1e-6,
                              need_restore=chaser_need_restore)

    runner = RandomAgent(500,
                         500,
                         30,
                         color=(0.0, 1.0, 0.0),
                         agent_type=AgentType.Runner,
                         env=env)

    env.add_agent(chaser)
    env.add_agent(runner)

    total_steps = 0

    losses = []
    for epi in range(episode_count):
        chaser_state = env.reset()

        step = 0
        while True:
            env.render(close=close)
            action = chaser.act(chaser_state)
            chaser_state_, reward, done, _ = env.step(chaser.type, action)
            chaser.memory.append(
                (chaser_state, action, chaser_state_, reward, done))
            chaser_state = chaser_state_
            step += 1
            total_steps += 1
            if step >= 500:
                # When episode's steps above 500, break up this episode
                chaser.memory.clear()
                break
            if done:
                loss = chaser.optimize_model()
                print('Episode: %d\tsteps: %d\tloss: %f' %
                      (epi + 1, step + 1, loss))
                losses.append(loss)
                break
            else:
                runner_action = runner.act(chaser_state)
                _, _, done, _ = env.step(runner.type, runner_action)
                env.render(close=close)
                step += 1
                total_steps += 1
                if done:
                    chaser.memory.clear()
                    print('Episode: %d\tsteps: %d' % (epi + 1, step + 1))
                    break
        # 每个回合结束保存模型
        if epi % 100 == 0:
            chaser.save()

示例#4

显示文件

if __name__ == "__main__":
    sample_batch_size = 32
    observation_shape = env.observation_space.shape
    action_shape = env.action_space.n

    first_episode = True
    episode_rewards = list()
    steps = 0
    PRE_TRAINLENGTH = 3000

    ## Random Agent
    ragent = RandomAgent(action_shape)
    obs = env.reset()
    for step in range(PRE_TRAINLENGTH):
        #env.render()
        action = ragent.act(obs)

        next_obs, reward, done, info = env.step(action)
        next_obs = np.reshape(next_obs, [1, observation_shape[0]])
        ragent.observe((obs, action, reward, next_obs, done))
        obs = next_obs

    agent = Shallow_Q_Learner(observation_shape,
                              action_shape,
                              memory=ragent.get_memory())

    for episode in range(MAX_NUM_EPISODES):
        obs = env.reset()
        obs = np.reshape(obs, [1, observation_shape[0]])
        cum_reward = 0.0  # Cumulative reward
        for step in range(MAX_STEPS_PER_EPISODE):

示例#5

显示文件

    env.seed(38)

    return env


# test env:
if __name__ == '__main__':

    env = env_fn()

    obs = env.reset()
    env.render()

    agent = RandomAgent(env.action_space)

    reward = 0
    done = False

    n_steps = 20
    for step in range(n_steps):
        print("Step {}".format(step + 1))
        action = agent.act(obs, reward, done)
        obs, reward, done, info = env.step(action)
        print('action=', action, 'obs=', obs, 'reward=', reward, 'done=', done)
        env.render()
        if done:
            print("Goal reached!", "reward=", reward)
            break

    env.close()

示例#6

显示文件

文件： main.py 项目： MarcoSelvatici/reinforcement-learning

  game_reward_p2 = 0

  # p1 starts.
  turn = 1
  state = game.get_state()

  while not is_terminal:
    if turn == 1:
      action = p1.act(state)
      states_actions_p1.append((deepcopy(state), action))
      state, game_reward_p1, is_terminal = \
        game.step(player=1, action=action, display=False)
      game_reward_p2 = game_reward_p1 * -1
      turn = 2
    elif turn == 2:
      action = p2.act(state)
      # states_actions_p2.append((deepcopy(state), action))
      state, game_reward_p2, is_terminal = \
        game.step(player=2, action=action, display=False)
      game_reward_p1 = game_reward_p2 * -1
      turn = 1
  
  p1.learn(states_actions_p1, game_reward_p1)
  #p2.learn(states_actions_p2, game_reward_p2)

  if game_reward_p1 == 1:
    win_p1 += 1
  elif game_reward_p1 == 0:
    draw += 1
  
  if game_num % BATCH_SIZE == 0: