Python RandomAgent.act примеры использования

Язык программирования: Python

Пространство имен/Пакет: agent

Класс/Тип: RandomAgent

Метод/Функция: act

Примеров на hotexamples.com: 5

Python RandomAgent.act - 5 примеров найдено. Это лучшие примеры Python кода для agent.RandomAgent.act, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RandomAgent(30)

act(5)

load_model(2)

play(2)

select_action(2)

update(2)

action(1)

chooseAction(1)

evaluate_action(1)

getPoint(1)

get_action(1)

player(1)

reset(1)

step(1)

take_turn(1)

Пример #1

Показать файл

def main(env_name, render=False):
    env = gym.make(env_name)
    # Inicialize seu agente aqui
    agent = RandomAgent(env)
    for episode_i in range(100000):
        state = env.reset()
        done = False
        while not done:
            if render and episode_i % 10 == 0:
                env.render()
            # Ação do seu agente aqui
            action = agent.act(state)
            state, reward, done, info = env.step(action)

Пример #2

Показать файл

import time
import gym

from agent import RandomAgent

env = gym.make("CartPole-v1")

agent = RandomAgent(env.action_space)

episode_count = 10
reward = 0
done = False

for i in range(episode_count):
    ob = env.reset()
    while True:
        action = agent.act(ob, reward, done)
        ob, reward, done, info = env.step(action)
        if done:
            print("Game Finished!")
            break
        env.render()
        time.sleep(1 / 30)
    env.close()

Пример #3

Показать файл

    # set seed
    random.seed(args.seed)
    np.random.seed(args.seed)

    env = MazeEnv(args, args.game_name, args.graph_param, args.game_len,
                  args.gamma)

    # agent
    if args.agent == 'random':
        agent = RandomAgent(args, env)

    NUM_GRAPH = 100
    NUM_ITER = 32
    ep_rews = []
    for graph_id in range(NUM_GRAPH):
        for _ in range(NUM_ITER):
            ep_rew = 0
            state, info = env.reset(graph_index=graph_id)
            done = False
            while not done:
                action = agent.act(state)
                state, rew, done, info = env.step(action)
                ep_rew += rew
            ep_rews.append(ep_rew)

        string = 'Graph={:02d}/{:02d}, Return={:.4f}'
        print(string.format(graph_id, NUM_GRAPH, sum(ep_rews) / len(ep_rews)))

    print('Avg. Ep Return={:.4f}'.format(sum(ep_rews) / len(ep_rews)))
    print('This should be around 0.0455')

Пример #4

Показать файл

Файл: main.py Проект: ezliu/SlimeAI

def challenger_round():
    challengers = []
    leaders = []
    leader_checkpoints = os.listdir(LEADER_DIR)
    # Need to share the same schedule with all challengers, so they all anneal
    # at same rate
    epsilon_schedule = LinearSchedule(EPS_START, EPS_END, TRAIN_FRAMES)
    for i in xrange(NUM_LEADERS):
        challenger = try_gpu(
            DQNAgent(6,
                     epsilon_schedule,
                     OBSERVATION_MODE,
                     lr=LR,
                     max_grad_norm=GRAD_CLIP_NORM))
        if i < len(leader_checkpoints):
            leader = try_gpu(
                DQNAgent(6, LinearSchedule(0.1, 0.1, 500000),
                         OBSERVATION_MODE))
            leader_path = os.path.join(LEADER_DIR, leader_checkpoints[i])
            print "LOADING CHECKPOINT: {}".format(leader_path)
            challenger.load_state_dict(
                torch.load(leader_path,
                           map_location=lambda storage, loc: storage))
            leader.load_state_dict(
                torch.load(leader_path,
                           map_location=lambda storage, loc: storage))
        else:
            leader = RandomAgent(6)
            print "INITIALIZING NEW CHALLENGER AND LEADER"
        challengers.append(challenger)
        leaders.append(leader)

    if CHALLENGER_DIR is not None:
        challengers = []
        # Load in all of the leaders
        for checkpoint in os.listdir(CHALLENGER_DIR):
            path = os.path.join(CHALLENGER_DIR, checkpoint)
            print "LOADING FROM CHALLENGER_DIR: {}".format(path)
            challenger = try_gpu(
                DQNAgent(6,
                         LinearSchedule(0.05, 0.05, 1),
                         CHALLENGER_OBSERVATION_MODE,
                         lr=LR,
                         max_grad_norm=GRAD_CLIP_NORM,
                         name=checkpoint))
            challenger.load_state_dict(
                torch.load(path, map_location=lambda storage, loc: storage))
            challengers.append(challenger)

    challenger = EnsembleDQNAgent(challengers)
    leader = EnsembleDQNAgent(leaders)
    if OPPONENT is not None or HUMAN:
        leader = NoOpAgent()
    replay_buffer = ReplayBuffer(1000000)
    rewards = collections.deque(maxlen=1000)
    frames = 0  # number of training frames seen
    episodes = 0  # number of training episodes that have been played
    with tqdm(total=TRAIN_FRAMES) as progress:
        # Each loop completes a single episode
        while frames < TRAIN_FRAMES:
            states = env.reset()
            challenger.reset()
            leader.reset()
            episode_reward = 0.
            episode_frames = 0
            # Each loop completes a single step, duplicates _evaluate() to
            # update at the appropriate frame #s
            for _ in xrange(MAX_EPISODE_LENGTH):
                frames += 1
                episode_frames += 1
                action1 = challenger.act(states[0])
                action2 = leader.act(states[1])
                next_states, reward, done = env.step(action1, action2)
                episode_reward += reward

                # NOTE: state and next_state are LazyFrames and must be
                # converted to np.arrays
                replay_buffer.add(
                    Experience(states[0], action1._action_index, reward,
                               next_states[0], done))
                states = next_states

                if len(replay_buffer) > 50000 and \
                        frames % 4 == 0:
                    experiences = replay_buffer.sample(32)
                    challenger.update_from_experiences(experiences)

                if frames % 10000 == 0:
                    challenger.sync_target()

                if frames % SAVE_FREQ == 0:
                    # TODO: Don't access internals
                    for agent in challenger._agents:
                        path = os.path.join(LEADER_DIR,
                                            agent.name + "-{}".format(frames))
                        print "SAVING CHECKPOINT TO: {}".format(path)
                        torch.save(agent.state_dict(), path)
                    #path = os.path.join(
                    #        LEADER_DIR, challenger.name + "-{}".format(frames))
                    #torch.save(challenger.state_dict(), path)

                if frames >= TRAIN_FRAMES:
                    break

                if done:
                    break

            if episodes % 300 == 0:
                print "Evaluation: {}".format(
                    evaluate(challenger, leader, EPISODES_EVALUATE_TRAIN))
            print "Episode reward: {}".format(episode_reward)
            episodes += 1
            rewards.append(episode_reward)
            stats = challenger.stats
            stats["Avg Episode Reward"] = float(sum(rewards)) / len(rewards)
            stats["Num Episodes"] = episodes
            stats["Replay Buffer Size"] = len(replay_buffer)
            progress.set_postfix(stats, refresh=False)
            progress.update(episode_frames)
            episode_frames = 0

Пример #5

Показать файл

possible_actions = [0, 1]  # Cooperate or Defect
cooperator, defector = RandomAgent(possible_actions, p=0.9), RandomAgent(possible_actions, p=0.1)

# Stateless interactions (agents do not have memory)
s = None

n_iter = 1000
for i in range(n_iter):

    # A full episode:
    done = False

    while not done:

        # Agents decide
        a0 = cooperator.act()
        a1 = defector.act()

        # World changes
        new_s, (r0, r1), done, _ = env.step(([a0], [a1]))

        # Agents learn
        cooperator.update(s, (a0, a1), (r0, r1), new_s )
        defector.update(s, (a1, a0), (r1, r0), new_s )

        s = new_s
        print(r0, r1)

    env.reset()