Python Agent.choose_action示例，dqn.Agent.choose_action Python示例

示例#1

0

显示文件

class AI:
    def __init__(self, fname):
        lr = 0.0005
        self.agent = Agent(gamma=0.99,
                           epsilon=0.0,
                           alpha=lr,
                           input_dims=6,
                           n_actions=2,
                           mem_size=60000,
                           batch_size=64,
                           epsilon_end=0.0,
                           fname=fname)
        self.observation = []
        self.action = 0
        self.n_step = 0
        self.fname = fname.split("/")[-1]

    def episode_start(self, observation):
        self.observation = observation

    def choose_action(self):
        self.action = self.agent.choose_action(self.observation)
        return self.action

    def step(self, observation_, reward, done):
        self.agent.remember(self.observation, self.action, reward,
                            observation_, int(done))
        self.observation = observation_
        if self.n_step % 3 == 0:
            self.agent.learn()
        self.n_step += 1

    def episode_end(self):
        self.agent.save_model()

示例#2

0

显示文件

文件： main.py 项目： Skillerde6de/Minor-AI-2019_2020

def main():
    gym_env = gym.make('custom_gym:Xplane-v0')
    lr = 0.001
    gam = 0.01
    n_games = 1
    # nn_input = obs()
    agent = Agent(learning_rate=lr,
                  gamma=gam,
                  epsilon=1.0,
                  input_dims=(6, ),
                  n_actions=15,
                  batch_size=32,
                  file_name='AI_takeoff/saved_models/dq_model_2.h5')
    scores = []
    total_steps = []
    eps_hist = []
    agent.load_model()

    for i in range(n_games):
        try:
            done = False
            score = 0
            observation = gym_env.reset()
            time.sleep(2)
            observation_checkpoints = np.array([observation[0:2]])
            step_counter = 0
            print("GAME ITERATION ", i)
            while not done:
                action = agent.choose_action(observation)
                new_observation, reward, done = gym_env.step(action)
                step_counter = step_counter + 1
                score = score + reward
                agent.store_transition(observation, action, reward,
                                       new_observation, done)
                observation = new_observation
                # agent.learn()
                # This if statement checks if the airplane is stuck
                observation_checkpoints = np.append(observation_checkpoints,
                                                    [new_observation[0:2]],
                                                    axis=0)
                print(observation_checkpoints)
                print("stepcounter is", step_counter)
                if step_counter % 30 == 0:
                    if np.array_equal(
                            observation_checkpoints[step_counter - 30],
                            observation_checkpoints[step_counter - 1]):
                        done = True
            eps_hist.append(agent.epsilon)
            scores.append(score)
            total_steps.append(step_counter)
        except Exception as e:
            print(str(e))

示例#3

0

显示文件

def main():
    #make env and agent
    env = gym.make('LunarLander-v2')
    agent = Agent(gamma=0.99,
                  epsilon=1.0,
                  batch_size=64,
                  n_actions=4,
                  eps_end=0.01,
                  input_dims=[8],
                  lr=0.0001)

    scores, eps_history = [], []
    n_games = 500

    for i in range(n_games):
        score = 0
        done = False
        observation = env.reset()
        while not done:
            #ingame
            #get action from current view of game (observation)
            action = agent.choose_action(observation)
            #next frame
            observation_, reward, done, info = env.step(action)

            score += reward
            #store memory
            agent.store_transisation(observation, action, reward, observation_,
                                     done)
            agent.learn()

            #set next stage to current stage
            observation = observation_
        #append score and eps
        scores.append(score)
        eps_history.append(agent.epsilon)

        #print some nice statements
        avg_score = np.mean(scores[-100:])
        print(
            f'Episode: {i}   Score: {score}   Average Score: {avg_score}   Epsilon: {agent.epsilon}'
        )

示例#4

0

显示文件

            brain.store_transition(observation, action, reward, observation_,
                                   int(done))
            observation = observation_
    print('done initializing memory')

# uncomment the line below to record every episode.
# env = wrappers.Monitor(env, "tmp/space-invaders-1", video_callable=lambda episode_id: True, force=True)
for i in range(numGames):
    print('starting game ', i + 1, 'epsilon: %.4f' % brain.epsilon)
    epsHistory.append(brain.epsilon)
    done = False
    observation = env.reset()
    observation = preprocess(observation, crop_start, crop_end)
    score = 0
    while not done:
        action = brain.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        score += reward
        observation_ = preprocess(observation_, crop_start, crop_end)
        if done and info['ale.lives'] == 0:
            reward = -100
        brain.store_transition(observation, action, reward, observation_,
                               int(done))
        observation = observation_
        brain.learn()
        env.render()
    scores.append(score)
    print('score:', score)
x = [i + 1 for i in range(numGames)]
fileName = str(numGames) + 'Games' + 'Gamma' + str(brain.gamma) + \
            'Alpha' + str(brain.lr) + 'Memory' + str(brain.mem_size)+ '.png'

示例#5

0

显示文件

文件： main.py 项目： magiconline/LunarLander-v2

    n_games = 300
    show = False
    agent = Agent(gamma=0.99, epsilon=1.0, alpha=0.0005, input_dims=8,
                n_actions=4, batch_size=64)

    scores = []
    eps_history = []

    for i in range(1, n_games+1):
        done = False
        score = 0
        obseervation = env.reset()
        while not done:
            if show:
                env.render()
            action = agent.choose_action(obseervation)
            obseervation_, reward, done, info = env.step(action)
            score += reward
            agent.remember(obseervation, action, reward, obseervation_, done)
            obseervation = obseervation_
            agent.learn()

        eps_history.append(agent.epsilon)
        scores.append(score)

        avg_score = np.mean(scores[max(0, i-100):i+1])
        print('epsiode', i, 'score ', score, 'avg score', avg_score)
        if i % 10 == 0 and i > 0:
            agent.save_model()

    plt.plot(scores)

示例#6

0

显示文件

def main():
    scores = []
    eps_history = []
    info_history = []

    # Random starting-points:
    env = sky.make(random=True,
                   xi=(301, 650 - 25),
                   yi=(100, 300 - 25),
                   width=15,
                   height=15,
                   v_initial=14)
    # Fixed starting-point:
    #env = sky.make(xi=550)

    agent = Agent(gamma=gamma,
                  epsilon=epsilon,
                  lr=lr,
                  input_dims=[imput_dimensions],
                  n_actions=n_actions,
                  mem_size=mem_size,
                  batch_size=batch_size,
                  epsilon_dec=epsilon_dec)

    if (load_checkpoint):
        agent.load_modes()

    for i in range(n_games):
        score = 0
        done = False
        observation = env.reset()
        while not done:
            '''
            one game: ending, when done=True
            '''
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, observation_,
                                   int(done))
            observation = observation_
            agent.learn()

        if i % 10 == 0 and i > 0:
            avg_score = np.mean(scores[max(0, i - 10):(i + 1)])
            print(i, 'episode', info, '|| score:', score,
                  '| average score: %.3f' % avg_score,
                  '| epsilon: %.3f' % agent.epsilon, '| training done:',
                  round(i / n_games, 2))
        else:
            print(i, 'episode', info, '|| score:', score)

        scores.append(score)
        eps_history.append(agent.epsilon)
        info_history.append(info)

    print('training ended with:',
          [[el, info_history.count(el)] for el in ('crashed', 'goal')])

    if (save_checkpoint):
        agent.save_models()
        print('[+] model saved')

    # -------------------
    # Plotting and output
    # -------------------
    x = [i + 1 for i in range(n_games)]

    # First axis: Scores
    fig, ax1 = plt.subplots()
    color = 'tab:red'
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('score per Episode', color=color)
    ax1.scatter(x, scores, color=color, s=2)
    ax1.tick_params(axis='y', labelcolor=color)

    # Second axis: epsilon
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    color = 'tab:blue'
    ax2.set_ylabel('epsilon',
                   color=color)  # we already handled the x-label with ax1
    ax2.plot(x, eps_history, color=color)
    ax2.tick_params(axis='y', labelcolor=color)

    # Output
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.savefig(filename)

    return env

示例#7

0

显示文件

文件： connect4.py 项目： Mavengence/DQL_Connect4_Mathematical_Data_Science_SS2020.FAU

def selfplay():
    """
        legacy function for trying to implement self-play reinforcement learning like alpha-zero Go
    """
    agent2 = Agent(0.99, 0.1, 0.003, 42, train_games, 7, eps_dec)
    agent2.load_checkpoint()
    global win_cntr
    global done
    g = Game()
    turn = random.choice([PLAYER, AI])
    done = False
    transitions_agent = []
    transitions_agent2 = []
    while done == False:
        g.printBoard()
        if turn == PLAYER:
            # row = input('{}\'s turn: '.format('Red'))
            # g.insert(int(row), turn)
            observation = []
            for sublist in g.board:
                for i in sublist:
                    observation.append(i)
            observation = np.asarray(observation)
            action = agent2.choose_action(observation)
            if g.check_if_action_valid(action):
                print('{}\'s turn: %d'.format('Red') % action)
                g.insert(action, PLAYER_PIECE)
            else:
                while g.check_if_action_valid(action) == False:
                    agent.store_transition(observation, action, -100,
                                           observation, done)
                    action = np.random.randint(7)
                print('{}\'s turn: %d'.format('Red') % action)
                g.insert(action, PLAYER_PIECE)
            observation_ = []
            for sublist in g.board:
                for i in sublist:
                    observation_.append(i)
            observation_ = np.asarray(observation_)
            transitions_agent2 += [(observation, action, observation_, done)]
        else:
            observation = []
            for sublist in g.board:
                for i in sublist:
                    observation.append(i)
            observation = np.asarray(observation)
            action = agent.choose_action(observation)
            if g.check_if_action_valid(action):
                print('{}\'s turn: %d'.format('Yellow') % action)
                g.insert(action, AI_PIECE)
            else:
                while g.check_if_action_valid(action) == False:
                    agent.store_transition(observation, action, -100,
                                           observation, done)
                    action = np.random.randint(7)
                print('{}\'s turn: %d'.format('Yellow') % action)
                g.insert(action, AI_PIECE)
            observation_ = []
            for sublist in g.board:
                for i in sublist:
                    observation_.append(i)
            observation_ = np.asarray(observation_)
            transitions_agent += [(observation, action, observation_, done)]
        turn = AI if turn == PLAYER else PLAYER
    if g.getWinner() == Tie:
        reward_agent = 0
    else:
        winner = AI if turn == PLAYER else PLAYER
        if winner == AI:
            win_cntr += 1
            if vertical_win:
                reward_agent = 5
            else:
                reward_agent = 20

        else:
            reward_agent = -20

    for i in range(len(transitions_agent)):
        agent.store_transition(transitions_agent[i][0],
                               transitions_agent[i][1], reward_agent,
                               transitions_agent[i][2],
                               transitions_agent[i][3])
    agent.learn()
    return

示例#8

0

显示文件

文件： main.py 项目： Raccoonn/Q-Learning-Pong

                    break
                elif e.type == pygame.KEYDOWN:
                    if e.key == pygame.K_RETURN:
                        done = False
                        p1_state, p2_state = env.reset()
                        break

                env.render()

            # Get actions based on player_Type
            if p1_type == 'Human':
                p1_action = pygame.mouse.get_pos()[1], pygame.mouse.get_rel(
                )[1]

            elif p1_type == 'Agent':
                p1_action = agent_1.choose_action(p1_state)

            if p2_type == 'Human':
                p2_action = pygame.mouse.get_pos()[1], pygame.mouse.get_rel(
                )[1]

            elif p2_type == 'Agent':
                p2_action = agent_2.choose_action(p2_state)

            # Environment takes a step, return observations, reward, and status
            p1_state_, p2_state_, p1_reward, p2_reward, done = env.step(
                p1_action, p2_action)

            # Update memory objects with states for each player
            if train_networks == True:
                memory_1.store_transition(p1_state, p1_action, p1_reward,

示例#9

0

显示文件

>>>>>>> e97bebf4b98d392a9fbd9abab6252c78816f590c
			scores['avg'].append(avg_score)
			scores['max'].append(max_score)
			scores['min'].append(min_score)
		else:
			print("episode " + str(episode) + "   score " +  str(score))

		state = env.reset()
		state = preprocess_state(state)
		stacked_states = None
		stacked_states = stack_states(stacked_states, state, stack_size)
		score = 0
		done = False

		while not done:
			action = agent.choose_action(stacked_states)
			action += 1
			new_state, reward, done, _ = env.step(action)
			new_state = preprocess_state(new_state)
			new_stacked_states = stack_states(stacked_states, new_state, stack_size)

			action -= 1
			agent.store_transition(stacked_states, action, reward, new_stacked_states, int(done))

			score += reward
			agent.learn()
			
			stacked_states = new_stacked_states

		history['eps'].append(agent.epsilon)
		history['score'].append(score)