Python DQNAgent.remember примеры использования

Язык программирования: Python

Пространство имен/Пакет: rl.agents

Класс/Тип: DQNAgent

Метод/Функция: remember

Примеров на hotexamples.com: 2

Python DQNAgent.remember - 2 примера найдено. Это лучшие примеры Python кода для rl.agents.DQNAgent.remember, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DQNAgent(30)

compile(30)

fit(22)

save_weights(17)

test(16)

load_weights(10)

forward(2)

pre_train(2)

remember(2)

replay_new(1)

train_short_memory(1)

set_reward(1)

model_name(1)

replay(1)

act(1)

get_state(1)

epsilon(1)

backward(1)

update_target_model(1)

Пример #1

Показать файл

    state_size = env.observation_space.shape[0]
    #env.observation_space.shape
    action_size = env.action_space.shape[0]
    #print(state_size, action_size)
    agent = DQNAgent(state_size, action_size)
    # agent.load("../models/human-ddqn.h5f")
    done = False
    batch_size = 32

    for e in range(args.EPISODES):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            env.render()
            action = agent.act(state)
            #print("action: ",type(action),action);print("ENV.STEP: ",env.step(action))
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                agent.update_target_model()
                print("episode: {}/{}, score: {}, e: {:.2}".format(
                    e, args.EPISODES, time, agent.epsilon))
                break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
        # if e % 10 == 0:
        # agent.save("../models/human-ddqn.h5f")

Пример #2

Показать файл

def run(display_option, speed, params):
    pygame.init()
    agent = DQNAgent(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    if params['train']:
        agent.model.save_weights(params['weights_path'])
    plot_seaborn(counter_plot, score_plot)