Exemplos de QLAgent em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: QL_agents

Classe / Tipo: QLAgent

Exemplos em hotexamples.com: 4

QLAgent em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de QL_agents.QLAgent em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

QLAgent(3)

choose_best_action(1)

copy_model(1)

correct_action(1)

epsilon(1)

q_iteration(1)

random_play_and_save(1)

supervised_learning_iteration(1)

update_epsilon(1)

Métodos Frequentes

QLAgent (3)

choose_best_action (1)

copy_model (1)

correct_action (1)

epsilon (1)

q_iteration (1)

random_play_and_save (1)

supervised_learning_iteration (1)

update_epsilon (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: QL_breakout_training.py Projeto: AlexDoumas/BrPong_1

import gym from QL_agents import QLAgent # Constants MAX_EPISODES = 10000000 # 10000000 MAX_EPISODE_TIME = 100000 RANDOM_STATES = 50000 # 50000 MAX_STATES = 10000000 # 10000000 SAVE_AFTER = 500000 # 1000000 SAVE_TARGET_MODEL_AFTER = 10000 # 10000 MAX_NOOP = 30 # initialize gym environment and the agent env = gym.make('BreakoutDeterministic-v4') agent = QLAgent('breakout', recurrent=False) # setup to continue training # agent.epsilon = 0.1 # agent.model.load_weights('DQN_breakout_weights14000000.hdf5') # agent.target_model.load_weights('DQN_breakout_weights14000000.hdf5') episode_return = 0 state_counter = 1 random_counter = 0 act_list = [] training_returns = [] # Iterate the game for e in xrange(MAX_EPISODES): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset())

Exemplo n.º 2

0

Exibir arquivo

Arquivo: supervised_training_randframskip_pong.py Projeto: AlexDoumas/BrPong_1

import gym from QL_agents import QLAgent # Constants MAX_EPISODES = 10000000 # 10000000 MAX_EPISODE_TIME = 100000 RANDOM_STATES = 5000 # 50000 MAX_STATES = 10000000 # 10000000 SAVE_AFTER = 1000000 # 1000000 SAVE_TARGET_MODEL_AFTER = 10000 # 10000 MAX_NOOP = 30 # initialize gym environment and the agent env = gym.make('Pong-v0') agent = QLAgent('pong', recurrent=False) agent.epsilon = 0.1 agent.model.load_weights('supervised_randframskip_breakout_weights_huber_target10000_hidden_150_100_4000000.hdf5') # counters episode_return = 0 state_counter = 1 random_counter = 0 act_list = [] training_returns = [] # Iterate the game for e in xrange(MAX_EPISODES): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset())

Exemplo n.º 3

0

Exibir arquivo

# agent.model.load_weights('QN_pong_weights_6000000.hdf5') # To test supervised fixed frame skipping load the supervised weights # agent.model.load_weights('supervised_fixed_breakout_weights_4000000.hdf5') # To test supervised random frame skipping change the env and load the supervised weights # env = gym.make('Breakout-v0') # agent = QLAgent('breakout', recurrent=False) # agent.model.load_weights('supervised_random_breakout_weights_4000000.hdf5') # To test supervised fixed transfer load supervised fixed pong weights # agent.model.load_weights('supervised_fixed_pong_weights_4000000.hdf5') # To test supervised random frame skipping transfer change the env and load the supervised weights env = gym.make('Breakout-v0') agent = QLAgent('breakout', recurrent=False) agent.model.load_weights('supervised_random_pong_weights_4000000.hdf5') returns = [] episode_return = 0 for episode in xrange(100): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset()) # Initialize the first state with the same 4 images current_state = np.concatenate((obs, obs, obs, obs)) for time_step in xrange(20000): # print "episode:", e, "time_step:", time_step

Exemplo n.º 4

0

Exibir arquivo

Arquivo: QL_pong_play.py Projeto: AlexDoumas/BrPong_1

# agent.model.load_weights('QN_breakout_weights_6000000.hdf5') # To test supervised fixed frame skipping load the supervised weights # agent.model.load_weights('supervised_fixed_pong_weights_4000000.hdf5') # To test supervised random frame skipping change the env and load the supervised weights # env = gym.make('Pong-v0') # agent = QLAgent('pong', recurrent=False) # agent.model.load_weights('supervised_random_pong_weights_4000000.hdf5') # To test supervised fixed transfer load supervised fixed pong weights # agent.model.load_weights('supervised_fixed_breakout_weights_4000000.hdf5') # To test supervised random frame skipping transfer change the env and load the supervised weights env = gym.make('Pong-v0') agent = QLAgent('pong', recurrent=False) agent.model.load_weights('supervised_random_breakout_weights__4000000.hdf5') returns = [] episode_return = 0 for episode in xrange(100): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset()) # Initialize the first state with the same 4 images current_state = np.concatenate((obs, obs, obs, obs)) for time_step in xrange(20000): # print "episode:", e, "time_step:", time_step