Exemplo n.º 1
0
import gym
from QL_agents import QLAgent


# Constants
MAX_EPISODES = 10000000  # 10000000
MAX_EPISODE_TIME = 100000
RANDOM_STATES = 50000  # 50000
MAX_STATES = 10000000  # 10000000
SAVE_AFTER = 500000  # 1000000
SAVE_TARGET_MODEL_AFTER = 10000  # 10000
MAX_NOOP = 30

# initialize gym environment and the agent
env = gym.make('BreakoutDeterministic-v4')
agent = QLAgent('breakout', recurrent=False)

# setup to continue training
# agent.epsilon = 0.1
# agent.model.load_weights('DQN_breakout_weights14000000.hdf5')
# agent.target_model.load_weights('DQN_breakout_weights14000000.hdf5')

episode_return = 0
state_counter = 1
random_counter = 0
act_list = []
training_returns = []
# Iterate the game
for e in xrange(MAX_EPISODES):
    # Observe reward and initialize first state
    obs = agent.preprocessor.get_center_objects(env.reset())
import gym
from QL_agents import QLAgent


# Constants
MAX_EPISODES = 10000000  # 10000000
MAX_EPISODE_TIME = 100000
RANDOM_STATES = 5000  # 50000
MAX_STATES = 10000000  # 10000000
SAVE_AFTER = 1000000  # 1000000
SAVE_TARGET_MODEL_AFTER = 10000  # 10000
MAX_NOOP = 30

# initialize gym environment and the agent
env = gym.make('Pong-v0')
agent = QLAgent('pong', recurrent=False)
agent.epsilon = 0.1
agent.model.load_weights('supervised_randframskip_breakout_weights_huber_target10000_hidden_150_100_4000000.hdf5')


# counters
episode_return = 0
state_counter = 1
random_counter = 0
act_list = []
training_returns = []
# Iterate the game
for e in xrange(MAX_EPISODES):
    # Observe reward and initialize first state
    obs = agent.preprocessor.get_center_objects(env.reset())
Exemplo n.º 3
0
# agent.model.load_weights('QN_pong_weights_6000000.hdf5')

# To test supervised fixed frame skipping load the supervised weights
# agent.model.load_weights('supervised_fixed_breakout_weights_4000000.hdf5')

# To test supervised random frame skipping change the env and load the supervised weights
# env = gym.make('Breakout-v0')
# agent = QLAgent('breakout', recurrent=False)
# agent.model.load_weights('supervised_random_breakout_weights_4000000.hdf5')

# To test supervised fixed transfer load supervised fixed pong weights
# agent.model.load_weights('supervised_fixed_pong_weights_4000000.hdf5')

# To test supervised random frame skipping transfer change the env and load the supervised weights
env = gym.make('Breakout-v0')
agent = QLAgent('breakout', recurrent=False)
agent.model.load_weights('supervised_random_pong_weights_4000000.hdf5')

returns = []
episode_return = 0

for episode in xrange(100):
    # Observe reward and initialize first state
    obs = agent.preprocessor.get_center_objects(env.reset())

    # Initialize the first state with the same 4 images
    current_state = np.concatenate((obs, obs, obs, obs))

    for time_step in xrange(20000):
        # print "episode:", e, "time_step:", time_step
Exemplo n.º 4
0
# agent.model.load_weights('QN_breakout_weights_6000000.hdf5')

# To test supervised fixed frame skipping load the supervised weights
# agent.model.load_weights('supervised_fixed_pong_weights_4000000.hdf5')

# To test supervised random frame skipping change the env and load the supervised weights
# env = gym.make('Pong-v0')
# agent = QLAgent('pong', recurrent=False)
# agent.model.load_weights('supervised_random_pong_weights_4000000.hdf5')

# To test supervised fixed transfer load supervised fixed pong weights
# agent.model.load_weights('supervised_fixed_breakout_weights_4000000.hdf5')

# To test supervised random frame skipping transfer change the env and load the supervised weights
env = gym.make('Pong-v0')
agent = QLAgent('pong', recurrent=False)
agent.model.load_weights('supervised_random_breakout_weights__4000000.hdf5')


returns = []
episode_return = 0

for episode in xrange(100):
    # Observe reward and initialize first state
    obs = agent.preprocessor.get_center_objects(env.reset())

    # Initialize the first state with the same 4 images
    current_state = np.concatenate((obs, obs, obs, obs))

    for time_step in xrange(20000):
        # print "episode:", e, "time_step:", time_step