示例#1
0
    env, "networks/settlers_network/parameters.json", 0.2, 0.1)  #0.2, 0.1
#agent = libs_agent.agent.Agent(env)

#process training
training_iterations = 500000

for iteration in range(0, training_iterations):
    agent.main()
    #print training progress %, ane score, every 100th iterations
    if iteration % 100 == 0:
        env._print()
        print(iteration * 100.0 / training_iterations, env.get_score())

agent.save("networks/settlers_network/trained/")

agent.load("networks/settlers_network/trained/")

#reset score
env.reset_score()

#choose only the best action
agent.run_best_enable()

#process testing iterations
testing_iterations = 10000
for iteration in range(0, testing_iterations):
    agent.main()
    print("move=", env.get_move(), " score=", env.get_score(),
          " moves to win=", env.get_moves_to_win())

while True:
示例#2
0

#print environment info
env.print_info()

#init DQN agent
gamma = 0.99
replay_buffer_size  = 16384
epsilon_training    = 1.0
epsilon_testing     = 0.1
epsilon_decay       = 0.99999

#init DQN agent
agent = libs.libs_agent.agent_dqn.DQNAgent(env, network_path + "network_config.json", gamma, replay_buffer_size, epsilon_training, epsilon_testing, epsilon_decay)

'''
agent.load(network_path + "trained/")

agent.run_best_enable()

while True:
    agent.main()
    env._print()
'''

training_progress_log = rysy.Log(network_path + "progress_training.log")
testing_progress_log = rysy.Log(network_path + "progress_testing.log")

#process training
total_games_to_play = 20000
while env.get_games_count() < total_games_to_play:
示例#3
0
#init DQN agent
agent = libs.libs_agent.agent_dqn.DQNAgent(
    env, "networks/arkanoid_network_b/parameters.json", 0.2, 0.02, 0.99999)

#process training
training_iterations = 250000

for iteration in range(0, training_iterations):
    agent.main()
    #print training progress %, ane score, every 100th iterations
    if iteration % 100 == 0:
        env._print()

agent.save("networks/arkanoid_network_b/trained/")

agent.load("networks/arkanoid_network_b/trained/")

#reset score
env.reset_score()

#choose only the best action
agent.run_best_enable()

#process testing iterations
testing_iterations = 10000
for iteration in range(0, testing_iterations):
    agent.main()
    env._print()

while True:
    agent.main()