示例#1
0
    def __init__(self, config_file_name, seed=0, verbose=False):

        self.verbose = verbose
        self.env = blackbox.EnvBlackBox(seed)

        #print environment info
        if (verbose):
            self.env.print_info()

        #init DQN agent
        self.agent = agent_dqn.DQNAgent(self.env, config_file_name, 0.3, 0.05,
                                        0.99999)
        #self.agent = agent.Agent(self.env)

        #iterations count
        self.training_iterations = 100000
        self.testing_iterations = 10000
示例#2
0
import sys
sys.path.append("..")  # Adds higher directory to python modules path.

import libs.libs_env.env_birds
import agent
import agent_dqn

env = libs.libs_env.env_birds.EnvBirds()

env.print_info()

#agent = agent.Agent(env)
agent = agent_dqn.DQNAgent(env, "flappy_bird_net.json")

training_iterations = 500000

for i in range(0, training_iterations):
    agent.main()

    if (i % 100) == 0:
        progress = 100.0 * i / training_iterations
        print("training done = ", progress, " score = ", env.get_score())

env.reset_score()
agent.run_best_enable()

testing_iterations = 100000

for i in range(0, testing_iterations):
    agent.main()
#example for convolutional and deep neural network use

import sys
sys.path.append("..")  # Adds higher directory to python modules path.

import libs.libs_env.env_pong
import agent_dqn

#init environment
env = libs.libs_env.env_pong.EnvPong()

#print environment info
env.print_info()

#init DQN agent
agent = agent_dqn.DQNAgent(env, "pong_network.json", 0.2, 0.01, 0.99999)

#process training
training_iterations = 200000

for iteration in range(0, training_iterations):
    agent.main()
    #print training progress %, ane score, every 100th iterations
    if iteration % 100 == 0:
        print(iteration * 100.0 / training_iterations, env.get_score())

#reset score
env.reset_score()

#choose only the best action
agent.run_best_enable()
env = libs.libs_env.env_atari_arkanoid.EnvAtariArkanoid()

#print environment info
env.print_info()
'''
#random play environment test
random_agent = agent.Agent(env)
while True:
    random_agent.main()

    if env.get_iterations()%256 == 0:
        print("  miss ",  env.get_miss(),  " iterations = ",  env.get_iterations())
        env.render()
'''
#init DQN agent
dqn_agent = agent_dqn.DQNAgent(env, "atari_arkanoid_network.json", 0.4, 0.1,
                               0.99999)

#process training
total_games_to_play = 500

while env.get_games_count() < total_games_to_play:
    dqn_agent.main()

    #print training progress %, ane score, every 100th iterations
    if env.get_iterations() % 256 == 0:
        env._print()
        env.render()

    if env.get_iterations() % 256 == 0:
        print("done = ",
              env.get_games_count() * 100.0 / total_games_to_play, "%",
env = env_black_box.EnvBlackBox(4)

#print environment info
env.print_info()

#random play environment test
random_agent = agent.Agent(env)
while True:
    random_agent.main()

    if env.get_iterations() % 256 == 0:
        print(" iterations = ", env.get_iterations(), " score = ",
              env.get_score())

#init DQN agent
dqn_agent = agent_dqn.DQNAgent(env, "black_box_network.json", 0.1, 0.05,
                               0.99999)

#process training
training_iterations = 100000

for i in range(0, training_iterations):
    dqn_agent.main()

    if env.get_iterations() % 256 == 0:
        print(" iterations = ", env.get_iterations(), " score = ",
              env.get_score(), " epsilon = ", dqn_agent.get_epsilon_training())

#reset score
env.reset_score()

#choose only the best action
示例#6
0
import sys

sys.path.append("..")  # Adds higher directory to python modules path.

import libs.libs_env.env_pong
import libs.libs_env.env_arkanoid
import agent
import agent_dqn

#env = libs.libs_env.env_pong.EnvPong()
env = libs.libs_env.env_arkanoid.EnvArkanoid()

env.print_info()

#agent = agent.Agent(env)
agent = agent_dqn.DQNAgent(env, "arkanoid_network.json")

training_iterations = 250000

for i in range(0, training_iterations):
    agent.main()

    if (i % 100) == 0:
        progress = 100.0 * i / training_iterations
        print("training done = ", progress, " score = ", env.get_score())

agent.save("arkanoid_network/")

env.reset_score()
agent.run_best_enable()