Exemplo n.º 1
0
 def __init__(self,
              architecture,
              min_episode=50000,
              train_lvls=None,
              test_lvl=None,
              description=None,
              additional_description=None):
     description = 'Untrained' if train_lvls is None else self.make_description(
         train_lvls) if description is None else description
     # putting space at beginning
     additional_description = '' if additional_description is None else ' ' + additional_description
     self.description = description + additional_description
     # use untrained agent if either the test levels or the trained levels are not given
     self.agent = agents.DQNAgent(
         min_episode, architecture=architecture
     ) if (train_lvls and test_lvl) is None else agents.DQNAgent.load_agent(
         self.description)
     self.env = golfenv.Env(
         test_lvl) if test_lvl is not None else golfenv.Env(
             train_lvls) if train_lvls is not None else golfenv.Env(
                 levels=[1])
     self.current_episode = 1
     self.min_episode = min_episode
     self.episode_rewards = []
     self.loss_table = []
     self.transition_list = []
     self.is_done = False
Exemplo n.º 2
0
    # INIT ENV
    env, observation_space, action_space = build_env_wrapper(
        params["env_name"], env_type=params["env_type"])

    # LOGGING
    writer = SummaryWriter(comment="-" + params["run_name"] + "-noisy")

    # NETWORK
    net = dqn_noisy_net.Network(env.observation_space.shape,
                                env.action_space.n).to(device)
    tgt_net = agents.TargetNetwork(net)

    # AGENT
    selector = actions.ArgmaxActionSelector()
    agent = agents.DQNAgent(net, selector, device=device)

    # RUNNER
    exp_source = runner.RunnerSourceFirstLast(
        env, agent,
        gamma=params["gamma"])  # increase the number of steps for the runner
    buffer = ExperienceReplayBuffer(exp_source,
                                    buffer_size=params["replay_size"])
    optimizer = optim.Adam(net.parameters(), lr=params["learning_rate"])

    frame_idx = 0

    # TRAIN
    with logger.RewardTracker(writer, params["stop_reward"]) as reward_tracker:
        while True:
            frame_idx += 1
Exemplo n.º 3
0
def connect4(path='data/connect4', seed=161831415):
    env = envs.Connect4()

    rand_epochs = 1000
    ai_epochs = 0

    test_games = 500
    mem_size = 200
    log_freq = 100

    # 3 states per position
    depth = 3
    # The state is preprocessed and has this shape now
    dim_state = [depth, *env.n_state]
    log = Logger(log_freq)
    # Simple dqn
    net = dqn.Conn(depth, env.n_action)
    ai = agents.DQNAgent(env.n_state,
                         env.n_action,
                         net,
                         logger=log,
                         lr=1e-3,
                         discount_factor=.98,
                         exploration_decay=.98,
                         exploration_min=.1,
                         state_preprocessor=f_one_hot_state(depth,
                                                            -1,
                                                            new_size=[1] +
                                                            dim_state))
    mem = LinearMemory(dim_state, mem_size, ai.learn)
    # Train first against random agent
    rand_act = envs.Connect4.random_act()

    # Loading
    # TODO : ai.load(path)

    # Training
    print('Training vs random')
    train(ai, rand_act, mem, env, rand_epochs, log, False)
    # print('Training vs ai')
    # TODO : train(ai, ai.act, mem, env, ai_epochs, log, True)

    # Saving
    # TODO : ai.save(path)

    # Testing
    ai.exploration_rate = 0
    win, draw = test(ai.act,
                     rand_act,
                     env,
                     games=test_games,
                     state_preprocessor=ai.state_preprocessor)

    print(f'Test on {test_games} games : Victories : {win} Draws : {draw}')
    print(f'Win or draw rate : {(win + draw) / test_games * 100:.1f} %')

    # Playing
    while 1:
        print('New Game')
        p1, p2 = play(ai.act,
                      user_act(env.n_action),
                      env,
                      state_preprocessor=ai.state_preprocessor)
        if p1 > 0:
            print('AI won')
        elif p2 > 0:
            print('You won')
        else:
            print('Error / Draw')
Exemplo n.º 4
0
    net = models.SimpleFFDQN(env.get_obs_len(), env.get_action_space_size())

    # load the network
    if RL_options['load_net'] is True:
        with open(
                os.path.join(RL_options['net_saved_path'],
                             RL_options['net_file']), "rb") as f:
            checkpoint = torch.load(f)
        net = models.SimpleFFDQN(env.get_obs_len(),
                                 env.get_action_space_size())
        net.load_state_dict(checkpoint['state_dict'])

    # create buffer
    net.to(torch.device("cuda"))  # pass into gpu
    selector = actions.EpsilonGreedyActionSelector(RL_options['epsilon_start'])
    agent = agents.DQNAgent(net, selector)
    # agent = agents.Supervised_DQNAgent(net, selector, sample_sheet, assistance_ratio=0.2)
    exp_source = experience.ExperienceSourceFirstLast(
        env,
        agent,
        RL_options['gamma'],
        steps_count=RL_options['reward_steps'])
    buffer = experience.ExperienceReplayBuffer(exp_source,
                                               RL_options['replay_size'])

    # create optimizer
    optimizer = optim.Adam(net.parameters(), lr=RL_options['lr'])

    # create net pre-processor
    net_processor = common.netPreprocessor(net, agent.target_model)
Exemplo n.º 5
0
    env, observation_space, action_space = build_env_wrapper(
        params["env_name"], env_type=params["env_type"])

    # LOGGING
    writer = SummaryWriter(comment="-" + params["run_name"] + "-distrib")

    # NETWORK
    net = dqn_distributional_net.Network(env.observation_space.shape,
                                         env.action_space.n).to(device)
    tgt_net = agents.TargetNetwork(net)

    # AGENT
    selector = actions.EpsilonGreedyActionSelector(
        epsilon=params["epsilon_start"])
    epsilon_tracker = logger.EpsilonTracker(selector, params)
    agent = agents.DQNAgent(lambda x: net.qvals(x), selector, device=device)

    # RUNNER
    exp_source = runner.RunnerSourceFirstLast(env,
                                              agent,
                                              gamma=params["gamma"],
                                              steps_count=1)
    buffer = ExperienceReplayBuffer(exp_source,
                                    buffer_size=params["replay_size"])
    optimizer = optim.Adam(net.parameters(), lr=params["learning_rate"])

    frame_idx = 0

    # TRAIN
    with logger.RewardTracker(writer, params["stop_reward"]) as reward_tracker:
        while True:
Exemplo n.º 6
0
import agents
import pong_env
import pygame
import numpy as np
import matplotlib.pyplot as plt

player_random = agents.RandomAgent(3)
player_dqn = agents.DQNAgent(5, 3)

'''
#player dqn uses saved model
model = "q_model_4_5"
player_dqn.q_from_load_model(model)
print("Model loaded.")
'''

num_play = 100
clock = pygame.time.Clock()
scores = []
epsilons = []

break_learning = False
for i in range(num_play):
    done = False
    exits = False
    score = 0
    game = pong_env.Pong()
    while not done and not exits:
        #terminate if user quits
        for event in pygame.event.get():
            if event.type == pygame.QUIT: