Exemplo n.º 1
0
def start_experiment(experiment):
    init_random_seeds(experiment.random_seed, cuda_determenistic=False)
    init_logger("logdir", experiment.logname)
    log().update_params(experiment.to_dict())

    try:
        train(experiment)
    finally:
        log().save_logs()
Exemplo n.º 2
0
def train(experiment):
    env = experiment.env(random_state=experiment.random_seed)
    memory = ReplayMemory(experiment.hyperparams.memory_config.memory_size)
    controller = ControllerDQN(env=env,
                               memory=memory,
                               params=experiment.hyperparams,
                               prune_percent=experiment.prune_percent,
                               pruner=experiment.pruner,
                               stop_criterion=experiment.stop_criterion,
                               device=experiment.device)
    agent = Agent(env, controller)

    EXPLORE_ITERS = 1
    EXPLOIT_ITERS = 1

    episodes, prune_iters, opt_steps = experiment.episodes, experiment.prune_iters, experiment.opt_steps

    for iter in range(prune_iters):
        pbar = tqdm(range(episodes))
        cur_percent = (1 - experiment.prune_percent / 100)**iter
        explore_plot = "Explore_iter" + str(iter) + "_prune" + str(cur_percent)
        exploit_plot = "Exploit_iter" + str(iter) + "_prune" + str(cur_percent)
        log().add_plot(explore_plot,
                       columns=("train_episode", "train_steps", "reward"))
        log().add_plot(exploit_plot,
                       columns=("train_episode", "train_steps", "reward"))

        for episode in pbar:
            # once in EXPLORE_ITERS train rollouts, do EXPLOIT_ITERS exploit rollouts
            if episode % EXPLORE_ITERS == EXPLORE_ITERS - 1:
                for _ in range(EXPLOIT_ITERS):
                    pbar.set_description(
                        "Iter[{}/{}] Episode [{}/{}] Step[{}/{}] Exploit".
                        format(iter + 1, prune_iters, episode + 1, episodes,
                               controller.steps_done, opt_steps))
                    exploit(agent, episode, exploit_plot)

            pbar.set_description(
                "Iter[{}/{}] Episode [{}/{}] Step[{}/{}] Explore".format(
                    iter + 1, prune_iters, episode + 1, episodes,
                    controller.steps_done, opt_steps))
            explore(agent, episode, explore_plot)

            if controller.steps_done >= opt_steps:
                break
            if controller.optimization_completed(
            ) and not iter + 1 == prune_iters:  # no stop on last iteration
                break

            torch.cuda.empty_cache()

        log().save_logs()
        log().save_model(controller.get_state(),
                         "model:iter{}:{}".format(iter, cur_percent))

        controller.prune()
        controller.reinit()
Exemplo n.º 3
0
def launch_after_training(params, net_state_dict, device, episodes, opt_steps):
    env = Assault(23)
    net = DQN(env.state_sz,
              env.action_sz,
              "vae",
              params["image_input"] == "True",
              device=device).to(device)
    net.load_state_dict(net_state_dict)
    controller = FixedController(
        lambda state, explore: net(state.to(device)).max(1)[1].item())
    agent = Agent(env, controller)

    plot_name = "AfterTraining"
    log().add_plot(plot_name,
                   columns=("train_episode", "train_steps", "reward"))
    pbar = tqdm(range(episodes))
    total_steps = 0
    for episode in pbar:
        pbar.set_description("Episode [{}/{}] Step[{}/{}] Exploit".format(
            episode + 1, episodes, total_steps, opt_steps))

        reward, steps = agent.rollout(train=False)
        total_steps += steps
        log().add_plot_point(plot_name, (episode, total_steps, reward))

        if total_steps >= opt_steps:
            break

    log().save_logs()
Exemplo n.º 4
0
def exploit(agent, train_episode, plot_name):
    reward, steps = agent.rollout(train=False)
    log().add_plot_point(plot_name,
                         (train_episode, agent.controller.steps_done, reward))
    agent.controller.metrics["stability"].add(reward)
Exemplo n.º 5
0
def explore(agent, train_episode, plot_name):
    reward, steps = agent.rollout(train=True)
    log().add_plot_point(plot_name,
                         (train_episode, agent.controller.steps_done, reward))
 def add(self, value):
     log().add_plot_point(self.name, json.dumps([x.item() for x in value]))
Exemplo n.º 7
0
 def __init__(self, name):
     self.name = name
     log().add_plot(name, columns=("metric_value", ))
Exemplo n.º 8
0
 def get_plot(self):
     return log().get_plot(self.name)
Exemplo n.º 9
0
 def add__(self, value):
     log().add_plot_point(self.name, value)