示例#1
0
    # Every save_ckpt_interval, Check if there is any checkpoint.
    # If there is, load checkpoint and continue training
    # Need to specify the i_episode of the checkpoint intended to load
    # if i_epoch % save_ckpt_interval == 0 and os.path.isfile(os.path.join(ckpt_dir, "ckpt_eps%d.pt" % i_epoch)):
    #     policy_net, value_net_in, value_net_ex, valuenet_in_optimizer, valuenet_ex_optimizer,\
    #     simhash, training_info = \
    #         load_checkpoint(ckpt_dir, i_epoch, layer_sizes, input_size, device=device)
    #     print("\n\tCheckpoint successfully loaded!\n")

    # To record episode stats
    episode_durations = []
    episode_rewards = []

    # Use value net in evaluation mode when collecting trajectories
    value_net_in.eval()
    value_net_ex.eval()

    ###################################################################
    # Collect trajectories

    print("\n\n\tCollecting %d episodes: " % (batch_size))

    for i_episode in tqdm(range(batch_size)):  # Use tqdm to show progress bar

        # Keep track of the running reward
        running_reward = 0

        # Initialize the environment and state
        current_state = env.reset()
示例#2
0
    finished_rendering_this_epoch = False

    # Every save_ckpt_interval, Check if there is any checkpoint.
    # If there is, load checkpoint and continue training
    # Need to specify the i_episode of the checkpoint intended to load
    if i_epoch % save_ckpt_interval == 0 and os.path.isfile(
            os.path.join(ckpt_dir, "ckpt_eps%d.pt" % i_epoch)):
        policy_net, value_net, valuenet_optimizer, training_info = \
            load_checkpoint(ckpt_dir, i_epoch, layer_sizes, input_size, device=device)

    # To record episode stats
    episode_durations = []
    episode_rewards = []

    # Use value net in evaluation mode when collecting trajectories
    value_net.eval()

    ###################################################################
    # Collect trajectories

    print("\n\n\tCollecting %d episodes: " % (batch_size))

    for i_episode in tqdm(range(batch_size)):  # Use tqdm to show progress bar

        # Keep track of the running reward
        running_reward = 0

        # Initialize the environment and state
        current_state = env.reset()

        # Estimate the value of the initial state