Пример #1
0
        "Visualy test Agent, after reset top frame should be a reduced image of size {} by {}"
        .format(width, height))
    agent._reset()
    io.imshow(agent.state[-1])
    plt.show()
    input("Press any key to continue")
    plt.close()

    print(
        "resetting experience buffer, then observe state and invoke agent play method 100 times ..."
    )
    experience_buffer.buffer.clear()
    agent._reset()
    for i in range(100):
        play_random = True
        agent.play_step(play_random)
    print("number of experiences in the buffer: {}. Buffer capacity {}".format(
        len(experience_buffer), capacity))

    input(
        "Press any key to see a random initial state image from the experience buffer"
    )
    states, actions, rewards, dones, next_states = experience_buffer.sample(1)
    io.imshow(states[0][3])
    plt.show()
    input("Press any key to see the next state image for the same experience")
    plt.close()
    io.imshow(next_states[0][3])
    plt.show()
    input("Press any key to continue")
Пример #2
0
    # to track the mean across epochs
    total_mean = 0
    total_count = 0

    dt = datetime.datetime.now()
    date_time_string = dt.strftime("%Y%m%d-%H-%M-%S")
    model_path = "models/" + date_time_string
    os.makedirs(model_path, mode=0o755, exist_ok=True)
    steps = 0
    last_reward_step = 0

    if args.initialise_buffer == "init":
        print("Initialising and saving buffer with %d transitions" %
              args.replay_buffer_size)
        for step in trange(args.replay_buffer_size, leave=False):
            agent.play_step(True, device, write_to_buffer=False)

        with open("experience_buffer.pickle", "wb") as f:
            pickle.dump(experience_buffer, f)
        sys.exit()

    elif args.initialise_buffer == "load":
        print("loading buffer ...")
        with open("experience_buffer.pickle", "rb") as f:
            experience_buffer = pickle.load(f)
            print("buffer size: {}".format(len(experience_buffer)))

    else:
        print("Initialising buffer with %d transitions" % initialisation_steps)
        for step in trange(initialisation_steps, leave=False):
            agent.play_step(True, device)