max_memory_size=MAX_MEMORY_SIZE, num_episodes=NUM_EPISODES, replace_after=REPLACE_AFTER, use_per=USE_PER, per_a=1) # Filling memory print("Filling replay memory") state = env.reset() if USE_TB: writer.add_graph(agent.Q_loc, torch.FloatTensor([state]).to(device)) for t in range(200): action = env.action_space.sample() next_state, reward, done, info = env.step(action) agent.store_transition(state, action, reward, next_state, done) state = next_state if done: state = env.reset() print("Finished filling memory") # Learning print("Started learning") rewards = [] time_learned = [] n_episode = 100 time_per_n_episode = datetime.now() total_cleared = 0
alpha=ALPHA, max_memory_size=MAX_MEMORY_SIZE, num_episodes=NUM_EPISODES, replace_after=REPLACE_AFTER, use_per=USE_PER) # Filling memory print("Filling replay memory") state = env.reset() if USE_TB: writer.add_graph(agent.Q_loc, torch.FloatTensor([downsample(state)]).to(device)) for t in range(2000): action = env.action_space.sample() next_state, reward, done, info = env.step(action) agent.store_transition(downsample(state), action, reward, downsample(next_state), done) state = next_state if done: state = env.reset() print("Finished filling memory") # Learning print("Started learning") rewards = [] time_learned = [] n_episode = 100 time_per_n_episode = datetime.now() for i_episode in range(NUM_EPISODES): stepcount = 0 state = env.reset()