state_ = rgb2dataset(state_) model.memory(state, action, reward, done) accum_reward += reward model.step += 1 state = state_ # Transition transition.append(state) if len(transition) > 4: transition.pop(0) if model.step > model.train_start_step and model.step % model.train_step_interval: model.train() if model.step % model.target_update_interval == 0: model.update_target() if is_render: env.render() if done: writer.add_scalar('reward/accum', accum_reward, model.step) writer.add_scalar('data/epsilon', model.epsilon, model.step) writer.add_scalar('data/x_pos', info['x_pos'], model.step) print( "Episode : %5d\t\tSteps : %10d\t\tReward : %7d\t\tX_step : %4d\t\tEpsilon : %.3f" % (model.episode, model.step, accum_reward, info['x_pos'], model.epsilon)) if save_model and model.episode % 100 == 0:
gamma = 0.99 lr = 3e-4 buffer_size = 50000 learning_starts = 300 grad_clip = 10 plot_freq = 1000 losses = [] all_rewards = [] episode_reward = 0 saved_mean_reward = None #create dqn dqn = DQN(observation_size, num_actions,device=device,lr=lr,dueling=True,gamma=gamma) #update the dqn target network to match weights dqn.update_target() replay_buffer = ReplayBuffer(buffer_size) target_network_update_freq = 200 train_freq = 1 checkpoint_freq = 3000 num_episodes=0 model_file = os.path.join(os.getcwd(),"turtlebot_model_test") state = env.reset() ep_no = 0 #epsiode number counter teleop=False #teleop=True if teleop==False: #RL Learning happens, no teleop mode