def main(self, args): """ Train and save the DQN model, for the cartpole problem :param args: (ArgumentParser) the input arguments """ #env = gym.make('CartPole-v1') #model = DQN(MlpPolicy, env, verbose=1) #model.load("cartpole_model.pkl") model = DQN(env=env, policy=CustomPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.01, exploration_final_eps=0.02, verbose=1) model.learn(total_timesteps=args.max_timesteps, callback=self.callback) print("Saving model to cartpole_model.pkl") model.save("cartpole_model.pkl") #if __name__ == '__main__': #parser = argparse.ArgumentParser(description="Train DQN on cartpole") #parser.add_argument('--max-timesteps', default=100000000, type=int, help="Maximum number of timesteps") #args = parser.parse_args() #main(args)
def main(args): """ Train and save the DQN model, for the cartpole problem :param args: (ArgumentParser) the input arguments """ # env = gym.make("CartPole-v0") # model = DQN( # env=env, # policy=MlpPolicy, # verbose=1, # learning_rate=1e-3, # buffer_size=50000, # exploration_fraction=0.1, # exploration_final_eps=0.02, # tensorboard_log='./log', # ) # model.learn(total_timesteps=args.max_timesteps, callback=callback) # print("Saving model to cartpole_model.pkl") # model.save("cartpole_model.pkl") # env = Vrep_Env() env = gym.make('vrep-v0') model = DQN( env=env, gamma=0.95, policy=MlpPolicy, #policy=CustomPolicy, verbose=1, learning_rate=1e-4, buffer_size=50000, #5000 train_freq=1, learning_starts=100, batch_size=64, # 32 checkpoint_freq=3000, checkpoint_path='./model/', target_network_update_freq=300, prioritized_replay=True, exploration_fraction=0.1, exploration_final_eps=0.02, tensorboard_log='./log', ) # path = './model/' # model = DQN.load(path+'bk2_16/cartpole_model6000.pkl', env, tensorboard_log='./log') model.learn(total_timesteps=args.max_timesteps, callback=callback, log_interval=30) print("Saving model to slab_installing_model.pkl") model.save("slab_installing_model.pkl")
def train(env, fname): env.setRender(False) env.reset() start = time.time() model = DQN( env=env, policy=CustomPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02 ) model.learn(total_timesteps=STEPS, callback=callback) # save trained model model.save(fname) print("Duration: %.1f" % ((time.time() - start)/60))
def main(args): """ Train and save the DQN model, for the cartpole problem :param args: (ArgumentParser) the input arguments """ env = gym.make("CartPole-v0") model = DQN( env=env, policy=MlpPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, ) model.learn(total_timesteps=args.max_timesteps, callback=callback) print("Saving model to cartpole_model.pkl") model.save("cartpole_model.pkl")
def main(args): """ Train and save the DQN model, for the mountain car problem :param args: (ArgumentParser) the input arguments """ env = gym.make("MountainCar-v0") # using layer norm policy here is important for parameter space noise! model = DQN(policy=CustomPolicy, env=env, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.1, param_noise=True) model.learn(total_timesteps=args.max_timesteps) print("Saving model to mountaincar_model.pkl") model.save("mountaincar_model.pkl")