Пример #1
0
n_steps = 0
os.makedirs(log_dir, exist_ok=True)

################ MODEL AND GYM ENVIRONMENT

if ENVIRONMENT == 'rgbd':
  env = tm700_rgbd_gym(renders=RENDERS, isDiscrete=DISCRETE)
  env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True)


if ENVIRONMENT == 'rgb':
  env = tm700_rgb_gym(renders=RENDERS, isDiscrete=DISCRETE)
  env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True)

if ENVIRONMENT == 'possensor':
  env = tm700_possensor_gym(renders=RENDERS, isDiscrete=DISCRETE)
  env = Monitor(env, os.path.join(log_dir, 'monitor.csv'), allow_early_resets=True)

if MODEL == 'DQN':
  from stable_baselines.deepq.policies import LnCnnPolicy, MlpPolicy

  if ENVIRONMENT in ['rgbd', 'rgb', 'rgbdsparse']:
      model = DQN(LnCnnPolicy, env, verbose=1,
              tensorboard_log=(log_dir + "tensorboard_%s_%s_%s/") % (MODEL, ENVIRONMENT, DATE),
              gamma=0.99, learning_rate=0.0001, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02,
              train_freq=1, batch_size=32, double_q=True, learning_starts=1000,
              target_network_update_freq=500, prioritized_replay=True, prioritized_replay_alpha=0.6,
              prioritized_replay_beta0=0.4, prioritized_replay_beta_iters=None, prioritized_replay_eps=1e-06,
              param_noise=False, _init_setup_model=True,
              policy_kwargs=None, full_tensorboard_log=False)
Пример #2
0
    import logging

    tf.get_logger().setLevel(logging.ERROR)

    args=parser.arg_parse()

    # Create save dir
    model_dir = args.model_dir

    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)


    ############## load environment

    env = tm700_possensor_gym(renders=False, isDiscrete=True)
    # env = gym.make('CartPole-v1')
    # vectorized environments allow to easily multiprocess training
    # we demonstrate its usefulness in the next examples
    env = DummyVecEnv([lambda: env])  # The algorithms require a vectorized environment to run


    ############ MODELS

    model = PPO2('MlpPolicy', 'Pendulum-v0', verbose=0).learn(8000)
    # The model will be saved under PPO2_tutorial.zip
    # ddpg_model = DDPG(MlpPolicy, env, verbose=1, param_noise=None, random_exploration=0.1)
    kwargs = {'double_q': True, 'prioritized_replay': True, 'policy_kwargs': dict(dueling=True)}  #DQN + Prioritized Experience Replay + Double Q-Learning + Dueling
    dqn_model = DQN('MlpPolicy', env, verbose=1, **kwargs)