Exemplo n.º 1
0
def main(args):
    for ite in range(int(args['trial_num'])):
        print('Trial Number:', ite)

        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        # config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)

        # with tf.Session(config=config) as sess:
        with tf.Session() as sess:

            if args['change_seed']:
                rand_seed = 10 * ite
                #rand_seed = np.random.randint(1, 1000, size=1)
            else:
                rand_seed = 0
            env = gym.make(args['env'])

            np.random.seed(int(args['random_seed']) + int(rand_seed))
            tf.set_random_seed(int(args['random_seed']) + int(rand_seed))
            env.seed(int(args['random_seed']) + int(rand_seed))

            env_test = gym.make(args['env'])
            env_test.seed(int(args['random_seed']) + int(rand_seed))

            state_dim = env.observation_space.shape[0]
            action_dim = env.action_space.shape[0]
            print('action_space.shape', env.action_space.shape)
            print('observation_space.shape', env.observation_space.shape)
            action_bound = env.action_space.high

            assert (env.action_space.high[0] == -env.action_space.low[0])

            agent = adInfoHRLTD3(sess,
                                 env,
                                 state_dim,
                                 action_dim,
                                 action_bound,
                                 int(args['minibatch_size']),
                                 tau=float(args['tau']),
                                 actor_lr=float(args['actor_lr']),
                                 critic_lr=float(args['critic_lr']),
                                 option_lr=float(args['option_lr']),
                                 gamma=float(args['gamma']),
                                 hidden_dim=np.asarray(args['hidden_dim']),
                                 entropy_coeff=float(args['lambda']),
                                 c_reg=float(args['c_reg']),
                                 option_num=int(args['option_num']),
                                 vat_noise=float(args['vat_noise']),
                                 c_ent=float(args['c_ent']))

            if args['use_gym_monitor']:
                if not args['render_env']:
                    env = wrappers.Monitor(env,
                                           args['monitor_dir'],
                                           video_callable=False,
                                           force=True)
                else:
                    env = wrappers.Monitor(
                        env,
                        args['monitor_dir'],
                        video_callable=lambda episode_id: episode_id % 50 == 0,
                        force=True)

            step_R_i = train(sess, env, env_test, args, agent)

            result_path = "./results/trials/separate/"
            try:
                import pathlib
                pathlib.Path(result_path).mkdir(parents=True, exist_ok=True)
            except:
                print(
                    "A result directory does not exist and cannot be created. The trial results are not saved"
                )

            result_filename = args['result_file'] + '_' + args['env'] \
                              + '_lambda_' + str(float(args['lambda'])) \
                              + '_c_reg_' + str(float(args['c_reg'])) \
                              + '_vat_noise_' + str(float(args['vat_noise'])) \
                              + '_c_ent_' + str(float(args['c_ent'])) \
                              + '_option_' + str(float(args['option_num'])) \
                              + '_temporal_' + str(float(args['temporal_num'])) \
                              + '_trial_idx_' + str(int(args['trial_idx'])) \
                              + '.txt'

            if args['overwrite_result'] and ite == 0:
                np.savetxt(result_filename, np.asarray(step_R_i))
            else:
                data = np.loadtxt(result_filename, dtype=float)
                data_new = np.vstack((data, np.asarray(step_R_i)))
                np.savetxt(result_filename, data_new)

            if args['use_gym_monitor']:
                env.monitor.close()
Exemplo n.º 2
0
def main(args):
    result_name = 'adInfoHRLTD3_' + args['env'] \
                + '_lambda_' + str(float(args['lambda'])) \
                + '_c_reg_' + str(float(args['c_reg'])) \
                + '_vat_noise_' + str(float(args['vat_noise'])) \
                + '_c_ent_' + str(float(args['c_ent'])) \
                + '_option_' + str(float(args['option_num'])) \
                + '_temporal_' + str(float(args['temporal_num'])) \
                + '_trial_idx_' + str(int(args['trial_idx']))

    for ite in range(int(args['trial_num'])):
        print('Trial Number:', ite)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
        config = tf.ConfigProto(gpu_options=gpu_options,
                                log_device_placement=False)

        with tf.Session(config=config) as sess:

            if args['change_seed']:
                rand_seed = 10 * ite
            else:
                rand_seed = 0

            np.random.seed(int(args['random_seed']) + int(rand_seed))
            tf.set_random_seed(int(args['random_seed']) + int(rand_seed))
            env = gym.make(args['env'])
            env.seed(int(args['random_seed']) + int(rand_seed))

            if args['save_video']:
                try:
                    import pathlib
                    pathlib.Path("./Video/" + args['env']).mkdir(parents=True,
                                                                 exist_ok=True)
                    video_relative_path = "./Video/" + args['env'] + "/"

                    ## To save video of the first episode
                    env = gym.wrappers.Monitor(
                        env,
                        video_relative_path,
                        video_callable=lambda episode_id: episode_id == 0,
                        force=True)
                    ## To save video of every episodes
                    # env_test = gym.wrappers.Monitor(env_test, video_relative_path, \
                    #    video_callable=lambda episode_id: episode_id%1==0, force =True)
                except:
                    print(
                        "Cannot create video directories. Video will not be saved."
                    )

            state_dim = env.observation_space.shape[0]
            action_dim = env.action_space.shape[0]
            action_bound = env.action_space.high
            # Ensure action bound is symmetric
            assert (env.action_space.high[0] == -env.action_space.low[0])
            if args['method_name'] == 'adInfoHRLTD3':
                from adInfoHRL_agent import adInfoHRLTD3
                agent = adInfoHRLTD3(
                    sess,
                    env,
                    state_dim,
                    action_dim,
                    action_bound,
                    #  , int(args['minibatch_size']),
                    #  tau=float(args['tau']),
                    #  actor_lr=float(args['actor_lr']),
                    #  critic_lr=float(args['critic_lr']),
                    #  gamma=float(args['gamma']),
                    #  hidden_dim=np.asarray(args['hidden_dim']),
                    #  entropy_coeff=float(args['lambda']),
                    #  c_reg=float(args['c_reg']),
                    option_num=int(args['option_num']),
                    #  vat_noise=float(args['vat_noise'])
                )

            model_path = "./Model/adInfoHRL/" + args['env'] + '/'
            agent.load_model(iteration=int(args['load_model_iter']),
                             expname=result_name,
                             model_path=model_path)

            test(sess, env, args, agent, result_name)

            if args['use_gym_monitor']:
                env.close()