-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_with_human.py
68 lines (57 loc) · 2.1 KB
/
run_with_human.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import sys
import multiprocessing
import os.path as osp
import os
import tensorflow as tf
import pprint as pp
from arguments_with_human import common_arg_parser
from util import get_session,save_state
from learner import learn
from wrap_env import make_vec_env
def train(args):
total_timesteps = int(args.num_timesteps)
seed = args.seed
nsteps=int(args.nsteps)
ent_coef=args.ent_coef
vf_coef=args.vf_coef
p_coef=args.p_coef
human_ent_coef = args.human_ent_coef
human_vf_coef = args.human_vf_coef
human_p_coef = args.human_p_coef
lr=args.lr
max_grad_norm=args.max_grad_norm
gamma=args.gamma
lam=args.lam
nminibatches=int(args.nminibatches)
noptepochs=int(args.noptepochs)
cliprange=args.cliprange
save_interval=int(args.save_interval)
env = build_env(args)
model = learn(env=env, total_timesteps=total_timesteps, seed = seed, nsteps = nsteps, ent_coef = ent_coef, lr = lr,
vf_coef = vf_coef,p_coef=p_coef,max_grad_norm = max_grad_norm , gamma =gamma, lam = lam, nminibatches = nminibatches, noptepochs = noptepochs, cliprange = cliprange,
save_interval = save_interval,copeoperation=True,human_ent_coef=human_ent_coef,human_vf_coef=human_vf_coef,human_p_coef=human_p_coef)
return model, env
def build_env(args):
ncpu = multiprocessing.cpu_count()
if sys.platform == 'darwin': ncpu //= 2
nenv = args.num_env or ncpu
config = tf.ConfigProto(allow_soft_placement=True,intra_op_parallelism_threads=0,inter_op_parallelism_threads=0)#1 or more?
config.gpu_options.allow_growth = True
get_session(config=config)
env = make_vec_env(nenv, args.seed,copeoperation=True)
return env
def main():
# get argument
tf.reset_default_graph()
arg_parser = common_arg_parser()
args= arg_parser.parse_args()
pp.pprint(vars(args))
model, env = train(args)
savepath = osp.join("my_model_cop/", 'final')
os.makedirs(savepath, exist_ok=True)
savepath = osp.join(savepath, 'ppomodel')
save_state(savepath)
env.close()
return model
if __name__ == '__main__':
main()#get argument from terminal