-
Notifications
You must be signed in to change notification settings - Fork 0
/
behavioral_cloning.py
76 lines (62 loc) · 2.27 KB
/
behavioral_cloning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pickle
import argparse
import numpy as np
from collections import defaultdict
from model import init_model
import parse_expert
from keras.models import Sequential
parser = argparse.ArgumentParser()
parser.add_argument('expert_policy_file', type=str)
parser.add_argument('envname', type=str)
args = parser.parse_args()
def normalize(inputs, mean, sdev):
std_inputs = (inputs - mean) / (sdev + 1e-6)
return std_inputs
def render_and_eval(model, in_shape, out_shape, obs_mean, obs_sdev):
import gym
env = gym.make(args.envname)
max_steps = 1000
actions = []
returns = []
for i in range(10):
print('iter', i)
obs = env.reset()
done = False
steps = 0
totalr = 0
while not done:
obs = obs.reshape(-1, in_shape)
std_obs = normalize(obs, obs_mean, obs_sdev)
action = model.predict(std_obs, batch_size=1)
actions.append(action)
obs, r, done, _ = env.step(action)
totalr += r
steps += 1
#if True:
# env.render()
if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
if steps >= max_steps:
break
returns.append(totalr)
return returns
def main():
print("loading expert actions")
expert_behavior = 'data/cloning/experts/{}.p'.format(args.envname)
expert_policy = args.expert_policy_file
observations, actions = parse_expert.actions(expert_behavior)
obs_shape, action_shape = parse_expert.model_shapes(expert_behavior)
actions = actions.reshape(-1, action_shape)
print("standardize training data")
obs_mean, obs_sdev = parse_expert.standardizers(expert_policy)
obs_std = normalize(observations, obs_mean, obs_sdev)
model = init_model(obs_shape, action_shape)
results = defaultdict(list)
for i in range(11):
result = render_and_eval(model, obs_shape, action_shape, obs_mean, obs_sdev)
results['clone_epoch'].append(result)
model.fit(obs_std, actions, epochs=5, batch_size=256)
results['expert'] = parse_expert.returns(expert_behavior)
results['epochs_per_result'] = 5
pickle.dump(results, open("results/cloning/{}.p".format(args.envname), "wb"))
if __name__ == '__main__':
main()