def _enjoyWindow(env, dagger=False): model = WindowModel(action_dim=2, max_action=1.) try: if dagger: state_dict = torch.load('./models/dagger_windowimitate.pt') else: state_dict = torch.load('./models/windowimitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) obs = env.reset() obsWindow = np.zeros((12, 160, 120)) successes = 0 count = 0 written = False while count < NUM_TESTS: obsWindow[:9, :, :] = obsWindow[3:, :, :] obsWindow[9:12, :, :] = obs obs = torch.from_numpy(obsWindow).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) env.render() if done: count += 1 if info['Simulator']['done_code'] == 'lap-completed': print('*** SUCCESS ***') successes += 1 else: print('*** FAILED ***') obs = env.reset() env.render() if count != 0 and count % 50 == 0 and written is False: if dagger: f = open("../window_test_{}_dagger.txt".format(env.map_name), "a") else: f = open("../window_test_{}.txt".format(env.map_name), "a") f.write("{} {}".format(env.map_name, count)) f.write("\n{}/{}\n\n".format(successes, NUM_TESTS)) f.close() written = True else: if count % 50 != 0: written = False
def _enjoyWindow(): model = WindowModel(action_dim=2, max_action=1.) try: state_dict = torch.load('./models/windowimitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) env = launch_env1() env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) obs = env.reset() obsWindow = np.zeros((12, 160, 120)) while True: obsWindow[:9, :, :] = obsWindow[3:, :, :] obsWindow[9:12, :, :] = obs obs = torch.from_numpy(obsWindow).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) env.render() if done: if reward < 0: print('*** FAILED ***') time.sleep(0.7) obs = env.reset() env.render()
def _dagger(): model = Model(action_dim=2, max_action=1.) try: state_dict = torch.load('./models/imitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.eval().to(device) env = launch_env1() # Register a keyboard handler env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) obs = env.reset() env.render() key_handler = key.KeyStateHandler() env.unwrapped.window.push_handlers(key_handler) print(env.map_name) raise Exception("asdfsadf") obsHistory = [] actionHistory = [] while True: obs = torch.from_numpy(obs).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) print(key_handler) daggerAction = np.array([0.0, 0.0]) if key_handler[key.UP]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([1.00, 0.0]) #action = np.array([0.44, 0.0]) if key_handler[key.DOWN]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([-1.00, 0]) #action = np.array([-0.44, 0]) if key_handler[key.LEFT]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([0.35, +1]) if key_handler[key.RIGHT]: print("as===as=df=sad=f=asdf=sad=fs=adf") daggerAction = np.array([0.35, -1]) if key_handler[key.SPACE]: obsHistoryArray = np.array(obsHistory) actionHistoryArray = np.array(actionHistory) np.save('./dagger/obs_{}.npy'.format(len(count)), obsHistoryArray) np.save('./dagger/actions_{}.npy'.format(len(count)), actionHistoryArray) print(daggerAction) obsHistory.append(obs) actionHistory.append(daggerAction) env.render() if done: if reward < 0: print('*** FAILED ***') time.sleep(0.7) obs = env.reset() env.render()
def _train(args): env = launch_env1() env1 = ResizeWrapper(env) env2 = NormalizeWrapper(env) env3 = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) print("Initialized Wrappers") def transformObs(obs): obs = env1.observation(obs) obs = env2.observation(obs) obs = env3.observation(obs) return obs actions = None rawObs = None for map in MAP_NAMES: if map == "loop_obstacles": episodes = 3 else: episodes = 2 print(map) for episode in range(episodes): actionFile = "actions_{}.npy".format(episode) action = np.load(TRAINING_DATA_PATH.format(map, actionFile)) print(action.shape) observationFile = "obs_{}.npy".format(episode) observation = np.load(TRAINING_DATA_PATH.format(map, observationFile)) if actions is None: actions = action rawObs = observation else: actions = np.concatenate((actions, action), axis=0) rawObs = np.concatenate((rawObs, observation), axis=0) print(actions.shape) print(actions.shape) print("---") observations = np.zeros((rawObs.shape[0], 3, 160, 120)) for i, obs in enumerate(rawObs): observations[i] = transformObs(obs) ''' # Create an imperfect demonstrator expert = PurePursuitExpert(env=env) observations = [] actions = [] # let's collect our samples for episode in range(0, 2): #for episode in range(0, args.episodes): print("Starting episode", episode) #for steps in range(0, args.steps): for steps in range(0, 4): # use our 'expert' to predict the next action. action = expert.predict(None) observation, reward, done, info = env.step(action) observations.append(observation) actions.append(action) env.reset() actions = np.array(actions) observations = np.array(observations) print(observations.shape) ''' env.close() #raise Exception("Done with testing") model = Model(action_dim=2, max_action=1.) model.train().to(device) # weight_decay is L2 regularization, helps avoid overfitting optimizer = optim.SGD( model.parameters(), lr=0.0004, weight_decay=1e-3 ) loss_list = [] avg_loss = 0 for epoch in range(args.epochs): optimizer.zero_grad() batch_indices = np.random.randint(0, observations.shape[0], (args.batch_size)) obs_batch = torch.from_numpy(observations[batch_indices]).float().to(device) act_batch = torch.from_numpy(actions[batch_indices]).long().to(device) model_actions = model(obs_batch) loss = (model_actions - act_batch).norm(2).mean() loss.backward() optimizer.step() #loss = loss.data[0] loss = loss.item() avg_loss = avg_loss * 0.995 + loss * 0.005 print('epoch %d, loss=%.3f' % (epoch, loss)) loss_list.append(loss) # Periodically save the trained model if epoch % 50 == 0: print("Saving...") torch.save(model.state_dict(), 'imitation/pytorch/models/imitate.pt') save_loss(loss_list, 'imitation/pytorch/loss.npy') print("Saving...") torch.save(model.state_dict(), 'imitation/pytorch/models/imitate.pt')
def _train(args): actions = None observations = None for map in MAP_NAMES: print(map) actionFile = "actions_{}.npy".format(map) action = np.load(TRAINING_DATA_PATH.format(actionFile)) print(action.shape) observationFile = "obs_{}.npy".format(map) observation = np.load(TRAINING_DATA_PATH.format(observationFile)) if actions is None: actions = action observations = observation else: actions = np.concatenate((actions, action), axis=0) observations = np.concatenate((observations, observation), axis=0) print("---") model = Model(action_dim=2, max_action=1.) try: state_dict = torch.load('./models/imitate.pt') model.load_state_dict(state_dict) except: print('failed to load model') exit() model.train().to(device) # weight_decay is L2 regularization, helps avoid overfitting optimizer = optim.SGD(model.parameters(), lr=0.0004, weight_decay=1e-3) length = min(len(actions), len(observations)) actions = actions[:length, :] observations = observations[:length, :, :, :] loss_list = [] avg_loss = 0 for epoch in range(args.epochs): optimizer.zero_grad() batch_indices = np.random.randint(0, observations.shape[0], (args.batch_size)) obs_batch = torch.from_numpy( observations[batch_indices]).float().to(device) act_batch = torch.from_numpy(actions[batch_indices]).long().to(device) model_actions = model(obs_batch) loss = (model_actions - act_batch).norm(2).mean() loss.backward() optimizer.step() #loss = loss.data[0] loss = loss.item() avg_loss = avg_loss * 0.995 + loss * 0.005 print('epoch %d, loss=%.3f' % (epoch, loss)) loss_list.append(loss) # Periodically save the trained model if epoch % 50 == 0: print("Saving...") torch.save(model.state_dict(), 'imitation/pytorch/models/dagger_imitate.pt') save_loss(loss_list, 'imitation/pytorch/dagger_loss.npy') print("Saving...") torch.save(model.state_dict(), 'imitation/pytorch/models/dagger_imitate.pt')
def dagger(dt, actionHistory, obsHistory, count, env, model): """ This function is called at every frame to handle movement/stepping and redrawing """ step_count = env.unwrapped.step_count daggerAction = np.array([0.0, 0.0]) if key_handler[key.UP]: daggerAction = np.array([1.00, 0.0]) #action = np.array([0.44, 0.0]) if key_handler[key.DOWN]: daggerAction = np.array([-1.00, 0]) #action = np.array([-0.44, 0]) if key_handler[key.LEFT]: daggerAction = np.array([0.35, +1]) if key_handler[key.RIGHT]: daggerAction = np.array([0.35, -1]) if key_handler[key.SPACE]: obsHistoryArray = np.array(obsHistory) actionHistoryArray = np.array(actionHistory) np.save('./dagger/obs_{}.npy'.format(env.map_name), obsHistoryArray) np.save('./dagger/actions_{}.npy'.format(env.map_name), actionHistoryArray) print(daggerAction) actionHistory.append(daggerAction) if step_count == 0: obs = np.zeros((3, 160, 120)) else: obs = obsHistory[step_count - 1] obs = torch.from_numpy(obs).float().to(device).unsqueeze(0) action = model(obs) action = action.squeeze().data.cpu().numpy() obs, reward, done, info = env.step(action) print('step_count = %s, reward=%.3f' % (env.unwrapped.step_count, reward)) obsHistory.append(obs) if key_handler[key.RETURN]: from PIL import Image im = Image.fromarray(obs) im.save('screen.png') if done: print('done!') ''' if info['Simulator']['done_code'] == 'lap-completed' and args.record: obsHistoryArray = np.array(obsHistory) print(obsHistoryArray.shape) np.save('./data/{}/obs_{}.npy'.format(args.map_name, len(count)), obsHistoryArray) actionHistoryArray = np.array(actionHistory) print(actionHistoryArray.shape) np.save('./data/{}/actions_{}.npy'.format(args.map_name, len(count)), actionHistoryArray) count.append(0) ''' #raise Exception("Stopping the program") env.reset() #obsHistory.clear() #actionHistory.clear() env.render() env.render()