def main(args): env_name = args.env_name total_episodes = args.total_episodes start_batch = args.start_batch time_steps = args.time_steps obs_data = [] action_data = [] env = make_env(env_name) s = 0 batch = start_batch while s < total_episodes: for i_episode in range(200): print('-----') observation = env.reset() env.render() done = False action = env.action_space.sample() t = 0 obs_sequence = [] action_sequence = [] while t < time_steps: t = t + 1 action = config.generate_data_action(t, action) observation = config.adjust_obs(observation) obs_sequence.append(observation) action_sequence.append(action) observation, reward, done, info = env.step(action) obs_data.append(obs_sequence) action_data.append(action_sequence) print("Batch {} Episode {} finished after {} timesteps".format(batch, i_episode, t+1)) print("Current dataset contains {} observations".format(sum(map(len, obs_data)))) s = s + 1 print("Saving dataset for batch {}".format(batch)) np.save('./data/obs_data_' + str(batch), obs_data) np.save('./data/action_data_' + str(batch), action_data) batch = batch + 1 obs_data = [] action_data = []
def main(args): env_name = args.env_name total_episodes = args.total_episodes time_steps = args.time_steps render = args.render run_all_envs = args.run_all_envs action_refresh_rate = args.action_refresh_rate if run_all_envs: envs_to_generate = config.train_envs else: envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) env = make_env(current_env_name) # <1> s = 0 while s < total_episodes: episode_id = random.randint(0, 2**31 - 1) filename = DIR_NAME + str(episode_id) + ".npz" observation = env.reset() env.render() t = 0 obs_sequence = [] action_sequence = [] reward_sequence = [] done_sequence = [] reward = -0.1 done = False while t < time_steps: # and not done: if t % action_refresh_rate == 0: action = config.generate_data_action(t, env) # <2> observation = config.adjust_obs(observation) # <3> obs_sequence.append(observation) action_sequence.append(action) reward_sequence.append(reward) done_sequence.append(done) observation, reward, done, info = env.step(action) # <4> t = t + 1 if render: env.render() print("Episode {} finished after {} timesteps".format(s, t)) np.savez_compressed(filename, obs=obs_sequence, action=action_sequence, reward=reward_sequence, done=done_sequence) # <4> s = s + 1 env.close()
def simulate(model, num_episode=5, seed=-1, max_len=-1, generate_data_mode=False, render_mode=False): reward_list = [] t_list = [] max_episode_length = 1000 if max_len > 0: if max_len < max_episode_length: max_episode_length = max_len if (seed >= 0): random.seed(seed) np.random.seed(seed) model.env.seed(seed) for episode in range(num_episode): model.reset() obs = model.env.reset() reward = 0 action = np.array([0, 0, 0]) if obs is None: obs = np.zeros(model.input_size) total_reward = 0.0 model.env.render("rgb_array") for t in range(max_episode_length): if obs.shape == model.vae.input_dim: ### running in real environment obs = config.adjust_obs(obs) reward = config.adjust_reward(reward) if render_mode: model.env.render("human") if RENDER_DELAY: time.sleep(0.1) # else: # model.env.render('rgb_array') vae_encoded_obs = model.update(obs, t) input_to_rnn = [ np.array( [[np.concatenate([vae_encoded_obs, action, [reward]])]]), np.array([model.hidden]), np.array([model.cell_values]) ] out = model.rnn.forward.predict(input_to_rnn) y_pred = out[0][0][0] model.hidden = out[1][0] model.cell_values = out[2][0] controller_obs = np.concatenate([vae_encoded_obs, model.hidden]) if generate_data_mode: action = config.generate_data_action(t=t, env=model.env) else: action = model.get_action(controller_obs, t=t, add_noise=ADD_NOISE) # print(action) # action = [-0.1,1,0] obs, reward, done, info = model.env.step(action) total_reward += reward if done: break if render_mode: print("reward", total_reward, "timesteps", t) reward_list.append(total_reward) t_list.append(t) model.env.close() return reward_list, t_list
def simulate(model, train_mode=False, render_mode=True, num_episode=5, seed=-1, max_len=-1, generate_data_mode=False): reward_list = [] t_list = [] max_episode_length = 3000 if max_len > 0: if max_len < max_episode_length: max_episode_length = max_len if (seed >= 0): random.seed(seed) np.random.seed(seed) model.env.seed(seed) for episode in range(num_episode): model.reset() obs = model.env.reset() obs = config.adjust_obs(obs) action = model.env.action_space.sample() model.env.render("human") if obs is None: obs = np.zeros(model.input_size) total_reward = 0.0 for t in range(max_episode_length): if render_mode: model.env.render("human") if RENDER_DELAY: time.sleep(0.01) vae_encoded_obs = model.update(obs, t) controller_obs = np.concatenate([vae_encoded_obs, model.hidden]) if generate_data_mode: action = config.generate_data_action(t=t, current_action=action) elif MEAN_MODE: action = model.get_action(controller_obs, t=t, mean_mode=(not train_mode)) else: action = model.get_action(controller_obs, t=t, mean_mode=False) obs, reward, done, info = model.env.step(action) obs = config.adjust_obs(obs) input_to_rnn = [ np.array([[np.concatenate([vae_encoded_obs, action])]]), np.array([model.hidden]), np.array([model.cell_values]) ] h, c = model.rnn.forward.predict(input_to_rnn) model.hidden = h[0] model.cell_values = c[0] total_reward += reward if done: break if render_mode: print("reward", total_reward, "timesteps", t) reward_list.append(total_reward) t_list.append(t) model.env.close() return reward_list, t_list
def simulate(model, num_episode=5, seed=-1, max_len=-1, generate_data_mode=False, render_mode=False): reward_list = [] t_list = [] max_episode_length = 100000 if max_len > 0: if max_len < max_episode_length: max_episode_length = max_len if (seed >= 0): random.seed(seed) np.random.seed(seed) model.env.seed(seed) avg_time = [0, 0] count_times = [0, 0] for episode in range(num_episode): # print(f'Episode {episode}') model.reset() # obs = model.env.reset() # action = [0,0] obs = model.env.reset() reward = 0 action = np.array([0, 0]) if obs is None: obs = np.zeros(model.input_size) total_reward = 0.0 # Uncomment block # model.env.render("rgb_array") for t in range(max_episode_length): # print(f'Timestep {t}') if obs.shape == model.vae.input_dim: ### running in real environment obs = config.adjust_obs(obs) reward = config.adjust_reward(reward) # Uncomment block # if render_mode: # model.env.render("human") # if RENDER_DELAY: # time.sleep(0.1) # else: # model.env.render('rgb_array') # model.env.render() vae_encoded_obs = model.update(obs, t) input_to_rnn = [ np.array( [[np.concatenate([vae_encoded_obs, action, [reward]])]]), np.array([model.hidden]), np.array([model.cell_values]) ] start = time.process_time() out = model.rnn.forward.predict(input_to_rnn) y_pred = out[0][0][0] model.hidden = out[1][0] model.cell_values = out[2][0] controller_obs = np.concatenate([vae_encoded_obs, model.hidden]) if generate_data_mode: action = config.generate_data_action(t=t, env=model.env) else: action = model.get_action(controller_obs, t=t, add_noise=ADD_NOISE) # print(action) # action = [-0.1,1,0] new_time = time.process_time() - start avg_time[0] = ((avg_time[0] * count_times[0]) + new_time) / (count_times[0] + 1) count_times[0] += 1 start = time.process_time() obs, reward, done, _ = model.env.step(action) # print(f'action:{action} reward:{reward}') new_time = time.process_time() - start avg_time[1] = ((avg_time[1] * count_times[1]) + new_time) / (count_times[1] + 1) count_times[1] += 1 # print(avg_time) total_reward += reward if done: break if render_mode: print("reward", total_reward, "timesteps", t) reward_list.append(total_reward) t_list.append(t) # model.env.close() return reward_list, t_list
def simulate(model, train_mode=False, render_mode=True, num_episode=5, seed=-1, max_len=-1, generate_data_mode = False): reward_list = [] t_list = [] max_episode_length = 3000 if max_len > 0: if max_len < max_episode_length: max_episode_length = max_len if (seed >= 0): random.seed(seed) np.random.seed(seed) model.env.seed(seed) for episode in range(num_episode): model.reset() obs = model.env.reset() obs = config.adjust_obs(obs) action = model.env.action_space.sample() model.env.render("human") if obs is None: obs = np.zeros(model.input_size) total_reward = 0.0 for t in range(max_episode_length): if render_mode: model.env.render("human") if RENDER_DELAY: time.sleep(0.01) vae_encoded_obs = model.update(obs, t) controller_obs = np.concatenate([vae_encoded_obs,model.hidden]) if generate_data_mode: action = config.generate_data_action(t=t, current_action = action) elif MEAN_MODE: action = model.get_action(controller_obs, t=t, mean_mode=(not train_mode)) else: action = model.get_action(controller_obs, t=t, mean_mode=False) obs, reward, done, info = model.env.step(action) obs = config.adjust_obs(obs) input_to_rnn = [np.array([[np.concatenate([vae_encoded_obs, action])]]),np.array([model.hidden]),np.array([model.cell_values])] h, c = model.rnn.forward.predict(input_to_rnn) model.hidden = h[0] model.cell_values = c[0] total_reward += reward if done: break if render_mode: print("reward", total_reward, "timesteps", t) reward_list.append(total_reward) t_list.append(t) model.env.close() return reward_list, t_list
def main(args): env_name = args.env_name total_episodes = args.total_episodes start_batch = args.start_batch time_steps = args.time_steps render = args.render batch_size = args.batch_size run_all_envs = args.run_all_envs if run_all_envs: envs_to_generate = config.train_envs else: envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) env = make_env(current_env_name) s = 0 batch = start_batch batch_size = min(batch_size, total_episodes) while s < total_episodes: obs_data = [] action_data = [] for i_episode in range(batch_size): print('-----') observation = env.reset() observation = config.adjust_obs(observation) # Position car randomly on track position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) # plt.imshow(observation) # plt.show() env.render() done = False action = env.action_space.sample() t = 0 obs_sequence = [] action_sequence = [] while t < time_steps: #and not done: t = t + 1 action = config.generate_data_action(t, action) obs_sequence.append(observation) action_sequence.append(action) observation, reward, done, info = env.step(action) observation = config.adjust_obs(observation) if render: env.render() obs_data.append(obs_sequence) action_data.append(action_sequence) print("Batch {} Episode {} finished after {} timesteps".format( batch, i_episode, t + 1)) print("Current dataset contains {} observations".format( sum(map(len, obs_data)))) s = s + 1 print("Saving dataset for batch {}".format(batch)) np.save('./data/obs_data_' + current_env_name + '_' + str(batch), obs_data) np.save( './data/action_data_' + current_env_name + '_' + str(batch), action_data) batch = batch + 1 env.close()
def main(args): env_name = args.env_name total_episodes = int(args.total_episodes) time_steps = int(args.time_steps) render = args.render run_all_envs = args.run_all_envs action_refresh_rate = args.action_refresh_rate alpha = float(args.alpha) model_name = str(args.model_name) if run_all_envs: envs_to_generate = config.train_envs else: envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) if not os.path.isdir(DIR_NAME + model_name): os.mkdir(DIR_NAME + model_name) env = make_env(current_env_name) # <1> s = 0 while s < total_episodes: episode_id = random.randint(0, 2**31 - 1) filename = DIR_NAME + model_name + '/' + str(episode_id) + ".npz" observation = env.reset() env.render() t = 0 obs_sequenceS = [] obs_sequenceB = [] action_sequence = [] reward_sequence = [] done_sequence = [] reward = -0.1 done = False beta = alpha + np.random.rand() * (1 - alpha) while t < time_steps: # and not done: if t % action_refresh_rate == 0: action = config.generate_data_action(t, env) # <2> observation = config.adjust_obs(observation) # <3> obs_sequenceS.append( cv2.resize(crop(observation, alpha * beta), dsize=(WH, WH), interpolation=cv2.INTER_CUBIC)) obs_sequenceB.append( cv2.resize(crop(observation, beta), dsize=(WH, WH), interpolation=cv2.INTER_CUBIC)) action_sequence.append(action) reward_sequence.append(reward) done_sequence.append(done) observation, reward, done, info = env.step(action) # <4> t = t + 1 if render: env.render() print("Episode {} finished after {} timesteps".format(s, t)) np.savez_compressed(filename, obsS=np.asarray(obs_sequenceS), obsB=np.asarray(obs_sequenceB), action=np.asarray(action_sequence), reward=np.asarray(reward_sequence), done=np.asarray(done_sequence)) # <4> s = s + 1 env.close()
def main(args): env_name = args.env_name total_episodes = args.total_episodes start_batch = args.start_batch time_steps = args.time_steps render = args.render batch_size = args.batch_size run_all_envs = args.run_all_envs if run_all_envs: envs_to_generate = config.train_envs else: envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) env = make_env(current_env_name) s = 0 batch = start_batch batch_size = min(batch_size, total_episodes) while s < total_episodes: obs_data = [] action_data = [] for i_episode in range(batch_size): print('-----') observation = env.reset() observation = config.adjust_obs(observation) # plt.imshow(observation) # plt.show() env.render() done = False action = env.action_space.sample() t = 0 obs_sequence = [] action_sequence = [] while t < time_steps: #and not done: t = t + 1 action = config.generate_data_action(t, action) obs_sequence.append(observation) action_sequence.append(action) observation, reward, done, info = env.step(action) observation = config.adjust_obs(observation) if render: env.render() obs_data.append(obs_sequence) action_data.append(action_sequence) print("Batch {} Episode {} finished after {} timesteps".format(batch, i_episode, t+1)) print("Current dataset contains {} observations".format(sum(map(len, obs_data)))) s = s + 1 print("Saving dataset for batch {}".format(batch)) np.save('./data/obs_data_' + current_env_name + '_' + str(batch), obs_data) np.save('./data/action_data_' + current_env_name + '_' + str(batch), action_data) batch = batch + 1 env.close()
def main(args): env_name = args.env_name total_episodes = args.total_episodes start_file = args.start_file time_steps = args.time_steps render = args.render file_size = args.file_size run_all_envs = args.run_all_envs validation = args.validation start_frame = args.start_frame if validation: total_episodes = file_size if run_all_envs: envs_to_generate = config.train_envs else: envs_to_generate = [env_name] for current_env_name in envs_to_generate: print("Generating data for env {}".format(current_env_name)) env = make_env(current_env_name) s = 0 file = start_file file_size = min(file_size, total_episodes) while s < total_episodes: obs_data = [] action_data = [] for i_episode in range(file_size): print("-----") observation = env.reset() observation = config.adjust_obs(observation) # essential for saving as well env.render() done = False action = env.action_space.sample() time = 0 obs_sequence = [] action_sequence = [] while time < time_steps and not done: time = time + 1 action = config.generate_data_action(time, action) observation, reward, done, info = env.step(action) observation = config.adjust_obs(observation) if time > start_frame: obs_sequence.append(observation) # [:56]? action_sequence.append(action) if render: env.render() obs_data.append(obs_sequence) action_data.append(action_sequence) print("File {} Episode {} finished after {} timesteps".format( file, i_episode, time + 1)) print("Current dataset contains {} observations".format( sum(map(len, obs_data)))) s = s + 1 print("Saving dataset for batch {}".format(file)) if validation: np.savez_compressed("./data/obs_valid_" + current_env_name, obs_data) np.savez_compressed("./data/action_valid_" + current_env_name, action_data) else: # np.random.shuffle(obs_data) # obs_data, action_data = shuffle(obs_data, action_data) np.savez_compressed( "./data/obs_data_" + current_env_name + "_" + str(file), obs_data) np.savez_compressed( "./data/action_data_" + current_env_name + "_" + str(file), action_data) file = file + 1 env.close()