def launch(self, env_config, use_eval=False): environment_path = (env_config["environment_path_eval"] if use_eval else env_config["environment_path"]) port = env_config.get("port", 0) if use_eval and port: port += 2 use_visual = env_config.get("use_visual", False) use_vector = env_config.get("use_vector", True) multiagent = env_config.get("multiagent", False) uint8_visual = env_config.get("uint8_visual", True) flatten_branched = env_config.get("flatten_branched", True) self.env = UnityEnv( environment_path, port, use_visual=use_visual, use_vector=use_vector, uint8_visual=uint8_visual, multiagent=multiagent, flatten_branched=flatten_branched, ) self.action_space = self.env._action_space self.observation_space = self.env._observation_space # agent name must be unique among **all** agents self.agent_name = [ f'{port}_{i}' for i in range(self.env.number_agents) ]
def main(): env = UnityEnv( "/homes/gkumar/Documents/UnityProjects/maze/Build/mazeBasic_Discrete_imageOnly", 0, use_visual=True, uint8_visual=True) HOSTNAME = os.uname()[1] logger.configure('./logs/' + HOSTNAME) # Çhange to log in a different directory act = deepq.learn( env, "cnn", # conv_only is also a good choice for GridWorld lr=2.5e-4, total_timesteps=1000000, buffer_size=50000, exploration_fraction=0.05, exploration_final_eps=0.1, print_freq=20, train_freq=5, learning_starts=20000, target_network_update_freq=50, gamma=0.99, prioritized_replay=False, checkpoint_freq=1000, checkpoint_path= './logs', # Change to save model in a different directory dueling=True) print("Saving model to unity_model.pkl") act.save("unity_model.pkl")
def get_gym_env(self, unity_file): """ @param unity_file the full path of Unity environment Returns an Open AI Gym that wraps given Unity environment based on selected trainer parameters. """ return UnityEnv(unity_file, self.params.get('worker_id', 0), use_visual=self.params.get('use_visual', False), uint8_visual=self.params.get('use_uint8_visual', False), multiagent=self.params.get('multiagent', False), flatten_branched=self.params.get('flatten_branched', False), allow_multiple_visual_obs=self.params.get('allow_multiple_visual_obs', False), no_graphics=self.params.get('no_graphics', True))
def wrap_unity_env(env_path, frame_skip=0, frame_stack=False, chw_style=False, **unity_config): worker_id = unity_config.get('port', 9527) use_visual = unity_config.get('use_visual', True) uint8_visual = unity_config.get('uint8_visual', True) flatten_branched = unity_config.get('flatten_branched', True) multiagent = unity_config.get('multiagent', False) env = UnityEnv(env_path, worker_id=worker_id, use_visual=use_visual, uint8_visual=uint8_visual, flatten_branched=flatten_branched, multiagent=multiagent) # Be careful with `Decision Interval` in agent script of Unity ml-agent if frame_skip > 0: env = MaxAndSkipEnv(env, frame_skip) if chw_style: env = CHWStyle(env) if frame_stack: env = FrameStack(env, frame_stack, chw_style) return env
def main(): mask_unused_gpus() env = UnityEnv("../unity_envs/kais_banana", 0, use_visual=True, uint8_visual=True, flatten_branched=True) logger.configure('./logs') # Çhange to log in a different directory act = deepq.learn( env, "cnn", # conv_only is also a good choice for GridWorld lr=2.5e-4, total_timesteps=100000, #0 buffer_size=50000, exploration_fraction=0.05, exploration_final_eps=0.1, print_freq=20, train_freq=5, learning_starts=20000, target_network_update_freq=50, gamma=0.99, prioritized_replay=False, checkpoint_freq=1000, checkpoint_path='./logs', # Change to save model in a different directory dueling=True ) print("Saving model to unity_model.pkl") act.save("unity_model.pkl")
start = time.time() mask_unused_gpus() # Avoid Tensorflow eats up GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) #Setting up the env #TODO Worker_id can be changed to run in parallell #Flatten_branched gives us a onehot encoding of all 54 action combinations. print("Opening unity env") env = UnityEnv( "../unity_envs/kais_banana2", worker_id=39, use_visual=True, flatten_branched=True ) #KOE: Note: If I accept images as uint8_visual=True, I have to convert to float later. print("Resetting env") initial_observation = env.reset() #KOETODO This would have to be manually configured for each environment. #KOE: What is this misc?? #misc = game_state.game_variables # [Health] #prev_misc = misc #KOE: I think this should be the same as my battery measure. misc = 100 # [Health] prev_misc = misc # game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]
start = time.time() mask_unused_gpus() # Avoid Tensorflow eats up GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) #Setting up the env #TODO Worker_id can be changed to run in parallell #Flatten_branched gives us a onehot encoding of all 54 action combinations. print("Opening unity env") env = UnityEnv("../unity_envs/kais_banana3", worker_id=24, use_visual=True, uint8_visual=True, flatten_branched=True) print("Resetting env") initial_observation = env.reset() #KOETODO This would have to be manually configured for each environment. #KOE: What is this misc?? #misc = game_state.game_variables # [Health] #prev_misc = misc #KOE: I think this should be the same as my battery measure. misc = 100 # [Health] prev_misc = misc # game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]
class UnityEnvWrapper: def __init__(self, env_config=None, use_eval=False, rpc_mode=False): self.env = None if not rpc_mode: assert (env_config is not None) self.launch(env_config, use_eval) def launch(self, env_config, use_eval=False): environment_path = (env_config["environment_path_eval"] if use_eval else env_config["environment_path"]) port = env_config.get("port", 0) if use_eval and port: port += 2 use_visual = env_config.get("use_visual", False) use_vector = env_config.get("use_vector", True) multiagent = env_config.get("multiagent", False) uint8_visual = env_config.get("uint8_visual", True) flatten_branched = env_config.get("flatten_branched", True) self.env = UnityEnv( environment_path, port, use_visual=use_visual, use_vector=use_vector, uint8_visual=uint8_visual, multiagent=multiagent, flatten_branched=flatten_branched, ) self.action_space = self.env._action_space self.observation_space = self.env._observation_space # agent name must be unique among **all** agents self.agent_name = [ f'{port}_{i}' for i in range(self.env.number_agents) ] def _transform_list_to_dict(self, objs): return {name: obj for name, obj in zip(self.agent_name, objs)} def _transform_dict_to_list(self, objs): return [objs[name] for name in self.agent_name] def step(self, act, action_settings=None): action = np.stack(self._transform_dict_to_list(act)).tolist() observation, reward, done, info = self.env.step(action) transform = self._transform_list_to_dict info = list(map(json.loads, info['text_observation'])) for i, x in enumerate(info): x['done'] = done[i] done = [False] * 4 done_dict = transform(done) done_dict['__all__'] = False # no early termination (for logging) return transform(observation), transform(reward), done_dict, transform( info) def reset(self, reset_settings=None): obs = self.env.reset() return self._transform_list_to_dict(obs) def get_env_spaces(self): spaces = self.action_space, self.observation_space, self.agent_name p = pickle.dumps(spaces) z = zlib.compress(p) return z def get_action_count(self): if isinstance(self.env.action_space, gym.spaces.Discrete): return self.env.action_space.n elif isinstance(self.env.action_space, gym.spaces.MultiDiscrete): return self.env.action_space.nvec.tolist() raise NotImplementedError def sample(self): return self.env.action_space.sample() def number_agents(self): return self.env.number_agents def env_close(self): if self.env: self.env.close() self.env = None def close(self): self.env_close() def hello(self): print('Hello World')
import cv2 np.set_printoptions(threshold=np.inf) # create grid and write to file maze_trials, max_episode, max_steps = 1, 100000, 900 action_repeat = 30 buffer = ReplayBuffer(10000) learner = Learner(buffer) # assume that unity reads that file and generates maze dynamically env = UnityEnv( "/home/gaurav/MySharedRepository/mazeContinuousTarget_fixed_camera/Build/mazeContinuousTarget_fixed_camera", 0, use_visual=True, uint8_visual=True) def drawTrajectory(): action_repeat = 150 # size of experience # 11 velocities on x axis # 11 velocities on y axis # 11 actions on x axis # 11 actions on y axis # 150 action repeats # 2 position values
def sampleTrajectory(): action_repeat = 300 action_range_around_zero = 20 # should be even #action_range_around_zero = [-9, -8, -7, -6, -5, 0, 5, 6, 7, 8, 9] period = 1 if action_range_around_zero % 2 != 0: return False env = UnityEnv( "/homes/gkumar/Documents/UnityProjects/mazeContinuousTarget_fixed_camera_data_collection/Build/mazeContinuousTarget_fixed_camera_data_collection", 0, use_visual=True, uint8_visual=True) list_of_data = [] for i in range(int(-1 * action_range_around_zero / 2), int(action_range_around_zero / 2 + 1), period): # velocity X [-5, -3, -1, 1, 3, 5] for j in range(int(-1 * action_range_around_zero / 2), int(action_range_around_zero / 2 + 1), period): # velocity Y [-5, -3, -1, 1, 3, 5] print(i, j) for k in range(int(-1 * action_range_around_zero / 2), int(action_range_around_zero / 2 + 1), period): # action X [-5, -3, -1, 1, 3, 5] for l in range(int(-1 * action_range_around_zero / 2), int(action_range_around_zero / 2 + 1), period): # action Y [-5, -3, -1, 1, 3, 5] single_tuple = np.zeros(4 + 2 * action_repeat) obs_fovea = env.reset() obs_fovea_next, reward, done, info = env.step([[i], [j], [k], [l]]) # action single_tuple[0] = i single_tuple[1] = j # velocity single_tuple[2] = k single_tuple[3] = l for m in range(0, action_repeat): single_tuple[ 3 + m * 2 + 1] = info["brain_info"].vector_observations[0][2] single_tuple[ 3 + m * 2 + 2] = info["brain_info"].vector_observations[0][3] x_vel_new = info["brain_info"].vector_observations[0][ 6] y_vel_new = info["brain_info"].vector_observations[0][ 7] if math.sqrt( math.pow((single_tuple[3 + m * 2 + 1] - single_tuple[4]), 2) + math.pow((single_tuple[3 + m * 2 + 2] - single_tuple[5]), 2)) < 6: obs_fovea_next, reward, done, info = env.step( [[i], [j], [x_vel_new], [y_vel_new]]) else: for n in range(m, action_repeat): single_tuple[3 + n * 2 + 1] = single_tuple[3 + (m - 1) * 2 + 1] single_tuple[3 + n * 2 + 2] = single_tuple[3 + (m - 1) * 2 + 2] break list_of_data.append(single_tuple) h5f = h5py.File('data.h5', 'w') h5f.create_dataset('dataset_1', data=list_of_data) h5f.close()
import gym from baselines import deepq from baselines import logger import time from gym_unity.envs.unity_env import UnityEnv import subprocess as sp import os env = UnityEnv("../unity_envs/kais_banana", 0, use_visual=True, uint8_visual=True, flatten_branched=True) act = deepq.learn(env, network='cnn', total_timesteps=0, load_path="logs_backup/model") #"unity_model.pkl") #Visualizing #TODO Maybe slow down the simulation by inserting some delays here. while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew