def make_multigoal_ssp_env(map_array, csp_scaling, csp_offset, object_locations, x_axis_vec, y_axis_vec, dim=512, continuous=True, movement_type='holonomic'): params = { 'x_axis_vec': spa.SemanticPointer(data=x_axis_vec), 'y_axis_vec': spa.SemanticPointer(data=y_axis_vec), 'goal_csp': True, 'agent_csp': True, 'csp_dim': dim, 'goal_csp_egocentric': False, # other arguments for bio sensors "full_map_obs": False, "pob": 0, "max_sensor_dist": 10, "n_sensors": 10, "fov": 180, "normalize_dist_sensors": True, "n_grid_cells": 0, "heading": "none", "location": "none", "goal_loc": "none", "bc_n_ring": 12, "bc_n_rad": 3, "bc_dist_rad": 0.75, "bc_receptive_field_min": 1, "bc_receptive_field_max": 1.5, "hd_n_cells": 8, "hd_receptive_field_min": 0.78539816339, "hd_receptive_field_max": 0.78539816339, "goal_vec": "normalized", } obs_dict = generate_obs_dict(params) env = GridWorldEnv( map_array=map_array, object_locations=object_locations, observations=obs_dict, movement_type=movement_type, max_lin_vel=5, max_ang_vel=5, continuous=continuous, max_steps=1000, fixed_episode_length=False, # True, dt=0.1, screen_width=300, screen_height=300, csp_scaling=csp_scaling, csp_offset=csp_offset, ) return env
'heading': 'none', 'location': 'none', 'goal_loc': 'none', 'goal_vec': 'none', 'bc_n_ring': 0, 'hd_n_cells': 0, 'goal_csp': False, 'goal_distance': 0, # 0 means completely random, goal isn't used anyway 'agent_csp': True, 'csp_dim': args.dim, 'x_axis_vec': x_axis_sp, 'y_axis_vec': y_axis_sp, } obs_dict = generate_obs_dict(params) positions = np.zeros( (args.n_mazes, args.n_trajectories, args.trajectory_steps, 2)) dist_sensors = np.zeros((args.n_mazes, args.n_trajectories, args.trajectory_steps, args.n_sensors)) # spatial semantic pointers for position ssps = np.zeros((args.n_mazes, args.n_trajectories, args.trajectory_steps, args.dim)) # velocity in x and y cartesian_vels = np.zeros( (args.n_mazes, args.n_trajectories, args.trajectory_steps, 2)) def get_ssp_activation(pos):
def __init__( self, maze_sps, x_axis_sp, y_axis_sp, dim=256, maze_id_dim=256, n_sensors=36, n_goals=7, use_dataset_goals=False, sim_dt=0.01, nengo_dt=0.001, maze_index=0, normalize_action=True, noise=0.1, env_seed=13, n_trials=100, # number of trials to record before exiting fname_data='output/debugging'): self.dim = dim self.maze_id_dim = maze_id_dim self.n_sensors = n_sensors self.sim_dt = sim_dt self.nengo_dt = nengo_dt self.dt_ratio = int(self.sim_dt / self.nengo_dt) self.normalize_action = normalize_action self.noise = noise self.n_trials = n_trials # index of the current trial self.trial_index = 0 # 1 for reaching goal within step limit, 0 for not self.successes = np.zeros((self.n_trials, ), dtype=bool) # return for each trials self.returns = np.zeros((self.n_trials, )) if not os.path.exists('output'): os.makedirs('output') # self.fname_data = 'output/maze{}_seed{}_spiking_data.npz'.format(maze_index, env_seed) self.fname_data = fname_data # Output vector periodically updated based on sim_dt # last 4 dimensions are for debug (agent x,y and goal x,y) self.env_output = np.zeros( (self.maze_id_dim + self.dim + self.n_sensors * 4 + 4)) self.steps = 0 home = os.path.expanduser("~") dataset_file = os.path.join( home, 'ssp-navigation/ssp_navigation/datasets/mixed_style_100mazes_100goals_64res_13size_13seed/maze_dataset.npz' ) data = np.load(dataset_file) xs = data['xs'] ys = data['ys'] # fixed random set of locations for the goals limit_range = xs[-1] - xs[0] # n_mazes by size by size coarse_mazes = data['coarse_mazes'] coarse_size = coarse_mazes.shape[1] goals = data['goals'] goals_scaled = ((goals - xs[0]) / limit_range) * coarse_size self.map_array = coarse_mazes[maze_index, :, :] object_locations = OrderedDict() self.vocab = {} for i in range(n_goals): sp_name = possible_objects[i] if use_dataset_goals: object_locations[sp_name] = goals_scaled[ maze_index, i] # using goal locations from the dataset else: # If set to None, the environment will choose a random free space on init object_locations[sp_name] = None # vocab[sp_name] = spa.SemanticPointer(ssp_dim) self.vocab[sp_name] = nengo_spa.SemanticPointer( data=np.random.uniform(-1, 1, size=dim)).normalized() colour_centers = np.array([ [3, 3], [10, 4], [7, 7], ]) def colour_func(x, y, sigma=7): ret = np.zeros((3, )) for c in range(3): ret[c] = np.exp(-((colour_centers[c, 0] - x)**2 + (colour_centers[c, 1] - y)**2) / (sigma**2)) return ret params = { 'continuous': True, 'fov': 360, 'n_sensors': n_sensors, 'colour_func': colour_func, 'max_sensor_dist': 10, 'normalize_dist_sensors': False, 'movement_type': 'holonomic', 'seed': env_seed, # 'map_style': args.map_style, 'map_size': 10, # 'fixed_episode_length': True, 'fixed_episode_length': False, 'episode_length': 1000, # 500, 1000, #200, 'max_lin_vel': 5, 'max_ang_vel': 5, 'dt': 0.1, 'full_map_obs': False, 'pob': 0, 'n_grid_cells': 0, 'heading': 'none', 'location': 'none', 'goal_loc': 'none', 'goal_vec': 'none', 'bc_n_ring': 0, 'hd_n_cells': 0, 'csp_dim': 0, 'goal_csp': False, 'agent_csp': False, # set up rewards so minimum score is -1 and maximum score is +1 (based on 1000 steps max) 'wall_penalty': -.001, 'movement_cost': -.001, 'goal_reward': 1., 'goal_distance': 0, # args.goal_distance # 0 means completely random } obs_dict = generate_obs_dict(params) self.env = GridWorldEnv( map_array=self.map_array, object_locations= object_locations, # object locations explicitly chosen so a fixed SSP memory can be given observations=obs_dict, movement_type=params['movement_type'], max_lin_vel=params['max_lin_vel'], max_ang_vel=params['max_ang_vel'], continuous=params['continuous'], max_steps=params['episode_length'], fixed_episode_length=params['fixed_episode_length'], wall_penalty=params['wall_penalty'], movement_cost=params['movement_cost'], goal_reward=params['goal_reward'], dt=params['dt'], screen_width=300, screen_height=300, debug_ghost=True, seed=env_seed + maze_index, ) # Fill the item memory with the correct SSP for remembering the goal locations self.item_memory = nengo_spa.SemanticPointer(data=np.zeros((dim, ))) for i in range(n_goals): sp_name = possible_objects[i] x_env, y_env = self.env.object_locations[sp_name][[0, 1]] # Need to scale to SSP coordinates # Env is 0 to 13, SSP is -5 to 5 x = ((x_env - 0) / coarse_size) * limit_range + xs[0] y = ((y_env - 0) / coarse_size) * limit_range + ys[0] self.item_memory += self.vocab[sp_name] * encode_point( x, y, x_axis_sp, y_axis_sp) # item_memory.normalize() self.item_memory = self.item_memory.normalized() # the unsqueeze is to add the batch dimension # map_id = torch.Tensor(maze_sps[maze_index, :]).unsqueeze(0) obs = self.env.reset() # get the cue goal_object_index = self.env.goal_object_index cue_sp = self.vocab[possible_objects[goal_object_index]] self.env_output[self.maze_id_dim + self.dim:self.maze_id_dim + self.dim + self.n_sensors * 4] = obs # Load in the static outputs for this environment self.env_output[:self.maze_id_dim] = maze_sps[maze_index, :] # self.env_output[:self.dim] = maze_sps[maze_index+1, :] # TODO: temporarily just returning the deconvolved noisy goal # self.env_output[self.dim:2*self.dim] = item_memory.v self.env_output[self.maze_id_dim:self.maze_id_dim + self.dim] = (self.item_memory * ~cue_sp).v self.env_output[-4] = self.env.state[0] self.env_output[-3] = self.env.state[1] self.env_output[-2] = self.env.goal_state[0] self.env_output[-1] = self.env.goal_state[1] self.th = 0 # TODO: set scales appropriately self.scale_x = 1 self.scale_y = 1 self.build_html_string() self.update_html() self._nengo_html_ = self.base_html.format(self.env.state[0], self.env.state[1], self.env.goal_state[0], self.env.goal_state[1])
def get_env(seed=13, dim=512): rstate = np.random.RandomState(seed=seed) x_axis_sp = make_good_unitary(dim=dim, rng=rstate) y_axis_sp = make_good_unitary(dim=dim, rng=rstate) map_array = np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 1, 0, 0, 1, 0, 0, 0, 1], [1, 0, 1, 1, 0, 0, 0, 0, 0, 1], [1, 0, 0, 1, 0, 0, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 1], [1, 0, 0, 0, 1, 1, 0, 0, 0, 1], [1, 0, 1, 0, 1, 0, 0, 0, 0, 1], [1, 1, 1, 0, 0, 0, 1, 1, 0, 1], [1, 1, 0, 0, 0, 0, 1, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ]) # Parameters to define the environment to use params = { 'x_axis_vec': x_axis_sp, 'y_axis_vec': y_axis_sp, 'full_map_obs': False, 'pob': 0, 'max_sensor_dist': 10, 'n_sensors': 36, 'fov': 360, 'normalize_dist_sensors': True, 'n_grid_cells': 0, 'bc_n_ring': 0, 'bc_n_rad': 0, 'bc_dist_rad': 0, 'bc_receptive_field_min': 0, 'bc_receptive_field_max': 0, 'hd_n_cells': 0, 'hd_receptive_field_min': 0, 'hd_receptive_field_max': 0, 'heading': 'circular', 'location': 'none', 'goal_loc': 'none', 'goal_vec': 'none', 'goal_csp': True, 'agent_csp': True, 'goal_csp_egocentric': True, 'csp_dim': dim, } obs_dict = generate_obs_dict(params) csp_offset = map_array.shape[0] / 2 csp_scaling = 5 / (map_array.shape[0] / 2) env = GridWorldEnv( map_array=map_array, # object_locations=object_locations, observations=obs_dict, continuous=True, movement_type='holonomic', dt=0.1, max_steps=1000, fixed_episode_length=False, max_lin_vel=5, max_ang_vel=5, screen_width=300, screen_height=300, csp_scaling=csp_offset, csp_offset=csp_scaling, ) return env
'hd_receptive_field_max': np.pi / 4, 'goal_vec': 'normalized', # TODO: add grid cell observations when implemented } else: specific_params = { 'location': 'normalized', 'goal_vec': 'normalized', 'goal_loc': 'normalized', 'heading': 'circular', } # Merge dictionaries, replacing base params with specific params params = {**base_params, **specific_params} obs_dict = generate_obs_dict(params) config = { 'map_array': map_array, 'observations': obs_dict, 'continuous': continuous, 'movement_type': movement_type, 'dt': 0.1, 'max_steps': 1000, } env_configs[name] = config simple_map_array = np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 1],