def encodes(self, o: TSTensor): device = o.device if o.data.device.type == 'cuda': o = o.cpu() if o.ndim == 2: o = o[None] nvars, seq_len = o.shape[-2:] aspect = seq_len / nvars size = ifnone(self.size, seq_len) fig = get_plot_fig(self.size, dpi=self.dpi) ax = fig.get_axes()[0] ax.set_xlim(0, seq_len - 1) canvas = FigureCanvasAgg(fig) output = [] for oi in o: if output == []: im = ax.imshow(oi, aspect=aspect, vmin=-1, vmax=1, cmap=self.cmap, **self.kwargs) else: im.set_data(oi) canvas.draw() buf = np.asarray(canvas.buffer_rgba())[..., :3] canvas.flush_events() output.append(tensor(buf / 255).permute(2, 0, 1)[None]) return TSImage(torch.cat(output)).to(device=device)
class GridWorldEnv(DiscreteEnv): """ Actions: 0 left and 1 right """ def __init__(self, size=20, discount=0.99, seed=0): self.__name__ = self.__class__.__name__ + str(seed) self._state = 0 self._states = None self._fig = None self.discount = discount self.max_path_length = 2 * size np.random.seed(seed) self._grid = np.random.binomial(1, 0.2, size=(size, size)) self._grid[0, 0], self._grid[1, 0], self._grid[0, 1], self._grid[1, 1] = 0, 0, 0, 0 self._grid[-1, -1], self._grid[-2, -1], self._grid[-1, -2], self._grid[-2, -2] = 0, 0, 0, 0 self._rgb_grid = np.zeros((size, size, 3), dtype=np.uint8) self._rgb_grid[:, :, :] = np.expand_dims(((1-self._grid) * 255).astype(np.uint8), axis=-1) self._rgb_grid[size-1, size-1, :] = 255, 215, 0 self._size = size self.dt = .02 self.obs_dims = 2 self._scale = 4 self.vectorized = True DiscreteEnv.__init__(self, size * size + 1, 4) def step(self, action): probs = self._transitions[self._state, action] next_state = np.argmax(np.random.multinomial(1, probs)) reward = self._rewards[self._state, action, next_state] done = self._state == self._size ** 2 env_info = dict() self._state = next_state return next_state, reward, done, env_info def reset(self): self._states = None state = np.random.randint(0, self._size * self._size) while self._grid[state % self._size, state//self._size]: state = np.random.randint(0, self._size * self._size) self._state = state return self._state def vec_reset(self, num_states): states = np.random.randint(0, self._size * self._size, size=(num_states,)) collisions = self._grid[states % self._size, states // self._size] num_collisions = np.sum(collisions) while num_collisions: states[collisions.astype(bool)] = np.random.randint(0, self._size * self._size, size=(num_collisions,)) collisions = self._grid[states % self._size, states // self._size] num_collisions = np.sum(collisions) self._states = states return self._states def vec_step(self, actions): assert self._states is not None assert len(self._states) == len(actions) probs = self._transitions[self._states, actions] next_states = np.argmax(probs, axis=-1) rewards = self._rewards[self._states, actions, next_states] dones = self._states == self._size ** 2 env_info = dict() self._states = next_states return next_states, rewards, dones, env_info def _build_transitions(self): size = self._size for x in range(size): for y in range(size): for act in range(4): id_s = x + y * size if act == 0: next_x = x - 1 next_y = y elif act == 1: next_x = x + 1 next_y = y elif act == 2: next_x = x next_y = y - 1 else: next_x = x next_y = y + 1 if (next_x < 0) or (next_x >= size): next_x = x if (next_y < 0) or (next_y >= size): next_y = y if self._grid[next_x, next_y] or self._grid[x, y]: next_x, next_y = x, y id_next_s = next_x + next_y * size self._transitions[id_s, act, id_next_s] = 1. self._transitions[-2, :, :] = 0. self._transitions[-2, :, -1] = 1. self._transitions[-1, :, -1] = 1. def _build_rewards(self): self._rewards[-2, :, -1] = 1. def render(self, mode='human', iteration=None): if self._fig is None: self._fig = plt.figure() self._ax = self._fig.add_subplot(111) data = upsample(self._rgb_grid, self._scale) self._render = self._ax.imshow(data, animated=True) self._ax.tick_params( axis='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labelleft=False) # labels along the bottom edge are off self._ax.set_aspect('equal') self._canvas = FigureCanvas(self._fig) data = self._rgb_grid.copy() if self._states is None: x, y = self._state % self._size, self._state//self._size if self._state != self._size ** 2: data[x, y, :] = [255, 0, 0] else: x, y = self._states % self._size, self._states//self._size x = x[self._states != self._size ** 2] y = y[self._states != self._size ** 2] data[x, y, :] = [255, 0, 0] data = upsample(data, self._scale) self._render.set_data(data) if iteration is not None: self._ax.set_title('Iteration %d' % iteration) self._canvas.draw() self._canvas.flush_events() time.sleep(self.dt) if mode == 'rgb_array': width, height = self._fig.get_size_inches() * self._fig.get_dpi() image = np.fromstring(self._canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3) return image def upsample(self, image, scale): up_image = np.repeat(image, self._scale, axis=0) up_image = np.repeat(up_image, self._scale, axis=1) return up_image def close(self): plt.close() self._fig = None
class DoubleIntegratorEnv(Env): """ state: [pos, vel] """ def __init__(self, discount=0.99): self._state = np.zeros((2, )) self.dt = 0.05 self.max_path_length = 200 self._fig = None self.discount = discount self.vectorized = True self.action_space = spaces.Box(low=np.array((-3, )), high=np.array((3, )), dtype=np.float64) self.observation_space = spaces.Box(low=np.array((-4, -4)), high=np.array((4, 4)), dtype=np.float64) def step(self, action): next_state = self._state + np.array([self._state[1], action[0] ]) * self.dt reward = -0.5 * (self._state[0]**2 + self._state[1]**2 + action**2) done = (next_state < self.observation_space.low).any() or ( next_state > self.observation_space.high).any() env_info = dict() self._state = next_state if done: reward /= (1 - self.discount) return next_state.copy(), reward, done, env_info def reset(self): self._states = None # self._state = np.random.uniform(low=-2, high=2, size=2) self._state = np.ones((2, )) return self._state.copy() def set_state(self, state): self._state = state def vec_step(self, actions): next_states = self._states + np.stack( [self._states[:, 1], actions[:, 0]], axis=-1) * self.dt rewards = -0.5 * (self._states[:, 0]**2 + self._states[:, 1]**2 + actions[:, 0]**2) dones = np.sum([ (next_states[:, i] < l) + (next_states[:, i] > h) for i, (l, h) in enumerate( zip(self.observation_space.low, self.observation_space.high)) ], axis=0).astype(np.bool) env_infos = dict() self._states = next_states rewards[dones] /= (1 - self.discount) return next_states, rewards, dones, env_infos def vec_set_state(self, states): self._states = states def vec_reset(self, num_envs=None): if num_envs is None: assert self._num_envs is not None num_envs = self._num_envs else: self._num_envs = num_envs self._states = np.random.uniform(low=-2, high=2, size=(num_envs, 2)) return self._states def render(self, mode='human', iteration=None): if self._fig is None: self._fig = plt.figure() self._ax = self._fig.add_subplot(111) self._agent_render, = self._ax.plot(self._state[0], 0, 'ro') self._goal_render, = self._ax.plot(0, 'y*') self._ax.set_xlim(-4.5, 4.5) self._ax.set_ylim(-.5, .5) self._ax.set_aspect('equal') self._canvas = FigureCanvas(self._fig) self._agent_render.set_data(self._state[0], 0) if iteration is not None: self._ax.set_title('Iteration %d' % iteration) self._canvas.draw() # time.sleep(self.dt) self._canvas.flush_events() if mode == 'rgb_array': width, height = self._fig.get_size_inches() * self._fig.get_dpi() image = np.fromstring(self._canvas.tostring_rgb(), dtype='uint8').reshape( int(height), int(width), 3) return image def close(self): plt.close() self._fig = None
class ASRSEnv(object): """ Description: There is a storage warehouse with M= W * H bins. M (or W * H) types of products will be store in this warehouse. Each period, there will be an array of orders coming in. A robot can exchange the positions of two bins. The goal is to find the optimal storage plan to make best fullfil the orders. Observation: Current Storage Map np.array() (M,) any 1d array row number indicates the bin number, the value indicates the the good number being stored in the bin. Good number starts from 1. Current Period Order np.array() (M,) State: Current Storage Map np.array() (M,) Time to receive next order np.array() (M,) Current Action: Num of Action: M Choose 2 + 1 e.g (a , b) switch bin a with bin b or do nothing Reward: Reward is -1 for every step taken Starting State: A random permutation. Parameter type: storage_shape: tuple with 1 to 3 int dist_param: list with M numbers in [0, 1] """ metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 30 } def __init__(self, storage_shape, order_process, origin_coord=None, seed=42): self.order_process = order_process self.map_random = np.random.RandomState() self.set_seed(seed) assert len(storage_shape) <= 3, "storage_shape length should be <= 3" self.storage_shape = storage_shape self.obs_dim = 1 self.num_products = np.array(storage_shape).prod() self.num_maps = math.factorial(self.num_products) self.num_actions = int(self.num_products * (self.num_products - 1) / 2 + 1) assert (origin_coord is None) or (len(storage_shape) == len( origin_coord)), "origin_coord does not have correct dimensions" if origin_coord: if np.array(origin_coord).ndim == 2: self.origin_coords = np.array(origin_coord) else: self.origin_coords = np.array(origin_coord)[np.newaxis] else: self.origin_coords = np.zeros( (len(storage_shape), storage_shape[0])).astype(int) self.origin_coords[0] = np.arange(storage_shape[0]) self.origin_coords = self.origin_coords.T # np.zeros(len(storage_shape)).astype(int) # Default is (:, 0, 0) self.dist_origin_to_exit = 1 # Distance from origin to exit assert order_process.num_products == self.num_products, "Number of products in order process need to match number of storage bins" self.dynamic_order = order_process.dynamic_order self.reset() self._fig = None # self.cmap = matplotlib.cm.get_cmap('Spectral') self.cmap = matplotlib.cm.get_cmap('coolwarm') self.dt = .2 self._scale = 16 self.vectorized = True self.__name__ = 'ASRSEnv' def set_seed(self, seed=None): if seed: self.seed_num = seed self.order_process.set_seed(self.seed_num + 1000) self.map_random.seed(self.seed_num + 100) def reset(self): self.set_seed() self._storage_maps = None self._num_envs = None self.order_process.reset() self.storage_map = self.map_random.permutation(self.num_products) + 1 return np.array(self.storage_map).copy() def vec_reset(self, num_envs=None): self.set_seed() self.order_process.reset() if num_envs is None: assert self._num_envs is not None num_envs = self._num_envs else: self._num_envs = num_envs self._storage_maps = np.vstack( list( map(self.map_random.permutation, [self.num_products] * num_envs))) + 1 return np.array(self._storage_maps).copy() def get_bin_coordinate(self, bin_id): ''' Given a bin number, this gives the location of the bin. This can be useful to to calculate distance between bin to exit. ''' if len(self.storage_shape) == 3: a, b, c = self.storage_shape x, y, z = bin_id // (b * c), bin_id % (b * c) // c, bin_id % c return x, y, z elif len(self.storage_shape) == 2: a, b = self.storage_shape x, y = bin_id // b, bin_id % b return x, y elif len(self.storage_shape) == 1: return bin_id def get_distance_to_exit(self, bin_id=None): if bin_id is None: coords = self.get_bin_coordinate(np.arange(self.num_products)) else: coords = self.get_bin_coordinate(bin_id) dist_to_each_exit = [] for origin in self.origin_coords: dist_to_each_exit.append( self.get_distance_between_coord(coords, np.vstack(origin)) + self.dist_origin_to_exit) return np.array(dist_to_each_exit).min(axis=0) def get_distance_between_coord(self, coord1, coord2): return np.abs(np.array(coord1) - np.array(coord2)).sum(axis=0) def get_order_sequence(self, num_period=1): # Can only generate order sequences for 1 environments order_sequence = np.zeros((num_period, self.num_products)) p_sequence = np.zeros((num_period, self.num_products)) for t in range(num_period): order = self.order_process.get_orders(num_envs=1) order_sequence[t] = order p_sequence[t] = self.order_process.dist_param return order_sequence, p_sequence def step(self, action=None, rollout=True): ''' Action should be a tuple (x, y), which indicates that good in bin number x and bin number y should switch. ''' assert action is None or (action[0] < action[1] and action[1] < self.num_products and action[0] > -1), f"Invalid action {action}!" storage_map = self.storage_map exchange_cost = 0 if rollout: order = self.order_process.get_orders() else: order = None if (action is not None): storage_map[action[0]], storage_map[action[1]] = storage_map[ action[1]], storage_map[action[0]] exchange_cost += self.get_distance_between_coord( self.get_bin_coordinate(action[0]), self.get_bin_coordinate(action[1])) self.storage_map = storage_map return self.storage_map.copy(), order, exchange_cost def vec_step(self, actions, rollout=True): # actions is a list of length n either 2-tuple or None assert np.array( list( map((lambda action: action is None or (action[0] < action[ 1] and action[1] < self.num_products and action[0] > -1)), actions))).all() assert self._storage_maps is not None actions = np.array( [action if action is not None else (0, 0) for action in actions]) self._storage_maps = self.vec_next_storage(self._storage_maps, actions) if rollout: orders = self.order_process.get_orders(num_envs=self._num_envs) else: orders = None exchange_costs = self.get_distance_between_coord( self.get_bin_coordinate(actions[:, 0]), self.get_bin_coordinate(actions[:, 1])) return self._storage_maps.copy(), orders, exchange_costs def vec_next_storage(self, storage_maps, actions): next_storage_maps = storage_maps.copy() range_n = np.arange(next_storage_maps.shape[0]) next_storage_maps[range_n, actions[:,0]], next_storage_maps[range_n, actions[:,1]] =\ next_storage_maps[range_n, actions[:,1]], next_storage_maps[range_n, actions[:,0]] return next_storage_maps def set_state(self, storage_map): self.storage_map = storage_map.copy() def vec_set_state(self, storage_maps): self._num_envs = len(storage_maps) self._storage_maps = storage_maps.copy() def render(self, mode='human', iteration=None): assert len(self.storage_shape ) == 2, "Storage map need to be 2-d in order to render" if self._fig is None: self._fig = plt.figure() self._ax = self._fig.add_subplot(111) self._ax.tick_params( axis='both', bottom=False, top=False, left=False, right=False, labelbottom=False, labelleft=False) # labels along the bottom edge are off self._ax.set_aspect('equal') self._canvas = FigureCanvas(self._fig) if self._storage_maps is not None: current_map = self._storage_maps[0].reshape(self.storage_shape) else: current_map = self.storage_map.reshape(self.storage_shape) # if self.dynamic_order: # data = self.cmap((self.long_term_2p[self.season]/2)[current_map-1]) # else: # data = self.cmap(self.order_process.dist_param[current_map-1]) data = self.cmap(self.order_process.dist_param[current_map - 1]) data = self.upsample(data, self._scale) for ix, iy in np.ndindex(self.storage_shape): number = current_map[ix, iy] box = data[ix * self._scale:(ix + 1) * self._scale, iy * self._scale:(iy + 1) * self._scale, :3] self.add_numbers_on_plot(number, box) for origin_num in range(self.origin_coords.shape[0]): self.mark_exit_on_plot(data, origin_num) self._render = self._ax.imshow(data, animated=True) # self._render.set_data(data) if iteration is not None: self._ax.set_title('Iteration %d, time %d' % (iteration, self.order_process.age)) self._canvas.draw() self._canvas.flush_events() time.sleep(self.dt) if mode == 'rgb_array': width, height = self._fig.get_size_inches() * self._fig.get_dpi() image = np.fromstring(self._canvas.tostring_rgb(), dtype='uint8').reshape( int(height), int(width), 3) return image if mode == 'human': s, (width, height) = self._canvas.print_to_buffer() plt.imshow( np.fromstring(s, dtype='uint8').reshape(int(height), int(width), 4)) def upsample(self, image, scale): up_image = np.repeat(image, scale, axis=0) up_image = np.repeat(up_image, scale, axis=1) return up_image def add_numbers_on_plot(self, number, box): number = str(number) imageRGB = Image.new('RGB', (self._scale, self._scale)) draw = ImageDraw.Draw(imageRGB) font = ImageFont.truetype('/Library/Fonts/Arial.ttf', size=12) w, h = draw.textsize(number, font=font) draw.text(((self._scale - w) / 2, (self._scale - h) / 2), number) p = 1 - np.array(imageRGB) / 255 box[np.where(p == 0)] = p[np.where(p == 0)] def mark_exit_on_plot(self, plot, origin_num): box = plot[self.origin_coords[origin_num][0] * self._scale:(self.origin_coords[origin_num][0] + 1) * self._scale, self.origin_coords[origin_num][1] * self._scale:(self.origin_coords[origin_num][1] + 1) * self._scale, :3] border = np.ones((self._scale, self._scale, 3), dtype=bool) border[1:-1, 1:-1, :] = False color = np.array([255, 215, 0]) / 255 rgb_patch = np.ones((self._scale, self._scale, 3), dtype=np.uint8) rgb_patch = rgb_patch * color box[border] = rgb_patch[border] def close(self): plt.close() self._fig = None