示例#1
0
def _get_states(shape: tuple):
    shape = (*shape, *cube.shape()) if len(shape) > 1 else (1, *shape,
                                                            *cube.shape())
    n, n_states = shape[0], shape[1]
    states = np.empty(shape, dtype=cube.dtype)
    states[0] = cube.repeat_state(cube.get_solved(), n_states)
    for i in range(1, len(states)):
        faces, dirs = np.random.randint(0, 6, n_states), np.random.randint(
            0, 2, n_states)
        states[i] = cube.multi_rotate(states[i - 1], faces, dirs)
    return states
示例#2
0
	def expand(self, state: np.ndarray) -> (list, np.ndarray, torch.tensor, tuple):
		# Initialize needed data structures
		states = cube.repeat_state(state, self.workers)
		states_oh = cube.as_oh(states)
		paths = paths = np.empty((self.workers, self.depth), dtype=int)  # Index n contains path for worker n
		new_states = np.empty((self.workers * self.depth, *cube.shape()), dtype=cube.dtype)
		new_states_oh = torch.empty(self.workers * self.depth, cube.get_oh_shape(), dtype=torch.float, device=gpu)
		# Expand for self.depth iterations
		for d in range(self.depth):
			# Use epsilon-greedy to decide where to use policy and random actions
			use_random = np.random.choice(2, self.workers, p=[1-self.epsilon, self.epsilon]).astype(bool)
			use_policy = ~use_random
			actions = np.empty(self.workers, dtype=int)
			# Random actions
			actions[use_random] = np.random.randint(0, cube.action_dim, use_random.sum())
			# Policy actions
			p = self.net(states_oh[use_policy], value=False).cpu().numpy()
			actions[use_policy] = p.argmax(axis=1)
			# Update paths
			paths[:, d] = actions

			# Expand using selected actions
			faces, dirs = cube.indices_to_actions(actions)
			states = cube.multi_rotate(states, faces, dirs)
			states_oh = cube.as_oh(states)
			solved_states = cube.multi_is_solved(states)
			if np.any(solved_states):
				self._explored_states += (d+1) * self.workers
				w = np.where(solved_states)[0][0]
				return paths, None, None, (w, d+1)
			new_states[self._get_indices(d)] = states
			new_states_oh[self._get_indices(d)] = states_oh
		self._explored_states += len(new_states)

		return paths, new_states, new_states_oh, (-1, -1)
示例#3
0
	def increase_stack_size(self):
		expand_size    = len(self.states)

		self.states	   = np.concatenate([self.states, np.empty((expand_size, *cube.shape()), dtype=cube.dtype)])
		self.parents   = np.concatenate([self.parents, np.zeros(expand_size, dtype=int)])
		self.parent_actions   = np.concatenate([self.parent_actions, np.zeros(expand_size, dtype=int)])
		self.G         = np.concatenate([self.G, np.empty(expand_size)])
示例#4
0
	def increase_stack_size(self):
		expand_size    = len(self.states)
		self.states	   = np.concatenate([self.states, np.empty((expand_size, *cube.shape()), dtype=cube.dtype)])
		self.neighbors = np.concatenate([self.neighbors, np.zeros((expand_size, cube.action_dim), dtype=int)])
		self.leaves    = np.concatenate([self.leaves, np.ones(expand_size, dtype=bool)])
		self.P         = np.concatenate([self.P, np.empty((expand_size, cube.action_dim))])
		self.V         = np.concatenate([self.V, np.empty(expand_size)])
		self.N         = np.concatenate([self.N, np.zeros((expand_size, cube.action_dim), dtype=int)])
		self.W         = np.concatenate([self.W, np.zeros((expand_size, cube.action_dim))])
		self.L         = np.concatenate([self.L, np.zeros((expand_size, cube.action_dim))])
示例#5
0
	def reset(self, time_limit: float, max_states: int) -> (float, int):
		time_limit, max_states = super().reset(time_limit, max_states)
		self.open_queue = list()
		self.indices   = dict()

		self.states    = np.empty((self._stack_expand, *cube.shape()), dtype=cube.dtype)
		self.parents = np.empty(self._stack_expand, dtype=int)
		self.parent_actions = np.zeros(self._stack_expand, dtype=int)
		self.G         = np.empty(self._stack_expand)
		return time_limit, max_states
示例#6
0
	def reset(self, time_limit: float, max_states: int):
		time_limit, max_states = super().reset(time_limit, max_states)
		self.indices   = dict()
		self.states    = np.empty((self.expand_nodes, *cube.shape()), dtype=cube.dtype)
		self.neighbors = np.zeros((self.expand_nodes, cube.action_dim), dtype=int)
		self.leaves    = np.ones(self.expand_nodes, dtype=bool)
		self.P         = np.empty((self.expand_nodes, cube.action_dim))
		self.V         = np.empty(self.expand_nodes)
		self.N         = np.zeros((self.expand_nodes, cube.action_dim), dtype=int)
		self.W         = np.zeros((self.expand_nodes, cube.action_dim))
		self.L         = np.zeros((self.expand_nodes, cube.action_dim))
		return time_limit, max_states