def __init__(self, history=1, random_depth=None, _internal_state=None): if _internal_state is not None: self._internal_state = _internal_state else: blank_history = tuple(None for _ in range(history - 1)) cube = BatchCube(1) if random_depth is not None: cube.randomize(random_depth) self._internal_state = (cube, ) + blank_history
def __init__(self, history=1, random_depth=None, _internal_state=None): if _internal_state is not None: self._internal_state = _internal_state else: blank_history = tuple(None for _ in range(history-1)) cube = BatchCube(1) if random_depth is not None: cube.randomize(random_depth) self._internal_state = (cube, ) + blank_history
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12,)) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class State(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6 * 54)) def calculate_priors_and_value(self, model): """ For now, this does nothing special. It evenly weights all actions, and it gives a nuetral value (0 out of [-1,1]) to each non-terminal node. """ prior = model.predict(self.input_array()).reshape((12, )) value = .01 return prior, value def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
class BatchState(): """ This is application specfic """ def __init__(self, internal_state=None): if internal_state is None: internal_state = BatchCube(1) self.internal_state = internal_state def copy(self): return State(self.internal_state.copy()) def import_bit_array(self, bit_array): color_idx = np.indices((1, 54, 6))[2] array = (color_idx * bit_array.reshape((1, 54, 6))).max(axis=2) self.internal_state = BatchCube(cube_array=array) def reset_and_randomize(self, depth): self.internal_state = BatchCube(1) self.internal_state.randomize(depth) def next(self, action): next_internal_state = self.internal_state.copy() next_internal_state.step(action) return State(next_internal_state) def input_array(self): return self.internal_state.bit_array().reshape((1, 6*54)) def key(self): return self.internal_state.bit_array().tobytes() def done(self): return self.internal_state.done()[0] def __str__(self): return str(self.internal_state)
# Rebuild dictionary state_dict = { b.tobytes(): (b, a, int(d)) for b, a, d in zip(bits, best_actions, distances) } print("Testing data...") # Test data types for k, v in state_dict.items(): assert v[0].dtype == bool assert v[1].dtype == bool break # Test data import numpy as np for i in range(1000): test_cube = BatchCube(1) test_cube.randomize(1 + (i % MAX_DISTANCE)) _, best_actions, distance = state_dict[test_cube.bit_array().tobytes()] for _ in range(distance): assert not test_cube.done()[0] action = np.random.choice(12, p=best_actions / np.sum(best_actions)) test_cube.step([action]) _, best_actions, _ = state_dict[test_cube.bit_array().tobytes()] assert test_cube.done()[0] print("Passed all tests")
np.set_printoptions( threshold=np.inf ) # allows one to see the whole array even if it is big print("neighbors = \\") pprint(neighbors) print() np.set_printoptions( threshold=1000) # allows one to see the whole array even if it is big bc = BatchCube(1) grid = np.full((5, 5, 5), -1, dtype=int) grid[x, y, z] = bc._cube_array[0] pprint(grid) bc = BatchCube(1) bc.randomize(100) grid = np.full((5, 5, 5), -1, dtype=int) grid[x, y, z] = bc._cube_array[0] print() print(bc) c = np.array(list("rygwob ")) pprint(c[grid]) bc = BatchCube(10) grid = np.full((len(bc), 5, 5, 5), -1, dtype=int) idx = np.indices(bc._cube_array.shape)[0] grid[idx, x[np.newaxis], y[np.newaxis], z[np.newaxis]] = bc._cube_array bc = BatchCube(10) grid = np.full((len(bc), 5, 5, 5), -1, dtype=int) idx = np.indices(bc._cube_array.shape)[0]
# Rebuild dictionary state_dict = {b.tobytes():(b, a, int(d)) for b, a, d in zip(bits, best_actions, distances)} print("Testing data...") # Test data types for k, v in state_dict.items(): assert v[0].dtype == bool assert v[1].dtype == bool break # Test data import numpy as np for i in range(1000): test_cube = BatchCube(1) test_cube.randomize(1 + (i % MAX_DISTANCE)) _, best_actions, distance = state_dict[test_cube.bit_array().tobytes()] for _ in range(distance): assert not test_cube.done()[0] action = np.random.choice(12, p=best_actions/np.sum(best_actions)) test_cube.step([action]) _, best_actions, _ = state_dict[test_cube.bit_array().tobytes()] assert test_cube.done()[0] print("Passed all tests")
threshold=np.inf ) # allows one to see the whole array even if it is big pprint(np.array(position_permutations)) print() print("opp_action_permutations = \\") pprint(np.array(opp_action_permutations)) print() print("action_permutations = \\") pprint(np.array(action_permutations)) # test opposites for i in range(48): bc0 = BatchCube() bc0.randomize(100) bc = bc0.copy() bc._cube_array[0] = bc._cube_array[0][position_permutations[i]] bc._cube_array[0] = bc._cube_array[0][opp_position_permutations[i]] assert bc == bc0 bc._cube_array[0] = color_permutations[i][bc._cube_array[0]] bc._cube_array[0] = opp_color_permutations[i][bc._cube_array[0]] assert bc == bc0 policy0 = np.random.uniform(size=12) policy1 = policy0.copy() policy1 = policy1[action_permutations[i]] policy1 = policy1[opp_action_permutations[i]] assert np.array_equal(policy0, policy1)