def validate_data(inputs, policies, values, gamma=.95): """ Validate the input, policy, value data to make sure it is of good quality. It must be in order and not shuffled. """ from batch_cube import BatchCube import math next_state = None next_value = None for state, policy, value in zip(inputs, policies, values): cube = BatchCube() cube.load_bit_array(state) if next_state is not None: assert next_state.shape == state.shape assert np.array_equal(next_state, state), "\nstate:\n" + str(state) + "\nnext_state:\n" + str(next_state) if next_value is not None: assert round(math.log(next_value, .95)) == round(math.log(value, .95)), "next_value:" + str(next_value) + " value:" + str(value) action = np.argmax(policy) cube.step([action]) if value == 0 or value == gamma: next_value = None next_state = None else: next_value = value / gamma next_state = cube.bit_array().reshape((54, 6))
# Rebuild dictionary state_dict = { b.tobytes(): (b, a, int(d)) for b, a, d in zip(bits, best_actions, distances) } print("Testing data...") # Test data types for k, v in state_dict.items(): assert v[0].dtype == bool assert v[1].dtype == bool break # Test data import numpy as np for i in range(1000): test_cube = BatchCube(1) test_cube.randomize(1 + (i % MAX_DISTANCE)) _, best_actions, distance = state_dict[test_cube.bit_array().tobytes()] for _ in range(distance): assert not test_cube.done()[0] action = np.random.choice(12, p=best_actions / np.sum(best_actions)) test_cube.step([action]) _, best_actions, _ = state_dict[test_cube.bit_array().tobytes()] assert test_cube.done()[0] print("Passed all tests")
print("Testing data...") # Test data types for k, v in state_dict.items(): assert v[0].dtype == bool assert v[1].dtype == bool break # Test data import numpy as np for i in range(1000): test_cube = BatchCube(1) test_cube.randomize(1 + (i % MAX_DISTANCE)) _, best_actions, distance = state_dict[test_cube.bit_array().tobytes()] for _ in range(distance): assert not test_cube.done()[0] action = np.random.choice(12, p=best_actions/np.sum(best_actions)) test_cube.step([action]) _, best_actions, _ = state_dict[test_cube.bit_array().tobytes()] assert test_cube.done()[0] print("Passed all tests")
def next_state(self, node_idx, actions): batch_cube = BatchCube(cube_array = self.states[node_idx]) batch_cube.step(actions) states = batch_cube._cube_array return states
""" Every square position on the cube can be descriped by its starting color and the set of actions which preserve it (or equivalently those which move it). This is also the same as decribing a position by its starting color and the adjacent starting colors. """ color_encoding = [] bc = BatchCube(1) starting_colors = list(bc._cube_array[0]) # replace colors with positions bc._cube_array[0] = np.arange(54) neighbor_colors = [set() for _ in range(54)] for c in range(6): a = action_from_color[c] bc.step(a) c_adjacent = [ i for i in range(54) if bc._cube_array[0][i] != i and starting_colors[i] != c ] for i in range(54): if i in c_adjacent: neighbor_colors[i].add(c) bc.step(opp_actions[a]) #undo action color_encoding = [(c, frozenset(s)) for c, s in zip(starting_colors, neighbor_colors)] color_decoding = { (c, frozenset(s)): i for i, c, s in zip(range(54), starting_colors, neighbor_colors)