def policy_test(nn, states, inputs): policy = {} predictions = nn.predict(inputs).reshape((-1, 9)) for i in range(512 * 3 * 3): choice = np.argmax(predictions[i]) state = states[i] for a1 in range(9): for a2 in range(9): m1 = np.unravel_index(a1, (3, 3)) m1 = helicopter3x3.Position(*m1) m2 = np.unravel_index(a2, (3, 3)) m2 = helicopter3x3.Position(*m2) region = region_from_state(state, m1, m2) if region != None: if region not in policy.keys(): policy[region] = [0 for _ in range(9)] policy[region][choice] += 1 for region in policy.keys(): policy[region] = [a / sum(policy[region]) for a in policy[region]] return policy
def do_epsilon_greedy_step_regions(paths, epsilon, predict_model): # Get predictions (don't query the net if epsilon == 1.0) if epsilon == 1.0: p = np.zeros((len(paths), 9)).reshape((-1,9)) else: p = get_predictions(paths, predict_model).reshape((-1,9)) # Exploration with probability epsilon epsilon_choice = np.random.uniform(size=len(paths)) < epsilon # Argsort to get action ranks ranked_actions = p.argsort() # For each state in path alldone = True for n, path in enumerate(paths): _ , state = path[-1] if state.status != helicopter3x3.Status.flying: continue if epsilon_choice[n]: state = copy.deepcopy(state) move = random_move_over_regions(state) state.receive_Move(move) else: # Find highest ranked valid move state = copy.deepcopy(state) for action in ranked_actions[n,::-1]: action = np.unravel_index(action, (3,3)) move = helicopter3x3.Position(*action) if state.receive_Move(move) is None: break path.append((move, state)) alldone = False return alldone
def generate_random_initial(size): states = [] for nmap in generate_random_maps(size): state = helicopter3x3.State() pos = helicopter3x3.Position(0, 0) state.receive_SetState(pos, nmap, 1) states.append(state) return states
def generate_all_from(x, y, f): states = [] for nmap in generate_all_maps(): state = helicopter3x3.State() pos = helicopter3x3.Position(x, y) state.receive_SetState(pos, nmap, f) states.append(state) return states
def evaluate(nnets): ix = np.arange(2**9) maps = [] for i in range(9): mask = np.left_shift(np.ones_like(ix), i) maps.append(np.bitwise_and(mask, ix).astype(bool)) maps = np.array(maps).T maps = maps.reshape((512, 3, 3)) allstates = [] ps = np.array(np.meshgrid(np.arange(3), np.arange(3))).T for imap in maps: islands = defaultdict(lambda: False) for x, y in ps[np.array(imap).astype(bool)]: islands[helicopter3x3.Position(x, y)] = True state = helicopter3x3.State() state.receive_SetState(helicopter3x3.Position(0, 0), islands, 1) allstates.append(state) allmoves = np.array(np.unravel_index(np.arange(9), (3, 3))).T allmoves = [helicopter3x3.Position(x, y) for x, y in allmoves] inputs = np.zeros((512, 3, 3, 2)) alldone = False while not alldone: for n, state in enumerate(allstates): encode_state(inputs[n], state) predictions = nnets.predict_model.predict(inputs).reshape((-1, 9)) ranked_actions = predictions.argsort() alldone = True for n, state in enumerate(allstates): if state.status != helicopter3x3.Status.flying: continue alldone = False for m in ranked_actions[n][::-1]: if state.receive_Move(allmoves[m]) is None: break ev = pd.Series([s.status for s in allstates]).value_counts().to_dict() crashed = ev.get(helicopter3x3.Status.crashed, 0) reached = ev.get(helicopter3x3.Status.reached, 0) return reached, crashed
def generate_random_maps(size): r = np.random.uniform(0,2**9,size=size) r = r.astype(np.uint32).view(np.uint8) r = np.unpackbits(r).reshape((-1,32))[:,:9] r = np.argwhere(r.astype(bool)) data = [defaultdict(lambda:[]) for _ in range(size)] for i in range(r.shape[0]): n , ix = r[i] p = helicopter3x3.Position(*np.unravel_index(ix,(3,3))) data[n][p] = True return data
def random_move(): ix = int(np.random.uniform(9)) move = np.unravel_index(ix, (3, 3)) return helicopter3x3.Position(*move)