def cum_softmax_direction_prop(state): # calculates the cumulated softmax propability for every possible action current_policy = policy[state['y'], state['x'], :] # prop in this agent_pos softmax_prop = numpy.exp(current_policy) softmax_prop = softmax_prop / numpy.sum(softmax_prop) # softmax: (e^prop) / (sum(e^prop)) cum_softmax_prop = numpy.cumsum(softmax_prop) # cumulating return (cum_softmax_prop) def pick_action(state): cum_softmax_prop = cum_softmax_direction_prop(state) r = numpy.random.rand() for i in range(len(cum_softmax_prop)): if cum_softmax_prop[i] > r: return i while True: possible_actions = env.get_possible_actions() direction = pick_action(state) last_state = state.copy() outcome = 0 state, outcome, in_end_pos = env.move(possible_actions[direction]) time.sleep(0.02) state = env.getState().copy()
plot(fig, ax, nest.GetStatus(sd_wta, keys='events')[0]) max_rate = -1 chosen_action = -1 for i in range(len(sd_actions)): rate = len([e for e in nest.GetStatus([sd_actions[i]], keys='events')[0]['times'] if e > last_action_time]) # calc the "firerate" of each actor population if rate > max_rate: max_rate = rate # the population with the hightes rate wins chosen_action = i nest.SetStatus(stimulus, {'rate': 5000.}) possible_actions = env.get_possible_actions() new_position, outcome, in_end_position = env.move(possible_actions[chosen_action]) nest.SetStatus(wta_noise, {'rate': 0.}) for t in range(4): nest.Simulate(5) time.sleep(0.01) last_action_time += 60 actions_executed += 1 else: position = env.get_agent_pos().copy() _, in_end_position = env.init_new_trial() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) rplt.from_device(sd_wta, title="WTA circuit")