def run(): # Main loop #values_hist = [np.ravel(values.copy())] actions_executed = 0 last_action_time = 0 position = env.getState().copy() in_end_position = False while actions_executed < NUM_ITERATIONS: if not in_end_position: # stimulate new state nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) position = env.getState().copy() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.}) nest.SetStatus(wta_noise, {'rate': 3000.}) for t in range(8): nest.Simulate(5) time.sleep(0.01) max_rate = -1 chosen_action = -1 for i in range(len(sd_actions)): rate = len([e for e in nest.GetStatus([sd_actions[i]], keys='events')[0]['times'] if e > last_action_time]) # calc the "firerate" of each actor population if rate > max_rate: max_rate = rate # the population with the hightes rate wins chosen_action = i nest.SetStatus(stimulus, {'rate': 5000.}) possible_actions = env.get_possible_actions() new_position, outcome, in_end_position = env.move(possible_actions[chosen_action]) nest.SetStatus(wta_noise, {'rate': 0.}) for t in range(4): nest.Simulate(5) time.sleep(0.01) last_action_time += 60 actions_executed += 1 else: position = env.get_agent_pos().copy() _, in_end_position = env.init_new_trial() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.})
softmax_prop = numpy.exp(current_policy) softmax_prop = softmax_prop / numpy.sum(softmax_prop) # softmax: (e^prop) / (sum(e^prop)) cum_softmax_prop = numpy.cumsum(softmax_prop) # cumulating return (cum_softmax_prop) def pick_action(state): cum_softmax_prop = cum_softmax_direction_prop(state) r = numpy.random.rand() for i in range(len(cum_softmax_prop)): if cum_softmax_prop[i] > r: return i while True: possible_actions = env.get_possible_actions() state = env.getState().copy() direction = pick_action(state) last_state = state.copy() outcome = 0 state, outcome, in_end_pos = env.move(possible_actions[direction]) time.sleep(0.1)