示例#1
0
def run():
    # Main loop
    #values_hist = [np.ravel(values.copy())]
    actions_executed = 0
    last_action_time = 0
    position = env.getState().copy()
    in_end_position = False
    
    while actions_executed < NUM_ITERATIONS:
        if not in_end_position:
            # stimulate new state
            nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.})
            position = env.getState().copy()
            nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.})
            
            nest.SetStatus(wta_noise, {'rate': 3000.})
            for t in range(8):
                nest.Simulate(5)
                time.sleep(0.01)
    
            max_rate = -1
            chosen_action = -1
            for i in range(len(sd_actions)):
                rate = len([e for e in nest.GetStatus([sd_actions[i]], keys='events')[0]['times'] if e > last_action_time]) # calc the "firerate" of each actor population
                if rate > max_rate:
                    max_rate = rate # the population with the hightes rate wins
                    chosen_action = i
    
            nest.SetStatus(stimulus, {'rate': 5000.})
    
            possible_actions = env.get_possible_actions() 
    
            new_position, outcome, in_end_position = env.move(possible_actions[chosen_action])
    
            nest.SetStatus(wta_noise, {'rate': 0.})
            for t in range(4):
                nest.Simulate(5)
                time.sleep(0.01)
            
                  
            last_action_time += 60
            actions_executed += 1
        else:
            position = env.get_agent_pos().copy()        
            _, in_end_position = env.init_new_trial()
            nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.})
示例#2
0
    softmax_prop = numpy.exp(current_policy)
    softmax_prop = softmax_prop / numpy.sum(softmax_prop)  # softmax: (e^prop) / (sum(e^prop))
    cum_softmax_prop = numpy.cumsum(softmax_prop)  # cumulating
    return (cum_softmax_prop)


def pick_action(state):
    cum_softmax_prop = cum_softmax_direction_prop(state)
    r = numpy.random.rand()
    for i in range(len(cum_softmax_prop)):
        if cum_softmax_prop[i] > r:
            return i


while True:
    possible_actions = env.get_possible_actions()
    state = env.getState().copy()
    
    direction = pick_action(state)
    	
    last_state = state.copy()
    
    	
    outcome = 0	
    state, outcome, in_end_pos = env.move(possible_actions[direction])
    	
    time.sleep(0.1)