Exemplo n.º 1
0
def critic(state, last_state, reward):
    error = reward - values[last_state[1], last_state[0]] + gamma * values[state[1], state[0]]
    return (error)


i = 0
while i < iterations:
	#time.sleep(0.9)
	i += 1
	sys.stdout.write(str(float(i)/iterations) + "\r")
	direction = pick_action(state)
		
	last_state = state[:][:] 
	
	outcome = 0	
	state, outcome = env.move(direction)
		
	error = critic(state, last_state, outcome)


	if outcome != 0 or state != last_state:
	#	print "error ", error
		values[last_state[1], last_state[0]] += alpha * error
	
		policy[last_state[1], last_state[0], direction] += beta * error

#	if outcome != 0:
#		for row in values:
#			print numpy.array(row, dtype=int) 
	
Exemplo n.º 2
0
i = 0
in_end_pos = False
while i < iterations:
    state = env.getState().copy()
    if not in_end_pos:
        possible_actions = env.get_possible_actions()
        #time.sleep(0.9)
        i += 1
        sys.stdout.write(str(float(i)/iterations) + "\r")
        direction = pick_action(state)
        	
        last_state = state.copy() 
        
        outcome = 0	
        state, outcome, in_end_pos = env.move(possible_actions[direction])
        	
        error = critic(state, last_state, outcome * 100)
        
        
        if outcome != 0 or state != last_state:
        #	print "error ", error
        	values[last_state['y'], last_state['x']] += alpha * error
        
        	policy[last_state['y'], last_state['x'], direction] += beta * error
        
   #     if outcome != 0:
   #     	for row in values:
   #     		print numpy.array(row, dtype=int) 
        
        
Exemplo n.º 3
0
        nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.})
        
        nest.SetStatus(wta_noise, {'rate': 3000.})
        nest.Simulate(100)
        max_rate = -1
        chosen_action = -1
        for i in range(len(sd_actions)):
            rate = len([e for e in nest.GetStatus([sd_actions[i]], keys='events')[0]['times'] if e > last_action_time]) # calc the "firerate" of each actor population
            if rate > max_rate:
                max_rate = rate # the population with the hightes rate wins
                chosen_action = i
        nest.SetStatus(stimulus, {'rate': 5000.})

        possible_actions = env.get_possible_actions() 

        new_position, outcome, in_end_position = env.move(possible_actions[chosen_action])

        prediction_error = update_values(position, chosen_action, new_position, outcome)

        print "iteration:", actions_executed, "action:", chosen_action, 
        print "new pos:", new_position, "reward:", outcome, "updated values:", values[position['x']][position['y']], "prediction error:", prediction_error

        for i in range(num_actions):
            nest.SetStatus(nest.GetConnections(states[position['x']][position['y']], actions[i]), {'weight': values[position['x']][position['y']][i] * WEIGHT_SCALING})
            
        # stimulate new state
        nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.})
        nest.SetStatus(nest.GetConnections(stimulus, states[new_position['x']][new_position['y']]), {'weight': 1.})

        nest.SetStatus(wta_noise, {'rate': 0.})
        nest.Simulate(50.)