Exemplo n.º 1
0
def generate_values_old(maze, model,**kwargs):
	value_map = maze.empty_map
	EC = kwargs.get('EC', None)
	pcs = kwargs.get('pcs', None)
	if EC is not None:
		EC_pol_map = maze.make_map(maze.grid, pol=True)
		MF_pol_map = maze.make_map(maze.grid, pol=True)
	for loc in maze.useable:
		if model.input_type == 'vector':
			state = Variable(torch.FloatTensor(pcs.activity(loc)))
			policy, value = sample_select_action(model,state)[1:3]
		elif model.input_type == 'frame':
			state = Variable(torch.FloatTensor(sg.get_frame(maze,agtlocation=loc)))
			if isinstance (model.hidden[-1], nn.Linear):
				policy, value, lin_act = sample_select_action(model,state, getlin=True)[1:4]
			else:
				policy, value = sample_select_action(model,state)[1:3]
		value_map[loc[1]][loc[0]] = value
		if EC is not None:
			if model.input_type == 'vector':
				EC_pol = EC.recall_mem(tuple(state.data[0]))
			elif model.input_type == 'frame':
				EC_pol = EC.recall_mem(tuple(lin_act.view(-1)))
			EC_pol_map[loc[1]][loc[0]] = tuple(EC_pol.data[0])
			MF_pol_map[loc[1]][loc[0]] = tuple(policy)

	if EC == None:
		return value_map
	else:
		return EC_pol_map, MF_pol_map
Exemplo n.º 2
0
def generate_values(maze, model):
	value_map = maze.empty_map
	for loc in maze.useable:
		state = Variable(torch.FloatTensor(sg.get_frame(maze,agtlocation=loc)))
		policy, value = sample_select_action(model,state)[1:3]
		value_map[loc[1]][loc[0]] = value
	return value_map