def generate_values_old(maze, model,**kwargs): value_map = maze.empty_map EC = kwargs.get('EC', None) pcs = kwargs.get('pcs', None) if EC is not None: EC_pol_map = maze.make_map(maze.grid, pol=True) MF_pol_map = maze.make_map(maze.grid, pol=True) for loc in maze.useable: if model.input_type == 'vector': state = Variable(torch.FloatTensor(pcs.activity(loc))) policy, value = sample_select_action(model,state)[1:3] elif model.input_type == 'frame': state = Variable(torch.FloatTensor(sg.get_frame(maze,agtlocation=loc))) if isinstance (model.hidden[-1], nn.Linear): policy, value, lin_act = sample_select_action(model,state, getlin=True)[1:4] else: policy, value = sample_select_action(model,state)[1:3] value_map[loc[1]][loc[0]] = value if EC is not None: if model.input_type == 'vector': EC_pol = EC.recall_mem(tuple(state.data[0])) elif model.input_type == 'frame': EC_pol = EC.recall_mem(tuple(lin_act.view(-1))) EC_pol_map[loc[1]][loc[0]] = tuple(EC_pol.data[0]) MF_pol_map[loc[1]][loc[0]] = tuple(policy) if EC == None: return value_map else: return EC_pol_map, MF_pol_map
def generate_values(maze, model): value_map = maze.empty_map for loc in maze.useable: state = Variable(torch.FloatTensor(sg.get_frame(maze,agtlocation=loc))) policy, value = sample_select_action(model,state)[1:3] value_map[loc[1]][loc[0]] = value return value_map