Пример #1
0
class dqn():
    def __init__(self, env, policy):
        self.N = env.map.get_node_num()
        self.env = env

        config_path = join(dirname(policy), "default.json")
        args, _ = load_args(config_path)
        # model
        state_shape = env.observation_space.shape or env.observation_space.n
        action_shape = env.action_space.shape or env.action_space.n
        self.net = Net(args.layer_num, state_shape,
                       action_shape, args.device,  # dueling=(1, 1)
                       ).to(args.device)
        state_dict = torch.load(policy)
        self.net.load_state_dict(state_dict)

    def next_node(self, obs):
        out, _ = self.net(np.array([obs]))
        out = out.detach().cpu()[0]
        return np.argmax(out)
Пример #2
0
        '.')[0] + '_camera' + '.world'

state_dict = {}
state_dict_raw = torch.load(model_path)
for key in state_dict_raw.keys():
    if key.split('.')[0] == 'model':
        state_dict[key[6:]] = state_dict_raw[key]

env = wrapper_dict[wrapper_config['wrapper']](gym.make('jackal_navigation-v0',
                                                       **env_config),
                                              wrapper_config['wrapper_args'])
state_shape = env.observation_space.shape or env.observation_space.n
action_shape = env.action_space.shape or env.action_space.n
model = Net(training_config['layer_num'], state_shape, action_shape,
            config['device']).to(config['device'])
model.load_state_dict(state_dict)

range_dict = {
    'max_vel_x': [0.1, 2],
    'max_vel_theta': [0.314, 3.14],
    'vx_samples': [1, 12],
    'vtheta_samples': [1, 40],
    'path_distance_bias': [0.1, 1.5],
    'goal_distance_bias': [0.1, 2]
}

rs = []
cs = []
pms = np.array(env_config['param_init'])
pms = np.expand_dims(pms, -1)
succeed = 0