env_vis = HumanoidVisEnv('assets/mujoco_models/%s.xml' % args.vis_model, 10)
env.seed(cfg.seed)
cnn_feat_dim = cnn_feat_dict[takes[0]].shape[-1]
actuators = env.model.actuator_names
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
body_qposaddr = get_body_qposaddr(env.model)

"""load policy net"""
policy_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.policy_v_hdim, cfg.fr_margin, cfg.policy_v_net,
                                 cfg.policy_v_net_param, cfg.policy_s_hdim, cfg.policy_s_net)
value_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.value_v_hdim, cfg.fr_margin, cfg.value_v_net,
                                cfg.value_v_net_param, cfg.value_s_hdim, cfg.value_s_net)
policy_net = PolicyGaussian(MLP(policy_vs_net.out_dim, cfg.policy_hsize, cfg.policy_htype), action_dim,
                            log_std=cfg.log_std, fix_std=cfg.fix_std)
value_net = Value(MLP(value_vs_net.out_dim, cfg.value_hsize, cfg.value_htype))
cp_path = '%s/iter_%04d.p' % (cfg.model_dir, args.iter)
logger.info('loading policy net from checkpoint: %s' % cp_path)
model_cp = pickle.load(open(cp_path, "rb"))
policy_net.load_state_dict(model_cp['policy_dict'])
policy_vs_net.load_state_dict(model_cp['policy_vs_dict'])
value_net.load_state_dict(model_cp['value_dict'])
value_vs_net.load_state_dict(model_cp['value_vs_dict'])
running_state = model_cp['running_state']
value_stat = RunningStat(1)
to_test(policy_vs_net, policy_net, value_vs_net, value_net)

"""load ego mimic results"""
em_cfg = EgoMimicConfig(cfg.ego_mimic_cfg)
em_res_path = '%s/iter_%04d_%s.p' % (em_cfg.result_dir, cfg.ego_mimic_iter, args.test_feat)
em_res, em_meta = pickle.load(open(em_res_path, 'rb'))
示例#2
0
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
body_qposaddr = get_body_qposaddr(env.model)
"""load policy net"""
policy_vs_net = VideoStateNet(cnn_feat_dim, cfg.policy_v_hdim, cfg.fr_margin,
                              cfg.policy_v_net, cfg.policy_v_net_param,
                              cfg.causal)
value_vs_net = VideoStateNet(cnn_feat_dim, cfg.value_v_hdim, cfg.fr_margin,
                             cfg.value_v_net, cfg.value_v_net_param,
                             cfg.causal)
policy_net = PolicyGaussian(MLP(state_dim + cfg.policy_v_hdim,
                                cfg.policy_hsize, cfg.policy_htype),
                            action_dim,
                            log_std=cfg.log_std,
                            fix_std=cfg.fix_std)
value_net = Value(
    MLP(state_dim + cfg.value_v_hdim, cfg.value_hsize, cfg.value_htype))
cp_path = '%s/iter_%04d.p' % (cfg.model_dir, args.iter)
logger.info('loading policy net from checkpoint: %s' % cp_path)
model_cp = pickle.load(open(cp_path, "rb"))
policy_net.load_state_dict(model_cp['policy_dict'])
policy_vs_net.load_state_dict(model_cp['policy_vs_dict'])
value_net.load_state_dict(model_cp['value_dict'])
value_vs_net.load_state_dict(model_cp['value_vs_dict'])
running_state = model_cp['running_state']
value_stat = RunningStat(1)
"""load state net"""
cp_path = cfg.state_net_model
logger.info('loading state net from checkpoint: %s' % cp_path)
model_cp, meta = pickle.load(open(cp_path, "rb"))
state_net_mean, state_net_std, state_net_cfg = meta['mean'], meta['std'], meta[
    'cfg']
示例#3
0
env.seed(cfg.seed)
env.load_experts(cfg.takes['train'], cfg.expert_feat_file, cfg.cnn_feat_file)
cnn_feat_dim = env.cnn_feat[0].shape[-1]
actuators = env.model.actuator_names
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
running_state = ZFilter((state_dim,), clip=5)

"""define actor and critic"""
policy_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.policy_v_hdim, cfg.fr_margin, cfg.policy_v_net,
                                 cfg.policy_v_net_param, cfg.policy_s_hdim, cfg.policy_s_net, cfg.policy_dyn_v)
value_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.value_v_hdim, cfg.fr_margin, cfg.value_v_net,
                                cfg.value_v_net_param, cfg.value_s_hdim, cfg.value_s_net, cfg.value_dyn_v)
policy_net = PolicyGaussian(MLP(policy_vs_net.out_dim, cfg.policy_hsize, cfg.policy_htype), action_dim,
                            log_std=cfg.log_std, fix_std=cfg.fix_std)
value_net = Value(MLP(value_vs_net.out_dim, cfg.value_hsize, cfg.value_htype))
if args.iter == 0:
    em_cfg = EgoMimicConfig(cfg.ego_mimic_cfg)
    cp_path = '%s/iter_%04d.p' % (em_cfg.model_dir, cfg.ego_mimic_iter)
    logger.info('loading model from ego mimic checkpoint: %s' % cp_path)
    model_cp = pickle.load(open(cp_path, "rb"))
    if cfg.obs_phase or cfg.policy_s_net != 'id' or cfg.policy_v_hdim != em_cfg.policy_v_hdim:
        filter_state_dict(model_cp['policy_dict'], {'net.affine_layers.0'})
        filter_state_dict(model_cp['value_dict'], {'net.affine_layers.0'})
    policy_net.load_state_dict(model_cp['policy_dict'], strict=False)
    value_net.load_state_dict(model_cp['value_dict'], strict=False)
elif args.iter > 0:
    cp_path = '%s/iter_%04d.p' % (cfg.model_dir, args.iter)
    logger.info('loading model from checkpoint: %s' % cp_path)
    model_cp = pickle.load(open(cp_path, "rb"))
    policy_net.load_state_dict(model_cp['policy_dict'])