env_vis = HumanoidVisEnv('assets/mujoco_models/%s.xml' % args.vis_model, 10) env.seed(cfg.seed) cnn_feat_dim = cnn_feat_dict[takes[0]].shape[-1] actuators = env.model.actuator_names state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] body_qposaddr = get_body_qposaddr(env.model) """load policy net""" policy_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.policy_v_hdim, cfg.fr_margin, cfg.policy_v_net, cfg.policy_v_net_param, cfg.policy_s_hdim, cfg.policy_s_net) value_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.value_v_hdim, cfg.fr_margin, cfg.value_v_net, cfg.value_v_net_param, cfg.value_s_hdim, cfg.value_s_net) policy_net = PolicyGaussian(MLP(policy_vs_net.out_dim, cfg.policy_hsize, cfg.policy_htype), action_dim, log_std=cfg.log_std, fix_std=cfg.fix_std) value_net = Value(MLP(value_vs_net.out_dim, cfg.value_hsize, cfg.value_htype)) cp_path = '%s/iter_%04d.p' % (cfg.model_dir, args.iter) logger.info('loading policy net from checkpoint: %s' % cp_path) model_cp = pickle.load(open(cp_path, "rb")) policy_net.load_state_dict(model_cp['policy_dict']) policy_vs_net.load_state_dict(model_cp['policy_vs_dict']) value_net.load_state_dict(model_cp['value_dict']) value_vs_net.load_state_dict(model_cp['value_vs_dict']) running_state = model_cp['running_state'] value_stat = RunningStat(1) to_test(policy_vs_net, policy_net, value_vs_net, value_net) """load ego mimic results""" em_cfg = EgoMimicConfig(cfg.ego_mimic_cfg) em_res_path = '%s/iter_%04d_%s.p' % (em_cfg.result_dir, cfg.ego_mimic_iter, args.test_feat) em_res, em_meta = pickle.load(open(em_res_path, 'rb'))
state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] body_qposaddr = get_body_qposaddr(env.model) """load policy net""" policy_vs_net = VideoStateNet(cnn_feat_dim, cfg.policy_v_hdim, cfg.fr_margin, cfg.policy_v_net, cfg.policy_v_net_param, cfg.causal) value_vs_net = VideoStateNet(cnn_feat_dim, cfg.value_v_hdim, cfg.fr_margin, cfg.value_v_net, cfg.value_v_net_param, cfg.causal) policy_net = PolicyGaussian(MLP(state_dim + cfg.policy_v_hdim, cfg.policy_hsize, cfg.policy_htype), action_dim, log_std=cfg.log_std, fix_std=cfg.fix_std) value_net = Value( MLP(state_dim + cfg.value_v_hdim, cfg.value_hsize, cfg.value_htype)) cp_path = '%s/iter_%04d.p' % (cfg.model_dir, args.iter) logger.info('loading policy net from checkpoint: %s' % cp_path) model_cp = pickle.load(open(cp_path, "rb")) policy_net.load_state_dict(model_cp['policy_dict']) policy_vs_net.load_state_dict(model_cp['policy_vs_dict']) value_net.load_state_dict(model_cp['value_dict']) value_vs_net.load_state_dict(model_cp['value_vs_dict']) running_state = model_cp['running_state'] value_stat = RunningStat(1) """load state net""" cp_path = cfg.state_net_model logger.info('loading state net from checkpoint: %s' % cp_path) model_cp, meta = pickle.load(open(cp_path, "rb")) state_net_mean, state_net_std, state_net_cfg = meta['mean'], meta['std'], meta[ 'cfg']
env.seed(cfg.seed) env.load_experts(cfg.takes['train'], cfg.expert_feat_file, cfg.cnn_feat_file) cnn_feat_dim = env.cnn_feat[0].shape[-1] actuators = env.model.actuator_names state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] running_state = ZFilter((state_dim,), clip=5) """define actor and critic""" policy_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.policy_v_hdim, cfg.fr_margin, cfg.policy_v_net, cfg.policy_v_net_param, cfg.policy_s_hdim, cfg.policy_s_net, cfg.policy_dyn_v) value_vs_net = VideoForecastNet(cnn_feat_dim, state_dim, cfg.value_v_hdim, cfg.fr_margin, cfg.value_v_net, cfg.value_v_net_param, cfg.value_s_hdim, cfg.value_s_net, cfg.value_dyn_v) policy_net = PolicyGaussian(MLP(policy_vs_net.out_dim, cfg.policy_hsize, cfg.policy_htype), action_dim, log_std=cfg.log_std, fix_std=cfg.fix_std) value_net = Value(MLP(value_vs_net.out_dim, cfg.value_hsize, cfg.value_htype)) if args.iter == 0: em_cfg = EgoMimicConfig(cfg.ego_mimic_cfg) cp_path = '%s/iter_%04d.p' % (em_cfg.model_dir, cfg.ego_mimic_iter) logger.info('loading model from ego mimic checkpoint: %s' % cp_path) model_cp = pickle.load(open(cp_path, "rb")) if cfg.obs_phase or cfg.policy_s_net != 'id' or cfg.policy_v_hdim != em_cfg.policy_v_hdim: filter_state_dict(model_cp['policy_dict'], {'net.affine_layers.0'}) filter_state_dict(model_cp['value_dict'], {'net.affine_layers.0'}) policy_net.load_state_dict(model_cp['policy_dict'], strict=False) value_net.load_state_dict(model_cp['value_dict'], strict=False) elif args.iter > 0: cp_path = '%s/iter_%04d.p' % (cfg.model_dir, args.iter) logger.info('loading model from checkpoint: %s' % cp_path) model_cp = pickle.load(open(cp_path, "rb")) policy_net.load_state_dict(model_cp['policy_dict'])