def load_policy_model(model_dir="model/test_nlg_no_warm_up_with_nlu.pkl"): # f.seek(0) model = torch.load(model_dir) net = Net(state_dim=dialog_config.STATE_DIM, num_actions=dialog_config.SYS_ACTION_CARDINALITY, config=config).to(device) net.load_state_dict(model) net.eval() return net
def load_policy_model(model_dir): model = torch.load(model_dir) net = Net(state_dim=dialog_config.STATE_DIM, num_actions=dialog_config.SYS_ACTION_CARDINALITY, config=config).to(device) net.load_state_dict(model) net.eval() return net
def load_policy_model(model_dir="model/test_nlg_no_warm_up_with_nlu.pkl"): model = torch.load(model_dir) net = Net(state_dim=dialog_config.STATE_DIM, num_actions=dialog_config.SYS_ACTION_CARDINALITY, config=config).to(device) net.load_state_dict(model) net.eval() return net if config.resume: policy_net = load_policy_model(config.resume_rl_model_dir) else: policy_net = Net(state_dim=state_dim, num_actions=num_actions, config=config).to(device) # # optimizer = optim.RMSprop(policy_net.parameters()) optimizer = optim.Adam(lr=config.lr, params=policy_net.parameters(), weight_decay=5e-5) # net.optimizer = optim.Adam(params=net.parameters(), lr=5e-4, weight_decay=1e-3) # net.lr_scheduler = optim.lr_scheduler.StepLR(net.optimizer, step_size=500, gamma=0.95) # net.loss_func = nn.CrossEntropyLoss() pg_reinforce = PolicyGradientREINFORCE( optimizer=optimizer, policy_network=policy_net, state_dim=state_dim, num_actions=num_actions,