示例#1
0
  rewards = torch.Tensor(rewards)
  rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps)
  for action, r in zip(model.saved_actions, rewards):
    action.reinforce(r)
  optimizer.zero_grad()
  autograd.backward(model.saved_actions, [None for _ in model.saved_actions])
  optimizer.step()
  del model.rewards[:]
  del model.saved_actions[:]


# Training:

env = SenseEnv(vars(args))
print("action space: ",env.action_space())
print("class count: ",env.classification_n())
model = Policy(env.observation_space(),env.action_space_n())
cnn_lstm = CNNLSTM(env.classification_n())
if args.gpu and torch.cuda.is_available():
  model.cuda()
  cnn_lstm.cuda()
if model_path:
  if os.path.exists(model_path+"/model.pkl"):
    print("loading pretrained models")
    model.load_state_dict(torch.load(model_path+"/model.pkl"))
    cnn_lstm.load_state_dict(torch.load(model_path+"/cnn_lstm.pkl"))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

classifier_criterion = nn.CrossEntropyLoss()
    reward = r - value.data[0,0]
    action.reinforce(reward)
    value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r])))
  optimizer.zero_grad()
  final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions))
  gradients = [torch.ones(1)] + [None] * len(saved_actions)
  autograd.backward(final_nodes, gradients)
  optimizer.step()
  del model.rewards[:]
  del model.saved_actions[:]

#train
env = SenseEnv(vars(args))
print("action space: ",env.action_space())
model = Policy(env.observation_space(),env.action_space_n())
cnn = CNN(env.classification_n())
if args.gpu and torch.cuda.is_available():
  model.cuda()
  cnn.cuda()
if args.model_path:
  if os.path.exists(args.model_path+"/model.pkl"):
    print("loading pretrained models")
    model.load_state_dict(torch.load(args.model_path+"/model.pkl"))
    cnn.load_state_dict(torch.load(args.model_path+"/cnn.pkl"))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

classifier_criterion = nn.CrossEntropyLoss()
classifier_optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)
示例#3
0
                                            np.finfo(np.float32).eps)
    for action, r in zip(model.saved_actions, rewards):
        action.reinforce(r)
    optimizer.zero_grad()
    autograd.backward(model.saved_actions, [None for _ in model.saved_actions])
    optimizer.step()
    del model.rewards[:]
    del model.saved_actions[:]


# Training:

env = SenseEnv(vars(args))
print("action space: ", env.action_space())
model = Policy(env.observation_space(), env.action_space_n())
cnn_lstm = CNNLSTM(env.classification_n())
if args.gpu and torch.cuda.is_available():
    model.cuda()
    cnn_lstm.cuda()
if args.model_path:
    if os.path.exists(args.model_path + "/model.pkl"):
        print("loading pretrained models")
        model.load_state_dict(torch.load(args.model_path + "/model.pkl"))
        cnn_lstm.load_state_dict(torch.load(args.model_path + "/cnn_lstm.pkl"))

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

classifier_criterion = nn.CrossEntropyLoss()
classifier_optimizer = torch.optim.Adam(cnn_lstm.parameters(), lr=0.001)