def learn_value(self, obs, vals, lr): # b/c the value_fn is trained in a supervised fashion, we can do the forward/recompute each time. vals = h.varify(vals) obs = h.varify(obs) self.value_fn.set_lr(lr) self.value_fn.zero_grad() val_preds = self.value_fn(obs) loss = self.value_fn.criterion(val_preds, vals) loss.backward() self.value_fn.optimizer.step()
def test_mask_operations_correctness(): sampled_acts = h.const([1, 2], dtype='int') # first version probs_1 = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]]) # pre-sampled probability, we compute it's gradient act_oh = h.one_hot(sampled_acts, feat_n=probs_1.size()[-1]).detach() act_oh.requires_grad = False sampled_probs_1 = probs_1.mul(act_oh).sum(dim=-1).squeeze(dim=-1) sampled_probs_1.sum().backward() # second version probs_2 = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]]) sampled_probs_2 = h.sample_probs(probs_2, sampled_acts) sampled_probs_2.sum().backward() assert (sampled_probs_1.data.numpy() == sampled_probs_2.data.numpy()).all(), 'two should give the same result' assert (probs_1.grad.data.numpy() == probs_2.grad.data.numpy()).all(), 'two should give the same grad for the original input'
def test_mask(): probs = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]]) acts = h.const([1, 2], dtype='int') sampled_probs = h.sample_probs(probs, acts) sampled_probs.sum().backward() dp = probs.grad.data.numpy() assert dp[0, 1] is not None and dp[1, 2] is not None, 'key entries of probs grad should be non-zero'
def test_value(): print("""test the ValueNetwork""") value_fn = ValueNetwork(ob_size=4) value_fn.optimizer.param_groups[0]['lr'] = 5e-2 # test against values larger than 1. target_val = h.varify([50.0]) for i in range(1000): obs = h.varify([[0.0, 0.0, 1.0, 1.0]]) value_fn.zero_grad() vals = value_fn(obs) loss = value_fn.criterion(vals, target_val) if i % 100 == 0: print(loss.data.numpy()[0]) loss.backward() value_fn.optimizer.step() assert loss.data.numpy()[0] < 1e-1, 'loss should be very small (l < 0.1)'
def act(self, obs): obs = h.varify(obs, volatile=True) # use as inference mode. mus, stddev = self.action(obs) if self.action_type == 'linear': acts = self.discrete_sampling(mus) elif self.action_type == 'gaussian': acts = self.gaussian_sampling(mus, stddev) else: raise Exception('action_type {} is not supported'.format(self.action_type)) return acts
def test_varify(): x = range(0, 3) t = h.varify(x, 'int') # setting a `Float` tensor results in RunTimeError x = np.arange(0.0, 3.0) t = h.varify(x) x = torch.randn(4, 1) t = h.varify(x) t = h.varify(x, volatile=True) t = h.const(x, volatile=True) assert t.requires_grad is False and t.volatile is True # You can override the requires_grad flag in constants. # This is useful when you want to have a constant by default, but # would like to switch to var when a requires_grad flag is passed in. t = h.const(x, requires_grad=True) assert t.requires_grad is True
def reinforce(self, obs, acts, vs): """ :param obs: Size(batch_n, steps, ob_size) :param acts: Size(batch_n, steps, ac_size) :param vs: Size(batch_n, steps) :param normalize: bool :param use_baseline: bool :return: None """ obs = h.varify(obs) # .view(-1, self.input_size) # todo: support higher dimensional value functions? vs = h.varify(vs) # .view(-1) # self.value_fn(obs) mu, stddev = self.action(obs) if self.action_type == 'linear': acts = h.varify(acts, dtype='int') sampled_log_probs = self.discrete_sampling(mu, sampled_acts=acts) elif self.action_type == "gaussian": acts = h.varify(acts, dtype='float') sampled_log_probs = self.gaussian_sampling(mu, stddev, sampled_acts=acts) # eligibility is the derivative of log_probability self.surrogate_loss -= torch.sum(vs * sampled_log_probs)
import torch from moleskin import Moleskin from torch_helpers import varify, volatile from debug import graph M = Moleskin() x = varify(torch.randn(4, 2)) loss = x.sum() assert x.grad is None loss.backward(varify(torch.ones(1)), retain_graph=True) assert x.grad.volatile is False, "gradient is never volatile" def test_pytorch_grad(): """NOTE: volatile can only be set on leaf variables. pyTorch enforces this.""" try: x.grad.volatile = True except RuntimeError as e: assert str(e) == "volatile can only be set on leaf variables" return raise Exception('pyTorch did not enforce gradient non-volatility.') test_pytorch_grad() # However, there is a way to get around it. x.grad = volatile(torch.ones(1).expand_as(x)) assert x.grad.volatile is True
for u in node.next_functions: if u[0] is None: pass # todo: add string 'None' else: add_nodes(u[0], node_id, depth=depth) try: if hasattr(node, 'saved_tensors'): for t in node.saved_tensors: add_nodes(t, node_id, depth=depth) except RuntimeError: pass for root in roots: add_nodes(root, name=name) return dot if __name__ == "__main__": import numpy as np import torch_helpers as h import torch.nn # x = h.varify(np.ones(10)) ** h.const(np.random.randn(10)) + 10 x = h.const(torch.randn(1)) y = h.varify(np.ones(1)) fc = torch.nn.Linear(1, 40) o = fc(x) + y g = make_dot(o, max_depth=3) g.render('graphviz_test/example')