def learn_value(self, obs, vals, lr):
     # b/c the value_fn is trained in a supervised fashion, we can do the forward/recompute each time.
     vals = h.varify(vals)
     obs = h.varify(obs)
     self.value_fn.set_lr(lr)
     self.value_fn.zero_grad()
     val_preds = self.value_fn(obs)
     loss = self.value_fn.criterion(val_preds, vals)
     loss.backward()
     self.value_fn.optimizer.step()
示例#2
0
def test_mask_operations_correctness():
    sampled_acts = h.const([1, 2], dtype='int')
    # first version
    probs_1 = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]])  # pre-sampled probability, we compute it's gradient
    act_oh = h.one_hot(sampled_acts, feat_n=probs_1.size()[-1]).detach()
    act_oh.requires_grad = False
    sampled_probs_1 = probs_1.mul(act_oh).sum(dim=-1).squeeze(dim=-1)
    sampled_probs_1.sum().backward()
    # second version
    probs_2 = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]])
    sampled_probs_2 = h.sample_probs(probs_2, sampled_acts)
    sampled_probs_2.sum().backward()

    assert (sampled_probs_1.data.numpy() == sampled_probs_2.data.numpy()).all(), 'two should give the same result'
    assert (probs_1.grad.data.numpy() ==
            probs_2.grad.data.numpy()).all(), 'two should give the same grad for the original input'
示例#3
0
def test_mask():
    probs = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]])
    acts = h.const([1, 2], dtype='int')
    sampled_probs = h.sample_probs(probs, acts)
    sampled_probs.sum().backward()
    dp = probs.grad.data.numpy()
    assert dp[0, 1] is not None and dp[1, 2] is not None, 'key entries of probs grad should be non-zero'
示例#4
0
def test_value():
    print("""test the ValueNetwork""")
    value_fn = ValueNetwork(ob_size=4)
    value_fn.optimizer.param_groups[0]['lr'] = 5e-2
    # test against values larger than 1.
    target_val = h.varify([50.0])
    for i in range(1000):
        obs = h.varify([[0.0, 0.0, 1.0, 1.0]])
        value_fn.zero_grad()
        vals = value_fn(obs)
        loss = value_fn.criterion(vals, target_val)
        if i % 100 == 0:
            print(loss.data.numpy()[0])
        loss.backward()
        value_fn.optimizer.step()
    assert loss.data.numpy()[0] < 1e-1, 'loss should be very small (l < 0.1)'
 def act(self, obs):
     obs = h.varify(obs, volatile=True)  # use as inference mode.
     mus, stddev = self.action(obs)
     if self.action_type == 'linear':
         acts = self.discrete_sampling(mus)
     elif self.action_type == 'gaussian':
         acts = self.gaussian_sampling(mus, stddev)
     else:
         raise Exception('action_type {} is not supported'.format(self.action_type))
     return acts
示例#6
0
def test_varify():
    x = range(0, 3)
    t = h.varify(x, 'int')  # setting a `Float` tensor results in RunTimeError

    x = np.arange(0.0, 3.0)
    t = h.varify(x)

    x = torch.randn(4, 1)
    t = h.varify(x)

    t = h.varify(x, volatile=True)

    t = h.const(x, volatile=True)
    assert t.requires_grad is False and t.volatile is True

    # You can override the requires_grad flag in constants.
    # This is useful when you want to have a constant by default, but
    # would like to switch to var when a requires_grad flag is passed in.
    t = h.const(x, requires_grad=True)
    assert t.requires_grad is True
 def reinforce(self, obs, acts, vs):
     """
     :param obs: Size(batch_n, steps, ob_size)
     :param acts: Size(batch_n, steps, ac_size)
     :param vs: Size(batch_n, steps)
     :param normalize: bool
     :param use_baseline: bool
     :return: None
     """
     obs = h.varify(obs)  # .view(-1, self.input_size)
     # todo: support higher dimensional value functions?
     vs = h.varify(vs)  # .view(-1)  # self.value_fn(obs)
     mu, stddev = self.action(obs)
     if self.action_type == 'linear':
         acts = h.varify(acts, dtype='int')
         sampled_log_probs = self.discrete_sampling(mu, sampled_acts=acts)
     elif self.action_type == "gaussian":
         acts = h.varify(acts, dtype='float')
         sampled_log_probs = self.gaussian_sampling(mu, stddev, sampled_acts=acts)
     # eligibility is the derivative of log_probability
     self.surrogate_loss -= torch.sum(vs * sampled_log_probs)
import torch
from moleskin import Moleskin
from torch_helpers import varify, volatile

from debug import graph

M = Moleskin()

x = varify(torch.randn(4, 2))
loss = x.sum()
assert x.grad is None

loss.backward(varify(torch.ones(1)), retain_graph=True)
assert x.grad.volatile is False, "gradient is never volatile"


def test_pytorch_grad():
    """NOTE: volatile can only be set on leaf variables. pyTorch enforces this."""
    try:
        x.grad.volatile = True
    except RuntimeError as e:
        assert str(e) == "volatile can only be set on leaf variables"
        return
    raise Exception('pyTorch did not enforce gradient non-volatility.')


test_pytorch_grad()

# However, there is a way to get around it.
x.grad = volatile(torch.ones(1).expand_as(x))
assert x.grad.volatile is True
            for u in node.next_functions:
                if u[0] is None:
                    pass  # todo: add string 'None'
                else:
                    add_nodes(u[0], node_id, depth=depth)

        try:
            if hasattr(node, 'saved_tensors'):
                for t in node.saved_tensors:
                    add_nodes(t, node_id, depth=depth)
        except RuntimeError:
            pass

    for root in roots:
        add_nodes(root, name=name)
    return dot


if __name__ == "__main__":
    import numpy as np
    import torch_helpers as h
    import torch.nn

    # x = h.varify(np.ones(10)) ** h.const(np.random.randn(10)) + 10
    x = h.const(torch.randn(1))
    y = h.varify(np.ones(1))
    fc = torch.nn.Linear(1, 40)
    o = fc(x) + y
    g = make_dot(o, max_depth=3)
    g.render('graphviz_test/example')