示例#1
0
def test_mask():
    probs = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]])
    acts = h.const([1, 2], dtype='int')
    sampled_probs = h.sample_probs(probs, acts)
    sampled_probs.sum().backward()
    dp = probs.grad.data.numpy()
    assert dp[0, 1] is not None and dp[1, 2] is not None, 'key entries of probs grad should be non-zero'
示例#2
0
    def td_error(rewards, terminal, state, action):
        

    # Let's collect data
    trajectories = []
    actions = []
    rewards = []
    obs = env.reset()
    with h.Eval(q) and torch.no_grad():
        for i in range(G.n_rollouts):
            a_prob = q(h.const([obs]))
            a = torch.distributions.Categorical(a_prob).sample().detach().item()
            obs, reward, info, done = env.step(a)
            trajectories.append(obs)
            actions.append(a)
            rewards.append(reward)
    import numpy as np
    values = np.zeros(len(rewards))
    gammas = G.gamma ** np.arange(len(rewards))[::-1]
    # todo: can linearize
    for ind, r in enumerate(rewards):
        values[:-ind - 1] += r * gammas[:-ind - 1]
    print(values)

    # compute TD error
    # td error: 
    td = q(trajectories) * actions
示例#3
0
def test_one_hot():
    acts = h.const([1, 2], dtype='int')
    n = 3
    oh = h.one_hot(acts, n)
    h.assert_equal(oh.data,
                   h.tensorify([[0., 1., 0.], [0., 0., 1.]]),
                   message="one_hot gives incorrect output {}".format(oh))
示例#4
0
def test_varify():
    x = range(0, 3)
    t = h.varify(x, 'int')  # setting a `Float` tensor results in RunTimeError

    x = np.arange(0.0, 3.0)
    t = h.varify(x)

    x = torch.randn(4, 1)
    t = h.varify(x)

    t = h.varify(x, volatile=True)

    t = h.const(x, volatile=True)
    assert t.requires_grad is False and t.volatile is True

    # You can override the requires_grad flag in constants.
    # This is useful when you want to have a constant by default, but
    # would like to switch to var when a requires_grad flag is passed in.
    t = h.const(x, requires_grad=True)
    assert t.requires_grad is True
示例#5
0
def test_mask_operations_correctness():
    sampled_acts = h.const([1, 2], dtype='int')
    # first version
    probs_1 = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]])  # pre-sampled probability, we compute it's gradient
    act_oh = h.one_hot(sampled_acts, feat_n=probs_1.size()[-1]).detach()
    act_oh.requires_grad = False
    sampled_probs_1 = probs_1.mul(act_oh).sum(dim=-1).squeeze(dim=-1)
    sampled_probs_1.sum().backward()
    # second version
    probs_2 = h.varify([[0.1, 0.2, 0.7], [0.4, 0.5, 0.1]])
    sampled_probs_2 = h.sample_probs(probs_2, sampled_acts)
    sampled_probs_2.sum().backward()

    assert (sampled_probs_1.data.numpy() == sampled_probs_2.data.numpy()).all(), 'two should give the same result'
    assert (probs_1.grad.data.numpy() ==
            probs_2.grad.data.numpy()).all(), 'two should give the same grad for the original input'
示例#6
0
def sgd_baseline(lr=0.001):
    from playground.maml.maml_torch.tasks import Sine
    task = Sine()
    model = StandardMLP(1, 1) if G.debug else FunctionalMLP(1, 1)

    adam = t.optim.Adam([p for p in model.parameters()], lr=lr)
    mse = t.nn.MSELoss()
    for ep_ind in range(1000):
        xs, labels = h.const(task.proper())
        ys = model(xs.unsqueeze(-1))
        loss = mse(ys, labels.unsqueeze(-1))
        logger.log(ep_ind, loss=loss.item(), silent=ep_ind % 50)
        adam.zero_grad()
        loss.backward()
        adam.step()
    logger.flush()
            for u in node.next_functions:
                if u[0] is None:
                    pass  # todo: add string 'None'
                else:
                    add_nodes(u[0], node_id, depth=depth)

        try:
            if hasattr(node, 'saved_tensors'):
                for t in node.saved_tensors:
                    add_nodes(t, node_id, depth=depth)
        except RuntimeError:
            pass

    for root in roots:
        add_nodes(root, name=name)
    return dot


if __name__ == "__main__":
    import numpy as np
    import torch_helpers as h
    import torch.nn

    # x = h.varify(np.ones(10)) ** h.const(np.random.randn(10)) + 10
    x = h.const(torch.randn(1))
    y = h.varify(np.ones(1))
    fc = torch.nn.Linear(1, 40)
    o = fc(x) + y
    g = make_dot(o, max_depth=3)
    g.render('graphviz_test/example')