Exemplo n.º 1
0
    frame = Variable(frame1).cuda()

    losses = []
    for m in range(mc_samples):

        eps = Variable(torch.Tensor(1, 2, 84, 84).normal_(0, 1)).cuda()
        noisy_frame = frame + eps * (torch.sqrt(torch.exp(logvar)))

        noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val)

        # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data)
        # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data)
        # noisy_frame = noisy_frame * max_val

        dist_noise = policy.action_dist(noisy_frame)

        log_dist_noise = policy.action_logdist(noisy_frame)
        log_dist_true = policy.action_logdist(frame)

        action_dist_kl = torch.sum(
            (log_dist_true - log_dist_noise) * torch.exp(log_dist_true),
            dim=1)  #[B]

        action_dist_kl = torch.mean(action_dist_kl)  # * 1000.

        # std of 2 is the prior, which is 4 for var, which is log(4) for logvar
        # loss = action_dist_kl + (logvar - torch.log(4))**2
        logvar_dif = torch.mean((logvar - .6)**2) * .0001

        loss = action_dist_kl + logvar_dif
Exemplo n.º 2
0
    frame = Variable(frame1).cuda()

    losses = []
    for m in range(mc_samples):

        eps = Variable(torch.Tensor(1,2,84,84).normal_(0,1)).cuda()
        noisy_frame = frame + eps*(torch.sqrt(torch.exp(logvar)))

        noisy_frame = torch.clamp(noisy_frame, min=0., max=max_val)

        # noisy_frame = noisy_frame - Variable(torch.min(noisy_frame).data)
        # noisy_frame = noisy_frame / Variable(torch.max(noisy_frame).data)
        # noisy_frame = noisy_frame * max_val

        dist_noise = policy.action_dist(noisy_frame)

        log_dist_noise = policy.action_logdist(noisy_frame)
        log_dist_true = policy.action_logdist(frame)

        action_dist_kl = torch.sum((log_dist_true - log_dist_noise)*torch.exp(log_dist_true), dim=1) #[B]


        action_dist_kl = torch.mean(action_dist_kl) # * 1000.

        # std of 2 is the prior, which is 4 for var, which is log(4) for logvar
        # loss = action_dist_kl + (logvar - torch.log(4))**2
        logvar_dif = torch.mean((logvar - .6)**2) *.0001

        loss = action_dist_kl + logvar_dif
Exemplo n.º 3
0
            dataset[traj_ind][start_ind + i][1][1]
        ],
                                axis=1)
        ax.imshow(state1, cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])

        for a in range(4):

            #Plot grads
            ax = plt.subplot2grid((rows, cols), (i, 1 + a), frameon=False)

            x = Variable(torch.from_numpy(
                np.array([dataset[traj_ind][start_ind + i][1]])).float(),
                         requires_grad=True).cuda()
            dist = policy.action_dist(x)
            grad = torch.autograd.grad(torch.sum(dist[:, a]), x)[0]
            grad = grad.data.cpu().numpy()[
                0]  #for the first one in teh batch -> [2,84,84]

            grad = np.abs(grad)
            # print (np.max(grad))
            # print (np.min(grad))
            # print (np.mean(grad))
            # fad

            state1 = np.concatenate([grad[0], grad[1]], axis=1)
            # ax.imshow(state1, cmap='gray', norm=NoNorm())
            ax.imshow(state1, cmap='gray')
            ax.set_xticks([])
            ax.set_yticks([])