Пример #1
0
def get_positive_expectation(p_samples, measure, average=True):
    log_2 = math.log(2.)

    if measure == 'GAN':
        Ep = - F.softplus(-p_samples)
    elif measure == 'JSD':
        Ep = log_2 - F.softplus(- p_samples)
    elif measure == 'X2':
        Ep = p_samples ** 2
    elif measure == 'KL':
        Ep = p_samples + 1.
    elif measure == 'RKL':
        Ep = -torch.exp(-p_samples)
    elif measure == 'DV':
        Ep = p_samples
    elif measure == 'H2':
        Ep = 1. - torch.exp(-p_samples)
    elif measure == 'W1':
        Ep = p_samples
    else:
        raise_measure_error(measure)

    if average:
        return Ep.mean()
    else:
        return Ep
Пример #2
0
def get_negative_expectation(q_samples, measure, average=True):
    log_2 = math.log(2.)

    if measure == 'GAN':
        Eq = F.softplus(-q_samples) + q_samples
    elif measure == 'JSD':
        Eq = F.softplus(-q_samples) + q_samples - log_2
    elif measure == 'X2':
        Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2)
    elif measure == 'KL':
        Eq = torch.exp(q_samples)
    elif measure == 'RKL':
        Eq = q_samples - 1.
    elif measure == 'DV':
        Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0))
    elif measure == 'H2':
        Eq = torch.exp(q_samples) - 1.
    elif measure == 'W1':
        Eq = q_samples
    else:
        raise_measure_error(measure)

    if average:
        return Eq.mean()
    else:
        return Eq
Пример #3
0
def discretized_mix_logistic_loss_1d(x, l):
    """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """
    # Pytorch ordering
    x = x.permute(0, 2, 3, 1)
    l = l.permute(0, 2, 3, 1)
    xs = [int(y) for y in x.size()]
    ls = [int(y) for y in l.size()]

    # here and below: unpacking the params of the mixture of logistics
    nr_mix = int(ls[-1] / 3)
    logit_probs = l[:, :, :, :nr_mix]
    l = l[:, :, :, nr_mix:].contiguous().view(xs + [nr_mix * 2]) # 2 for mean, scale
    means = l[:, :, :, :, :nr_mix]
    log_scales = torch.clamp(l[:, :, :, :, nr_mix:2 * nr_mix], min=-7.)
    # here and below: getting the means and adjusting them based on preceding
    # sub-pixels
    x = x.contiguous()
    x = x.unsqueeze(-1) + Variable(torch.zeros(xs + [nr_mix]).cuda(), requires_grad=False)

    # means = torch.cat((means[:, :, :, 0, :].unsqueeze(3), m2, m3), dim=3)
    centered_x = x - means
    inv_stdv = torch.exp(-log_scales)
    plus_in = inv_stdv * (centered_x + 1. / 255.)
    cdf_plus = F.sigmoid(plus_in)
    min_in = inv_stdv * (centered_x - 1. / 255.)
    cdf_min = F.sigmoid(min_in)
    # log probability for edge case of 0 (before scaling)
    log_cdf_plus = plus_in - F.softplus(plus_in)
    # log probability for edge case of 255 (before scaling)
    log_one_minus_cdf_min = -F.softplus(min_in)
    cdf_delta = cdf_plus - cdf_min  # probability for all other cases
    mid_in = inv_stdv * centered_x
    # log probability in the center of the bin, to be used in extreme cases
    # (not actually used in our code)
    log_pdf_mid = mid_in - log_scales - 2. * F.softplus(mid_in)
    
    inner_inner_cond = (cdf_delta > 1e-5).float()
    inner_inner_out  = inner_inner_cond * torch.log(torch.clamp(cdf_delta, min=1e-12)) + (1. - inner_inner_cond) * (log_pdf_mid - np.log(127.5))
    inner_cond       = (x > 0.999).float()
    inner_out        = inner_cond * log_one_minus_cdf_min + (1. - inner_cond) * inner_inner_out
    cond             = (x < -0.999).float()
    log_probs        = cond * log_cdf_plus + (1. - cond) * inner_out
    log_probs        = torch.sum(log_probs, dim=3) + log_prob_from_logits(logit_probs)
    
    #Don't sum over batch dimension
    lse = log_sum_exp(log_probs)
    return -torch.sum(lse.view(lse.size(0), -1), dim=1)
Пример #4
0
 def forward(self, x):
     return x * (F.softplus(self.alpha.exp() * x)).tanh()
Пример #5
0
def rational_quadratic_spline(
    inputs,
    unnormalized_widths,
    unnormalized_heights,
    unnormalized_derivatives,
    inverse=False,
    left=0.0,
    right=1.0,
    bottom=0.0,
    top=1.0,
    min_bin_width=DEFAULT_MIN_BIN_WIDTH,
    min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
    min_derivative=DEFAULT_MIN_DERIVATIVE,
    full_jacobian=False,
):
    assert not full_jacobian

    try:
        if torch.min(inputs) < left or torch.max(inputs) > right:
            raise transforms.InputOutsideDomain()
    except RuntimeError:
        logger.error("Error in rational_quadratic_spline!")
        logger.error("  Left: %s", left)
        logger.error("  Right: %s", left)
        logger.error("  Input shape: %s", inputs.size())
        logger.error("  Input: %s", inputs)
        raise

    num_bins = unnormalized_widths.shape[-1]

    if min_bin_width * num_bins > 1.0:
        raise ValueError("Minimal bin width too large for the number of bins")
    if min_bin_height * num_bins > 1.0:
        raise ValueError("Minimal bin height too large for the number of bins")

    widths = F.softmax(unnormalized_widths, dim=-1)
    widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
    cumwidths = torch.cumsum(widths, dim=-1)
    cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)
    cumwidths = (right - left) * cumwidths + left
    cumwidths[..., 0] = left
    cumwidths[..., -1] = right
    widths = cumwidths[..., 1:] - cumwidths[..., :-1]

    derivatives = min_derivative + F.softplus(unnormalized_derivatives)

    heights = F.softmax(unnormalized_heights, dim=-1)
    heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
    cumheights = torch.cumsum(heights, dim=-1)
    cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)
    cumheights = (top - bottom) * cumheights + bottom
    cumheights[..., 0] = bottom
    cumheights[..., -1] = top
    heights = cumheights[..., 1:] - cumheights[..., :-1]

    if inverse:
        bin_idx = various.searchsorted(cumheights, inputs)[..., None]
    else:
        bin_idx = various.searchsorted(cumwidths, inputs)[..., None]

    input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
    input_bin_widths = widths.gather(-1, bin_idx)[..., 0]

    input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
    delta = heights / widths
    input_delta = delta.gather(-1, bin_idx)[..., 0]

    input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
    input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[...,
                                                                          0]

    input_heights = heights.gather(-1, bin_idx)[..., 0]

    if inverse:
        a = (inputs - input_cumheights) * (
            input_derivatives + input_derivatives_plus_one - 2 *
            input_delta) + input_heights * (input_delta - input_derivatives)
        b = input_heights * input_derivatives - (inputs - input_cumheights) * (
            input_derivatives + input_derivatives_plus_one - 2 * input_delta)
        c = -input_delta * (inputs - input_cumheights)

        discriminant = b.pow(2) - 4 * a * c
        # assert (discriminant >= 0).all()
        discriminant = torch.clamp(discriminant, min=0.0)

        root = (2 * c) / (-b - torch.sqrt(discriminant))
        outputs = root * input_bin_widths + input_cumwidths

        theta_one_minus_theta = root * (1 - root)
        denominator = input_delta + (
            (input_derivatives + input_derivatives_plus_one - 2 * input_delta)
            * theta_one_minus_theta)
        derivative_numerator = input_delta.pow(2) * (
            input_derivatives_plus_one * root.pow(2) +
            2 * input_delta * theta_one_minus_theta + input_derivatives *
            (1 - root).pow(2))
        logabsdet = torch.log(
            derivative_numerator) - 2 * torch.log(denominator)

        return outputs, -logabsdet
    else:
        theta = (inputs - input_cumwidths) / input_bin_widths
        theta_one_minus_theta = theta * (1 - theta)

        numerator = input_heights * (input_delta * theta.pow(2) +
                                     input_derivatives * theta_one_minus_theta)
        denominator = input_delta + (
            (input_derivatives + input_derivatives_plus_one - 2 * input_delta)
            * theta_one_minus_theta)
        outputs = input_cumheights + numerator / denominator

        derivative_numerator = input_delta.pow(2) * (
            input_derivatives_plus_one * theta.pow(2) +
            2 * input_delta * theta_one_minus_theta + input_derivatives *
            (1 - theta).pow(2))
        logabsdet = torch.log(
            derivative_numerator) - 2 * torch.log(denominator)

        return outputs, logabsdet
Пример #6
0
 def _activate(self, x, predict):
     return F.softplus(x)
Пример #7
0
 def forward(self, input):
     # logrbf up to constants is: c - t^1 / 2l
     out = self.constant - input[:, 0] * input[:, 1] * (
         input[:, 0] - input[:, 1]).pow(2).squeeze(-1) / (
             2 * (softplus(self.lengthscale.view(-1)) + 1e-7))
     return out
Пример #8
0
 def forward(self, x):
     return x * torch.tanh(F.softplus(x))
Пример #9
0
 def forward(self, x):
     h = F.relu(self.fc1(x))
     h = F.relu(self.fc2(h))
     return {"loc": self.fc31(h), "scale": F.softplus(self.fc32(h))}
Пример #10
0
 def g_nonsaturating_loss(self, fake_pred):
     loss = F.softplus(-fake_pred).mean()
     return loss
Пример #11
0
 def forward(self, x):
     return x.mul_(F.softplus(x).tanh())
Пример #12
0
 def log_abs_det_jacobian(self, x, y):
     return 2. * (np.log(2) - x - F.softplus(-2. * x))
Пример #13
0
 def forward(self, feat):
     feat = feat * torch.tanh(F.softplus(feat))
     return feat
Пример #14
0
 def discriminator_loss(real_pred, fake_pred, loss_dict):
     real_loss = F.softplus(-real_pred).mean()
     fake_loss = F.softplus(fake_pred).mean()
     loss_dict['d_real_loss'] = float(real_loss)
     loss_dict['d_fake_loss'] = float(fake_loss)
     return real_loss + fake_loss
Пример #15
0
def train(num_block, generator, discriminator, 
          batch_size, epochs, path_image) :

    d_losses = []
    g_losses = []

    # Progressive 학습 실행, 8x8 부터
    for step in range(2, num_block + 1) :

        # 에폭별실행
        #for epoch in tqdm(range(1, epochs[step] + 1)):
        for epoch in range(1, epochs[step] + 1):

            #데이터 로더 생성 (에포크당 최대 샘플 1000개)
            loader = data_loader(step, batch_size, path=path_image, num_workers=1)

            
            print(f'step = {step}, epoch = {epoch}')


            #생성된 로더로 이터레이션 실행
            for real_image in loader :

                # 가우시안 분포로 z 생성
                z = [torch.rand(100), torch.rand(100)]
                #z.append()
                #z.append(torch.rand(100))
                
                if torch.cuda.is_available() :
                    real_image = real_image.cuda()
                    z[0] = z[0].cuda()
                    z[1] = z[1].cuda()
 
            
                # Discriminator 학습
                discriminator.zero_grad()            
                set_requires_grad(generator, False)
                set_requires_grad(discriminator, True)

                # 기울기 계산 
                real_image.requires_grad = True
                real_predict = discriminator(real_image, step)
                real_predict = F.softplus(-real_predict).mean()
                real_predict.backward(retain_graph=True)

                # R1 패널티계산
                grad_real = torch.autograd.grad(outputs=real_predict.sum(), inputs=real_image, create_graph=True)[0]
                grad_penalty = (grad_real.view(grad_real.size(0), -1).norm(2, dim=1)**2).mean()
                grad_penalty = 10 / 2 * grad_penalty
                grad_penalty.backward()


                # Loss 계산
                fake_image = generator(z[0], step)
                fake_predict = discriminator(fake_image, step)
                
                fake_predict = F.softplus(fake_predict).mean()
                fake_predict.backward()
                
                d_losses.append((real_predict + fake_predict).item())

                # 가중치 업데이트
                d_optim = torch.optim.Adam(discriminator.parameters(), lr=0.001)
                d_optim.step()

                # 메모리 반환
                del fake_image, real_image, grad_penalty, grad_real

                # Generator 학습
                generator.zero_grad()
                set_requires_grad(discriminator, False)
                set_requires_grad(generator, True)

                fake_image = generator(z[0], step)
                fake_predict = discriminator(fake_image, step)
                fake_predict = F.softplus(-fake_predict).mean()
                fake_predict.backward()
                
                # 가중치 업데이트
                g_optim = torch.optim.Adam(generator.parameters(), lr=0.001)
                g_optim.step()

                g_losses.append(fake_predict.item())

    return d_losses, g_losses
Пример #16
0
def train(args, dataset, generator, discriminator):
    step = int(math.log2(args.init_size)) - 2
    resolution = 4 * 2**step
    loader = sample_data(dataset, args.batch.get(resolution,
                                                 args.batch_default),
                         resolution)
    data_loader = iter(loader)

    adjust_lr(g_optimizer, args.lr.get(resolution, 0.001))
    adjust_lr(d_optimizer, args.lr.get(resolution, 0.001))

    pbar = tqdm(range(3_000_000))

    requires_grad(generator, False)
    requires_grad(discriminator, True)

    disc_loss_val = 0
    gen_loss_val = 0
    grad_loss_val = 0

    alpha = 0
    used_sample = 0

    max_step = int(math.log2(args.max_size)) - 2
    final_progress = False

    for i in pbar:
        discriminator.zero_grad()

        alpha = min(1, 1 / args.phase * (used_sample + 1))

        if (resolution == args.init_size
                and args.ckpt is None) or final_progress:
            alpha = 1

        if used_sample > args.phase * 2:
            used_sample = 0
            step += 1

            if step > max_step:
                step = max_step
                final_progress = True
                ckpt_step = step + 1

            else:
                alpha = 0
                ckpt_step = step

            resolution = 4 * 2**step

            loader = sample_data(
                dataset, args.batch.get(resolution, args.batch_default),
                resolution)
            data_loader = iter(loader)

            torch.save(
                {
                    'generator': generator.module.state_dict(),
                    'discriminator': discriminator.module.state_dict(),
                    'g_optimizer': g_optimizer.state_dict(),
                    'd_optimizer': d_optimizer.state_dict(),
                    'g_running': g_running.state_dict(),
                },
                f'checkpoint/train_step-{ckpt_step}.model',
            )

            adjust_lr(g_optimizer, args.lr.get(resolution, 0.001))
            adjust_lr(d_optimizer, args.lr.get(resolution, 0.001))

        try:
            real_image = next(data_loader)

        except (OSError, StopIteration):
            data_loader = iter(loader)
            real_image = next(data_loader)

        used_sample += real_image.shape[0]

        b_size = real_image.size(0)
        real_image = real_image.cuda()

        if args.loss == 'wgan-gp':
            real_predict = discriminator(real_image, step=step, alpha=alpha)
            real_predict = real_predict.mean() - 0.001 * (real_predict**
                                                          2).mean()
            (-real_predict).backward()

        elif args.loss == 'r1':
            real_image.requires_grad = True
            real_scores = discriminator(real_image, step=step, alpha=alpha)
            real_predict = F.softplus(-real_scores).mean()
            real_predict.backward(retain_graph=True)

            grad_real = grad(outputs=real_scores.sum(),
                             inputs=real_image,
                             create_graph=True)[0]
            grad_penalty = (grad_real.view(grad_real.size(0),
                                           -1).norm(2, dim=1)**2).mean()
            grad_penalty = 10 / 2 * grad_penalty
            grad_penalty.backward()
            if i % 10 == 0:
                grad_loss_val = grad_penalty.item()

        if args.mixing and random.random() < 0.9:
            gen_in11, gen_in12, gen_in21, gen_in22 = torch.randn(
                4, b_size, code_size, device='cuda').chunk(4, 0)
            gen_in1 = [gen_in11.squeeze(0), gen_in12.squeeze(0)]
            gen_in2 = [gen_in21.squeeze(0), gen_in22.squeeze(0)]

        else:
            gen_in1, gen_in2 = torch.randn(2, b_size, code_size,
                                           device='cuda').chunk(2, 0)
            gen_in1 = gen_in1.squeeze(0)
            gen_in2 = gen_in2.squeeze(0)

        fake_image = generator(gen_in1, step=step, alpha=alpha)
        fake_predict = discriminator(fake_image, step=step, alpha=alpha)

        if args.loss == 'wgan-gp':
            fake_predict = fake_predict.mean()
            fake_predict.backward()

            eps = torch.rand(b_size, 1, 1, 1).cuda()
            x_hat = eps * real_image.data + (1 - eps) * fake_image.data
            x_hat.requires_grad = True
            hat_predict = discriminator(x_hat, step=step, alpha=alpha)
            grad_x_hat = grad(outputs=hat_predict.sum(),
                              inputs=x_hat,
                              create_graph=True)[0]
            grad_penalty = (
                (grad_x_hat.view(grad_x_hat.size(0), -1).norm(2, dim=1) -
                 1)**2).mean()
            grad_penalty = 10 * grad_penalty
            grad_penalty.backward()
            if i % 10 == 0:
                grad_loss_val = grad_penalty.item()
                disc_loss_val = (-real_predict + fake_predict).item()

        elif args.loss == 'r1':
            fake_predict = F.softplus(fake_predict).mean()
            fake_predict.backward()
            if i % 10 == 0:
                disc_loss_val = (real_predict + fake_predict).item()

        d_optimizer.step()

        if (i + 1) % n_critic == 0:
            generator.zero_grad()

            requires_grad(generator, True)
            requires_grad(discriminator, False)

            fake_image = generator(gen_in2, step=step, alpha=alpha)

            predict = discriminator(fake_image, step=step, alpha=alpha)

            if args.loss == 'wgan-gp':
                loss = -predict.mean()

            elif args.loss == 'r1':
                loss = F.softplus(-predict).mean()

            if i % 10 == 0:
                gen_loss_val = loss.item()

            loss.backward()
            g_optimizer.step()
            accumulate(g_running, generator.module)

            requires_grad(generator, False)
            requires_grad(discriminator, True)

        if (i + 1) % 100 == 0:
            images = []

            gen_i, gen_j = args.gen_sample.get(resolution, (10, 5))

            with torch.no_grad():
                for _ in range(gen_i):
                    images.append(
                        g_running(torch.randn(gen_j, code_size).cuda(),
                                  step=step,
                                  alpha=alpha).data.cpu())

            utils.save_image(
                torch.cat(images, 0),
                f'sample/{str(i + 1).zfill(6)}.png',
                nrow=gen_i,
                normalize=True,
                range=(-1, 1),
            )

        if (i + 1) % 10000 == 0:
            torch.save(
                {
                    'generator': generator.module.state_dict(),
                    'discriminator': discriminator.module.state_dict(),
                    'g_optimizer': g_optimizer.state_dict(),
                    'd_optimizer': d_optimizer.state_dict(),
                    'g_running': g_running.state_dict(),
                },
                f'checkpoint/{str(i + 1).zfill(6)}.model',
            )

        state_msg = (
            f'Size: {4 * 2 ** step}; G: {gen_loss_val:.3f}; D: {disc_loss_val:.3f};'
            f' Grad: {grad_loss_val:.3f}; Alpha: {alpha:.5f}')

        pbar.set_description(state_msg)
Пример #17
0
def to_sigma(x):
    return F.softplus(x + 0.5) + 1e-8
Пример #18
0
def loss_dcgan_dis(dis_fake, dis_real):
  L1 = torch.mean(F.softplus(-dis_real))
  L2 = torch.mean(F.softplus(dis_fake))
  return L1, L2
Пример #19
0
    def forward(
            self,
            prev_state: torch.Tensor,
            actions: torch.Tensor,
            prev_belief: torch.Tensor,
            observations: Optional[torch.Tensor] = None,
            nonterminals: Optional[torch.Tensor] = None) -> List[torch.Tensor]:
        '''
		Input: init_belief, init_state:  torch.Size([50, 200]) torch.Size([50, 30])
		Output: beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs
				torch.Size([49, 50, 200]) torch.Size([49, 50, 30]) torch.Size([49, 50, 30]) torch.Size([49, 50, 30]) torch.Size([49, 50, 30]) torch.Size([49, 50, 30]) torch.Size([49, 50, 30])
		'''
        if args.MultiGPU and torch.cuda.device_count() > 1:
            actions = torch.transpose(actions, 0, 1)
            observations = None if observations is None else torch.transpose(
                observations, 0, 1)
            nonterminals = None if nonterminals is None else torch.transpose(
                nonterminals, 0, 1)
        # Create lists for hidden states (cannot use single tensor as buffer because autograd won't work with inplace writes)
        T = actions.size(0) + 1
        beliefs, prior_states, prior_means, prior_std_devs, posterior_states, posterior_means, posterior_std_devs = [
            torch.empty(0)
        ] * T, [torch.empty(0)] * T, [torch.empty(0)] * T, [
            torch.empty(0)
        ] * T, [torch.empty(0)] * T, [torch.empty(0)] * T, [torch.empty(0)] * T
        beliefs[0], prior_states[0], posterior_states[
            0] = prev_belief, prev_state, prev_state
        # Loop over time sequence
        for t in range(T - 1):
            _state = prior_states[
                t] if observations is None else posterior_states[
                    t]  # Select appropriate previous state
            _state = _state if nonterminals is None else _state * nonterminals[
                t]  # Mask if previous transition was terminal
            # Compute belief (deterministic hidden state)
            hidden = self.act_fn(
                self.fc_embed_state_action(
                    torch.cat([_state, actions[t]], dim=1)))
            beliefs[t + 1] = self.rnn(hidden, beliefs[t])
            # Compute state prior by applying transition dynamics
            hidden = self.act_fn(self.fc_embed_belief_prior(beliefs[t + 1]))
            prior_means[t + 1], _prior_std_dev = torch.chunk(
                self.fc_state_prior(hidden), 2, dim=1)
            prior_std_devs[t +
                           1] = F.softplus(_prior_std_dev) + self.min_std_dev
            prior_states[t + 1] = prior_means[t + 1] + prior_std_devs[
                t + 1] * torch.randn_like(prior_means[t + 1])
            if observations is not None:
                # Compute state posterior by applying transition dynamics and using current observation
                t_ = t - 1  # Use t_ to deal with different time indexing for observations
                hidden = self.act_fn(
                    self.fc_embed_belief_posterior(
                        torch.cat([beliefs[t + 1], observations[t_ + 1]],
                                  dim=1)))
                posterior_means[t + 1], _posterior_std_dev = torch.chunk(
                    self.fc_state_posterior(hidden), 2, dim=1)
                posterior_std_devs[
                    t + 1] = F.softplus(_posterior_std_dev) + self.min_std_dev
                posterior_states[t + 1] = posterior_means[
                    t + 1] + posterior_std_devs[t + 1] * torch.randn_like(
                        posterior_means[t + 1])
        # Return new hidden states
        hidden = [
            torch.stack(beliefs[1:], dim=0),
            torch.stack(prior_states[1:], dim=0),
            torch.stack(prior_means[1:], dim=0),
            torch.stack(prior_std_devs[1:], dim=0)
        ]
        if observations is not None:
            hidden += [
                torch.stack(posterior_states[1:], dim=0),
                torch.stack(posterior_means[1:], dim=0),
                torch.stack(posterior_std_devs[1:], dim=0)
            ]
        return hidden
Пример #20
0
            y_fill = Variable(y_fill.cuda())
            y_fill_list.append(y_fill)

        y_onehot_v_concat = y_onehot_v_list[0]
        if opt.label_mode == 2:
            y_onehot_v_concat = torch.cat([y_onehot_v_list[0], y_onehot_v_list[1]], 1)
        y_fill_concat = y_fill_list[0]
        if opt.label_mode == 2:
            y_fill_concat = torch.cat([y_fill_list[0], y_fill_list[1]], 1)
        input.resize_(real_cpu.size()).copy_(real_cpu)
        # label.resize_(batch_size).fill_(real_label)
        inputv = Variable(input)
        # labelv = Variable(label)
        output = SND(inputv, y_fill_concat)
        #print(output)
        errD_real = torch.mean(F.softplus(-output).mean())
        #errD_real = criterion(output, labelv)
        #errD_real.backward()
        D_x = output.data.mean()

        # train with fake
        noise.resize_(batch_size, opt.nz, 1, 1).normal_(0, 1)
        noisev = Variable(noise)
        #y_nz = torch.cat([noisev, y_onehot], 1)
        fake = G(noisev, y_onehot_v_concat)
        # labelv = Variable(label.fill_(fake_label))
        output = SND(fake.detach(), y_fill_concat)
        errD_fake = torch.mean(F.softplus(output))
        #errD_fake = criterion(output, labelv)
        #errD_fake.backward()
        D_G_z1 = output.data.mean()
    def forward(self,x):
        d = x.shape[1] // 3
        num_off_diagonals = d * (d - 1) // 2
        n = x.shape[0]
        q, q_dot, q_ddot = torch.split(x,[d,d,d], dim = 1)

        # q.requires_grad = True
        h1 = self.act_fn(self.fc1(q))
        h2 = self.act_fn(self.fc1a(h1))
        
        # Gravity torque
        g = self.fc2(h2)

        # ld is vector of diagonal L terms, lo is vector of off-diagonal L terms
        h3 = self.fc3(h2)
        ld = F.softplus(h3)
        lo = self.fc4(h2)

        dRelu_fc1 = torch.where(h1 > 0, torch.ones(h1.shape,device=self.device), self.neg_slope * torch.ones(h1.shape,device=self.device))
        dh1_dq = torch.diag_embed(dRelu_fc1) @ self.fc1.weight

        dRelu_fc1a = torch.where(h2 > 0, torch.ones(h2.shape,device=self.device), self.neg_slope * torch.ones(h2.shape,device=self.device))
        dh2_dh1 = torch.diag_embed(dRelu_fc1a) @ self.fc1a.weight

        dRelu_fc3 = torch.sigmoid(h3)#torch.where(ld > 0, torch.ones(ld.shape), 0.0 * torch.ones(ld.shape))

        dld_dh2 = torch.diag_embed(dRelu_fc3) @ self.fc3.weight
        dlo_dh2 = self.fc4.weight
        
        dld_dq = dld_dh2 @ dh2_dh1 @ dh1_dq
        dlo_dq = dlo_dh2 @ dh2_dh1 @ dh1_dq
        dld_dqi = dld_dq.permute(0,2,1).view(n,d,d,1)
        dlo_dqi = dlo_dq.permute(0,2,1).view(n,d,-1,1)

        dld_dt = dld_dq @ q_dot.view(n,d,1)
        dlo_dt = dlo_dq @ q_dot.view(n,d,1)

        # Get L, dL matrices without inplace operations
        L = []
        dL_dt = []
        dL_dqi = []
        zeros = torch.zeros_like(ld)
        zeros_2 = torch.zeros_like(dld_dqi)
        lo_start = 0
        lo_end = d - 1
        for i in range(d):
            l = torch.cat((zeros[:, :i].view(n, -1), ld[:, i].view(-1, 1), lo[:, lo_start:lo_end]), dim=1)
            dl_dt = torch.cat((zeros[:, :i].view(n, -1), dld_dt[:, i].view(-1, 1),
                               dlo_dt[:, lo_start:lo_end].view(n, -1)), dim=1)

            dl_dqi = torch.cat((zeros_2[:, :, :i].view(n, d, -1), dld_dqi[:, :, i].view(n, -1, 1),
                                dlo_dqi[:, :, lo_start:lo_end].view(n, d, -1)), dim=2)

            lo_start = lo_start + lo_end
            lo_end = lo_end + d - 2 - i
            L.append(l)
            dL_dt.append(dl_dt)
            dL_dqi.append(dl_dqi)

        L = torch.stack(L, dim=2)
        dL_dt = torch.stack(dL_dt, dim=2)

        # dL_dqi n x d x d x d -- last dim is index for qi
        dL_dqi = torch.stack(dL_dqi, dim=3).permute(0, 2, 3, 1)

        epsilon = 1e-9   #small number to ensure positive definiteness of H

        H = L @ L.transpose(1, 2) + epsilon * torch.eye(d, device=self.device)

        # Time derivative of Mass Matrix
        dH_dt = L @ dL_dt.permute(0,2,1) + dL_dt @ L.permute(0,2,1)

        quadratic_term = []
        for i in range(d):
            qterm = q_dot.view(n, 1, d) @ (dL_dqi[:, :, :, i] @ L.transpose(1, 2) +
                                           L @ dL_dqi[:, :, :, i].transpose(1, 2)) @ q_dot.view(n, d, 1)
            quadratic_term.append(qterm)

        quadratic_term = torch.stack(quadratic_term, dim=1)

        c = dH_dt @ q_dot.view(n,d,1) - 0.5 * quadratic_term.view(n,d,1)

        tau = H @ q_ddot.view(n,d,1) + c + g.view(n,d,1)

        #set uncontrolled torque to zero
        tau = torch.diag_embed(torch.cat((torch.ones((n,1),device=self.device), torch.zeros((n,1),device=self.device)),dim=1)) @ tau
        # The loss layer will be applied outside Network class
        return (tau.squeeze(), (H @ q_ddot.view(n,d,1)).squeeze(), c.squeeze(), g.squeeze())
Пример #22
0
    def d_logistic_loss(self, real_pred, fake_pred):
        real_loss = F.softplus(-real_pred)
        fake_loss = F.softplus(fake_pred)

        return real_loss.mean() + fake_loss.mean()
Пример #23
0
def mish(x):
    """Mish: A Self Regularized Non-Monotonic Neural Activation Function (https://arxiv.org/abs/1908.08681)"""
    return x * torch.tanh(F.softplus(x))
Пример #24
0
 def log_abs_det_jacobian(self, x, y):
     # We use a formula that is more numerically stable, see details in the following link
     # https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7
     return 2. * (np.log(2.) - x - F.softplus(-2. * x))
 def forward(ctx, x):
     ctx.save_for_backward(x)
     return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
Пример #26
0
 def forward(self, x):
     x = x * (torch.tanh(F.softplus(x)))
     return x
 def backward(ctx, grad_output):
     x = ctx.saved_tensors[0]
     sx = torch.sigmoid(x)
     fx = F.softplus(x).tanh()
     return grad_output * (fx + x * sx * (1 - fx * fx))
Пример #28
0
 def forward(self, x):
     return x * F.softplus(x).tanh()
Пример #29
0
 def forward(self, x):
     a = self.mlp(x)
     return a[:, 0:self.z_size], softplus(a[:, self.z_size:])
"""
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt

# fake data
x = torch.linspace(-5, 5, 200)  # x data (tensor), shape=(100, 1)
x = Variable(x)
x_np = x.data.numpy()   # numpy array for plotting

# following are popular activation functions
y_relu = torch.relu(x).data.numpy()
y_sigmoid = torch.sigmoid(x).data.numpy()
y_tanh = torch.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy() # there's no softplus in torch
# y_softmax = torch.softmax(x, dim=0).data.numpy() softmax is a special kind of activation function, it is about probability

# plt to visualize these activation function
plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')

plt.subplot(223)
Пример #31
0
 def forward(self, z):
     out = self.linear2(F.softplus(self.linear1(z))) # softrelu. softplus better for counts
     return out # change whatever
Пример #32
0
def discrete_gan(nets, inputs, measure=None, penalty=None, n_samples=10, reinforce=False, gamma=0.95,
                 penalty_type='gradient_norm', use_beta=False, test_mode=False, use_sm=False):
    global log_Z
    log_M = math.log(n_samples)
    discriminator = nets['discriminator']
    generator = nets['generator']

    M = n_samples
    X = (inputs['images'] >= 0).float()
    Z = inputs['z']
    R = inputs['r']
    U = inputs['u']
    B = inputs['z'].size()[0]
    log_B = math.log(B)

    if R.size()[1] != DIM_C * n_samples * DIM_X * DIM_Y:
        R = inputs['r_t']
    assert R.size() == (B, DIM_C * n_samples * DIM_X * DIM_Y), (R.size(), (B, DIM_C * n_samples * DIM_X * DIM_Y))

    try:
        R = R.view(M, -1, DIM_C * DIM_X * DIM_Y)
    except BaseException:
        R = R.view(M, -1, DIM_C * DIM_X * DIM_Y)
    U.requires_grad = False

    logit = generator(Z)
    assert logit.size()[1:] == X.size()[1:], (logit.size(), X.size())

    g_output = F.sigmoid(logit)
    g_output_ = g_output.view(-1, DIM_C * DIM_X * DIM_Y)

    S = (R <= g_output_).float()
    S = S.view(M, -1, DIM_C, DIM_X, DIM_Y)
    S_ = Variable(S.data.cuda(), volatile=True)
    S = Variable(S.data.cuda(), requires_grad=False)

    gen_out = (U <= g_output_).float()
    gen_out = gen_out.view(-1, DIM_C, DIM_X, DIM_Y)

    real_out = discriminator(X)

    fake_out = discriminator(S.view(-1, DIM_C, DIM_X, DIM_Y))
    fake_out_ = discriminator(S_.view(-1, DIM_C, DIM_X, DIM_Y))
    log_g = -((1. - S) * logit + F.softplus(-logit)).sum(2).sum(2).sum(2)

    if (measure == 'w' and not test_mode) or use_sm:
        fake_out_sm = discriminator(g_output)
        d_loss, g_loss, r, f, w, b = f_divergence(measure, real_out, fake_out_sm)
    else:
        d_loss, g_loss, r, f, w, b = f_divergence(measure, real_out, fake_out.view(M, B, -1))

    if measure in ('gan', 'jsd', 'rkl', 'kl', 'sh', 'proxy_gan', 'dv') and not use_sm:
        log_w = Variable(fake_out_.data.cuda(), requires_grad=False).view(M, B)
        log_beta = log_sum_exp(log_w.view(M * B, -1) - log_M - log_B, axis=0)
        log_alpha = log_sum_exp(log_w - log_M, axis=0)

        if use_beta:
            log_Z_est = log_beta
            log_w_tilde = log_w - log_Z_est - log_M - log_B
        else:
            log_Z_est = log_alpha
            log_w_tilde = log_w - log_Z_est - log_M
        w_tilde = torch.exp(log_w_tilde)

        alpha = torch.exp(log_alpha)
        beta = torch.exp(log_beta)

    elif measure == 'xs':
        w = (fake_out / 2. + 1.).view(M, B)
        w_tilde = w / w.sum(0)
        log_Z_est = torch.log(torch.mean(w))

    elif measure == 'w' or use_sm:
        log_w = Variable(torch.Tensor([0.]).float()).cuda()
        log_Z_est = Variable(torch.Tensor([0.]).float()).cuda()
        w_tilde = Variable(torch.Tensor([0.]).float()).cuda()

    else:
        raise NotImplementedError(measure)

    if measure != 'w' and not use_sm:
        if reinforce:
            r = (log_w - log_Z)
            assert not r.requires_grad
            g_loss = -(r * log_g).sum(0).mean()
        else:
            w_tilde = Variable(w_tilde.data.cuda(), requires_grad=False)
            assert not w_tilde.requires_grad
            if use_beta:
                g_loss = -((w_tilde * log_g).view(M * B)).sum(0).mean()
            else:
                g_loss = -(w_tilde * log_g).sum(0).mean()

    results = dict(g_loss=g_loss.data[0], distance=-d_loss.data[0], boundary=torch.mean(b).data[0],
                   real=torch.mean(r).data[0], fake=torch.mean(f).data[0],
                   gen_out=g_output.mean().data[0], w_tilde=w_tilde.mean().data[0],
                   real_out=real_out.mean().data[0], fake_out=fake_out.mean().data[0])

    if measure != 'w' and not use_sm:
        results.update(alpha=alpha.mean().data[0], log_alpha=log_alpha.mean().data[0],
                       beta=beta.mean().data[0], log_beta=log_beta.mean().data[0])
        results.update(ess=(1. / (w_tilde ** 2).sum(0)).mean().data[0])

    if test_mode or measure == 'w' or use_sm:
        fake_out_sm = discriminator(Variable(g_output.data.cuda(), volatile=True))
        S_th = Variable((g_output >= 0.5).float().data.cuda(), volatile=True)
        fake_out_sam = Variable(fake_out.data.cuda(), volatile=True)
        fake_out_th = discriminator(S_th)
        dist_th = -f_divergence(measure, real_out, fake_out_th)[0]
        dist_sam = -f_divergence(measure, real_out, fake_out_sam)[0]
        dist_sm = -f_divergence(measure, real_out, fake_out_sm)[0]
        results.update(distance_th=dist_th.data[0], distance_sam=dist_sam.data[0],
                       distance_sm=dist_sm.data[0])

    samples = dict(images=dict(generated=gen_out.data,
                               prob=g_output.data,
                               real=X.data))

    if penalty:
        p_term = apply_penalty(inputs, discriminator, X, g_output,
                               measure, penalty_type=penalty_type)
        d_loss += penalty * p_term
        results['gradient penalty'] = p_term.data[0]

    log_Z *= gamma
    log_Z += (1. - gamma) * log_Z_est.mean()
    results.update(log_Z=log_Z.data[0], log_Z_est=log_Z_est.mean().data[0],
                   log_w=log_w.mean().data[0], log_g=log_g.mean().data[0])
    return dict(generator=g_loss, discriminator=d_loss), results, samples, 'boundary'
Пример #33
0
def create_gaussian_conditional(l):
    n_channels = int(l.size(1)/2)
    mu = l[:, :n_channels]
    sigma = F.softplus(l[:, n_channels:])
    dist = torch.distributions.normal.Normal(mu, sigma)
    return dist
Пример #34
0
import pytest
import torch
import torch.nn.functional as F
from torch.testing import assert_allclose

mish_forward_pt = lambda x: x.mul(torch.tanh(F.softplus(x)))


class Mish(torch.nn.Module):
    def forward(self, x):
        return mish_forward_pt(x)


def get_input_params():
    devs = ['cpu']
    if torch.cuda.is_available() and torch.cuda.device_count() > 0:
        devs += ['cuda:0']  # TODO: Allow other devices
    dev_types = [
        (dtype, device)
        for dtype in [torch.float16, torch.float32, torch.float64]
        for device in devs
        # Basic ops not supported on CPU/Half, could test by converting but skip for now
        if not (dtype == torch.float16 and torch.device(device).type == 'cpu')
    ]
    inputs = [(ndim, dtype, device) for (dtype, device) in dev_types
              for ndim in [1, 2, 3, 4, 8]]
    return inputs


@pytest.fixture(params=get_input_params())
def test_input(request):
Пример #35
0
def loss_dcgan_gen(dis_fake):
  loss = torch.mean(F.softplus(-dis_fake))
  return loss
Пример #36
0
 def forward(self, h):
     out = self.mlp(h)
     z_pres_p = sigmoid(out[:, 0:self.z_pres_size])
     z_where_mu = out[:, self.z_pres_size:self.z_pres_size + self.z_where_size]
     z_where_sigma = softplus(out[:, (self.z_pres_size + self.z_where_size):])
     return z_pres_p, z_where_mu, z_where_sigma
Пример #37
0
 def pz_params(self):
     return self._pz_params[0], F.softplus(self._pz_params[1])
Пример #38
0
def discretized_mix_logistic_loss(x, l):
    """ log-likelihood for mixture of discretized logistics, assumes the data has been rescaled to [-1,1] interval """
    # Pytorch ordering
    x = x.permute(0, 2, 3, 1)
    l = l.permute(0, 2, 3, 1)
    xs = [int(y) for y in x.size()]
    ls = [int(y) for y in l.size()]
   
    # here and below: unpacking the params of the mixture of logistics
    nr_mix = int(ls[-1] / 10) 
    logit_probs = l[:, :, :, :nr_mix]
    l = l[:, :, :, nr_mix:].contiguous().view(xs + [nr_mix * 3]) # 3 for mean, scale, coef
    means = l[:, :, :, :, :nr_mix]
    # log_scales = torch.max(l[:, :, :, :, nr_mix:2 * nr_mix], -7.)
    log_scales = torch.clamp(l[:, :, :, :, nr_mix:2 * nr_mix], min=-7.)
   
    coeffs = F.tanh(l[:, :, :, :, 2 * nr_mix:3 * nr_mix])
    # here and below: getting the means and adjusting them based on preceding
    # sub-pixels
    x = x.contiguous()
    x = x.unsqueeze(-1) + Variable(torch.zeros(xs + [nr_mix]).cuda(), requires_grad=False)
    m2 = (means[:, :, :, 1, :] + coeffs[:, :, :, 0, :]
                * x[:, :, :, 0, :]).view(xs[0], xs[1], xs[2], 1, nr_mix)

    m3 = (means[:, :, :, 2, :] + coeffs[:, :, :, 1, :] * x[:, :, :, 0, :] +
                coeffs[:, :, :, 2, :] * x[:, :, :, 1, :]).view(xs[0], xs[1], xs[2], 1, nr_mix)

    means = torch.cat((means[:, :, :, 0, :].unsqueeze(3), m2, m3), dim=3)
    centered_x = x - means
    inv_stdv = torch.exp(-log_scales)
    plus_in = inv_stdv * (centered_x + 1. / 255.)
    cdf_plus = F.sigmoid(plus_in)
    min_in = inv_stdv * (centered_x - 1. / 255.)
    cdf_min = F.sigmoid(min_in)
    # log probability for edge case of 0 (before scaling)
    log_cdf_plus = plus_in - F.softplus(plus_in)
    # log probability for edge case of 255 (before scaling)
    log_one_minus_cdf_min = -F.softplus(min_in)
    cdf_delta = cdf_plus - cdf_min  # probability for all other cases
    mid_in = inv_stdv * centered_x
    # log probability in the center of the bin, to be used in extreme cases
    # (not actually used in our code)
    log_pdf_mid = mid_in - log_scales - 2. * F.softplus(mid_in)

    # now select the right output: left edge case, right edge case, normal
    # case, extremely low prob case (doesn't actually happen for us)

    # this is what we are really doing, but using the robust version below for extreme cases in other applications and to avoid NaN issue with tf.select()
    # log_probs = tf.select(x < -0.999, log_cdf_plus, tf.select(x > 0.999, log_one_minus_cdf_min, tf.log(cdf_delta)))

    # robust version, that still works if probabilities are below 1e-5 (which never happens in our code)
    # tensorflow backpropagates through tf.select() by multiplying with zero instead of selecting: this requires use to use some ugly tricks to avoid potential NaNs
    # the 1e-12 in tf.maximum(cdf_delta, 1e-12) is never actually used as output, it's purely there to get around the tf.select() gradient issue
    # if the probability on a sub-pixel is below 1e-5, we use an approximation
    # based on the assumption that the log-density is constant in the bin of
    # the observed sub-pixel value
    
    inner_inner_cond = (cdf_delta > 1e-5).float()
    inner_inner_out  = inner_inner_cond * torch.log(torch.clamp(cdf_delta, min=1e-12)) + (1. - inner_inner_cond) * (log_pdf_mid - np.log(127.5))
    inner_cond       = (x > 0.999).float()
    inner_out        = inner_cond * log_one_minus_cdf_min + (1. - inner_cond) * inner_inner_out
    cond             = (x < -0.999).float()
    log_probs        = cond * log_cdf_plus + (1. - cond) * inner_out
    log_probs        = torch.sum(log_probs, dim=3) + log_prob_from_logits(logit_probs)
    
    #Don't sum over batch dimension
    lse = log_sum_exp(log_probs)
    return -torch.sum(lse.view(lse.size(0), -1), dim=1)
Пример #39
0
 def forward(self, x):
     e = self.enc(self.embedding(x.long()).unsqueeze(1))
     mu, logvar = self.c1(e).squeeze(), self.c2(e).squeeze()
     return mu, F.softplus(logvar) + Constants.eta
Пример #40
0
def prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask):

    pruned_model = deepcopy(model)
    activations = []
    for i, model_def in enumerate(model.module_defs):

        if model_def['type'] == 'convolutional':
            activation = torch.zeros(int(model_def['filters'])).cuda()
            if i in prune_idx:
                mask = torch.from_numpy(CBLidx2mask[i]).cuda()
                # mask = torch.from_numpy(CBLidx2mask[i])
                bn_module = pruned_model.module_list[i][1]
                bn_module.weight.data.mul_(mask)
                if model_def['activation'] == 'leaky':
                    activation = F.leaky_relu((1 - mask) * bn_module.bias.data,
                                              0.1)
                elif model_def['activation'] == 'mish':
                    activation = (1 - mask) * bn_module.bias.data.mul(
                        F.softplus(bn_module.bias.data).tanh())
                elif model_def['activation'] == 'SiLU':  #yolov5-v4
                    activation = (1 - mask) * bn_module.bias.data * F.sigmoid(
                        bn_module.bias.data)
                elif model_def['activation'] == 'Hardswish':
                    activation = (1 - mask) * bn_module.bias.data * F.hardtanh(
                        bn_module.bias.data + 3, 0., 6.) / 6.
                update_activation(i, pruned_model, activation, CBL_idx)
                bn_module.bias.data.mul_(mask)
            activations.append(activation)

        elif model_def['type'] == 'shortcut':
            actv1 = activations[i - 1]
            from_layer = int(model_def['from'])
            actv2 = activations[i + from_layer]
            activation = actv1 + actv2
            update_activation(i, pruned_model, activation, CBL_idx)
            activations.append(activation)

        elif model_def['type'] == 'route':
            #spp不参与剪枝,其中的route不用更新,仅占位
            from_layers = [int(s) for s in model_def['layers'].split(',')]
            activation = None
            if len(from_layers) == 1:
                activation = activations[
                    i +
                    from_layers[0] if from_layers[0] < 0 else from_layers[0]]
                if 'groups' in model_def:
                    activation = activation[(activation.shape[0] // 2):]
                update_activation(i, pruned_model, activation, CBL_idx)
            elif len(from_layers) == 2:
                actv1 = activations[i + from_layers[0]]
                actv2 = activations[
                    i +
                    from_layers[1] if from_layers[1] < 0 else from_layers[1]]
                activation = torch.cat((actv1, actv2))
                # update_activation(i, pruned_model, activation, CBL_idx)
                #update_activation_nconv
                next_idx = i + 1
                if pruned_model.module_defs[next_idx][
                        'type'] == 'convolutional_noconv':
                    next_conv1 = pruned_model.module_list[i +
                                                          from_layers[0]][0]
                    next_conv2 = pruned_model.module_list[
                        i + from_layers[1]
                        if from_layers[1] < 0 else from_layers[1]][0]
                    conv_sum1 = next_conv1.weight.data.sum(dim=(2, 3))
                    conv_sum2 = next_conv2.weight.data.sum(dim=(2, 3))
                    offset1 = conv_sum1.matmul(actv1.reshape(-1,
                                                             1)).reshape(-1)
                    offset2 = conv_sum2.matmul(actv2.reshape(-1,
                                                             1)).reshape(-1)
                    offset = torch.cat((offset1, offset2))
                    if next_idx in CBL_idx:
                        next_bn = pruned_model.module_list[next_idx][0]
                        next_bn.running_mean.data.sub_(offset)
                else:
                    update_activation(i, pruned_model, activation, CBL_idx)
            activations.append(activation)

        elif model_def['type'] == 'upsample':
            # activation = torch.zeros(int(model.module_defs[i - 1]['filters'])).cuda()
            activations.append(activations[i - 1])

        elif model_def['type'] == 'yolo':
            activations.append(None)

        elif model_def['type'] == 'focus':
            activations.append(None)

        elif model_def['type'] == 'convolutional_nobias':
            activations.append(activations[i - 1])
            # activation = torch.zeros(int(model_def['filters'])).cuda()
            # activations.append(activation)

        elif model_def['type'] == 'convolutional_noconv':
            activation = torch.zeros(int(model_def['filters'])).cuda()
            if i in prune_idx:
                mask = torch.from_numpy(CBLidx2mask[i]).cuda()
                # mask = torch.from_numpy(CBLidx2mask[i])
                bn_module = pruned_model.module_list[i][0]
                bn_module.weight.data.mul_(mask)

                activation = F.leaky_relu((1 - mask) * bn_module.bias.data,
                                          0.1)
                # if model_def['activation'] == 'leaky':
                #     activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
                # elif model_def['activation'] == 'mish':
                #     activation = (1 - mask) * bn_module.bias.data.mul(F.softplus(bn_module.bias.data).tanh())
                update_activation(i, pruned_model, activation, CBL_idx)
                bn_module.bias.data.mul_(mask)
            activations.append(activation)

        elif model_def['type'] == 'maxpool':  #区分spp和tiny
            if model.module_defs[i + 1]['type'] == 'route':
                activations.append(None)
            else:
                activation = activations[i - 1]
                update_activation(i, pruned_model, activation, CBL_idx)
                activations.append(activation)

    return pruned_model
Пример #41
0
 def scale(self):
     return softplus(self._scale)
Пример #42
0
"""
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt

# fake data
x = torch.linspace(-5, 5, 200)  # x data (tensor), shape=(100, 1)
x = Variable(x)
x_np = x.data.numpy()   # numpy array for plotting

# following are popular activation functions
y_relu = F.relu(x).data.numpy()
y_sigmoid = F.sigmoid(x).data.numpy()
y_tanh = F.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()
# y_softmax = F.softmax(x)  softmax is a special kind of activation function, it is about probability


# plt to visualize these activation function
plt.figure(1, figsize=(8, 6))
plt.subplot(221)
plt.plot(x_np, y_relu, c='red', label='relu')
plt.ylim((-1, 5))
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np, y_sigmoid, c='red', label='sigmoid')
plt.ylim((-0.2, 1.2))
plt.legend(loc='best')
Пример #43
0
 def log_prob(self, x):
     return -(F.softplus(x) + F.softplus(-x))