示例#1
0
def compute_loss(input, target, mask, smoothing):
    target = one_hot(target, input.size(2))
    target = label_smoothing(target, smoothing)

    loss = softmax_cross_entropy(input=input, target=target, axis=2)
    loss = (loss * mask.float()).sum(1)

    return loss
示例#2
0
    def compute(input, target, dim=-1):
        if config.label_smoothing is not None:
            target = utils.label_smoothing(target, eps=config.label_smoothing)

        ce = softmax_cross_entropy(input=input, target=target, axis=dim)
        r = softmax_recall_loss(input=input, target=target, dim=dim)

        loss = weighted_sum(ce.mean(), r.mean(), 0.5)

        return loss
示例#3
0
    def _calc_loss(self, labels, p_global, p_start, p_end):
        """
        calculate loss
        :param labels: labels, contain p_global, p_start, p_end
        :param p_global: predicted p_global
        :param p_start: predicted p_start
        :param p_end: predicted p_end
        :return: p_global loss + p_start loss + p_end loss
        """
        # global loss
        p_global_true = labels[:, 0]  # [N]
        p_global_true = label_smoothing(tf.one_hot(p_global_true,
                                                   depth=2))  # [N, 2]
        p_global = tf.squeeze(tf.stack([1 - p_global, p_global], axis=2),
                              axis=1)
        p_global_loss = self._focal_loss(p_global, p_global_true)

        # start loss
        p_start_true = labels[:, 1]
        p_start_true = tf.one_hot(p_start_true,
                                  depth=self.hp.maxlen2,
                                  dtype=tf.int32)
        p_start_true = label_smoothing(tf.one_hot(p_start_true, depth=2))
        p_start_loss = self.focal_loss(p_start, p_start_true)

        # end loss
        p_end_true = labels[:, 2]
        p_end_true = tf.one_hot(p_end_true,
                                depth=self.hp.maxlen2,
                                dtype=tf.int32)
        p_end_true = label_smoothing(tf.one_hot(p_end_true, depth=2))
        p_end_loss = self.focal_loss(p_end, p_end_true)

        loss = p_start_loss + p_end_loss

        return loss
示例#4
0
    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.y[idx]
        x = cv2.imread(x)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
        x = self.transform(image=x)["image"]
        x = np.rollaxis(x, -1, 0)  # H,W,C -> C,H,W

        y = to_onehot(y, num_classes)
        y = label_smoothing(y, self.ls_eps)

        data = {}
        data['x'] = torch.from_numpy(x.astype('float32'))
        data['y'] = torch.from_numpy(y.astype('float32'))
        return data
示例#5
0
    def forward(self,
                inputs,
                targets,
                attack=True,
                targeted_label=-1,
                batch_idx=0):

        if not attack:
            outputs, _ = self.basic_net(inputs)
            return outputs, None
        if self.box_type == 'white':
            aux_net = pickle.loads(pickle.dumps(self.basic_net))
        elif self.box_type == 'black':
            assert self.attack_net is not None, "should provide an additional net in black-box case"
            aux_net = pickle.loads(pickle.dumps(self.basic_net))

        aux_net.eval()
        batch_size = inputs.size(0)
        m = batch_size
        n = batch_size

        # logits = aux_net(inputs)[0]
        # num_classes = logits.size(1)

        # outputs = aux_net(inputs)[0]
        # targets_prob = F.softmax(outputs.float(), dim=1)
        # y_tensor_adv = targets
        # step_sign = 1.0

        x = inputs.detach()

        # x_org = x.detach()
        x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon)

        if self.train_flag:
            self.basic_net.train()
        else:
            self.basic_net.eval()

        logits_pred_nat, fea_nat = aux_net(inputs)

        num_classes = logits_pred_nat.size(1)
        y_gt = one_hot_tensor(targets, num_classes, device)

        loss_ce = softCrossEntropy()

        iter_num = self.num_steps

        for i in range(iter_num):
            x.requires_grad_()
            zero_gradients(x)
            if x.grad is not None:
                x.grad.data.fill_(0)

            logits_pred, fea = aux_net(x)

            ot_loss = ot.sinkhorn_loss_joint_IPOT(1, 0.00, logits_pred_nat,
                                                  logits_pred, None, None,
                                                  0.01, m, n)

            aux_net.zero_grad()
            adv_loss = ot_loss
            adv_loss.backward(retain_graph=True)
            x_adv = x.data + self.step_size * torch.sign(x.grad.data)
            x_adv = torch.min(torch.max(x_adv, inputs - self.epsilon),
                              inputs + self.epsilon)
            x_adv = torch.clamp(x_adv, -1.0, 1.0)
            x = Variable(x_adv)

            logits_pred, fea = self.basic_net(x)
            self.basic_net.zero_grad()

            y_sm = utils.label_smoothing(y_gt, y_gt.size(1), self.ls_factor)

            adv_loss = loss_ce(logits_pred, y_sm.detach())

        return logits_pred, adv_loss
    def build_network(self):
        #import ipdb; ipdb.set_trace()
        config = self.config
        de2idx, idx2de = load_de_vocab()
        en2idx, idx2en = load_en_vocab()

        # Encoder
        with tf.variable_scope("encoder"):
            ## Embedding
            self.enc = embedding(self.x,
                                 len(de2idx),
                                 num_units=config.hidden_dim,
                                 scale=True,
                                 scope='enc_embed')

            ## plus position embedding
            self.enc += embedding(tf.tile(tf.expand_dims(tf.range(tf.shape(self.x)[1]), 0), \
                                            [tf.shape(self.x)[0], 1]),
                                config.maxlen,
                                config.hidden_dim,
                                zero_pad=False,
                                scale=False,
                                scope="enc_pe")

            self.enc = dropout(self.enc,
                               config.keep_rate,
                               is_train=self.is_train)

            self.enc_ = self.enc
            for block_idx in range(config.num_enc_block_1):
                scope = "encoder_block_{}".format(block_idx)
                enc_out = conv2d(self.enc,
                                 kernel_shape=(config.enc_kernel_width, 1),
                                 scope=scope)
                enc_out = batch_norm(enc_out,
                                     is_training=self.is_train,
                                     scope="lm" + scope)
                self.enc = enc_out

        # Decoder
        with tf.variable_scope("decoder"):
            ## Embedding
            self.dec = embedding(self.decode_input,
                                 len(en2idx),
                                 config.hidden_dim,
                                 scale=True,
                                 scope='dec_embed')
            ## plus position embedding
            self.dec += embedding(tf.tile(tf.expand_dims(tf.range(tf.shape(self.decode_input)[1]), 0), \
                                            [tf.shape(self.decode_input)[0], 1]),
                                config.maxlen,
                                config.hidden_dim,
                                zero_pad=False,
                                scale=False,
                                scope='dec_pe')

            self.dec_ = self.dec
            for block_idx in range(config.num_dec_block_1):
                scope = "decoder_block_conv_{}".format(block_idx)
                attention_scope = "decoder_block_att_{}".format(block_idx)
                dec_out = conv2d(self.dec,
                                 kernel_shape=(config.dec_kernel_width, 1),
                                 causal=True,
                                 scope=scope)
                dec_out = attention_pool(self.enc_,
                                         self.dec,
                                         enc_out,
                                         dec_out,
                                         scope=attention_scope)
                dec_out = dec_out + self.dec
                dec_out = batch_norm(dec_out,
                                     is_training=self.is_train,
                                     scope="lm" + scope)
                self.dec = dec_out

        with tf.variable_scope('encoder'):
            for block_idx in range(config.num_enc_block_2):
                scope = "encoder_block_{}".format(config.num_enc_block_1 +
                                                  block_idx)
                enc_out = conv2d(self.enc,
                                 kernel_shape=(config.enc_kernel_width, 1),
                                 num_outputs=config.hidden_dim_2,
                                 scope=scope)
                enc_out = batch_norm(enc_out,
                                     is_training=self.is_train,
                                     scope="lm" + scope)
                self.enc = enc_out

        with tf.variable_scope('decoder'):
            for block_idx in range(config.num_dec_block_2):
                scope = "decoder_block_conv_{}".format(config.num_dec_block_1 +
                                                       block_idx)
                attention_scope = "decoder_block_att_{}".format(
                    config.num_dec_block_1 + block_idx)
                dec_out = conv2d(self.dec,
                                 kernel_shape=(config.dec_kernel_width, 1),
                                 num_outputs=config.hidden_dim_2,
                                 causal=True,
                                 scope=scope)
                dec_out = attention_pool(self.enc_,
                                         self.dec,
                                         enc_out,
                                         dec_out,
                                         scope=attention_scope)
                dec_out = dec_out + self.dec
                dec_out = batch_norm(dec_out,
                                     is_training=self.is_train,
                                     scope="lm" + scope)
                self.dec = dec_out

        with tf.variable_scope("softmax_layer"):
            w = tf.get_variable('w', [config.hidden_dim, len(en2idx)])
            b = tf.get_variable('b', [len(en2idx)])
            w = tf.tile(tf.expand_dims(w, 0), [config.batch_size, 1, 1])
            self.logits = tf.matmul(dec_out, w) + b
            self.preds = tf.to_int32(tf.arg_max(self.logits, dimension=-1))
            self.istarget = tf.to_float(tf.not_equal(self.y, 0))
            self.acc = tf.reduce_sum(
                tf.to_float(tf.equal(self.preds, self.y)) *
                self.istarget) / tf.reduce_sum(self.istarget)
            tf.summary.scalar('acc', self.acc)

            if self.is_train:
                self.y_smoothed = label_smoothing(
                    tf.one_hot(self.y, depth=len(en2idx)))
                self.loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.logits, labels=self.y_smoothed)
                self.mean_loss = tf.reduce_mean(self.loss)
                tf.summary.scalar('mean_loss', self.mean_loss)

        self.tensors = {
            'source_sentence': self.enc_,
            'target_sentence': self.dec_,
            'enc_out': enc_out,
            'dec_out': dec_out,
            'predictions': self.preds,
            'logits': self.logits
        }
        if self.is_train:
            self.tensors['loss'] = self.loss

        for key, value in self.tensors.items():
            tf.summary.histogram(key, value)