def compute_loss(input, target, mask, smoothing): target = one_hot(target, input.size(2)) target = label_smoothing(target, smoothing) loss = softmax_cross_entropy(input=input, target=target, axis=2) loss = (loss * mask.float()).sum(1) return loss
def compute(input, target, dim=-1): if config.label_smoothing is not None: target = utils.label_smoothing(target, eps=config.label_smoothing) ce = softmax_cross_entropy(input=input, target=target, axis=dim) r = softmax_recall_loss(input=input, target=target, dim=dim) loss = weighted_sum(ce.mean(), r.mean(), 0.5) return loss
def _calc_loss(self, labels, p_global, p_start, p_end): """ calculate loss :param labels: labels, contain p_global, p_start, p_end :param p_global: predicted p_global :param p_start: predicted p_start :param p_end: predicted p_end :return: p_global loss + p_start loss + p_end loss """ # global loss p_global_true = labels[:, 0] # [N] p_global_true = label_smoothing(tf.one_hot(p_global_true, depth=2)) # [N, 2] p_global = tf.squeeze(tf.stack([1 - p_global, p_global], axis=2), axis=1) p_global_loss = self._focal_loss(p_global, p_global_true) # start loss p_start_true = labels[:, 1] p_start_true = tf.one_hot(p_start_true, depth=self.hp.maxlen2, dtype=tf.int32) p_start_true = label_smoothing(tf.one_hot(p_start_true, depth=2)) p_start_loss = self.focal_loss(p_start, p_start_true) # end loss p_end_true = labels[:, 2] p_end_true = tf.one_hot(p_end_true, depth=self.hp.maxlen2, dtype=tf.int32) p_end_true = label_smoothing(tf.one_hot(p_end_true, depth=2)) p_end_loss = self.focal_loss(p_end, p_end_true) loss = p_start_loss + p_end_loss return loss
def __getitem__(self, idx): x = self.X[idx] y = self.y[idx] x = cv2.imread(x) x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB) x = self.transform(image=x)["image"] x = np.rollaxis(x, -1, 0) # H,W,C -> C,H,W y = to_onehot(y, num_classes) y = label_smoothing(y, self.ls_eps) data = {} data['x'] = torch.from_numpy(x.astype('float32')) data['y'] = torch.from_numpy(y.astype('float32')) return data
def forward(self, inputs, targets, attack=True, targeted_label=-1, batch_idx=0): if not attack: outputs, _ = self.basic_net(inputs) return outputs, None if self.box_type == 'white': aux_net = pickle.loads(pickle.dumps(self.basic_net)) elif self.box_type == 'black': assert self.attack_net is not None, "should provide an additional net in black-box case" aux_net = pickle.loads(pickle.dumps(self.basic_net)) aux_net.eval() batch_size = inputs.size(0) m = batch_size n = batch_size # logits = aux_net(inputs)[0] # num_classes = logits.size(1) # outputs = aux_net(inputs)[0] # targets_prob = F.softmax(outputs.float(), dim=1) # y_tensor_adv = targets # step_sign = 1.0 x = inputs.detach() # x_org = x.detach() x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon) if self.train_flag: self.basic_net.train() else: self.basic_net.eval() logits_pred_nat, fea_nat = aux_net(inputs) num_classes = logits_pred_nat.size(1) y_gt = one_hot_tensor(targets, num_classes, device) loss_ce = softCrossEntropy() iter_num = self.num_steps for i in range(iter_num): x.requires_grad_() zero_gradients(x) if x.grad is not None: x.grad.data.fill_(0) logits_pred, fea = aux_net(x) ot_loss = ot.sinkhorn_loss_joint_IPOT(1, 0.00, logits_pred_nat, logits_pred, None, None, 0.01, m, n) aux_net.zero_grad() adv_loss = ot_loss adv_loss.backward(retain_graph=True) x_adv = x.data + self.step_size * torch.sign(x.grad.data) x_adv = torch.min(torch.max(x_adv, inputs - self.epsilon), inputs + self.epsilon) x_adv = torch.clamp(x_adv, -1.0, 1.0) x = Variable(x_adv) logits_pred, fea = self.basic_net(x) self.basic_net.zero_grad() y_sm = utils.label_smoothing(y_gt, y_gt.size(1), self.ls_factor) adv_loss = loss_ce(logits_pred, y_sm.detach()) return logits_pred, adv_loss
def build_network(self): #import ipdb; ipdb.set_trace() config = self.config de2idx, idx2de = load_de_vocab() en2idx, idx2en = load_en_vocab() # Encoder with tf.variable_scope("encoder"): ## Embedding self.enc = embedding(self.x, len(de2idx), num_units=config.hidden_dim, scale=True, scope='enc_embed') ## plus position embedding self.enc += embedding(tf.tile(tf.expand_dims(tf.range(tf.shape(self.x)[1]), 0), \ [tf.shape(self.x)[0], 1]), config.maxlen, config.hidden_dim, zero_pad=False, scale=False, scope="enc_pe") self.enc = dropout(self.enc, config.keep_rate, is_train=self.is_train) self.enc_ = self.enc for block_idx in range(config.num_enc_block_1): scope = "encoder_block_{}".format(block_idx) enc_out = conv2d(self.enc, kernel_shape=(config.enc_kernel_width, 1), scope=scope) enc_out = batch_norm(enc_out, is_training=self.is_train, scope="lm" + scope) self.enc = enc_out # Decoder with tf.variable_scope("decoder"): ## Embedding self.dec = embedding(self.decode_input, len(en2idx), config.hidden_dim, scale=True, scope='dec_embed') ## plus position embedding self.dec += embedding(tf.tile(tf.expand_dims(tf.range(tf.shape(self.decode_input)[1]), 0), \ [tf.shape(self.decode_input)[0], 1]), config.maxlen, config.hidden_dim, zero_pad=False, scale=False, scope='dec_pe') self.dec_ = self.dec for block_idx in range(config.num_dec_block_1): scope = "decoder_block_conv_{}".format(block_idx) attention_scope = "decoder_block_att_{}".format(block_idx) dec_out = conv2d(self.dec, kernel_shape=(config.dec_kernel_width, 1), causal=True, scope=scope) dec_out = attention_pool(self.enc_, self.dec, enc_out, dec_out, scope=attention_scope) dec_out = dec_out + self.dec dec_out = batch_norm(dec_out, is_training=self.is_train, scope="lm" + scope) self.dec = dec_out with tf.variable_scope('encoder'): for block_idx in range(config.num_enc_block_2): scope = "encoder_block_{}".format(config.num_enc_block_1 + block_idx) enc_out = conv2d(self.enc, kernel_shape=(config.enc_kernel_width, 1), num_outputs=config.hidden_dim_2, scope=scope) enc_out = batch_norm(enc_out, is_training=self.is_train, scope="lm" + scope) self.enc = enc_out with tf.variable_scope('decoder'): for block_idx in range(config.num_dec_block_2): scope = "decoder_block_conv_{}".format(config.num_dec_block_1 + block_idx) attention_scope = "decoder_block_att_{}".format( config.num_dec_block_1 + block_idx) dec_out = conv2d(self.dec, kernel_shape=(config.dec_kernel_width, 1), num_outputs=config.hidden_dim_2, causal=True, scope=scope) dec_out = attention_pool(self.enc_, self.dec, enc_out, dec_out, scope=attention_scope) dec_out = dec_out + self.dec dec_out = batch_norm(dec_out, is_training=self.is_train, scope="lm" + scope) self.dec = dec_out with tf.variable_scope("softmax_layer"): w = tf.get_variable('w', [config.hidden_dim, len(en2idx)]) b = tf.get_variable('b', [len(en2idx)]) w = tf.tile(tf.expand_dims(w, 0), [config.batch_size, 1, 1]) self.logits = tf.matmul(dec_out, w) + b self.preds = tf.to_int32(tf.arg_max(self.logits, dimension=-1)) self.istarget = tf.to_float(tf.not_equal(self.y, 0)) self.acc = tf.reduce_sum( tf.to_float(tf.equal(self.preds, self.y)) * self.istarget) / tf.reduce_sum(self.istarget) tf.summary.scalar('acc', self.acc) if self.is_train: self.y_smoothed = label_smoothing( tf.one_hot(self.y, depth=len(en2idx))) self.loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y_smoothed) self.mean_loss = tf.reduce_mean(self.loss) tf.summary.scalar('mean_loss', self.mean_loss) self.tensors = { 'source_sentence': self.enc_, 'target_sentence': self.dec_, 'enc_out': enc_out, 'dec_out': dec_out, 'predictions': self.preds, 'logits': self.logits } if self.is_train: self.tensors['loss'] = self.loss for key, value in self.tensors.items(): tf.summary.histogram(key, value)