def forward(self, x): # Because this encoder decoder setup uses convolutional layers # There is no need to flatten anything # x.shape = (batch_size, n_channels, width, height) # Get the latent layer latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable with reparametrization trick applied eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Compute the KL Divergence between latent variable and standard normal kl_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Use the decoder to generate output x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1))) # Compute the pixel-by-pixel loss; this requires that x and x_hat be flattened x_flattened = x.reshape((x.shape[0], -1)) x_hat_flattened = x_hat.reshape((x_hat.shape[0], -1)) logloss = - nd.sum(x_flattened*nd.log(x_hat_flattened + 1e-10) + (1-x_flattened)*nd.log(1-x_hat_flattened+1e-10), axis=1) # Sum up the loss loss = kl_div_loss + logloss * self.pbp_weight return loss
def coordinate_distance(target, label): target_xy, target_wh = nd.split(target, 2, -1) label_xy, label_wh = nd.split(label, 2, -1) dxy = target_xy - label_xy dwh = nd.log(target_wh / label_wh) distance = nd.concat(dxy, dwh, dim=-1) return distance
def generate(self, x): # Because forward() returns the loss values, we still need a method that returns the generated image # Which is basically the forward process, up to (not including) the flattening of x_hat # x should be image arrays (4-dimensional) but encoder should be able # to handle this so I am not going flatten it # Use the encoder network to compute the values of latent layers latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output, then flatten it to compute loss return self.decoder(latent_z).reshape(-1, self.n_out_channels, self.out_width, self.out_height)
def generate(self, x): # Repeat the process of forward, but stop at x_hat and return it # input x is image and thus 4-dimensional ndarray batch_size, n_channels_in, input_width, input_height = x.shape # First run it through the encoder x_flattened = x.reshape(batch_size, -1) latent_layer = self.encoder(x_flattened) # Split latent layer into latent mean and latent log variances latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable's value using the reparametrization trick eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # At this point, also compute the KL_Divergence between latent variable and # Gaussian(0, 1) KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Run the latent variable through the decoder to get the flattened generated image x_hat_flattened = self.decoder(latent_z) # Inflate the flattened output to be fed into the discriminator x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width, input_height) return x_hat
def forward(self, x): # x is input of shape (n_batch, n_channels, width, height) batch_size = x.shape[0] x = x.reshape(batch_size, -1) self.loss_net.batch_size = batch_size # Get the latent layer latent_vals = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_vals, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_vals, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output x_hat = self.decoder(latent_z) self.x_hat = x_hat # Use the vgg loss net to compute the loss loss = self.loss_net(x, x_hat) return loss
def matmul(self, x, y, transpose_a=False,transpose_b=False): x = nd.split(x, self.embedding_size, 2) y = nd.split(y, self.embedding_size, 2) res = [] for idx in range(self.embedding_size): array = nd.batch_dot(x[idx], y[idx], transpose_a,transpose_b=transpose_b) res.append(array.asnumpy().tolist()) return nd.array(res,ctx=self.ctx)
def hybrid_forward(self, F, score_gt, kernel_gt, score_pred, training_masks, *args, **kwargs): # cal ohem mask selected_masks = [] for i in range(score_gt.shape[0]): # cal for text region selected_mask = self._ohem_single(score_gt[i:i + 1], score_pred[i:i + 1], training_masks[i:i + 1]) selected_masks.append(selected_mask) selected_masks = F.concat(*selected_masks, dim=0) s1, s2, s3, s4, s5, s6 = F.split(kernel_gt, num_outputs=6, axis=3, squeeze_axis=True) s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred, C_pred = F.split( score_pred, num_outputs=7, axis=1, squeeze_axis=True) self.pixel_acc = batch_pix_accuracy(C_pred, score_gt) # for text map eps = 1e-5 intersection = F.sum(score_gt * C_pred * selected_masks, axis=1) union = F.sum(score_gt * selected_masks, axis=1) + F.sum( C_pred * selected_mask, axis=1) + eps C_dice_loss = 1. - F.mean((2 * intersection / union)) # loss for kernel kernel_dices = [] for s, s_pred in zip( [s1, s2, s3, s4, s5, s6], [s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred]): kernel_mask = F.where(C_pred > 0.5, F.ones_like(s_pred), F.zeros_like(s_pred)) kernel_mask = F.cast(kernel_mask, dtype='float32') kernel_mask = F.cast(F.logical_or(kernel_mask, score_gt), dtype='float32') s = F.cast(s, dtype='float32') kernel_intersection = F.sum(s * s_pred * training_masks * kernel_mask, axis=1) kernel_union = F.sum( training_masks * s * kernel_mask, axis=1) + F.sum( training_masks * s_pred * kernel_mask, axis=1) + eps kernel_dice = 2. * kernel_intersection / kernel_union kernel_dice = 1. - F.mean( (2. * kernel_intersection / kernel_union)) kernel_dices.append(kernel_dice) kernel_dice_loss = F.mean(F.array(kernel_dices)) self.kernel_loss = kernel_dice_loss self.C_loss = C_dice_loss loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss return loss
def forward(self, x, first_cycle=False): # input x is image and thus 4-dimensional ndarray batch_size, n_channels_in, input_width, input_height = x.shape # First run it through the encoder x_flattened = x.reshape(batch_size, -1) latent_layer = self.encoder(x_flattened) # Split latent layer into latent mean and latent log variances latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable's value using the reparametrization trick eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # At this point, also compute the KL_Divergence between latent variable and # Gaussian(0, 1) KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Run the latent variable through the decoder to get the flattened generated image x_hat_flattened = self.decoder(latent_z) # Inflate the flattened output to be fed into the discriminator x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width, input_height) # Content loss is given by the resnet # In later training process we will feed the discriminator genuine and generated images # with genuine images labeled 1 and generated images labeled 0 # in this case a higher value in ResNet's output indicate higher confidence of # an image's realness; therefore we want to reduce the negative of the ResNet's output content_loss = -nd.sigmoid(self.discriminator(x_hat)).reshape(-1) # For the first training cycle, resnet is completely not trained # so we will not use the resnet as a content loss metric; instead we will use # the logloss as a content loss if first_cycle: content_loss = -nd.sum( x_flattened * nd.log(x_hat_flattened + 1e-10) + (1 - x_flattened) * nd.log(1 - x_hat_flattened + 1e-10), axis=1) # Loss is the sum of KL_Divergence and the content loss loss = KL_div_loss + content_loss return loss
def _split_batch(arg, batch_axis, arg_size): if isinstance(arg, nd.NDArray): return nd.split(arg, arg_size, axis=batch_axis) if arg_size > 1 else (arg, ) arg, fmt = _flatten(arg) if arg_size > 1: result = (nd.split(x, arg_size, axis=batch_axis) for x in arg) else: result = ((x, ) for x in arg) result = zip(*result) out = [_regroup(x, fmt)[0] for x in result] return out
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None): xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1) bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4)) dx, dy, dw, dh = nd.split(data=bbox_delta_reshape, num_outputs=4, axis=2, squeeze_axis=1) if (means is not None) and (stds is not None): dx = dx * stds[0] + means[0] dy = dy * stds[1] + means[1] dw = dw * stds[2] + means[2] dh = dh * stds[3] + means[3] refine_center_x = nd.broadcast_add(lhs=center_x, rhs=nd.broadcast_mul(lhs=bbox_width, rhs=dx)) refine_center_y = nd.broadcast_add(lhs=center_y, rhs=nd.broadcast_mul(lhs=bbox_height, rhs=dy)) refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw)) refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh)) w_offset = 0.5 * (refined_width - 1.) h_offset = 0.5 * (refined_height - 1.) refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1) refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1) refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1) refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1) refined_bbox = nd.concat(refined_xmin, refined_ymin, refined_xmax, refined_ymax, dim=1) if im_info is not None: # assume im_info [[height, width, scale]] with shape (1,3) im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2) im_wh = nd.reverse(im_hw, axis=1) im_wh = im_wh - 1. im_wh = nd.tile(data=im_wh, reps=(1, 2)) im_wh = nd.Reshape(im_wh, shape=(1, 4, 1)) refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh) refined_bbox = nd.broadcast_maximum(lhs=refined_bbox, rhs=nd.zeros_like(refined_bbox)) # print refined_bbox.debug_str() return refined_bbox
def matmul(self, x, y, transpose_a=False, transpose_b=False): batch = x.shape[0] #batch m = x.shape[1] #field h_k = y.shape[1] x = nd.split(x, self.embedding_size, 2) y = nd.split(y, self.embedding_size, 2) res = nd.zeros(shape=(1, batch, m, h_k), ctx=self.ctx) for idx in range(self.embedding_size): array = nd.batch_dot(x[idx], y[idx], transpose_a, transpose_b=transpose_b).reshape( (1, -1, m, h_k)) res = nd.concat(res, array, dim=0) # embedding+1,batch,field,field return res[1:, :, :, :]
def backward(self, grad_output): X, W = self.saved_tensors # recompute X_out X_list = [ X, ] for A in self.A_list: if A is not None: X_list.append(nd.sparse.dot(A, X)) else: X_list.append(nd.zeros_like(X)) X_out = nd.concat(*X_list, dim=1) grad_W = nd.dot(X_out.T, grad_output) grad_X_out = nd.dot(grad_output, W.T) grad_X_out_list = nd.split(grad_X_out, num_outputs=len(self.A_list) + 1) grad_X = [ grad_X_out_list[0], ] for A, grad_X_out in zip(self.A_list, grad_X_out_list[1:]): if A is not None: grad_X.append(nd.sparse.dot(A, grad_X_out)) else: grad_X.append(nd.zeros_like(grad_X_out)) grad_X = sum(grad_X) return grad_X, grad_W
def extract_multi_position_matrix_nd(bbox): bbox = nd.transpose(bbox, axes=(1, 0, 2)) xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=2) # [num_fg_classes, num_boxes, 1] bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) # [num_fg_classes, num_boxes, num_boxes] delta_x = nd.broadcast_minus(lhs=center_x, rhs=nd.transpose(center_x, axes=(0, 2, 1))) delta_x = nd.broadcast_div(delta_x, bbox_width) delta_x = nd.log(nd.maximum(nd.abs(delta_x), 1e-3)) delta_y = nd.broadcast_minus(lhs=center_y, rhs=nd.transpose(center_y, axes=(0, 2, 1))) delta_y = nd.broadcast_div(delta_y, bbox_height) delta_y = nd.log(nd.maximum(nd.abs(delta_y), 1e-3)) delta_width = nd.broadcast_div(lhs=bbox_width, rhs=nd.transpose(bbox_width, axes=(0, 2, 1))) delta_width = nd.log(delta_width) delta_height = nd.broadcast_div(lhs=bbox_height, rhs=nd.transpose(bbox_height, axes=(0, 2, 1))) delta_height = nd.log(delta_height) concat_list = [delta_x, delta_y, delta_width, delta_height] for idx, sym in enumerate(concat_list): concat_list[idx] = nd.expand_dims(sym, axis=3) position_matrix = nd.concat(*concat_list, dim=3) return position_matrix
def forward(self, inputs, batch_size): sequence_length_ = len(inputs) sequence_length = 0 for j in range(sequence_length_): # 函数目的是去掉padding if (inputs[j, 0].asscalar() <= 0): sequence_length = j break else: sequence_length = sequence_length_ if (sequence_length == 0): print("sequence_length=0") print(inputs) return inputs = inputs[0:sequence_length, :] # Get the emission scores from the BiLSTM. # inputs.shape: (sequence_length, batch_size) lstm_feats = self._get_lstm_features(inputs, batch_size) ''' 目的:将[sequence_length, batch_size, tagset_size]维度转换为[batch_size, sequence_length, tagset_size] ''' # outputs.shape: batch_size个(sequence_length, tagset_size) lstm_feats = nd.split(lstm_feats, num_outputs=batch_size, axis=0) # outputs.shape: (sequence_length, tagset_size) lstm_feats = nd.concat(*lstm_feats, dim=0).reshape(sequence_length, self.tagset_size) # Find the best path, given the features. tag_seq, score = self._viterbi_decode(lstm_feats) return tag_seq, score
def evaluate(data_iter_valid, model, state, loss, word_vocab, label_vocab, max_seq_len, only_ne_cate_dic): valid_loss = 0. y_true, y_pred, sentences_input = [], [], [] for n_batch, (batch_x, batch_nature, batch_y) in enumerate(data_iter_valid): batch_score, batch_pred, feats, _ = model(batch_x, batch_nature, state) l = loss(feats, nd.split(batch_y, max_seq_len, axis=1)) y_pred.append(batch_pred.asnumpy().astype(np.int32, copy=False)) y_true.append(batch_y.asnumpy().astype(np.int32, copy=False)) sentences_input.append(batch_x.asnumpy().astype(np.int32, copy=False)) valid_loss += l.mean().asscalar() y_pred = np.vstack(y_pred) y_true = np.vstack(y_true) sentences_input = np.vstack(sentences_input) valid_loss /= (n_batch + 1) # 计算训练集上的 P R F1 raw_prf_dic = cal_prf1(y_pred.tolist(), y_true.tolist(), sentences_input.tolist(), label_vocab, word_vocab, max_seq_len, only_ne_cate_dic) prf_dic = convert_signal_to_ne_name(only_ne_cate_dic, raw_prf_dic) prf_dic = pd.DataFrame(list(prf_dic.values()), index=list(prf_dic.keys()), columns=['P', 'R', 'F1']) return prf_dic, valid_loss
def predict_LP(self, LP_batch_out): # LP_batch_out = self.fp16_2_fp32(LP_batch_out) LP_batch_out = self.merge_and_slice(LP_batch_out, self.LP_slice_point) LP_score = nd.sigmoid(LP_batch_out[0]) LP_pose_xy = LP_batch_out[1] LP_pose_z = LP_batch_out[2] LP_pose_r = LP_batch_out[3] LP_batch_out = nd.concat( LP_score, LP_pose_xy, LP_pose_z, LP_pose_r, dim=-1) LP_batch_out = nd.split(LP_batch_out, axis=0, num_outputs=len(LP_batch_out)) LP_batch_pred = [] for i, out in enumerate(LP_batch_out): best_index = LP_score[i].reshape(-1).argmax(axis=0) out = out.reshape((-1, 7)) pred = out[best_index][0] # best out pred[1:7] = self.LP_pose_activation(pred[1:7]) LP_batch_pred.append(nd.expand_dims(pred, axis=0)) LP_batch_pred = nd.concat(*LP_batch_pred, dim=0) return LP_batch_pred.asnumpy()
def _split_box(x, num_outputs, axis, squeeze_axis=False): a = nd.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis) if not isinstance(a, (list, tuple)): return [a] return a
def forward(self, x): # x is input of shape (n_batch, n_channels, width, height) batch_size = x.shape[0] x = x.reshape(batch_size, -1) # Get the latent layer latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output x_hat = self.decoder(latent_z) # Compute the KL_Divergence between latent variable and standard normal self.KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Compute the content loss that is the cross entropy between the original image # and the generated image # content_loss = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)(x_hat, x.reshape(batch_size, -1)) # Add 1e-10 to prevent log(0) from happening self.logloss = -nd.sum(x * nd.log(x_hat + 1e-10) + (1 - x) * nd.log(1 - x_hat + 1e-10), axis=1) # Try l2 loss, too # self.l2loss = nd.sum((x_hat - x) ** 2, axis = 1) # Sum up the loss loss = self.KL_div_loss + self.logloss return loss
def hybrid_forward(self, F_geo_true, F_geo_pred): top_true, right_true, bottom_true, left_true, theta_true = nd.split( F_geo_true, axis=3, num_outputs=5) top_pred, right_pred, bottom_pred, left_pred, theta_pred = nd.split( F_geo_pred, axis=3, num_outputs=5) area_true = (top_true + bottom_true) * (right_true + left_true) area_pred = (top_pred + bottom_pred) * (right_pred + left_pred) w_union = mx.nd.minimum(right_true, right_pred) + mx.nd.minimum( left_true, left_pred) h_union = mx.nd.minimum(top_true, top_pred) + mx.nd.minimum( bottom_true, bottom_pred) area_intersect = w_union * h_union area_union = area_true + area_pred - area_intersect L_AABB = -nd.log((area_intersect + 1.0) / (area_union + 1.0)) L_theta = 1 - nd.cos(theta_pred - theta_true) L_geo = L_AABB + self.lambda_value * L_theta loss = mx.nd.sum(L_geo * F_geo_true) return loss
def generate(self, x): # Generate an image given the input # input is # x.shape = (batch_size, n_channels, width, height) # Get the latent layer latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable with reparametrization trick applied eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1))) return x_hat
def BBoxCornerToCenter(x, axis=-1, split=False): xmin, ymin, xmax, ymax = nd.split(x, axis=axis, num_outputs=4) width = xmax - xmin height = ymax - ymin x = xmin + width / 2 y = ymin + height / 2 if not split: return nd.concat(x, y, width, height, dim=axis) else: return x, y, width, height
def hybrid_forward(self, F, x): x = self.base(x) x1 = self.branch1(x) x2 = self.branch2(x) x3 = self.branch3(x) # local outputs = [] features = [] parts_2 = nd.split(x2, axis=2, num_outputs=2) for i in range(2): part = self.feat[i](parts_2[i]) if self.pretrained: part = self.classify[i](part) outputs.append(part) parts_3 = nd.split(x3, axis=2, num_outputs=3) for i in range(3): part = self.feat[i+2](parts_3[i]) if self.pretrained: part = self.classify[i+2](part) outputs.append(part) # global g_part_1 = self.g_feat[0](x1) g_part_2 = self.g_feat[1](x2) g_part_3 = self.g_feat[2](x3) features.append(g_part_1) features.append(g_part_2) features.append(g_part_3) if self.pretrained: g_part_1 = self.g_classify[0](g_part_1) g_part_2 = self.g_classify[1](g_part_2) g_part_3 = self.g_classify[2](g_part_3) outputs.append(g_part_1) outputs.append(g_part_2) outputs.append(g_part_3) return outputs, features
def BBoxCenterToCorner(x, axis=-1, split=False): x, y, w, h = nd.split(x, axis=axis, num_outputs=4) hw = w / 2 hh = h / 2 xmin = x - hw ymin = y - hh xmax = x + hw ymax = y + hh if not split: return nd.concat(xmin, ymin, xmax, ymax, dim=axis) else: return xmin, ymin, xmax, ymax
def train(data_iter_train, data_iter_valid, model, loss, trainer, CTX, num_epochs, word_vocab, label_vocab, max_seq_len, ne_cate_dic): print('Train on ', CTX) only_ne_cate_dic = ne_cate_dic.copy() only_ne_cate_dic.pop('不是实体') print(only_ne_cate_dic) print(ne_cate_dic) for epoch in range(1, num_epochs + 1): start = time() states = None for n_batch, (batch_x, batch_nature, batch_y) in enumerate(data_iter_train): with autograd.record(): batch_score, batch_pred, feats, _ = model( batch_x, batch_nature, states) l = loss(feats, nd.split(batch_y, max_seq_len, axis=1)) l.backward() trainer.step(batch_x.shape[0]) # 每隔 skip_step ,采样看看 if (n_batch + 1) % 100 == 0: print("Epoch {0}, n_batch {1}, loss {2}".format( epoch, n_batch + 1, l.mean().asscalar())) batch_y = batch_y.asnumpy().astype(np.int32, copy=False) batch_pred = batch_pred.asnumpy().astype(np.int32, copy=False) for example in range(3): true_idx = batch_y[example].tolist() pred_idx = batch_pred[example].tolist() true_label = label_vocab.to_tokens(true_idx) pred_label = label_vocab.to_tokens(pred_idx) print(" Sample {0}: ".format(example)) print(" True Label {0}: ".format(true_label)) print(" Pred Label {0}: ".format(pred_label)) # 在训练集上评估 print('Evaluating...') prf_dic_train, train_loss = evaluate(data_iter_train, model, states, loss, word_vocab, label_vocab, max_seq_len, only_ne_cate_dic) prf_dic_valid, valid_loss = evaluate(data_iter_valid, model, states, loss, word_vocab, label_vocab, max_seq_len, only_ne_cate_dic) print("===========================================") print("Epoch {0}, epoch_loss_train {1}, epoch_loss_valid {2}".format( epoch, train_loss, valid_loss)) print(prf_dic_train) print(prf_dic_valid) print("===========================================") print()
def get_indices(self, boxes): W, H = self.image_width, self.image_height w, h = self.patch_width, self.patch_height x_min, y_min, x_max, y_max = F.split(data=boxes, num_outputs=4, axis=1) cx = 0.5 * (x_min + x_max) cy = 0.5 * (y_min + y_max) indices = 1 + mx.nd.floor( cx / w) + math.floor(W / w) * mx.nd.floor(cy / h) indices = mx.nd.Concat(mx.nd.zeros(shape=(1, 1), ctx=mx.gpu()), indices, dim=0) return indices.reshape(1, -1)
def forward(self, feature, data, begin_state): num_nodes, batch_size, length, _ = data.shape data = nd.split(data, axis=2, num_outputs=length, squeeze_axis=1) outputs, state = [], begin_state for input in data: output, state = self.forward_single(feature, input, state) outputs.append(output) outputs = nd.stack(*outputs, axis=2) return outputs, state
def hybrid_forward(self, F, score_gt, kernel_gt, score_pred, training_masks, *args, **kwargs): s1, s2, s3, s4, s5, s6 = F.split(kernel_gt, num_outputs=6, axis=3, squeeze_axis=True) s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred, C_pred = F.split( score_pred, num_outputs=7, axis=1, squeeze_axis=True) self.pixel_acc = batch_pix_accuracy(C_pred, score_gt) # classification loss eps = 1e-5 intersection = F.sum(score_gt * C_pred * training_masks, axis=1) union = F.sum(training_masks * score_gt, axis=1) + F.sum( training_masks * C_pred, axis=1) + eps C_dice_loss = 1. - F.mean((2 * intersection / union)) # loss for kernel kernel_dices = [] for s, s_pred in zip( [s1, s2, s3, s4, s5, s6], [s1_pred, s2_pred, s3_pred, s4_pred, s5_pred, s6_pred]): kernel_mask = F.where((C_pred * training_masks > 0.5), F.ones_like(C_pred), F.zeros_like(C_pred)) kernel_mask = F.cast(F.logical_or(kernel_mask, score_gt), dtype='float32') s = F.cast(s, dtype='float32') kernel_intersection = F.sum(s * s_pred * kernel_mask, axis=1) kernel_union = F.sum(s * kernel_mask, axis=1) + F.sum( s_pred * kernel_mask, axis=1) + eps kernel_dice = 1. - F.mean( (2. * kernel_intersection / kernel_union)) kernel_dices.append(kernel_dice.asscalar()) kernel_dice_loss = F.mean(F.array(kernel_dices)) # print("kernel_loss:", kernel_dice_loss) self.C_loss = C_dice_loss self.kernel_loss = kernel_dice_loss loss = self.lam * C_dice_loss + (1. - self.lam) * kernel_dice_loss return loss
def forward(self, x): with x.context: x = self.conv1(x) x = nd.split(x, num_outputs=32, axis=1) cups = [] for i in range(32): xi = getattr(self, "caps{}".format(i))(x[i]) xi = nd.dot(xi, self.w[i].data()) cups += [xi] x = nd.concat(*cups) x = self.digitcap(x) x = nd.sum(x, axis=[1, 2]) return x
def bbox_iou(lhs, rhs, x1y1x2y2=True): if x1y1x2y2: b1_xmin, b1_ymin, b1_xmax, b1_ymax = nd.split(lhs, axis=-1, num_outputs=4) b2_xmin, b2_ymin, b2_xmax, b2_ymax = nd.split(rhs, axis=-1, num_outputs=4) else: b1_x, b1_y, b1_w, b1_h = nd.split(lhs, axis=-1, num_outputs=4) b2_x, b2_y, b2_w, b2_h = nd.split(rhs, axis=-1, num_outputs=4) b1_xmin, b1_xmax = b1_x - b1_w / 2., b1_x + b1_w / 2. b1_ymin, b1_ymax = b1_y - b1_h / 2., b1_y + b1_h / 2. b2_xmin, b2_xmax = b2_x - b2_w / 2., b2_x + b2_w / 2. b2_ymin, b2_ymax = b2_y - b2_h / 2., b2_y + b2_h / 2. # Intersection area MAX = 1e5 inter_w = nd.clip( nd.minimum(b1_xmax, b2_xmax) - nd.maximum(b1_xmin, b2_xmin), 0, MAX) inter_h = nd.clip( nd.minimum(b1_ymax, b2_ymax) - nd.maximum(b1_ymin, b2_ymin), 0, MAX) # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w) # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h) inter = inter_w * inter_h # Union Area w1, h1 = b1_xmax - b1_xmin, b1_ymax - b1_ymin w2, h2 = b2_xmax - b2_xmin, b2_ymax - b2_ymin # w1 = F.where(w1 < 0., F.zeros_like(w1), w1) # h1 = F.where(h1 < 0., F.zeros_like(h1), h1) # w2 = F.where(w2 < 0., F.zeros_like(w2), w2) # h2 = F.where(h2 < 0., F.zeros_like(h2), h2) union = (w1 * h1 + 1e-16) + w2 * h2 - inter iou = inter / union # iou return iou
def split(x): ''' Split ndarray on channel dimension ''' ch = x.shape[1] # If channel dimension uneven, no splitting function available in mxnet if ch % 2 == 1: ch1 = (ch // 2) # if uneven, split_a has one dim more split_a = x[:, :ch1, ...] split_b = x[:, ch1:, ...] else: split_a, split_b = nd.split(x, axis=1, num_outputs=2) return split_a, split_b