def test_batchnorm2d_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 16, 16) bn = BatchNorm2d(8, track_running_stats=False) data = tensor() for i in range(4): if i == 2: bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) xv_transposed = np.transpose(xv, [0, 2, 3, 1]).reshape( (data_shape[0] * data_shape[2] * data_shape[3], nr_chan)) mean = np.mean(xv_transposed, axis=0).reshape(1, nr_chan, 1, 1) var = np.var(xv_transposed, axis=0).reshape((1, nr_chan, 1, 1)) sd = np.sqrt(var + bn.eps) data.set_value(xv) yv = bn(data) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
def test_state_dict(): data_shape = (2, 28) data = tensor() data.set_value(np.random.random(data_shape)) mlp = MLP() pred0 = mlp(data) with BytesIO() as fout: mge.save(mlp.state_dict(), fout) fout.seek(0) state_dict = mge.load(fout) state_dict["extra"] = None mlp1 = MLP() mlp1.load_state_dict(state_dict, strict=False) pred1 = mlp1(data) assertTensorClose(pred0.numpy(), pred1.numpy(), max_err=5e-6) with pytest.raises(KeyError): mlp1.load_state_dict(state_dict) del state_dict["extra"] del state_dict["dense0.bias"] with pytest.raises(KeyError): mlp1.load_state_dict(state_dict)
def test_pickle_module(): data_shape = (2, 28) data = tensor() data.set_value(np.random.random(data_shape)) mlp = MLP() # pickle before forward with BytesIO() as fout: mge.save(mlp, fout) fout.seek(0) mlp1 = mge.load(fout) pred0 = mlp1(data) pred1 = mlp(data) # pickle after forward with BytesIO() as fout: mge.save(mlp, fout) fout.seek(0) mlp1 = mge.load(fout) pred2 = mlp1(data) assertTensorClose(pred0.numpy(), pred1.numpy(), max_err=5e-6) assertTensorClose(pred0.numpy(), pred2.numpy(), max_err=5e-6)
def test_syncbn_no_stats(): nr_chan = 8 data_shape = (3, nr_chan, 4) bn = SyncBatchNorm(8, track_running_stats=False) data = tensor() for i in range(4): if i == 2: bn.training = False xv = np.random.normal(loc=2.3, size=data_shape).astype(np.float32) mean = np.mean(np.mean(xv, axis=0, keepdims=True), axis=2, keepdims=True) var = np.var( np.transpose(xv, [0, 2, 1]).reshape( (data_shape[0] * data_shape[2], nr_chan) ), axis=0, ).reshape((1, nr_chan, 1)) sd = np.sqrt(var + bn.eps) data.set_value(xv) yv = bn(data) yv_expect = (xv - mean) / sd assertTensorClose(yv_expect, yv.numpy(), max_err=5e-6)
def test_shared_param(): net = Simple() assert net.conv0.weight is net.conv1.weight data = tensor(np.random.random((1, 1, 8, 8)).astype(np.float32)) assertTensorClose(net.conv0(data).numpy(), net.conv1(data).numpy()) with BytesIO() as f: mge.save(net, f) f.seek(0) net1 = mge.load(f) assert net1.conv0.weight is net1.conv1.weight assertTensorClose(net1.conv0(data).numpy(), net1.conv1(data).numpy()) with BytesIO() as f: mge.save(net.conv0, f) f.seek(0) conv0 = mge.load(f) with BytesIO() as f: mge.save(net.conv1, f) f.seek(0) conv1 = mge.load(f) assert conv0.weight is not conv1.weight assertTensorClose(conv0(data).numpy(), conv1(data).numpy())
def run_perf( batch_size=64, warm_up=True, dump_prof=None, opt_level=2, conv_fastrun=False, run_step=True, track_bn_stats=True, warm_up_iter=20, run_iter=100, num_gpu=None, device=0, server=None, port=None, scale_batch_size=False, eager=False, ): if conv_fastrun: set_conv_execution_strategy("PROFILE") if num_gpu: dist.init_process_group(args.server, args.port, num_gpu, device, device) if scale_batch_size: batch_size = batch_size // num_gpu print("Run with data parallel, batch size = {} per GPU".format( batch_size)) data = tensor(np.random.randn(batch_size, 3, 224, 224).astype("float32")) label = tensor(np.random.randint(1000, size=[ batch_size, ], dtype=np.int32)) net = Resnet50(track_bn_stats=track_bn_stats) opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) def train_func(data, label): logits = net(data) loss = F.cross_entropy_with_softmax(logits, label) if num_gpu: loss = loss / num_gpu opt.zero_grad() opt.backward(loss) return loss train_func = trace( train_func, symbolic=(not eager), opt_level=opt_level, profiling=not (dump_prof is None), ) if warm_up: print("Warm up ...") for _ in range(warm_up_iter): opt.zero_grad() train_func(data, label) if run_step: opt.step() print_gpu_usage() print("Running train ...") start = time.time() for _ in range(run_iter): opt.zero_grad() train_func(data, label) if run_step: opt.step() time_used = time.time() - start if dump_prof: with open(dump_prof, "w") as fout: json.dump(train_func.get_profile(), fout, indent=2) return time_used / run_iter
def get_focal_loss( score: Tensor, label: Tensor, ignore_label: int = -1, background: int = 0, alpha: float = 0.5, gamma: float = 0, norm_type: str = "fg", ) -> Tensor: r"""Focal Loss for Dense Object Detection: <https://arxiv.org/pdf/1708.02002.pdf> .. math:: FL(p_t) = -\alpha_t(1-p_t)^\gamma \log(p_t) Args: score (Tensor): the predicted score with the shape of :math:`(B, A, C)` label (Tensor): the assigned label of boxes with shape of :math:`(B, A)` ignore_label (int): the value of ignore class. Default: -1 background (int): the value of background class. Default: 0 alpha (float): parameter to mitigate class imbalance. Default: 0.5 gamma (float): parameter to mitigate easy/hard loss imbalance. Default: 0 norm_type (str): current support 'fg', 'none': 'fg': loss will be normalized by number of fore-ground samples 'none": not norm Returns: the calculated focal loss. """ mask = 1 - (label == ignore_label) valid_label = label * mask score_shp = score.shape zero_mat = mge.zeros( F.concat([score_shp[0], score_shp[1], score_shp[2] + 1], axis=0), dtype=np.float32, ) one_mat = mge.ones( F.concat([score_shp[0], score_shp[1], tensor(1)], axis=0), dtype=np.float32, ) one_hot = basic.indexing_set_one_hot( zero_mat, 2, valid_label.astype(np.int32), one_mat )[:, :, 1:] pos_part = F.power(1 - score, gamma) * one_hot * F.log(score) neg_part = F.power(score, gamma) * (1 - one_hot) * F.log(1 - score) loss = -(alpha * pos_part + (1 - alpha) * neg_part).sum(axis=2) * mask if norm_type == "fg": positive_mask = label > background return loss.sum() / F.maximum(positive_mask.sum(), 1) elif norm_type == "none": return loss.sum() else: raise NotImplementedError
def __init__(self): super().__init__() self.error_tensor_key = {True: tensor(), False: 0}
def get_mge_forward(): mge_module = PyTorchModule(APlusB()) mge_a = tensor(a.numpy(), dtype=np.float32) mge_b = tensor(b.numpy(), dtype=np.float32) return mge_module(mge_a, mge_b)
def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info): prev_nms_top_n = config.train_prev_nms_top_n \ if is_train else config.test_prev_nms_top_n post_nms_top_n = config.train_post_nms_top_n \ if is_train else config.test_post_nms_top_n batch_per_gpu = config.batch_per_gpu if is_train else 1 nms_threshold = config.rpn_nms_threshold box_min_size = config.rpn_min_box_size bbox_normalize_targets = config.rpn_bbox_normalize_targets bbox_normalize_means = config.bbox_normalize_means bbox_normalize_stds = config.bbox_normalize_stds list_size = len(rpn_bbox_offsets_list) return_rois = [] return_probs = [] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid] \ .dimshuffle(1, 2, 0).reshape(-1, 4) if bbox_normalize_targets: std_opr = tensor(config.bbox_normalize_stds[None, :]) mean_opr = tensor(config.bbox_normalize_means[None, :]) pred_offsets = pred_offsets * std_opr pred_offsets = pred_offsets + mean_opr all_anchors = all_anchors_list[l] proposals = bbox_transform_inv_opr(all_anchors, offsets) if config.anchor_within_border: proposals = clip_boxes_opr(proposals, im_info[bid, :]) probs = rpn_cls_prob_list[l][bid] \ .dimshuffle(1,2,0).reshape(-1, 2) probs = F.softmax(probs)[:, 1] # gather the proposals and probs batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_proposals = F.concat(batch_proposals_list, axis=0) batch_probs = F.concat(batch_probs_list, axis=0) # filter the zero boxes. batch_keep_mask = filter_boxes_opr(batch_proposals, box_min_size * im_info[bid, 2]) batch_probs = batch_probs * batch_keep_mask # prev_nms_top_n num_proposals = F.minimum(prev_nms_top_n, batch_probs.shapeof()[0]) batch_probs, idx = F.argsort(batch_probs, descending=True) batch_probs = batch_probs[:num_proposals].reshape(-1, 1) topk_idx = idx[:num_proposals].reshape(-1) batch_proposals = batch_proposals.ai[topk_idx] batch_rois = F.concat([batch_proposals, batch_probs], axis=1) # For each image, run a total-level NMS, and choose topk results. keep_inds = gpu_nms(batch_rois, nms_threshold, post_nms_top_n) batch_rois = batch_rois.ai[keep_inds] batch_probs = batch_rois[:, -1] # cons the rois batch_inds = mge.ones((batch_rois.shapeof()[0], 1)) * bid batch_rois = F.concat([batch_inds, batch_rois[:, :-1]], axis=1) return_rois.append(batch_rois) return_probs.append(batch_probs) if batch_per_gpu == 1: return batch_rois, batch_probs else: concated_rois = F.concat(return_rois, axis=0) concated_probs = F.concat(return_probs, axis=0) return concated_rois, concated_probs