示例#1
0
def worker(rank, world_size, args):
    cfg = import_config_from_file(args.config)

    if world_size > 1:
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=world_size,
            rank=rank,
            dev=rank,
        )
        logger.info("Init process group done")

    logger.info("Prepare dataset")
    train_loader, epoch_size = build_dataloader(cfg.BATCH_SIZE, args.dataset_dir, cfg)
    batch_iter = epoch_size // (cfg.BATCH_SIZE * world_size)

    net = DeepLabV3Plus(class_num=cfg.NUM_CLASSES, pretrained=args.weight_file)
    base_lr = cfg.LEARNING_RATE * world_size
    optimizer = optim.SGD(
        net.parameters(requires_grad=True),
        lr=base_lr,
        momentum=0.9,
        weight_decay=0.00004,
    )

    @jit.trace(symbolic=True, opt_level=2)
    def train_func(data, label, net=None, optimizer=None):
        net.train()
        pred = net(data)
        loss = softmax_cross_entropy(pred, label, ignore_index=cfg.IGNORE_INDEX)
        optimizer.backward(loss)
        return pred, loss

    begin_epoch = 0
    end_epoch = cfg.EPOCHS
    if args.resume is not None:
        pretrained = mge.load(args.resume)
        begin_epoch = pretrained["epoch"] + 1
        net.load_state_dict(pretrained["state_dict"])
        logger.info("load success: epoch %d", begin_epoch)

    itr = begin_epoch * batch_iter
    max_itr = end_epoch * batch_iter

    image = mge.tensor(
        np.zeros([cfg.BATCH_SIZE, 3, cfg.IMG_HEIGHT, cfg.IMG_WIDTH]).astype(np.float32),
        dtype="float32",
    )
    label = mge.tensor(
        np.zeros([cfg.BATCH_SIZE, cfg.IMG_HEIGHT, cfg.IMG_WIDTH]).astype(np.int32),
        dtype="int32",
    )
    exp_name = os.path.abspath(os.path.dirname(__file__)).split("/")[-1]

    for epoch in range(begin_epoch, end_epoch):
        for i_batch, sample_batched in enumerate(train_loader):

            def adjust_lr(optimizer, itr, max_itr):
                now_lr = base_lr * (1 - itr / (max_itr + 1)) ** 0.9
                for param_group in optimizer.param_groups:
                    param_group["lr"] = now_lr
                return now_lr

            now_lr = adjust_lr(optimizer, itr, max_itr)
            inputs_batched, labels_batched = sample_batched
            labels_batched = np.squeeze(labels_batched, axis=1).astype(np.int32)
            image.set_value(inputs_batched)
            label.set_value(labels_batched)

            optimizer.zero_grad()
            _, loss = train_func(image, label, net=net, optimizer=optimizer)
            optimizer.step()
            running_loss = loss.numpy()[0]

            if rank == 0:
                logger.info(
                    "%s epoch:%d/%d\tbatch:%d/%d\titr:%d\tlr:%g\tloss:%g",
                    exp_name,
                    epoch,
                    end_epoch,
                    i_batch,
                    batch_iter,
                    itr + 1,
                    now_lr,
                    running_loss,
                )
            itr += 1

        if rank == 0:
            save_path = os.path.join(cfg.MODEL_SAVE_DIR, "epoch%d.pkl" % (epoch))
            mge.save({"epoch": epoch, "state_dict": net.state_dict()}, save_path)
            logger.info("save epoch%d", epoch)
示例#2
0
def test_fill_():
    x = tensor(np.zeros((2, 3, 4)), dtype=np.float32)
    fill_(x, 5.0)

    np.testing.assert_array_equal(
        x.numpy(), np.full(shape=(2, 3, 4), fill_value=5.0, dtype=np.float32))
示例#3
0
import numpy as np
import megengine as mge  # 我们习惯将 MegEngine 缩写为 mge
import megengine.functional as F  # 我们习惯将 functional 缩写为 F

from megengine import tensor
from megengine import Tensor

# 1. 生成 Python List,然后转化为 MegEngine Tensor
py_list = range(5)
print(mge.tensor(py_list))

# 2. 生成 Numpy ndarray,然后转化为 MegEngine Tensor
np_ndarray = np.arange(5).astype("float32")
print(mge.tensor(np_ndarray))

# 3. 使用 functional 模块直接生成 MegEngine Tensor
mge_tensor = F.arange(5)
print(mge_tensor)

print(mge_tensor.dtype)
print(type(mge_tensor))

new_tensor = mge_tensor.astype("float16")
print(new_tensor)

print(type(mge_tensor.numpy()))

print(tensor([1, 2, 3]))  # 实际上我们更希望使用 float32 类型的 Tensor
print(Tensor([1., 2., 3.]))  # 因此我们会习惯性地加上一个点表示这是浮点数

matrix_tensor = mge.tensor([[1., 2., 3.], [4., 5., 6.]])
示例#4
0
def worker(world_size, args):
    # pylint: disable=too-many-statements

    rank = dist.get_rank()
    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(rank, world_size))

    save_dir = os.path.join(args.save, args.arch + "." + args.mode)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir, exist_ok=True)
    mge.set_log_file(os.path.join(save_dir, "log.txt"))

    model = models.__dict__[args.arch]()
    cfg = config.get_finetune_config(args.arch)

    cfg.LEARNING_RATE *= world_size  # scale learning rate in distributed training
    total_batch_size = cfg.BATCH_SIZE * world_size
    steps_per_epoch = 1280000 // total_batch_size
    total_steps = steps_per_epoch * cfg.EPOCHS

    if args.mode != "normal":
        quantize_qat(model, qconfig=Q.ema_fakequant_qconfig)

    if args.checkpoint:
        logger.info("Load pretrained weights from %s", args.checkpoint)
        ckpt = mge.load(args.checkpoint)
        ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt
        model.load_state_dict(ckpt, strict=False)

    if args.mode == "quantized":
        raise ValueError("mode = quantized only used during inference")

    if world_size > 1:
        # Sync parameters
        dist.bcast_list_(model.parameters(), dist.WORLD)

    # Autodiff gradient manager
    gm = autodiff.GradManager().attach(
        model.parameters(),
        callbacks=dist.make_allreduce_cb("MEAN") if world_size > 1 else None,
    )

    optimizer = optim.SGD(
        get_parameters(model, cfg), lr=cfg.LEARNING_RATE, momentum=cfg.MOMENTUM,
    )

    # Define train and valid graph
    def train_func(image, label):
        with gm:
            model.train()
            logits = model(image)
            loss = F.loss.cross_entropy(logits, label, label_smooth=0.1)
            acc1, acc5 = F.topk_accuracy(logits, label, (1, 5))
            gm.backward(loss)
            optimizer.step().clear_grad()
        return loss, acc1, acc5

    def valid_func(image, label):
        model.eval()
        logits = model(image)
        loss = F.loss.cross_entropy(logits, label, label_smooth=0.1)
        acc1, acc5 = F.topk_accuracy(logits, label, (1, 5))
        return loss, acc1, acc5

    # Build train and valid datasets
    logger.info("preparing dataset..")
    train_dataset = data.dataset.ImageNet(args.data, train=True)
    train_sampler = data.Infinite(
        data.RandomSampler(train_dataset, batch_size=cfg.BATCH_SIZE, drop_last=True)
    )
    train_queue = data.DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose(
            [
                T.RandomResizedCrop(224),
                T.RandomHorizontalFlip(),
                cfg.COLOR_JITTOR,
                T.Normalize(mean=128),
                T.ToMode("CHW"),
            ]
        ),
        num_workers=args.workers,
    )
    train_queue = iter(train_queue)
    valid_dataset = data.dataset.ImageNet(args.data, train=False)
    valid_sampler = data.SequentialSampler(
        valid_dataset, batch_size=100, drop_last=False
    )
    valid_queue = data.DataLoader(
        valid_dataset,
        sampler=valid_sampler,
        transform=T.Compose(
            [T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW")]
        ),
        num_workers=args.workers,
    )

    def adjust_learning_rate(step, epoch):
        learning_rate = cfg.LEARNING_RATE
        if cfg.SCHEDULER == "Linear":
            learning_rate *= 1 - float(step) / total_steps
        elif cfg.SCHEDULER == "Multistep":
            learning_rate *= cfg.SCHEDULER_GAMMA ** bisect.bisect_right(
                cfg.SCHEDULER_STEPS, epoch
            )
        else:
            raise ValueError(cfg.SCHEDULER)
        for param_group in optimizer.param_groups:
            param_group["lr"] = learning_rate
        return learning_rate

    # Start training
    objs = AverageMeter("Loss")
    top1 = AverageMeter("Acc@1")
    top5 = AverageMeter("Acc@5")
    total_time = AverageMeter("Time")

    t = time.time()
    for step in range(0, total_steps):
        # Linear learning rate decay
        epoch = step // steps_per_epoch
        learning_rate = adjust_learning_rate(step, epoch)

        image, label = next(train_queue)
        n = image.shape[0]
        image = mge.tensor(image, dtype="float32")
        label = mge.tensor(label, dtype="int32")

        loss, acc1, acc5 = train_func(image, label)

        top1.update(100 * acc1.numpy()[0], n)
        top5.update(100 * acc5.numpy()[0], n)
        objs.update(loss.numpy()[0], n)
        total_time.update(time.time() - t)
        t = time.time()
        if step % args.report_freq == 0 and rank == 0:
            logger.info(
                "TRAIN e%d %06d %f %s %s %s %s",
                epoch,
                step,
                learning_rate,
                objs,
                top1,
                top5,
                total_time,
            )
            objs.reset()
            top1.reset()
            top5.reset()
            total_time.reset()
        if step != 0 and step % 10000 == 0 and rank == 0:
            logger.info("SAVING %06d", step)
            save_path = os.path.join(save_dir, str(step))
            if not os.path.exists(save_path):
                os.makedirs(save_path, exist_ok=True)
            mge.save(
                {"step": step, "state_dict": model.state_dict()},
                os.path.join(save_path, "checkpoint.pkl"),
            )
        if step % 10000 == 0 and step != 0:
            _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args)
            logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)

    mge.save(
        {"step": step, "state_dict": model.state_dict()},
        os.path.join(save_dir, "checkpoint-final.pkl"),
    )
    _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args)
    logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)
示例#5
0
 def __init__(self):
     super().__init__()
     self.data = megengine.tensor(np.random.random((1, 1, 4, 4)),
                                  dtype=np.float32)
示例#6
0
def worker(args):
    # pylint: disable=too-many-statements
    rank = dist.get_rank()
    world_size = dist.get_world_size()
    if rank == 0:
        os.makedirs(os.path.join(args.save, args.arch), exist_ok=True)
        megengine.logger.set_log_file(os.path.join(args.save, args.arch, "log.txt"))
    # init process group

    # build dataset
    train_dataloader, valid_dataloader = build_dataset(args)
    train_queue = iter(train_dataloader)  # infinite
    steps_per_epoch = args.steps_per_epoch

    # build model
    model = UNetD(3)
    # Sync parameters
    if world_size > 1:
        dist.bcast_list_(model.parameters(), dist.WORLD)

    # Autodiff gradient manager
    gm = autodiff.GradManager().attach(
        model.parameters(),
        callbacks=dist.make_allreduce_cb("SUM") if world_size > 1 else None,
    )

    # Optimizer
    opt = optim.Adam(
        model.parameters(),
        lr=args.lr,
        weight_decay=args.weight_decay * world_size,  # scale weight decay in "SUM" mode
    )

    # mixup
    def preprocess(image, label):
        if args.dnd:
            image, label = MixUp_AUG(image, label)
        return image, label

    # train and valid func
    def train_step(image, label):
        with gm:
            logits = model(image)
            logits = image - logits
            loss = F.nn.l1_loss(logits, label)
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    def valid_step(image, label):
        pred = model(image)
        pred = image - pred
        mae_iter = F.nn.l1_loss(pred, label)
        psnr_it = batch_PSNR(pred, label)
        #print(psnr_it.item())
        if world_size > 1:
            mae_iter = F.distributed.all_reduce_sum(mae_iter) / world_size
            psnr_it = F.distributed.all_reduce_sum(psnr_it) / world_size

        return mae_iter, psnr_it

    # multi-step learning rate scheduler with warmup
    def adjust_learning_rate(step):
        #lr = 1e-6 + 0.5 * (args.lr - 1e-6)*(1 + np.cos(step/(args.epochs*steps_per_epoch) * np.pi))
        lr = args.lr * (np.cos(step / (steps_per_epoch * args.epochs) * np.pi) + 1) / 2
        for param_group in opt.param_groups:
            param_group["lr"] = lr
        return lr

    # start training
    for step in range(0, int(args.epochs * steps_per_epoch)):
        #print(step)
        lr = adjust_learning_rate(step)

        t_step = time.time()

        image, label = next(train_queue)
        if step > steps_per_epoch:
            image, label = preprocess(image, label)
        image = megengine.tensor(image)
        label = megengine.tensor(label)
        t_data = time.time() - t_step
        loss = train_step(image, label)
        t_train = time.time() - t_step
        speed = 1. / t_train
        if step % args.print_freq == 0 and dist.get_rank() == 0:
            logging.info(
                "Epoch {} Step {}, Speed={:.2g} mb/s, dp_cost={:.2g}, Loss={:5.2e}, lr={:.2e}".format(
                step // int(steps_per_epoch),
                step,
                speed,
                t_data/t_train,
                loss.item(),
                lr
            ))
        #print(steps_per_epoch)
        if (step + 1) % steps_per_epoch == 0:
            model.eval()
            loss, psnr_v = valid(valid_step, valid_dataloader)
            model.train()
            logging.info(
                "Epoch {} Test mae {:.3f}, psnr {:.3f}".format(
                (step + 1) // steps_per_epoch,
                loss.item(),
                psnr_v.item(),
            ))
            megengine.save(
                {
                    "epoch": (step + 1) // steps_per_epoch,
                    "state_dict": model.state_dict(),
                },
                os.path.join(args.save, args.arch, "checkpoint.pkl"),
            ) if rank == 0 else None
示例#7
0
def test_quantize_batchmatmul_activation():
    batch = 4
    in_features = 8
    out_features = 4

    class TestNet(Module):
        def __init__(self, bias):
            super().__init__()
            self.quant = QuantStub()
            self.dequant = DequantStub()
            self.batch_mm = BatchMatMulActivation(batch,
                                                  in_features,
                                                  out_features,
                                                  bias=bias)

        def forward(self, inp):
            out = self.quant(inp)
            out = self.batch_mm(out)
            out = expand_dims(out, -1)
            out = self.dequant(out)
            return out

    inputs = tensor(
        np.random.randn(batch, in_features, out_features).astype(np.float32))
    for bias in (True, False):
        net = TestNet(bias)
        net.train()
        qat_net = quantize_qat(net, inplace=False)
        disable_fake_quant(qat_net)
        normal_outputs = net(inputs)
        qat_outputs = qat_net(inputs)
        np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy())

        net.eval()
        normal_outputs = net(inputs)
        qat_net.eval()
        qat_outputs = qat_net(inputs)
        np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy())

        enable_fake_quant(qat_net)
        qat_outputs = qat_net(inputs)
        qnet = quantize(qat_net, inplace=False)
        qnet.eval()
        quantize_outputs = qnet(inputs)
        np.testing.assert_allclose(qat_outputs.numpy(),
                                   quantize_outputs.numpy(),
                                   atol=1e-6)

        @jit.trace(capture_as_const=True)
        def f(x):
            qnet.eval()
            return qnet(x)

        f(inputs)
        file = io.BytesIO()
        f.dump(file, enable_nchw4=True)
        file.seek(0)
        infer_cg = cgtools.GraphInference(file)[0]
        dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0]
        np.testing.assert_allclose(quantize_outputs.numpy(),
                                   dumped_outputs,
                                   atol=1e-6)
示例#8
0
    def test_func(
        N,
        IC,
        IH,
        IW,
        OC,
        KH,
        KW,
        SH,
        SW,
        PH,
        PW,
        DH,
        DW,
        groups=1,
        has_bias=True,
        conv_mode: str = "cross_correlation",
        compute_mode: str = "default",
    ):
        inp_scale = np.float32(rng.uniform(low=0.04, high=0.06))
        weight_scale = np.float32(rng.uniform(low=0.04, high=0.06))
        bias_scale = inp_scale * weight_scale
        out_scale = np.float32(rng.uniform(low=0.04, high=0.06))

        inp_dtype = dtype.qint8(inp_scale)
        weight_dtype = dtype.qint8(weight_scale)
        bias_dtype = dtype.qint32(bias_scale)
        out_dtype = dtype.qint8(out_scale)

        inp_fp32 = rng.uniform(low=-1, high=1,
                               size=(N, IC, IH, IW)).astype(np.float32)
        weight_fp32 = rng.uniform(low=-1, high=1,
                                  size=(IC, OC, KH, KW)).astype(np.float32)
        bias_fp32 = rng.uniform(low=-1, high=1,
                                size=(1, OC, 1, 1)).astype(np.float32)

        inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype)
        weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype)
        bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype)

        inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype)
        weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype)
        bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype)

        inp_fp32 = inp_int8.astype("float32")
        weight_fp32 = weight_int8.astype("float32")
        bias_fp32 = bias_int32.astype("float32")

        expected = F.conv_transpose2d(
            inp_fp32,
            weight_fp32,
            bias_fp32 if has_bias else None,
            stride=(SH, SW),
            padding=(PH, PW),
            dilation=(DH, DW),
            groups=groups,
            conv_mode=conv_mode,
            compute_mode=compute_mode,
        )
        expected = dtype.convert_to_qint8(expected.numpy(), out_dtype)
        expected = dtype.convert_from_qint8(expected)

        conv_transpose2d = ConvTranspose2d(
            in_channels=IC,
            out_channels=OC,
            kernel_size=(KH, KW),
            stride=(SH, SW),
            padding=(PH, PW),
            dilation=(DH, DW),
            groups=groups,
            bias=has_bias,
            conv_mode=conv_mode,
            compute_mode=compute_mode,
            dtype=out_dtype,
        )

        conv_transpose2d.weight = mge.Parameter(weight_int8)
        if has_bias:
            conv_transpose2d.bias = mge.Parameter(bias_int32)
        result = conv_transpose2d.forward(inp_int8).numpy()
        result = dtype.convert_from_qint8(result)
        np.testing.assert_allclose(result, expected, atol=out_scale)
示例#9
0
def roi_pool(rpn_fms,
             rois,
             stride,
             pool_shape,
             roi_type='roi_align',
             labels=None,
             bbox_targets=None):

    assert len(stride) == len(rpn_fms)
    canonical_level = 4
    canonical_box_size = 224
    min_level = math.log2(stride[0])
    max_level = math.log2(stride[-1])

    num_fms = len(rpn_fms)
    box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
    level_assignments = F.floor(canonical_level +
                                F.log(box_sizes / canonical_box_size) /
                                np.log(2))
    level_assignments = F.minimum(level_assignments, max_level)
    level_assignments = F.maximum(level_assignments, min_level)
    level_assignments = level_assignments - min_level
    available_masks = F.concat(
        [F.ones(level_assignments.shape[0]),
         F.zeros(num_fms)], axis=0)
    level_assignments = F.concat(
        [level_assignments,
         mge.tensor(np.arange(num_fms, dtype=np.int32))],
        axis=0)
    rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))], axis=0)

    if labels is not None and bbox_targets is not None:
        labels = F.concat([labels, F.ones((num_fms, labels.shape[-1]))],
                          axis=0)
        bbox_targets = F.concat(
            [bbox_targets,
             F.zeros((num_fms, bbox_targets.shape[-1]))], axis=0)

    pool_list, inds_list = [], []
    for i in range(len(rpn_fms)):
        # mask = level_assignments == i
        # inds = mask_to_inds(mask)
        mask = F.equal(level_assignments, i)
        _, inds = F.cond_take(mask > 0, mask)
        rois_fm = rois[inds.astype(np.int32)]
        if roi_type == 'roi_pool':
            pool_fm = F.nn.roi_pooling(rpn_fms[i],
                                       rois_fm,
                                       pool_shape,
                                       mode='max',
                                       scale=1.0 / stride[i])
        elif roi_type == 'roi_align':
            pool_fm = F.nn.roi_align(rpn_fms[i],
                                     rois_fm,
                                     pool_shape,
                                     mode='average',
                                     spatial_scale=1.0 / stride[i],
                                     sample_points=2,
                                     aligned=True)
        pool_list.append(pool_fm)
        inds_list.append(inds)

    fm_order = F.concat(inds_list, axis=0)
    pool_feature = F.concat(pool_list, axis=0)

    ordered_available_masks = available_masks[fm_order]
    # available_inds = mask_to_inds(ordered_available_masks)
    _, available_inds = F.cond_take(ordered_available_masks > 0,
                                    ordered_available_masks)
    available_inds = available_inds.astype(np.int32)
    pool_feature = pool_feature[available_inds.astype(np.int32)]
    rois = rois[fm_order, :][available_inds.astype(np.int32)]
    if labels is not None:
        labels = labels[fm_order][available_inds]
        bbox_targets = bbox_targets[fm_order][available_inds]
        return pool_feature, rois, labels.detach(), bbox_targets.detach()
    else:
        return pool_feature, rois, None, None
示例#10
0
def test_tensor_routine():
    mge.tensor(np.zeros((1, 2), dtype=np.int32))

    mge.tensor([1])

    mge.tensor(1.5)
示例#11
0
        return mean, var, decode


def get_net():
    c = 8
    net = AE([8 * c, 16 * c, 24 * c])
    return net


if __name__ == '__main__':
    net = get_net()
    net.eval()
    #x = mge.tensor(np.random.randn(2, 1, 28, 28).astype(np.float32))
    x = np.random.randn(2, 1, 28, 28).astype(np.float32)
    gaussian = np.random.normal(size=(2, net.layers[-1], 3, 3))
    target = np.array([1, 3])
    onehot = np.broadcast_to(
        np.eye(10)[target][:, :, np.newaxis, np.newaxis], (2, 10, 3, 3))
    print(onehot, onehot.shape)
    t_x = mge.tensor()
    t_x.set_value(x)
    t_gaussian = mge.tensor()
    t_gaussian.set_value(gaussian)
    t_onehot = mge.tensor()
    t_onehot.set_value(onehot)
    _, _, out = net(t_x, t_gaussian, t_onehot)
    print(out.shape)
    print(out)

    #net = ResNet(Bottleneck, [3,3,3,3]
示例#12
0
def test_wrong_dtype():
    with pytest.raises(TypeError):
        mge.tensor(np.zeros((5, 5), dtype=np.float64))

    with pytest.raises(TypeError):
        mge.Parameter(np.zeros((5, 5), dtype=np.int64))
示例#13
0
    def __init__(self, cfg, batch_size):
        super().__init__()
        self.cfg = cfg
        self.batch_size = batch_size

        self.anchor_gen = layers.DefaultAnchorGenerator(
            base_size=4,
            anchor_scales=self.cfg.anchor_scales,
            anchor_ratios=self.cfg.anchor_ratios,
        )
        self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std)

        self.stride_list = np.array(cfg.stride).astype(np.float32)
        self.in_features = ["p3", "p4", "p5", "p6", "p7"]

        # ----------------------- build the backbone ------------------------ #
        bottom_up = getattr(resnet, cfg.backbone)(
            norm=layers.get_norm(cfg.resnet_norm),
            pretrained=cfg.backbone_pretrained)

        # ------------ freeze the weights of resnet stage1 and stage 2 ------ #
        if self.cfg.backbone_freeze_at >= 1:
            for p in bottom_up.conv1.parameters():
                p.requires_grad = False
        if self.cfg.backbone_freeze_at >= 2:
            for p in bottom_up.layer1.parameters():
                p.requires_grad = False

        # ----------------------- build the FPN ----------------------------- #
        in_channels_p6p7 = 2048
        out_channels = 256
        self.backbone = layers.FPN(
            bottom_up=bottom_up,
            in_features=["res3", "res4", "res5"],
            out_channels=out_channels,
            norm=cfg.fpn_norm,
            top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels),
        )

        backbone_shape = self.backbone.output_shape()
        feature_shapes = [backbone_shape[f] for f in self.in_features]

        # ----------------------- build the RetinaNet Head ------------------ #
        self.head = layers.RetinaNetHead(cfg, feature_shapes)

        self.inputs = {
            "image":
            mge.tensor(
                np.random.random([2, 3, 224, 224]).astype(np.float32),
                dtype="float32",
            ),
            "im_info":
            mge.tensor(
                np.random.random([2, 5]).astype(np.float32),
                dtype="float32",
            ),
            "gt_boxes":
            mge.tensor(
                np.random.random([2, 100, 5]).astype(np.float32),
                dtype="float32",
            ),
        }

        self.loss_normalizer = mge.tensor(100.0)
mnist_train_dataloader = DataLoader(
    dataset=mnist_train_dataset,
    sampler=random_sampler,
    transform=Compose([
            RandomResizedCrop(output_size=28),
            # mean 和 std 分别是 MNIST 数据的均值和标准差,图片数值范围是 0~255
            #Normalize(mean=0.1307*255, std=0.3081*255),
            #Pad(2),
            # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式
            #ToMode('CHW'),
        ])
)
mnist_test_dataloader = DataLoader(
    dataset=mnist_test_dataset,
    sampler=sequential_sampler,
)


data = mge.tensor()
code = mge.tensor()
for step, batch_sample in enumerate(mnist_test_dataloader):
    imgs, _ = batch_sample
    imgs = imgs.transpose((0,3,1,2))
    data.set_value(imgs)
    reconstruct = (model(data).numpy() * 255).astype('uint8')
    all_img = np.concatenate([imgs, reconstruct], axis=1).reshape(-1, 1, 28, 28)
    show = torchvision.utils.make_grid(torch.tensor(all_img), 32)
    cv2.imwrite(str(step) + '.png', show.numpy().transpose(1,2,0))
    print(type(show))
    print(show.shape)
示例#15
0
def test_error_shape_linear_interpolate():
    inp = tensor(np.arange(1, 5, dtype=np.float32).reshape(1, 1, 2, 2))

    with pytest.raises(ValueError):
        F.interpolate(inp, scale_factor=2.0, mode="LINEAR")
示例#16
0
def fpn_roi_target(rpn_rois,
                   im_info,
                   gt_boxes,
                   fg_threshold=config.fg_threshold,
                   top_k=1):

    return_rois, return_labels = [], []
    return_bbox_targets = []
    # get per image proposals and gt_boxes
    batch_per_gpu = im_info.shape[0]
    sampling = True
    # is_sample = True if top_k < 2 else False
    for bid in range(batch_per_gpu):

        gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5].astype(np.int32), :]
        dummy_gt = F.ones([1, gt_boxes_perimg.shape[1]])

        batch_inds = F.ones((gt_boxes_perimg.shape[0], 1)) * bid
        #if config.proposal_append_gt:
        gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1)
        batch_rois_mask = F.equal(rpn_rois[:, 0], bid) > 0
        _, batch_rois_index = F.cond_take(batch_rois_mask, batch_rois_mask)

        # batch_roi_mask = rpn_rois[:, 0] == bid
        # batch_roi_inds = mask_to_inds(batch_roi_mask)
        all_rois= F.concat([rpn_rois[batch_rois_index], gt_rois], axis=0) if sampling \
            else rpn_rois[batch_rois_index]
        # all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0)

        gt_boxes_perimg = F.concat([gt_boxes_perimg, dummy_gt], axis=0)
        overlaps_normal, overlaps_ignore = box_overlap_ignore_opr(
            all_rois[:, 1:5], gt_boxes_perimg)

        # overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True)
        # overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True)
        overlaps_normal_indices = F.argsort(overlaps_normal, descending=True)
        overlaps_normal = F.gather(overlaps_normal, 1, overlaps_normal_indices)
        # overlaps_normal = F.nn.indexing_one_hot(overlaps_normal, overlaps_normal_indices, 1)
        overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True)
        overlaps_ignore = F.gather(overlaps_ignore, 1, overlaps_ignore_indices)
        # overlaps_ignore = F.nn.indexing_one_hot(overlaps_ignore, overlaps_ignore_indices, 1)

        # gt max and indices, ignore max and indices
        max_overlaps_normal = overlaps_normal[:, :top_k].flatten()
        gt_assignment_normal = overlaps_normal_indices[:, :top_k].flatten()
        max_overlaps_ignore = overlaps_ignore[:, :top_k].flatten()
        gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].flatten()
        # cons masks

        ignore_assign_mask = (max_overlaps_normal < fg_threshold).astype(
            np.float32) * (max_overlaps_ignore > max_overlaps_normal).astype(
                np.float32)
        max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask).astype(np.float32) + \
                max_overlaps_ignore * ignore_assign_mask


        gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \
                gt_assignment_ignore * ignore_assign_mask

        gt_assignment = gt_assignment.astype(np.int32)

        labels = gt_boxes_perimg[gt_assignment, 4]
        fg_mask = (max_overlaps >= fg_threshold).astype(
            np.float32) * (1 - F.equal(labels, config.ignore_label))
        bg_mask = (max_overlaps < config.bg_threshold_high).astype(
            np.float32) * (max_overlaps >= config.bg_threshold_low).astype(
                np.float32)

        fg_mask = fg_mask.reshape(-1, top_k)
        bg_mask = bg_mask.reshape(-1, top_k)
        pos_max = config.num_rois * config.fg_ratio
        fg_inds_mask = _bernoulli_sample_masks(
            fg_mask[:,
                    0], pos_max, 1) if sampling else F.equal(fg_mask[:, 0], 0)
        neg_max = config.num_rois - fg_inds_mask.sum()
        bg_inds_mask = _bernoulli_sample_masks(
            bg_mask[:,
                    0], neg_max, 1) if sampling else F.equal(bg_mask[:, 0], 0)
        labels = labels * fg_mask.reshape(-1)

        keep_mask = fg_inds_mask + bg_inds_mask
        keep_mask = keep_mask + F.equal(keep_mask.sum(), 0)
        # keep_inds = mask_to_inds(keep_mask)
        _, keep_inds = F.cond_take(keep_mask > 0, keep_mask)
        #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])]
        # labels
        labels = labels.reshape(-1, top_k)[keep_inds]
        gt_assignment = gt_assignment.reshape(
            -1, top_k)[keep_inds].reshape(-1).astype(np.int32)
        target_boxes = gt_boxes_perimg[gt_assignment, :4]
        # rois = all_rois.ai[keep_inds]
        rois = all_rois[keep_inds]
        # target_shape = (rois.shapeof()[0], top_k, rois.shapeof()[-1])
        n, c = rois.shape[0], rois.shape[1]
        target_rois = F.broadcast_to(F.expand_dims(rois, 1),
                                     (n, top_k, c)).reshape(-1, c)
        # target_rois = F.add_axis(rois, 1).broadcast(target_shape).reshape(-1, rois.shapeof()[-1])
        bbox_targets = bbox_transform_opr(target_rois[:, 1:5],
                                          target_boxes[:, :4])
        if config.rcnn_bbox_normalize_targets:
            std_opr = mge.tensor(config.bbox_normalize_stds[None, :]).to(
                rois.device)
            mean_opr = mge.tensor(config.bbox_normalize_means[None, :]).to(
                rois.device)
            minus_opr = mean_opr / std_opr
            bbox_targets = bbox_targets / std_opr - minus_opr
        bbox_targets = bbox_targets.reshape(-1, top_k * 4)
        return_rois.append(rois)
        return_labels.append(labels)
        return_bbox_targets.append(bbox_targets)
    if config.batch_per_gpu == 1:
        rois, labels, bbox_targets = rois.detach(), labels.detach(
        ), bbox_targets.detach()
        return rois, labels, bbox_targets
        # return F.zero_grad(rois), F.zero_grad(labels), F.zero_grad(bbox_targets)
    else:
        return_rois = F.concat(return_rois, axis=0)
        return_labels = F.concat(return_labels, axis=0)
        return_bbox_targets = F.concat(return_bbox_targets, axis=0)

        return_rois = return_rois.detach()
        return_labels = return_labels.detach()
        return_bbox_targets = return_bbox_targets.detach()
        return return_rois, return_labels, return_bbox_targets
示例#17
0
def test_inappropriate_scale_linear_interpolate():
    inp = tensor(np.arange(1, 3, dtype=np.float32).reshape(1, 1, 2))

    with pytest.raises(ValueError):
        F.interpolate(inp, scale_factor=[2.0, 3.0], mode="LINEAR")
示例#18
0
    def run(
        N,
        IC,
        OC,
        IH,
        IW,
        KH,
        KW,
        PH,
        PW,
        SH,
        SW,
        has_bias=True,
        nonlinear_mode="IDENTITY",
    ):
        inp_v = np.random.normal(size=(N, IC, IH, IW))
        w_v = np.random.normal(size=(OC, IC, KW, KW))
        b_v = np.random.normal(size=(1, OC, 1, 1))
        inp_scale = mgb.dtype.get_scale(inp_dtype)
        w_scale = mgb.dtype.get_scale(w_dtype)
        b_scale = mgb.dtype.get_scale(b_dtype)

        inpv = mgb.dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
        wv = mgb.dtype.convert_to_qint8(w_v * w_scale, w_dtype)
        bv = mgb.dtype.convert_to_qint32(b_v * b_scale, b_dtype)

        inp_int8 = tensor(inpv, dtype=inp_dtype)
        w_int8 = Parameter(wv, dtype=w_dtype)
        b_int32 = Parameter(bv, dtype=b_dtype)

        inp_fp32 = inp_int8.astype("float32")
        w_fp32 = w_int8.astype("float32")
        b_fp32 = b_int32.astype("float32")

        jit.trace.enabled = True
        b_symbolic = True

        def convert_to_nchw4(var):
            return var.reshape(var.shapeof(0),
                               var.shapeof(1) // 4, 4, var.shapeof(2),
                               var.shapeof(3)).dimshuffle(0, 1, 3, 4, 2)

        @jit.trace(symbolic=b_symbolic)
        def run_conv2d(inp, w, b):
            O = F.conv2d(
                inp,
                w,
                b if has_bias else None,
                stride=(SH, SW),
                padding=(PH, PW),
            )
            if nonlinear_mode == "RELU":
                return F.relu(O)
            else:
                return O

        @jit.trace(symbolic=b_symbolic)
        def run_conv_bias(inp, w, b, format="NCHW"):
            b = b if has_bias else np.zeros_like(b)
            if format == "NCHW4":
                inp = convert_to_nchw4(inp)
                w = convert_to_nchw4(w)
                b = F.flatten(b)
            return F.conv_bias_activation(
                inp,
                w,
                b,
                stride=(SH, SW),
                padding=(PH, PW),
                dtype=out_dtype,
                nonlinear_mode=nonlinear_mode,
            )

        format = "NCHW4" if is_cuda_available() else "NCHW"

        expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
        expected = expected.astype(out_dtype).astype("float32")
        result = run_conv_bias(inp_int8, w_int8, b_int32,
                               format=format).astype("float32")
        if format == "NCHW4":
            result = result.dimshuffle(0, 1, 4, 2, 3)
        expected = F.flatten(expected)
        result = F.flatten(result)
        assertTensorClose(result.numpy(), expected.numpy())
示例#19
0
 def f():
     return a._broadcast(tensor([1, 10], dtype=np.int32))
示例#20
0
def _mean(inp):
    inp = mge.tensor(inp)
    return F.mean(inp).numpy()
示例#21
0
文件: fcos.py 项目: wjfwzzc/Models
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            offsets = self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1))

            object_sizes_of_interest = F.concat([
                F.broadcast_to(
                    F.expand_dims(mge.tensor(size, dtype="float32"), axis=0),
                    (anchors_i.shape[0], 2)) for anchors_i, size in zip(
                        anchors_list, self.cfg.object_sizes_of_interest)
            ],
                                                axis=0)
            max_offsets = F.max(offsets, axis=2)
            is_cared_in_the_level = (
                (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0],
                                              axis=0))
                & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1],
                                                axis=0)))

            if self.cfg.center_sampling_radius > 0:
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                is_in_boxes = []
                for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                    radius = stride * self.cfg.center_sampling_radius
                    center_boxes = F.concat([
                        F.maximum(gt_centers - radius, gt_boxes[:, :2]),
                        F.minimum(gt_centers + radius, gt_boxes[:, 2:4]),
                    ],
                                            axis=1)
                    center_offsets = self.point_coder.encode(
                        anchors_i, F.expand_dims(center_boxes, axis=1))
                    is_in_boxes.append(F.min(center_offsets, axis=2) > 0)
                is_in_boxes = F.concat(is_in_boxes, axis=1)
            else:
                is_in_boxes = F.min(offsets, axis=2) > 0

            gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] -
                                                           gt_boxes[:, 1])
            # FIXME: use repeat instead of broadcast_to
            areas = F.broadcast_to(F.expand_dims(gt_area, axis=1),
                                   offsets.shape[:2])
            areas[~is_cared_in_the_level] = float("inf")
            areas[~is_in_boxes] = float("inf")

            match_indices = F.argmin(areas, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype("int32")
            labels[anchor_min_area == float("inf")] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.maximum(
                    F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) *
                F.maximum(
                    F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )
示例#22
0
def _std(inp):
    inp = mge.tensor(inp)
    return F.std(inp).numpy()
示例#23
0
    def forward(self, a):
        # add
        if self.mode == "add":
            x = a + mge.tensor(np.float32(10))
            y = a + mge.tensor(self.data1)
            z = x + y
        # sub
        elif self.mode == "sub":
            x = a - mge.tensor(np.float32(10))
            y = a - mge.tensor(self.data1)
            z = x - y
        # mul
        elif self.mode == "mul":
            x = a * mge.tensor(np.float32(10))
            y = mge.tensor(self.data1) * a
            z = x * y
        # div
        elif self.mode == "max":
            x = a + mge.tensor(self.data)
            y = a + mge.tensor(self.data2)
            z = F.maximum(x, y)
        elif self.mode == "min":
            x = a + mge.tensor(self.data)
            y = a + mge.tensor(self.data2)
            z = F.minimum(x, y)

        elif self.mode == "pow":
            z = a**2

        elif self.mode == "ceil":
            z = F.ceil(a)

        elif self.mode == "floor":
            z = F.floor(a)

        elif self.mode == "div":
            y = mge.tensor(self.data1) / a
            x = a / mge.tensor(np.float32(2))
            z = y / x
        # cycle_div
        elif self.mode == "cycle_div":
            z = a / mge.tensor(self.data1)
        # abs
        elif self.mode == "abs":
            z = F.abs(a)
        # exp
        elif self.mode == "exp":
            z = F.exp(a)
        # log
        elif self.mode == "log":
            z = F.log(a)
        elif self.mode == "fuse_add_relu":
            y = a + mge.tensor(self.data2)
            z = F.relu(y)
        elif self.mode == "fuse_mul_add3":
            y = a * mge.tensor(self.data1)
            z = y + mge.tensor(self.data2)
        elif self.mode == "fuse_add_sigmoid":
            y = a + mge.tensor(self.data2)
            z = F.sigmoid(y)
        else:
            raise NotImplementedError('no such elemwise mode "%s"' % self.mode)
        return z
示例#24
0
    def run(
        N,
        IC,
        OC,
        IH,
        IW,
        KH,
        KW,
        PH,
        PW,
        SH,
        SW,
        has_bias=True,
        nonlinear_mode="IDENTITY",
    ):
        inp_v = np.random.normal(size=(N, IC, IH, IW))
        w_v = np.random.normal(size=(OC, IC, KH, KW))
        b_v = np.random.normal(size=(1, OC, 1, 1))
        inp_scale = dtype.get_scale(inp_dtype)
        w_scale = dtype.get_scale(w_dtype)
        b_scale = dtype.get_scale(b_dtype)

        inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
        wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype)
        bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)

        inp_int8 = tensor(inpv, dtype=inp_dtype)
        w_int8 = Parameter(wv, dtype=w_dtype)
        b_int32 = Parameter(bv, dtype=b_dtype)

        inp_fp32 = inp_int8.astype("float32")
        w_fp32 = w_int8.astype("float32")
        b_fp32 = b_int32.astype("float32")

        def convert_to_nchw4(var):
            var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4,
                                  var.shape[2], var.shape[3]))
            var = F.transpose(var, (0, 1, 3, 4, 2))
            return var

        def run_conv2d(inp, w, b):
            O = F.conv2d(
                inp,
                w,
                b if has_bias else None,
                stride=(SH, SW),
                padding=(PH, PW),
            )
            if nonlinear_mode == "RELU":
                return F.relu(O)
            else:
                return O

        def run_conv_bias(inp, w, b, format="NCHW"):
            b = b if has_bias else Parameter(np.zeros_like(b.numpy()))
            if format == "NCHW4":
                inp = convert_to_nchw4(inp)
                w = convert_to_nchw4(w)
                b = convert_to_nchw4(b)
            return F.quantized.conv_bias_activation(
                inp,
                w,
                b,
                stride=(SH, SW),
                padding=(PH, PW),
                dtype=out_dtype,
                nonlinear_mode=nonlinear_mode,
            )

        format = "NCHW4" if is_cuda_available() else "NCHW"

        expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
        expected = expected.astype(out_dtype).astype("float32")
        result = run_conv_bias(inp_int8, w_int8, b_int32,
                               format=format).astype("float32")
        if format == "NCHW4":
            result = F.transpose(result, (0, 1, 4, 2, 3))
        expected = F.flatten(expected)
        result = F.flatten(result)
        np.testing.assert_allclose(result.numpy(),
                                   expected.numpy(),
                                   atol=outp_scale)
示例#25
0
def worker(rank, world_size, ngpus_per_node, args):
    # pylint: disable=too-many-statements
    if rank == 0:
        os.makedirs(os.path.join(args.save, args.arch), exist_ok=True)
        megengine.logger.set_log_file(
            os.path.join(args.save, args.arch, "log.txt"))
    # init process group
    if world_size > 1:
        dist.init_process_group(
            master_ip=args.dist_addr,
            port=args.dist_port,
            world_size=world_size,
            rank=rank,
            device=rank % ngpus_per_node,
            backend="nccl",
        )
        logging.info("init process group rank %d / %d", dist.get_rank(),
                     dist.get_world_size())

    # build dataset
    train_dataloader, valid_dataloader = build_dataset(args)
    train_queue = iter(train_dataloader)  # infinite
    steps_per_epoch = 1280000 // (world_size * args.batch_size)

    # build model
    model = snet_model.__dict__[args.arch]()

    # Sync parameters
    if world_size > 1:
        dist.bcast_list_(model.parameters(), dist.WORLD)

    # Autodiff gradient manager
    gm = autodiff.GradManager().attach(
        model.parameters(),
        callbacks=dist.make_allreduce_cb("SUM") if world_size > 1 else None,
    )

    # Optimizer
    params_wd = []
    params_nwd = []
    for n, p in model.named_parameters():
        if n.find("weight") >= 0 and len(p.shape) > 1:
            print("include ", n, p.shape)
            params_wd.append(p)
        else:
            print("NOT include ", n, p.shape)
            params_nwd.append(p)
    opt = optim.SGD(
        [
            {
                "params": params_wd
            },
            {
                "params": params_nwd,
                "weight_decay": 0
            },
        ],
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay *
        world_size,  # scale weight decay in "SUM" mode
    )

    # train and valid func
    def train_step(image, label):
        with gm:
            logits = model(image)
            loss = F.nn.cross_entropy(logits, label, label_smooth=0.1)
            acc1, acc5 = F.topk_accuracy(logits, label, topk=(1, 5))
            gm.backward(loss)
            opt.step().clear_grad()
        return loss, acc1, acc5

    def valid_step(image, label):
        logits = model(image)
        loss = F.nn.cross_entropy(logits, label, label_smooth=0.1)
        acc1, acc5 = F.topk_accuracy(logits, label, topk=(1, 5))
        # calculate mean values
        if world_size > 1:
            loss = F.distributed.all_reduce_sum(loss) / world_size
            acc1 = F.distributed.all_reduce_sum(acc1) / world_size
            acc5 = F.distributed.all_reduce_sum(acc5) / world_size
        return loss, acc1, acc5

    # linear learning rate scheduler
    def adjust_learning_rate(step):
        lr = args.lr * (1 - step / (args.epochs * steps_per_epoch))
        for param_group in opt.param_groups:
            param_group["lr"] = lr
        return lr

    # start training
    objs = AverageMeter("Loss")
    top1 = AverageMeter("Acc@1")
    top5 = AverageMeter("Acc@5")
    clck = AverageMeter("Time")

    for step in range(0, args.epochs * steps_per_epoch):
        lr = adjust_learning_rate(step)

        t = time.time()

        image, label = next(train_queue)
        image = megengine.tensor(image, dtype="float32")
        label = megengine.tensor(label, dtype="int32")

        loss, acc1, acc5 = train_step(image, label)

        objs.update(loss.item())
        top1.update(100 * acc1.item())
        top5.update(100 * acc5.item())
        clck.update(time.time() - t)

        if step % args.print_freq == 0 and dist.get_rank() == 0:
            logging.info(
                "Epoch %d Step %d, LR %.4f, %s %s %s %s",
                step // steps_per_epoch,
                step,
                lr,
                objs,
                top1,
                top5,
                clck,
            )
            objs.reset()
            top1.reset()
            top5.reset()
            clck.reset()

        if (step + 1) % steps_per_epoch == 0:
            model.eval()
            _, valid_acc1, valid_acc5 = valid(valid_step, valid_dataloader,
                                              args)
            model.train()
            logging.info(
                "Epoch %d Test Acc@1 %.3f, Acc@5 %.3f",
                (step + 1) // steps_per_epoch,
                valid_acc1,
                valid_acc5,
            )
            megengine.save(
                {
                    "epoch": (step + 1) // steps_per_epoch,
                    "state_dict": model.state_dict(),
                },
                os.path.join(args.save, args.arch, "checkpoint.pkl"),
            )
示例#26
0
def test_dropout():
    data = tensor(np.ones(10, dtype=np.float32))
    out = F.dropout(data, 1.0 / 3.0, training=False)

    assert out.numpy().sum() >= 0.0
示例#27
0
 def worker():
     rank = dist.get_rank()
     m = SyncMinMaxObserver()
     y = mge.tensor(x[rank * 3:(rank + 1) * 3])
     m(y)
     assert m.min_val == np_min and m.max_val == np_max
示例#28
0
def test_xornet_trace_dump():
    net = XORNet()
    opt = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    gm = GradManager().attach(net.parameters())
    batch_size = 64
    train_dataset = minibatch_generator(batch_size)
    val_dataset = minibatch_generator(batch_size)

    @trace
    def train_fun(data, label):
        with gm:
            net.train()
            pred = net(data)
            loss = F.nn.cross_entropy(pred, label)
            gm.backward(loss)
        return pred, loss

    @trace
    def val_fun(data, label):
        net.eval()
        pred = net(data)
        loss = F.nn.cross_entropy(pred, label)
        return pred, loss

    @trace(symbolic=True, capture_as_const=True)
    def pred_fun(data):
        net.eval()
        pred = net(data)
        pred_normalized = F.softmax(pred)
        return pred_normalized

    train_loss = []
    val_loss = []
    for step, minibatch in enumerate(train_dataset):
        if step > 100:
            break
        data = tensor(minibatch["data"])
        label = tensor(minibatch["label"])
        opt.clear_grad()
        _, loss = train_fun(data, label)
        train_loss.append((step, loss.numpy()))
        if step % 50 == 0:
            minibatch = next(val_dataset)
            _, loss = val_fun(data, label)
            loss = loss.numpy()[0]
            val_loss.append((step, loss))
            print("Step: {} loss={}".format(step, loss))
        opt.step()

    test_data = np.array([
        (0.5, 0.5),
        (0.3, 0.7),
        (0.1, 0.9),
        (-0.5, -0.5),
        (-0.3, -0.7),
        (-0.9, -0.1),
        (0.5, -0.5),
        (0.3, -0.7),
        (0.9, -0.1),
        (-0.5, 0.5),
        (-0.3, 0.7),
        (-0.1, 0.9),
    ])

    data = tensor(test_data.astype(np.float32))
    out = pred_fun(data)
    pred_output = out.numpy()
    pred_label = np.argmax(pred_output, 1)

    with np.printoptions(precision=4, suppress=True):
        print("Predicated probability:")
        print(pred_output)

    with mkstemp() as out:
        pred_fun.dump(out, arg_names=["data"], output_names=["label"])
 def make_dev_tensor(value, dtype=None, device=None):
     return tensor(value, dtype=dtype, device=device)._dev_tensor()
示例#30
0
import megengine
import numpy as np

a = megengine.tensor(np.zeros((3, 3, 3, 3), dtype=np.float32))

print(a[:, 0, ...].shape)