Exemplo n.º 1
0
def main():
    read_cfg(args.cfg)
    # get aogs
    aogs = []
    for i in range(len(cfg.AOG.dims)):
        aog = get_aog(dim=cfg.AOG.dims[i],
                      min_size=cfg.AOG.min_sizes[i],
                      tnode_max_size=cfg.AOG.tnode_max_size[i],
                      turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE)
        aogs.append(aog)

    factor, filter_list_adjusted, ps = search_factor(aogs, args.param_size)
    print("factor: {}, adjusted filter list: {}, param_size: {} M".format(
        factor, filter_list_adjusted, ps))
Exemplo n.º 2
0
def main():
    devs = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    symbol, arg_params, aux_params = mx.model.load_checkpoint(args.prefix, args.epoch)
    if args.cfg:
        read_cfg(args.cfg)
        # get aogs
        aogs = []
        for i in range(len(cfg.AOG.dims)):
            aog = get_aog(dim=cfg.AOG.dims[i], min_size=cfg.AOG.min_sizes[i], tnode_max_size=cfg.AOG.tnode_max_size[i],
                          turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE)
            aogs.append(aog)

        # get symbol
        symbol = aognet.get_symbol(aogs=aogs, cfg=cfg)
    print("symbol loaded")

    if args.dataset == 'cifar10':
        path_imgrec = "../data/cifar10/cifar10_val.rec"
    elif args.dataset == 'cifar100':
        path_imgrec = "../data/cifar100/cifar100_test.rec"

    label_name = 'softmax_label'

    validation_data_iter = mx.io.ImageRecordIter(
                path_imgrec         = path_imgrec,
                label_width         = 1,
                data_name           = 'data',
                label_name          = label_name,
                batch_size          = 128,
                data_shape          = (3,32,32),
                rand_crop           = False,
                rand_mirror         = False,
                num_parts           = 1,
                part_index          = 0)

    cifar_model = mx.mod.Module(symbol=symbol, context=devs, label_names=[label_name,])
    cifar_model.bind(for_training=False, data_shapes=validation_data_iter.provide_data, label_shapes=validation_data_iter.provide_label)
    cifar_model.set_params(arg_params, aux_params)

    metrics = [mx.metric.create('acc'), mx.metric.create('ce')]
    print("testing!!")
    for batch in validation_data_iter:
        cifar_model.forward(batch, is_train=False)
        for m in metrics:
            cifar_model.update_metric(m, batch.label)

    print("Accuracy: {}, Cross-Entropy: {}".format(metrics[0].get()[1], metrics[1].get()[1]))
Exemplo n.º 3
0
def main():
    # start program
    read_cfg(args.cfg)
    if args.gpus:
        cfg.gpus = args.gpus
    if args.model_path:
        cfg.model_path = args.model_path
    pprint.pprint(cfg)

    lr = cfg.train.lr
    beta1 = cfg.train.beta1
    wd = cfg.train.wd
    ctx = mx.gpu(0)
    check_point = False
    n_rand = cfg.dataset.n_rand

    symG, symD = DCGAN.get_symbol(cfg)

    if cfg.dataset.data_type == 'mnist':
        X_train, X_test = get_mnist()
        train_iter = mx.io.NDArrayIter(X_train, batch_size=cfg.batch_size)
    else:
        train_iter = ImagenetIter(
            cfg.dataset.path, cfg.batch_size,
            (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))
    rand_iter = RandIter(cfg.batch_size, n_rand)
    label = mx.nd.zeros((cfg.batch_size, ), ctx=ctx)

    modG = mx.mod.Module(symbol=symG,
                         data_names=('rand', ),
                         label_names=None,
                         context=ctx)
    modG.bind(data_shapes=rand_iter.provide_data)
    modG.init_params(initializer=mx.init.Normal(0.02))
    modG.init_optimizer(optimizer='adam',
                        optimizer_params={
                            'learning_rate': lr,
                            'wd': wd,
                            'beta1': beta1,
                        })
    mods = [modG]

    modD = mx.mod.Module(symbol=symD,
                         data_names=('data', ),
                         label_names=('label', ),
                         context=ctx)
    modD.bind(data_shapes=train_iter.provide_data,
              label_shapes=[('label', (cfg.batch_size, ))],
              inputs_need_grad=True)
    modD.init_params(initializer=mx.init.Normal(0.02))
    modD.init_optimizer(optimizer='adam',
                        optimizer_params={
                            'learning_rate': lr,
                            'wd': wd,
                            'beta1': beta1,
                        })
    mods.append(modD)

    randz = mx.random.normal(0,
                             1.0,
                             shape=(cfg.batch_size, cfg.dataset.n_rand, 1, 1))
    fix_noise = mx.io.DataBatch(data=[mx.ndarray.array(randz)], label=[])

    if not os.path.exists(cfg.out_path):
        os.makedirs(cfg.out_path)

    for epoch in range(cfg.num_epoch):
        train_iter.reset()
        for t, batch in enumerate(train_iter):
            rbatch = rand_iter.next()
            # generate fake data
            modG.forward(rbatch, is_train=True)
            outG = modG.get_outputs()

            # update discriminator on fake
            label[:] = 0
            modD.forward(mx.io.DataBatch(outG, [label]), is_train=True)
            modD.backward()
            gradD = [[grad.copyto(grad.context) for grad in grads]
                     for grads in modD._exec_group.grad_arrays]

            # update discriminator on real
            label[:] = 1
            batch.label = [label]
            modD.forward(batch, is_train=True)
            modD.backward()
            for gradsr, gradsf in zip(modD._exec_group.grad_arrays, gradD):
                for gradr, gradf in zip(gradsr, gradsf):
                    gradr += gradf
            modD.update()

            # update generator
            label[:] = 1
            modD.forward(mx.io.DataBatch(outG, [label]), is_train=True)
            modD.backward()
            diffD = modD.get_input_grads()
            modG.backward(diffD)
            modG.update()

            if t % cfg.frequent == 0:
                modG.forward(fix_noise, is_train=True)
                outG = modG.get_outputs()
                visual(cfg.out_path + 'GAN_%d_%d.jpg' % (epoch + 1, t + 1),
                       outG[0].asnumpy())
Exemplo n.º 4
0
def main():

    # read config
    read_cfg(args.cfg)
    cfg.memonger = args.memonger
    pprint.pprint(cfg)

    # get symbol
    aogs = []
    for i in range(len(cfg.AOG.dims)):
        aog = get_aog(dim=cfg.AOG.dims[i],
                      min_size=1,
                      tnode_max_size=cfg.AOG.dims[i],
                      turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE)
        aogs.append(aog)

    symbol = AOGNet.get_symbol(aogs=aogs, cfg=cfg)

    # check shapes
    internals = symbol.get_internals()
    if cfg.dataset.data_type == 'imagenet':
        dshape = (cfg.batch_size, 3, 224, 224)
    elif cfg.dataset.data_type in ['cifar10', 'cifar100']:
        dshape = (cfg.batch_size, 3, 32, 32)
    _, out_shapes, _ = internals.infer_shape(data=dshape)
    shape_dict = dict(zip(internals.list_outputs(), out_shapes))

    # count params size
    stages_kw = {
        'stage_0': 0.0,
        'stage_1': 0.0,
        'stage_2': 0.0,
        'stage_3': 0.0
    }
    sum = 0.0
    for k in shape_dict.keys():
        if k.split('_')[-1] in ['weight', 'bias', 'gamma', 'beta']:
            size = 1
            for val in shape_dict[k]:
                size *= val
            for key in stages_kw:
                if key in k:
                    stages_kw[key] += size
            sum += size
    print('total number of params: {} M'.format(sum / 1e6))
    for k, v in stages_kw.items():
        if v > 0:
            print('{} has param size: {} M'.format(k, v / 1e6))

    # setup memonger
    if args.memonger:
        dshape_ = (1, ) + dshape[1:]
        old_cost = memonger.get_cost(symbol, data=dshape_)
        symbol = memonger.search_plan(symbol, data=dshape_)
        new_cost = memonger.get_cost(symbol, data=dshape_)
        print('batch size=1, old cost= {} MB, new cost= {} MB'.format(
            old_cost, new_cost))

    # training setup
    kv = mx.kvstore.create(args.kv_store)
    devs = mx.cpu() if args.gpus is None else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    epoch_size = max(
        int(cfg.dataset.num_examples / cfg.batch_size / kv.num_workers), 1)
    if not os.path.exists(args.modeldir):
        os.makedirs(args.modeldir)
    model_prefix = os.path.join(args.modeldir, 'checkpoint')
    checkpoint = mx.callback.do_checkpoint(model_prefix)
    arg_params = None
    aux_params = None
    if args.resume:
        _, arg_params, aux_params = mx.model.load_checkpoint(
            model_prefix, args.resume)
        begin_epoch = args.resume
    else:
        begin_epoch = 0

    # MOD: save network description + visualization of graph
    # model.save_checkpoint saves a json file, but no checkpoint file and throws "AssertionError"
    # symbol.save saves the network definition to a json-file
    SAVE = True
    if SAVE:
        path = 'large_model-symbol.json'
        symbol.save(path)

    VIZ = True
    if VIZ:
        grp = mx.viz.plot_network(symbol,
                                  node_attrs={
                                      "shape": "oval",
                                      "fixedsize": "false"
                                  })
        grp.save('graph_file.dot',
                 '/home/gabras/deployed/relative-baseline/AOGNet/')

    # TODO: save symbol before this

    TRAIN = False
    if TRAIN:
        # iterator
        train, val = eval(cfg.dataset.data_type + "_iterator")(cfg, kv)

        initializer = mx.init.Xavier(rnd_type='gaussian',
                                     factor_type="in",
                                     magnitude=2)
        lr_scheduler = multi_factor_scheduler(begin_epoch,
                                              epoch_size,
                                              step=cfg.train.lr_steps,
                                              factor=0.1)

        optimizer_params = {
            'learning_rate': cfg.train.lr,
            'momentum': cfg.train.mom,
            'wd': cfg.train.wd,
            'lr_scheduler': lr_scheduler
        }

        model = mx.mod.Module(context=devs, symbol=symbol)

        if cfg.dataset.data_type in ["cifar10", "cifar100"]:
            eval_metric = ['acc', 'ce']
        elif cfg.dataset.data_type == 'imagenet':
            eval_metric = ['acc', mx.metric.create('top_k_accuracy', top_k=5)]

        model.fit(
            train,
            begin_epoch=begin_epoch,
            num_epoch=cfg.num_epoch,
            eval_data=val,
            eval_metric=eval_metric,
            kvstore=kv,
            optimizer='sgd',  # ['sgd', 'nag']
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            initializer=initializer,
            allow_missing=True,
            batch_end_callback=mx.callback.Speedometer(cfg.batch_size,
                                                       args.frequent),
            epoch_end_callback=checkpoint)
Exemplo n.º 5
0
def main():
    read_cfg(args.cfg)
    if args.gpus:
        cfg.gpus = args.gpus
    if args.model_path:
        cfg.model_path = args.model_path
    pprint.pprint(cfg)

    # get symbol
    symbol = seresnet.get_symbol(cfg)

    kv = mx.kvstore.create(cfg.kv_store)
    devs = mx.cpu() if cfg.gpus is None else [
        mx.gpu(int(i)) for i in cfg.gpus.split(',')
    ]
    epoch_size = max(
        int(cfg.dataset.num_examples / cfg.batch_size / kv.num_workers), 1)
    begin_epoch = cfg.model_load_epoch if cfg.model_load_epoch else 0
    if not os.path.exists(cfg.model_path):
        os.mkdir(cfg.model_path)
    model_prefix = cfg.model_path + "seresnet-{}-{}-{}-{}".format(
        cfg.dataset.data_type, cfg.network.depth, kv.rank, 2)
    checkpoint = mx.callback.do_checkpoint(model_prefix)
    arg_params = None
    aux_params = None

    if cfg.retrain:
        print("loading pretrained parameters...")
        _, arg_params, aux_params = mx.model.load_checkpoint(
            'model/resnet-tiny-imagenet-50-0', 100)

    if cfg.memonger:
        import memonger
        symbol = memonger.search_plan(
            symbol,
            data=(cfg.batch_size, 3, 32,
                  32) if cfg.dataset.data_type == "cifar10" else
            (cfg.batch_size, 3, 224, 224))

    ## data rec path
    if cfg.dataset.data_type == "cifar10":
        train_rec = os.path.join(cfg.dataset.data_dir, "cifar10_train.rec")
        val_rec = os.path.join(cfg.dataset.data_dir, "cifar10_val.rec")
    elif cfg.dataset.data_type == "cifar100":
        train_rec = os.path.join(cfg.dataset.data_dir, "cifar100_train.rec")
        val_rec = os.path.join(cfg.dataset.data_dir, "cifar100_test.rec")
    elif cfg.dataset.data_type == "tiny-imagenet":
        train_rec = os.path.join(cfg.dataset.data_dir,
                                 "tiny-imagenet-10_train.rec")
        val_rec = os.path.join(cfg.dataset.data_dir,
                               "tiny-imagenet-10_val.rec")
    else:
        val_rec = os.path.join(cfg.dataset.data_dir, "val_256_q95.rec")
        if cfg.dataset.aug_level == 1:
            train_rec = os.path.join(cfg.dataset.data_dir, "train_256_q95.rec")
        else:
            train_rec = os.path.join(cfg.dataset.data_dir, "train_480_q95.rec")

    train = mx.io.ImageRecordIter(
        path_imgrec=train_rec,
        label_width=1,
        data_name='data',
        label_name='softmax_label',
        data_shape=(3, 32, 32)
        if cfg.dataset.data_type in ["cifar10", "cifar100"] else (3, 224, 224),
        batch_size=cfg.batch_size,
        pad=4 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 0,
        fill_value=127,  # only used when pad is valid
        rand_crop=True,
        max_random_scale=1.0,  # 480 with imagnet, 32 with cifar10
        min_random_scale=1.0 if cfg.dataset.data_type in [
            "cifar10", "cifar100"
        ] else 1.0 if cfg.dataset.aug_level == 1 else 0.533,  # 256.0/480.0
        max_aspect_ratio=0 if cfg.dataset.data_type in ["cifar10", "cifar100"]
        else 0 if cfg.dataset.aug_level == 1 else 0.25,
        random_h=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else
        0 if cfg.dataset.aug_level == 1 else 36,  # 0.4*90
        random_s=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else
        0 if cfg.dataset.aug_level == 1 else 50,  # 0.4*127
        random_l=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else
        0 if cfg.dataset.aug_level == 1 else 50,  # 0.4*127
        max_rotate_angle=0 if cfg.dataset.aug_level <= 2 else 10,
        max_shear_ratio=0 if cfg.dataset.aug_level <= 2 else 0.1,
        rand_mirror=True,
        shuffle=True,
        num_parts=kv.num_workers,
        part_index=kv.rank)

    val = mx.io.ImageRecordIter(path_imgrec=val_rec,
                                label_width=1,
                                data_name='data',
                                label_name='softmax_label',
                                batch_size=cfg.batch_size,
                                data_shape=(3, 32, 32) if cfg.dataset.data_type
                                in ["cifar10", "cifar100"] else (3, 224, 224),
                                rand_crop=False,
                                rand_mirror=False,
                                num_parts=kv.num_workers,
                                part_index=kv.rank)

    model = mx.model.FeedForward(
        ctx=devs,
        symbol=symbol,
        arg_params=arg_params,
        aux_params=aux_params,
        num_epoch=200
        if cfg.dataset.data_type in ["cifar10", "cifar100"] else 200,
        begin_epoch=begin_epoch,
        learning_rate=cfg.train.lr,
        momentum=cfg.train.mom,
        wd=cfg.train.wd,
        # optimizer           = 'nag',
        optimizer='sgd',
        initializer=mx.init.Xavier(rnd_type='gaussian',
                                   factor_type="in",
                                   magnitude=2),
        lr_scheduler=multi_factor_scheduler(begin_epoch,
                                            epoch_size,
                                            step=cfg.train.lr_steps,
                                            factor=0.1),
    )

    model.fit(X=train,
              eval_data=val,
              eval_metric=['acc', 'ce']
              if cfg.dataset.data_type in ["cifar10", "cifar100"] else
              ['acc', mx.metric.create('top_k_accuracy', top_k=2)],
              kvstore=kv,
              batch_end_callback=mx.callback.Speedometer(
                  cfg.batch_size, cfg.frequent),
              epoch_end_callback=checkpoint)

    logging.info("top-1 and top-5 acc is {}".format(
        model.score(
            X=val,
            eval_metric=['acc',
                         mx.metric.create('top_k_accuracy', top_k=5)])))
Exemplo n.º 6
0
def main():
    # start program
    read_cfg(args.cfg)
    if args.gpus:
        cfg.gpus = args.gpus
    if args.model_path:
        cfg.model_path = args.model_path
    pprint.pprint(cfg)

    lr = cfg.train.lr
    beta1 = cfg.train.beta1
    wd = cfg.train.wd
    ctx = mx.gpu(0)
    devs = mx.cpu() if cfg.gpus is None else [
        mx.gpu(int(i)) for i in cfg.gpus.split(',')
    ]
    check_point = False
    load_model = False
    mode_path = './SavedModel'

    global modG_A, modG_B, modD_A, modD_B, cycleLoss_excu, label
    label = mx.nd.zeros((cfg.batch_size, 1, cfg.dataset.dh, cfg.dataset.dw),
                        ctx=ctx)

    symG_A, symG_B, symD_A, symD_B = cycleGAN.get_symbol(cfg)
    # Generator A
    modG_A = mx.mod.Module(symbol=symG_A,
                           data_names=('dataA', ),
                           label_names=None,
                           context=devs)
    modG_A.bind(data_shapes=[('dataA', (cfg.batch_size, cfg.dataset.c,
                                        cfg.dataset.h, cfg.dataset.w))],
                inputs_need_grad=True)
    modG_A.init_params(initializer=mx.init.Normal(0.02))
    modG_A.init_optimizer(optimizer='adam',
                          optimizer_params={
                              'learning_rate': lr,
                              'wd': wd,
                              'beta1': beta1,
                          })

    # Generator B
    modG_B = mx.mod.Module(symbol=symG_B,
                           data_names=('dataB', ),
                           label_names=None,
                           context=devs)
    modG_B.bind(data_shapes=[('dataB', (cfg.batch_size, cfg.dataset.c,
                                        cfg.dataset.h, cfg.dataset.w))],
                inputs_need_grad=True)
    modG_B.init_params(initializer=mx.init.Normal(0.02))
    modG_B.init_optimizer(optimizer='adam',
                          optimizer_params={
                              'learning_rate': lr,
                              'wd': wd,
                              'beta1': beta1,
                          })

    # Discriminator A
    modD_A = mx.mod.Module(symbol=symD_A,
                           data_names=('dataC', ),
                           label_names=('labelC', ),
                           context=devs)
    modD_A.bind(data_shapes=[('dataC', (cfg.batch_size, cfg.dataset.c,
                                        cfg.dataset.h, cfg.dataset.w))],
                label_shapes=[('labelC', (cfg.batch_size, 1, cfg.dataset.dh,
                                          cfg.dataset.dw))],
                inputs_need_grad=True)
    modD_A.init_params(initializer=mx.init.Normal(0.02))
    modD_A.init_optimizer(optimizer='adam',
                          optimizer_params={
                              'learning_rate': lr,
                              'wd': wd,
                              'beta1': beta1,
                          })

    # Discriminator B
    modD_B = mx.mod.Module(symbol=symD_B,
                           data_names=('dataD', ),
                           label_names=('labelD', ),
                           context=devs)
    modD_B.bind(data_shapes=[('dataD', (cfg.batch_size, cfg.dataset.c,
                                        cfg.dataset.h, cfg.dataset.w))],
                label_shapes=[('labelD', (cfg.batch_size, 1, cfg.dataset.dh,
                                          cfg.dataset.dw))],
                inputs_need_grad=True)
    modD_B.init_params(initializer=mx.init.Normal(0.02))
    modD_B.init_optimizer(optimizer='adam',
                          optimizer_params={
                              'learning_rate': lr,
                              'wd': wd,
                              'beta1': beta1,
                          })

    cycleLoss = cycleGAN.getAbsLoss()
    cycleLoss_excu = cycleLoss.simple_bind(
        ctx=ctx,
        grad_rep='write',
        cycle=(cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w),
        data=(cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))

    # load params
    if load_model:
        modG_A.load_params(os.path.join(mode_path, 'generatorA'))
        modG_B.load_params(os.path.join(mode_path, 'generatorB'))
        modD_A.load_params(os.path.join(mode_path, 'discriminatorA'))
        modD_B.load_params(os.path.join(mode_path, 'discriminatorB'))

    # load train data to iterator
    dataA = glob.glob(os.path.join(cfg.dataset.path, 'trainA/*.jpg'))
    dataB = glob.glob(os.path.join(cfg.dataset.path, 'trainB/*.jpg'))
    dataA_iter = ImagenetIter(dataA, cfg.batch_size,
                              (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))
    dataB_iter = ImagenetIter(dataB, cfg.batch_size,
                              (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))

    # load test data to iterator
    testA = glob.glob(os.path.join(cfg.dataset.path, 'testA/*.jpg'))
    testB = glob.glob(os.path.join(cfg.dataset.path, 'testB/*.jpg'))
    testA_iter = ImagenetIter(testA, cfg.batch_size,
                              (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))
    testB_iter = ImagenetIter(testB, cfg.batch_size,
                              (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))

    if not os.path.exists(cfg.out_path):
        os.makedirs(cfg.out_path)

    test = 0
    for epoch in range(cfg.num_epoch):
        dataA_iter.reset()
        dataB_iter.reset()
        for npic in range(cfg.dataset.num_pics):
            inputA = dataA_iter.getdata()
            inputB = dataB_iter.getdata()
            l1lossA, l1lossB, gradG_A, gradG_B, DlossA, DlossB = train_generator(
                inputA, inputB, 10)
            modG_A.forward(mx.io.DataBatch(data=inputA, label=None),
                           is_train=True)
            fakeB = modG_A.get_outputs()
            modG_B.forward(mx.io.DataBatch(data=inputB, label=None),
                           is_train=True)
            fakeA = modG_B.get_outputs()
            lossD_A = train_discriminator(modD_A, inputA, fakeA)
            lossD_B = train_discriminator(modD_B, inputB, fakeB)

            # update modG and modD
            update_module(modG_A, gradG_A)
            update_module(modG_B, gradG_B)

            if npic % cfg.frequent == 0:
                print('epoch:', str(npic), str(npic), 'lossD_A:', lossD_A,
                      'lossD_B:', lossD_B, 'l1loss_a:', l1lossA, 'l1loss_b:',
                      l1lossB, 'DlossA:', DlossA, 'DlossB:', DlossB)

        # apply model to test data and save result pics
        if test == cfg.dataset.num_pics / 3:
            testA_iter.reset()
            testB_iter.reset()
            test = 0
        A_B_As = []
        B_A_Bs = []
        for _ in range(3):
            testA = testA_iter.getdata()
            testB = testB_iter.getdata()
            test += 1
            # visualize A-B-A
            modG_A.forward(mx.io.DataBatch(data=testA, label=None),
                           is_train=True)
            fakeB = modG_A.get_outputs()
            modG_B.forward(mx.io.DataBatch(data=fakeB, label=None),
                           is_train=True)
            cycleA = modG_B.get_outputs()
            A_B_As.append(
                np.concatenate((testA[0].asnumpy(), fakeB[0].asnumpy(),
                                cycleA[0].asnumpy())))

            # visualize B-A-B
            modG_B.forward(mx.io.DataBatch(data=testB, label=None),
                           is_train=True)
            fakeA = modG_B.get_outputs()
            modG_A.forward(mx.io.DataBatch(data=fakeA, label=None),
                           is_train=True)
            cycleB = modG_A.get_outputs()
            B_A_Bs.append(
                np.concatenate((testB[0].asnumpy(), fakeA[0].asnumpy(),
                                cycleB[0].asnumpy())))

        A_B_A = np.concatenate((A_B_As[0], A_B_As[1], A_B_As[2]))
        B_A_B = np.concatenate((B_A_Bs[0], B_A_Bs[1], B_A_Bs[2]))
        visual(os.path.join(cfg.out_path, 'A_B_A' + str(epoch) + '.jpg'),
               A_B_A)
        visual(os.path.join(cfg.out_path, 'B_A_B' + str(epoch) + '.jpg'),
               B_A_B)

        ## save model
        modG_A.save_params(os.path.join(mode_path, 'generatorA'))
        modG_B.save_params(os.path.join(mode_path, 'generatorB'))
        modD_A.save_params(os.path.join(mode_path, 'discriminatorA'))
        modD_B.save_params(os.path.join(mode_path, 'discriminatorB'))
Exemplo n.º 7
0
def main():
    read_cfg(args.cfg)
    if args.gpus:
        cfg.gpus = args.gpus
    if args.model_path:
        cfg.model_path = args.model_path
    pprint.pprint(cfg)

    # get symbol
    symbol = hievqa.get_symbol(cfg)
    kv = mx.kvstore.create(cfg.kv_store)
    devs = mx.cpu() if cfg.gpus is None else [
        mx.gpu(int(i)) for i in cfg.gpus.split(',')
    ]
    begin_epoch = cfg.model_load_epoch if cfg.model_load_epoch else 0
    if not os.path.exists(cfg.model_path):
        os.mkdir(cfg.model_path)
    model_prefix = cfg.model_path + "hierarchical_VQA"
    checkpoint = mx.callback.do_checkpoint(model_prefix)

    # data iter
    train_iter = hieloader.VQAIter(cfg)

    if cfg.train.lr_factor_epoch > 0:
        step = cfg.train.lr_factor_epoch * (train_iter.n_total //
                                            cfg.batch_size)
    else:
        step = 1
    opt_args = {}
    opt_args['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
        step=step, factor=cfg.train.lr_factor)
    optimizer = mx.optimizer.Adam(learning_rate=cfg.train.lr,
                                  beta1=cfg.train.beta1,
                                  beta2=cfg.train.beta2,
                                  wd=cfg.train.wd,
                                  **opt_args)

    model = mx.mod.Module(context=devs,
                          symbol=symbol,
                          data_names=train_iter.data_names,
                          label_names=train_iter.label_names)

    if cfg.retrain:
        _, arg_params, __ = mx.model.load_checkpoint(model_prefix,
                                                     cfg.model_load_epoch)
    else:
        # containing only the skip thought weights
        arg_params = pickle.load(open(cfg.train.skip_thought_dict))

    embed_param = {}
    embed_param['embed_weight'] = arg_params['embed_weight']
    initializer = mx.initializer.Load(embed_param,
                                      default_init=mx.initializer.Uniform(
                                          cfg.train.uni_mag),
                                      verbose=True)

    def top1_accuracy(labels, preds):
        pred_labels = np.argmax(preds, axis=1)
        n_correct = np.where(labels == pred_labels)[0].size
        return n_correct / np.float32(labels.size)

    metrics = [
        mx.metric.CrossEntropy(),
        mx.metric.CustomMetric(top1_accuracy, allow_extra_outputs=True)
    ]
    epoch_end_callback = [mx.callback.do_checkpoint(model_prefix,
                                                    1)]  #, test_callback]
    batch_end_callback = [
        mx.callback.Speedometer(cfg.batch_size, cfg.frequent)
    ]

    print(
        '================================================================================='
    )
    print('Start training...')
    model.fit(
        train_data=train_iter,
        eval_metric=mx.metric.CompositeEvalMetric(metrics=metrics),
        epoch_end_callback=epoch_end_callback,
        batch_end_callback=batch_end_callback,
        optimizer=optimizer,
        # initializer=initializer,
        begin_epoch=cfg.model_load_epoch,
        num_epoch=cfg.num_epoch)
Exemplo n.º 8
0
def main():
    # start program
    read_cfg(args.cfg)
    if args.gpus:
        cfg.gpus = args.gpus
    if args.model_path:
        cfg.model_path = args.model_path
    pprint.pprint(cfg)
   
    lr = cfg.train.lr
    beta1 = cfg.train.beta1
    wd = cfg.train.wd
    ctx = mx.gpu(0)
    check_point = False
    n_rand = cfg.dataset.n_rand
    n_class = cfg.dataset.n_class

    symG, symD, l1loss, group = infoGAN.get_symbol(cfg)

    if cfg.dataset.data_type == 'mnist':
        X_train, X_test = get_mnist()
        train_iter = mx.io.NDArrayIter(X_train, batch_size=cfg.batch_size)
    else:
        train_iter = ImagenetIter(cfg.dataset.path, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))
    rand_iter = RandIter(cfg.batch_size, n_rand+n_class)
    label = mx.nd.zeros((cfg.batch_size,), ctx=ctx)

    modG = mx.mod.Module(symbol=symG, data_names=(
        'rand',), label_names=None, context=ctx)
    modG.bind(data_shapes=rand_iter.provide_data)
    modG.init_params(initializer=mx.init.Normal(0.02))
    modG.init_optimizer(
        optimizer='adam',
        optimizer_params={
            'learning_rate': lr,
            'wd': wd,
            'beta1': beta1,
        })
    mods = [modG]

    modD = mx.mod.Module(symbol=symD, data_names=(
        'data',), label_names=('label',), context=ctx)
    modD.bind(data_shapes=train_iter.provide_data,
              label_shapes=[('label', (cfg.batch_size,))],
              inputs_need_grad=True)
    modD.init_params(initializer=mx.init.Normal(0.02))
    modD.init_optimizer(
        optimizer='adam',
        optimizer_params={
            'learning_rate': lr,
            'wd': wd,
            'beta1': beta1,
        })
    mods.append(modD)

    modGroup = mx.mod.Module(symbol=group, data_names=(
        'data',), label_names=('label', 'c'), context=ctx)
    modGroup.bind(data_shapes=[('data', (cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))],
              label_shapes=[('label', (cfg.batch_size,)), ('c', (cfg.batch_size, cfg.dataset.n_class,))],
              inputs_need_grad=True
              )
    modGroup.init_params(initializer=mx.init.Normal(0.02))
    modGroup.init_optimizer(
        optimizer='adam',
        optimizer_params={
            'learning_rate': lr,
            'wd': wd,
            'beta1': beta1,
        })
    mods.append(modGroup)

    randz = mx.random.normal(0, 1.0, shape=(cfg.batch_size, cfg.dataset.n_rand, 1, 1))
    ids = np.array([np.eye(n_class)[:8, :] for _ in range(8)]).reshape(cfg.batch_size, cfg.dataset.n_class, 1, 1)
    fix_noise = mx.io.DataBatch(data=[mx.ndarray.concat(randz, 
                                                        mx.ndarray.array(ids.reshape(cfg.batch_size, 
                                                        cfg.dataset.n_class, 1, 1)))], label=[])

    if not os.path.exists(cfg.out_path):
        os.makedirs(cfg.out_path)

    for epoch in range(cfg.num_epoch):
        train_iter.reset()
        for t, batch in enumerate(train_iter):
            # generate fake data
            rbatch = rand_iter.next()
            modG.forward(rbatch, is_train=True)
            outG = modG.get_outputs()

            # update discriminator on fake
            label[:] = 0
            c = mx.ndarray.array(rbatch.data[0].asnumpy()[:, n_rand:n_rand+n_class, :, :].reshape(cfg.batch_size, n_class))

            cusData = cusDataBatch(data=outG, c=c, label=label)
            modGroup.forward(cusData)
            modGroup.backward()

            gradD = [[grad.copyto(grad.context) for grad in grads]
                        for grads in modGroup._exec_group.grad_arrays]

            # update discriminator on real
            label[:] = 1
            c = mx.ndarray.array(np.zeros((64, 10)))
            cusData = cusDataBatch(data=batch.data, c=c, label=label)
            modGroup.forward(cusData, is_train=True)
            modGroup.backward()

            # update discriminator
            for gradsr, gradsf in zip(modGroup._exec_group.grad_arrays, gradD):
                for gradr, gradf in zip(gradsr, gradsf):
                    gradr += gradf
            modGroup.update()

            # update generator
            label[:] = 1
            c = mx.ndarray.array(rbatch.data[0].asnumpy()[:, n_rand:n_rand+n_class, :, :].reshape(cfg.batch_size, n_class))
            cusData = cusDataBatch(data=outG, c=c, label=label)
            modGroup.forward(cusData, is_train=True)
            modGroup.backward()
            l1_loss = modGroup.get_outputs()[1].asnumpy()[0]

            diffD = modGroup.get_input_grads()
            modG.backward(diffD)
            modG.update()

            if t % cfg.frequent == 0:
                print('epoch:', epoch+1, 'iteration: ', t, 'l1 loss: ', l1_loss)

            if t % cfg.frequent == 0:
                modG.forward(fix_noise, is_train=True)
                outG = modG.get_outputs()
                visual(cfg.out_path+'info_%d_%d.jpg'%(epoch+1, t+1), outG[0].asnumpy())
Exemplo n.º 9
0
def main():

    # read config
    read_cfg(args.cfg)
    cfg.memonger = args.memonger
    pprint.pprint(cfg)

    # get symbol
    aogs = []
    for i in range(len(cfg.AOG.dims)):
        aog = get_aog(dim=cfg.AOG.dims[i], min_size=cfg.AOG.min_sizes[i], tnode_max_size=cfg.AOG.tnode_max_size[i],
                      turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE)
        aogs.append(aog)

    symbol = AOGNet.get_symbol(aogs=aogs, cfg=cfg)

    # check shapes
    internals = symbol.get_internals()
    if cfg.dataset.data_type == 'imagenet':
        dshape = (cfg.batch_size, 3, 224, 224)
    elif cfg.dataset.data_type in ['cifar10', 'cifar100']:
        dshape = (cfg.batch_size, 3, 32, 32)
    _, out_shapes, _ = internals.infer_shape(data=dshape)
    shape_dict = dict(zip(internals.list_outputs(), out_shapes))

    # count params size
    sum = 0.0
    for k in shape_dict.keys():
        if k.split('_')[-1] in ['weight', 'bias', 'gamma', 'beta']:
            size = 1
            for val in shape_dict[k]:
                size *= val
            sum += size
    print('total number of params: {} M'.format(sum / 1e6))

    # setup memonger
    if args.memonger:
        dshape_ = (1,) + dshape[1:]
        if args.no_run:
            old_cost = memonger.get_cost(symbol, data=dshape_)
        symbol = memonger.search_plan(symbol, data=dshape_)
        if args.no_run:
            new_cost = memonger.get_cost(symbol, data=dshape_)
            print('batch size=1, old cost= {} MB, new cost= {} MB'.format(old_cost, new_cost))

    # training setup
    kv = mx.kvstore.create(args.kv_store)
    devs = mx.cpu() if args.gpus is None else [mx.gpu(int(i)) for i in args.gpus.split(',')]
    epoch_size = max(int(cfg.dataset.num_examples / cfg.batch_size / kv.num_workers), 1)
    if not os.path.exists(args.modeldir):
        os.makedirs(args.modeldir)
    model_prefix = os.path.join(args.modeldir, 'aognet')
    checkpoint = mx.callback.do_checkpoint(model_prefix)
    arg_params = None
    aux_params = None
    if args.resume:
        _, arg_params, aux_params = mx.model.load_checkpoint(model_prefix, args.resume)
    begin_epoch = args.resume

    # iterator
    train, val = eval(cfg.dataset.data_type + "_iterator")(cfg, kv)

    initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)
    lr_scheduler = multi_factor_scheduler(begin_epoch, epoch_size, step=cfg.train.lr_steps, factor=0.1)

    optimizer_params = {
        'learning_rate': cfg.train.lr,
        'momentum': cfg.train.mom,
        'wd': cfg.train.wd,
        'lr_scheduler': lr_scheduler
    }

    model = mx.mod.Module(
        context             = devs,
        symbol              = symbol)

    if cfg.dataset.data_type in ["cifar10", "cifar100"]:
        eval_metric = ['acc', 'ce']
    elif cfg.dataset.data_type == 'imagenet':
        eval_metric = ['acc', mx.metric.create('top_k_accuracy', top_k = 5)]

    model.fit(
        train,
        begin_epoch        = begin_epoch,
        num_epoch          = cfg.num_epoch,
        eval_data          = val,
        eval_metric        = eval_metric,
        kvstore            = kv,
        optimizer          = 'sgd',  # ['sgd', 'nag']
        optimizer_params   = optimizer_params,
        arg_params         = arg_params,
        aux_params         = aux_params,
        initializer        = initializer,
        allow_missing      = True,
        batch_end_callback = mx.callback.Speedometer(cfg.batch_size, args.frequent),
        epoch_end_callback = checkpoint)