def main(): read_cfg(args.cfg) # get aogs aogs = [] for i in range(len(cfg.AOG.dims)): aog = get_aog(dim=cfg.AOG.dims[i], min_size=cfg.AOG.min_sizes[i], tnode_max_size=cfg.AOG.tnode_max_size[i], turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE) aogs.append(aog) factor, filter_list_adjusted, ps = search_factor(aogs, args.param_size) print("factor: {}, adjusted filter list: {}, param_size: {} M".format( factor, filter_list_adjusted, ps))
def main(): devs = [mx.gpu(int(i)) for i in args.gpus.split(',')] symbol, arg_params, aux_params = mx.model.load_checkpoint(args.prefix, args.epoch) if args.cfg: read_cfg(args.cfg) # get aogs aogs = [] for i in range(len(cfg.AOG.dims)): aog = get_aog(dim=cfg.AOG.dims[i], min_size=cfg.AOG.min_sizes[i], tnode_max_size=cfg.AOG.tnode_max_size[i], turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE) aogs.append(aog) # get symbol symbol = aognet.get_symbol(aogs=aogs, cfg=cfg) print("symbol loaded") if args.dataset == 'cifar10': path_imgrec = "../data/cifar10/cifar10_val.rec" elif args.dataset == 'cifar100': path_imgrec = "../data/cifar100/cifar100_test.rec" label_name = 'softmax_label' validation_data_iter = mx.io.ImageRecordIter( path_imgrec = path_imgrec, label_width = 1, data_name = 'data', label_name = label_name, batch_size = 128, data_shape = (3,32,32), rand_crop = False, rand_mirror = False, num_parts = 1, part_index = 0) cifar_model = mx.mod.Module(symbol=symbol, context=devs, label_names=[label_name,]) cifar_model.bind(for_training=False, data_shapes=validation_data_iter.provide_data, label_shapes=validation_data_iter.provide_label) cifar_model.set_params(arg_params, aux_params) metrics = [mx.metric.create('acc'), mx.metric.create('ce')] print("testing!!") for batch in validation_data_iter: cifar_model.forward(batch, is_train=False) for m in metrics: cifar_model.update_metric(m, batch.label) print("Accuracy: {}, Cross-Entropy: {}".format(metrics[0].get()[1], metrics[1].get()[1]))
def main(): # start program read_cfg(args.cfg) if args.gpus: cfg.gpus = args.gpus if args.model_path: cfg.model_path = args.model_path pprint.pprint(cfg) lr = cfg.train.lr beta1 = cfg.train.beta1 wd = cfg.train.wd ctx = mx.gpu(0) check_point = False n_rand = cfg.dataset.n_rand symG, symD = DCGAN.get_symbol(cfg) if cfg.dataset.data_type == 'mnist': X_train, X_test = get_mnist() train_iter = mx.io.NDArrayIter(X_train, batch_size=cfg.batch_size) else: train_iter = ImagenetIter( cfg.dataset.path, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) rand_iter = RandIter(cfg.batch_size, n_rand) label = mx.nd.zeros((cfg.batch_size, ), ctx=ctx) modG = mx.mod.Module(symbol=symG, data_names=('rand', ), label_names=None, context=ctx) modG.bind(data_shapes=rand_iter.provide_data) modG.init_params(initializer=mx.init.Normal(0.02)) modG.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) mods = [modG] modD = mx.mod.Module(symbol=symD, data_names=('data', ), label_names=('label', ), context=ctx) modD.bind(data_shapes=train_iter.provide_data, label_shapes=[('label', (cfg.batch_size, ))], inputs_need_grad=True) modD.init_params(initializer=mx.init.Normal(0.02)) modD.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) mods.append(modD) randz = mx.random.normal(0, 1.0, shape=(cfg.batch_size, cfg.dataset.n_rand, 1, 1)) fix_noise = mx.io.DataBatch(data=[mx.ndarray.array(randz)], label=[]) if not os.path.exists(cfg.out_path): os.makedirs(cfg.out_path) for epoch in range(cfg.num_epoch): train_iter.reset() for t, batch in enumerate(train_iter): rbatch = rand_iter.next() # generate fake data modG.forward(rbatch, is_train=True) outG = modG.get_outputs() # update discriminator on fake label[:] = 0 modD.forward(mx.io.DataBatch(outG, [label]), is_train=True) modD.backward() gradD = [[grad.copyto(grad.context) for grad in grads] for grads in modD._exec_group.grad_arrays] # update discriminator on real label[:] = 1 batch.label = [label] modD.forward(batch, is_train=True) modD.backward() for gradsr, gradsf in zip(modD._exec_group.grad_arrays, gradD): for gradr, gradf in zip(gradsr, gradsf): gradr += gradf modD.update() # update generator label[:] = 1 modD.forward(mx.io.DataBatch(outG, [label]), is_train=True) modD.backward() diffD = modD.get_input_grads() modG.backward(diffD) modG.update() if t % cfg.frequent == 0: modG.forward(fix_noise, is_train=True) outG = modG.get_outputs() visual(cfg.out_path + 'GAN_%d_%d.jpg' % (epoch + 1, t + 1), outG[0].asnumpy())
def main(): # read config read_cfg(args.cfg) cfg.memonger = args.memonger pprint.pprint(cfg) # get symbol aogs = [] for i in range(len(cfg.AOG.dims)): aog = get_aog(dim=cfg.AOG.dims[i], min_size=1, tnode_max_size=cfg.AOG.dims[i], turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE) aogs.append(aog) symbol = AOGNet.get_symbol(aogs=aogs, cfg=cfg) # check shapes internals = symbol.get_internals() if cfg.dataset.data_type == 'imagenet': dshape = (cfg.batch_size, 3, 224, 224) elif cfg.dataset.data_type in ['cifar10', 'cifar100']: dshape = (cfg.batch_size, 3, 32, 32) _, out_shapes, _ = internals.infer_shape(data=dshape) shape_dict = dict(zip(internals.list_outputs(), out_shapes)) # count params size stages_kw = { 'stage_0': 0.0, 'stage_1': 0.0, 'stage_2': 0.0, 'stage_3': 0.0 } sum = 0.0 for k in shape_dict.keys(): if k.split('_')[-1] in ['weight', 'bias', 'gamma', 'beta']: size = 1 for val in shape_dict[k]: size *= val for key in stages_kw: if key in k: stages_kw[key] += size sum += size print('total number of params: {} M'.format(sum / 1e6)) for k, v in stages_kw.items(): if v > 0: print('{} has param size: {} M'.format(k, v / 1e6)) # setup memonger if args.memonger: dshape_ = (1, ) + dshape[1:] old_cost = memonger.get_cost(symbol, data=dshape_) symbol = memonger.search_plan(symbol, data=dshape_) new_cost = memonger.get_cost(symbol, data=dshape_) print('batch size=1, old cost= {} MB, new cost= {} MB'.format( old_cost, new_cost)) # training setup kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] epoch_size = max( int(cfg.dataset.num_examples / cfg.batch_size / kv.num_workers), 1) if not os.path.exists(args.modeldir): os.makedirs(args.modeldir) model_prefix = os.path.join(args.modeldir, 'checkpoint') checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.resume: _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.resume) begin_epoch = args.resume else: begin_epoch = 0 # MOD: save network description + visualization of graph # model.save_checkpoint saves a json file, but no checkpoint file and throws "AssertionError" # symbol.save saves the network definition to a json-file SAVE = True if SAVE: path = 'large_model-symbol.json' symbol.save(path) VIZ = True if VIZ: grp = mx.viz.plot_network(symbol, node_attrs={ "shape": "oval", "fixedsize": "false" }) grp.save('graph_file.dot', '/home/gabras/deployed/relative-baseline/AOGNet/') # TODO: save symbol before this TRAIN = False if TRAIN: # iterator train, val = eval(cfg.dataset.data_type + "_iterator")(cfg, kv) initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) lr_scheduler = multi_factor_scheduler(begin_epoch, epoch_size, step=cfg.train.lr_steps, factor=0.1) optimizer_params = { 'learning_rate': cfg.train.lr, 'momentum': cfg.train.mom, 'wd': cfg.train.wd, 'lr_scheduler': lr_scheduler } model = mx.mod.Module(context=devs, symbol=symbol) if cfg.dataset.data_type in ["cifar10", "cifar100"]: eval_metric = ['acc', 'ce'] elif cfg.dataset.data_type == 'imagenet': eval_metric = ['acc', mx.metric.create('top_k_accuracy', top_k=5)] model.fit( train, begin_epoch=begin_epoch, num_epoch=cfg.num_epoch, eval_data=val, eval_metric=eval_metric, kvstore=kv, optimizer='sgd', # ['sgd', 'nag'] optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, initializer=initializer, allow_missing=True, batch_end_callback=mx.callback.Speedometer(cfg.batch_size, args.frequent), epoch_end_callback=checkpoint)
def main(): read_cfg(args.cfg) if args.gpus: cfg.gpus = args.gpus if args.model_path: cfg.model_path = args.model_path pprint.pprint(cfg) # get symbol symbol = seresnet.get_symbol(cfg) kv = mx.kvstore.create(cfg.kv_store) devs = mx.cpu() if cfg.gpus is None else [ mx.gpu(int(i)) for i in cfg.gpus.split(',') ] epoch_size = max( int(cfg.dataset.num_examples / cfg.batch_size / kv.num_workers), 1) begin_epoch = cfg.model_load_epoch if cfg.model_load_epoch else 0 if not os.path.exists(cfg.model_path): os.mkdir(cfg.model_path) model_prefix = cfg.model_path + "seresnet-{}-{}-{}-{}".format( cfg.dataset.data_type, cfg.network.depth, kv.rank, 2) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if cfg.retrain: print("loading pretrained parameters...") _, arg_params, aux_params = mx.model.load_checkpoint( 'model/resnet-tiny-imagenet-50-0', 100) if cfg.memonger: import memonger symbol = memonger.search_plan( symbol, data=(cfg.batch_size, 3, 32, 32) if cfg.dataset.data_type == "cifar10" else (cfg.batch_size, 3, 224, 224)) ## data rec path if cfg.dataset.data_type == "cifar10": train_rec = os.path.join(cfg.dataset.data_dir, "cifar10_train.rec") val_rec = os.path.join(cfg.dataset.data_dir, "cifar10_val.rec") elif cfg.dataset.data_type == "cifar100": train_rec = os.path.join(cfg.dataset.data_dir, "cifar100_train.rec") val_rec = os.path.join(cfg.dataset.data_dir, "cifar100_test.rec") elif cfg.dataset.data_type == "tiny-imagenet": train_rec = os.path.join(cfg.dataset.data_dir, "tiny-imagenet-10_train.rec") val_rec = os.path.join(cfg.dataset.data_dir, "tiny-imagenet-10_val.rec") else: val_rec = os.path.join(cfg.dataset.data_dir, "val_256_q95.rec") if cfg.dataset.aug_level == 1: train_rec = os.path.join(cfg.dataset.data_dir, "train_256_q95.rec") else: train_rec = os.path.join(cfg.dataset.data_dir, "train_480_q95.rec") train = mx.io.ImageRecordIter( path_imgrec=train_rec, label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 32, 32) if cfg.dataset.data_type in ["cifar10", "cifar100"] else (3, 224, 224), batch_size=cfg.batch_size, pad=4 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 0, fill_value=127, # only used when pad is valid rand_crop=True, max_random_scale=1.0, # 480 with imagnet, 32 with cifar10 min_random_scale=1.0 if cfg.dataset.data_type in [ "cifar10", "cifar100" ] else 1.0 if cfg.dataset.aug_level == 1 else 0.533, # 256.0/480.0 max_aspect_ratio=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 0 if cfg.dataset.aug_level == 1 else 0.25, random_h=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 0 if cfg.dataset.aug_level == 1 else 36, # 0.4*90 random_s=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 0 if cfg.dataset.aug_level == 1 else 50, # 0.4*127 random_l=0 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 0 if cfg.dataset.aug_level == 1 else 50, # 0.4*127 max_rotate_angle=0 if cfg.dataset.aug_level <= 2 else 10, max_shear_ratio=0 if cfg.dataset.aug_level <= 2 else 0.1, rand_mirror=True, shuffle=True, num_parts=kv.num_workers, part_index=kv.rank) val = mx.io.ImageRecordIter(path_imgrec=val_rec, label_width=1, data_name='data', label_name='softmax_label', batch_size=cfg.batch_size, data_shape=(3, 32, 32) if cfg.dataset.data_type in ["cifar10", "cifar100"] else (3, 224, 224), rand_crop=False, rand_mirror=False, num_parts=kv.num_workers, part_index=kv.rank) model = mx.model.FeedForward( ctx=devs, symbol=symbol, arg_params=arg_params, aux_params=aux_params, num_epoch=200 if cfg.dataset.data_type in ["cifar10", "cifar100"] else 200, begin_epoch=begin_epoch, learning_rate=cfg.train.lr, momentum=cfg.train.mom, wd=cfg.train.wd, # optimizer = 'nag', optimizer='sgd', initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), lr_scheduler=multi_factor_scheduler(begin_epoch, epoch_size, step=cfg.train.lr_steps, factor=0.1), ) model.fit(X=train, eval_data=val, eval_metric=['acc', 'ce'] if cfg.dataset.data_type in ["cifar10", "cifar100"] else ['acc', mx.metric.create('top_k_accuracy', top_k=2)], kvstore=kv, batch_end_callback=mx.callback.Speedometer( cfg.batch_size, cfg.frequent), epoch_end_callback=checkpoint) logging.info("top-1 and top-5 acc is {}".format( model.score( X=val, eval_metric=['acc', mx.metric.create('top_k_accuracy', top_k=5)])))
def main(): # start program read_cfg(args.cfg) if args.gpus: cfg.gpus = args.gpus if args.model_path: cfg.model_path = args.model_path pprint.pprint(cfg) lr = cfg.train.lr beta1 = cfg.train.beta1 wd = cfg.train.wd ctx = mx.gpu(0) devs = mx.cpu() if cfg.gpus is None else [ mx.gpu(int(i)) for i in cfg.gpus.split(',') ] check_point = False load_model = False mode_path = './SavedModel' global modG_A, modG_B, modD_A, modD_B, cycleLoss_excu, label label = mx.nd.zeros((cfg.batch_size, 1, cfg.dataset.dh, cfg.dataset.dw), ctx=ctx) symG_A, symG_B, symD_A, symD_B = cycleGAN.get_symbol(cfg) # Generator A modG_A = mx.mod.Module(symbol=symG_A, data_names=('dataA', ), label_names=None, context=devs) modG_A.bind(data_shapes=[('dataA', (cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))], inputs_need_grad=True) modG_A.init_params(initializer=mx.init.Normal(0.02)) modG_A.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) # Generator B modG_B = mx.mod.Module(symbol=symG_B, data_names=('dataB', ), label_names=None, context=devs) modG_B.bind(data_shapes=[('dataB', (cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))], inputs_need_grad=True) modG_B.init_params(initializer=mx.init.Normal(0.02)) modG_B.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) # Discriminator A modD_A = mx.mod.Module(symbol=symD_A, data_names=('dataC', ), label_names=('labelC', ), context=devs) modD_A.bind(data_shapes=[('dataC', (cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))], label_shapes=[('labelC', (cfg.batch_size, 1, cfg.dataset.dh, cfg.dataset.dw))], inputs_need_grad=True) modD_A.init_params(initializer=mx.init.Normal(0.02)) modD_A.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) # Discriminator B modD_B = mx.mod.Module(symbol=symD_B, data_names=('dataD', ), label_names=('labelD', ), context=devs) modD_B.bind(data_shapes=[('dataD', (cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))], label_shapes=[('labelD', (cfg.batch_size, 1, cfg.dataset.dh, cfg.dataset.dw))], inputs_need_grad=True) modD_B.init_params(initializer=mx.init.Normal(0.02)) modD_B.init_optimizer(optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) cycleLoss = cycleGAN.getAbsLoss() cycleLoss_excu = cycleLoss.simple_bind( ctx=ctx, grad_rep='write', cycle=(cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w), data=(cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) # load params if load_model: modG_A.load_params(os.path.join(mode_path, 'generatorA')) modG_B.load_params(os.path.join(mode_path, 'generatorB')) modD_A.load_params(os.path.join(mode_path, 'discriminatorA')) modD_B.load_params(os.path.join(mode_path, 'discriminatorB')) # load train data to iterator dataA = glob.glob(os.path.join(cfg.dataset.path, 'trainA/*.jpg')) dataB = glob.glob(os.path.join(cfg.dataset.path, 'trainB/*.jpg')) dataA_iter = ImagenetIter(dataA, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) dataB_iter = ImagenetIter(dataB, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) # load test data to iterator testA = glob.glob(os.path.join(cfg.dataset.path, 'testA/*.jpg')) testB = glob.glob(os.path.join(cfg.dataset.path, 'testB/*.jpg')) testA_iter = ImagenetIter(testA, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) testB_iter = ImagenetIter(testB, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) if not os.path.exists(cfg.out_path): os.makedirs(cfg.out_path) test = 0 for epoch in range(cfg.num_epoch): dataA_iter.reset() dataB_iter.reset() for npic in range(cfg.dataset.num_pics): inputA = dataA_iter.getdata() inputB = dataB_iter.getdata() l1lossA, l1lossB, gradG_A, gradG_B, DlossA, DlossB = train_generator( inputA, inputB, 10) modG_A.forward(mx.io.DataBatch(data=inputA, label=None), is_train=True) fakeB = modG_A.get_outputs() modG_B.forward(mx.io.DataBatch(data=inputB, label=None), is_train=True) fakeA = modG_B.get_outputs() lossD_A = train_discriminator(modD_A, inputA, fakeA) lossD_B = train_discriminator(modD_B, inputB, fakeB) # update modG and modD update_module(modG_A, gradG_A) update_module(modG_B, gradG_B) if npic % cfg.frequent == 0: print('epoch:', str(npic), str(npic), 'lossD_A:', lossD_A, 'lossD_B:', lossD_B, 'l1loss_a:', l1lossA, 'l1loss_b:', l1lossB, 'DlossA:', DlossA, 'DlossB:', DlossB) # apply model to test data and save result pics if test == cfg.dataset.num_pics / 3: testA_iter.reset() testB_iter.reset() test = 0 A_B_As = [] B_A_Bs = [] for _ in range(3): testA = testA_iter.getdata() testB = testB_iter.getdata() test += 1 # visualize A-B-A modG_A.forward(mx.io.DataBatch(data=testA, label=None), is_train=True) fakeB = modG_A.get_outputs() modG_B.forward(mx.io.DataBatch(data=fakeB, label=None), is_train=True) cycleA = modG_B.get_outputs() A_B_As.append( np.concatenate((testA[0].asnumpy(), fakeB[0].asnumpy(), cycleA[0].asnumpy()))) # visualize B-A-B modG_B.forward(mx.io.DataBatch(data=testB, label=None), is_train=True) fakeA = modG_B.get_outputs() modG_A.forward(mx.io.DataBatch(data=fakeA, label=None), is_train=True) cycleB = modG_A.get_outputs() B_A_Bs.append( np.concatenate((testB[0].asnumpy(), fakeA[0].asnumpy(), cycleB[0].asnumpy()))) A_B_A = np.concatenate((A_B_As[0], A_B_As[1], A_B_As[2])) B_A_B = np.concatenate((B_A_Bs[0], B_A_Bs[1], B_A_Bs[2])) visual(os.path.join(cfg.out_path, 'A_B_A' + str(epoch) + '.jpg'), A_B_A) visual(os.path.join(cfg.out_path, 'B_A_B' + str(epoch) + '.jpg'), B_A_B) ## save model modG_A.save_params(os.path.join(mode_path, 'generatorA')) modG_B.save_params(os.path.join(mode_path, 'generatorB')) modD_A.save_params(os.path.join(mode_path, 'discriminatorA')) modD_B.save_params(os.path.join(mode_path, 'discriminatorB'))
def main(): read_cfg(args.cfg) if args.gpus: cfg.gpus = args.gpus if args.model_path: cfg.model_path = args.model_path pprint.pprint(cfg) # get symbol symbol = hievqa.get_symbol(cfg) kv = mx.kvstore.create(cfg.kv_store) devs = mx.cpu() if cfg.gpus is None else [ mx.gpu(int(i)) for i in cfg.gpus.split(',') ] begin_epoch = cfg.model_load_epoch if cfg.model_load_epoch else 0 if not os.path.exists(cfg.model_path): os.mkdir(cfg.model_path) model_prefix = cfg.model_path + "hierarchical_VQA" checkpoint = mx.callback.do_checkpoint(model_prefix) # data iter train_iter = hieloader.VQAIter(cfg) if cfg.train.lr_factor_epoch > 0: step = cfg.train.lr_factor_epoch * (train_iter.n_total // cfg.batch_size) else: step = 1 opt_args = {} opt_args['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( step=step, factor=cfg.train.lr_factor) optimizer = mx.optimizer.Adam(learning_rate=cfg.train.lr, beta1=cfg.train.beta1, beta2=cfg.train.beta2, wd=cfg.train.wd, **opt_args) model = mx.mod.Module(context=devs, symbol=symbol, data_names=train_iter.data_names, label_names=train_iter.label_names) if cfg.retrain: _, arg_params, __ = mx.model.load_checkpoint(model_prefix, cfg.model_load_epoch) else: # containing only the skip thought weights arg_params = pickle.load(open(cfg.train.skip_thought_dict)) embed_param = {} embed_param['embed_weight'] = arg_params['embed_weight'] initializer = mx.initializer.Load(embed_param, default_init=mx.initializer.Uniform( cfg.train.uni_mag), verbose=True) def top1_accuracy(labels, preds): pred_labels = np.argmax(preds, axis=1) n_correct = np.where(labels == pred_labels)[0].size return n_correct / np.float32(labels.size) metrics = [ mx.metric.CrossEntropy(), mx.metric.CustomMetric(top1_accuracy, allow_extra_outputs=True) ] epoch_end_callback = [mx.callback.do_checkpoint(model_prefix, 1)] #, test_callback] batch_end_callback = [ mx.callback.Speedometer(cfg.batch_size, cfg.frequent) ] print( '=================================================================================' ) print('Start training...') model.fit( train_data=train_iter, eval_metric=mx.metric.CompositeEvalMetric(metrics=metrics), epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, optimizer=optimizer, # initializer=initializer, begin_epoch=cfg.model_load_epoch, num_epoch=cfg.num_epoch)
def main(): # start program read_cfg(args.cfg) if args.gpus: cfg.gpus = args.gpus if args.model_path: cfg.model_path = args.model_path pprint.pprint(cfg) lr = cfg.train.lr beta1 = cfg.train.beta1 wd = cfg.train.wd ctx = mx.gpu(0) check_point = False n_rand = cfg.dataset.n_rand n_class = cfg.dataset.n_class symG, symD, l1loss, group = infoGAN.get_symbol(cfg) if cfg.dataset.data_type == 'mnist': X_train, X_test = get_mnist() train_iter = mx.io.NDArrayIter(X_train, batch_size=cfg.batch_size) else: train_iter = ImagenetIter(cfg.dataset.path, cfg.batch_size, (cfg.dataset.c, cfg.dataset.h, cfg.dataset.w)) rand_iter = RandIter(cfg.batch_size, n_rand+n_class) label = mx.nd.zeros((cfg.batch_size,), ctx=ctx) modG = mx.mod.Module(symbol=symG, data_names=( 'rand',), label_names=None, context=ctx) modG.bind(data_shapes=rand_iter.provide_data) modG.init_params(initializer=mx.init.Normal(0.02)) modG.init_optimizer( optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) mods = [modG] modD = mx.mod.Module(symbol=symD, data_names=( 'data',), label_names=('label',), context=ctx) modD.bind(data_shapes=train_iter.provide_data, label_shapes=[('label', (cfg.batch_size,))], inputs_need_grad=True) modD.init_params(initializer=mx.init.Normal(0.02)) modD.init_optimizer( optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) mods.append(modD) modGroup = mx.mod.Module(symbol=group, data_names=( 'data',), label_names=('label', 'c'), context=ctx) modGroup.bind(data_shapes=[('data', (cfg.batch_size, cfg.dataset.c, cfg.dataset.h, cfg.dataset.w))], label_shapes=[('label', (cfg.batch_size,)), ('c', (cfg.batch_size, cfg.dataset.n_class,))], inputs_need_grad=True ) modGroup.init_params(initializer=mx.init.Normal(0.02)) modGroup.init_optimizer( optimizer='adam', optimizer_params={ 'learning_rate': lr, 'wd': wd, 'beta1': beta1, }) mods.append(modGroup) randz = mx.random.normal(0, 1.0, shape=(cfg.batch_size, cfg.dataset.n_rand, 1, 1)) ids = np.array([np.eye(n_class)[:8, :] for _ in range(8)]).reshape(cfg.batch_size, cfg.dataset.n_class, 1, 1) fix_noise = mx.io.DataBatch(data=[mx.ndarray.concat(randz, mx.ndarray.array(ids.reshape(cfg.batch_size, cfg.dataset.n_class, 1, 1)))], label=[]) if not os.path.exists(cfg.out_path): os.makedirs(cfg.out_path) for epoch in range(cfg.num_epoch): train_iter.reset() for t, batch in enumerate(train_iter): # generate fake data rbatch = rand_iter.next() modG.forward(rbatch, is_train=True) outG = modG.get_outputs() # update discriminator on fake label[:] = 0 c = mx.ndarray.array(rbatch.data[0].asnumpy()[:, n_rand:n_rand+n_class, :, :].reshape(cfg.batch_size, n_class)) cusData = cusDataBatch(data=outG, c=c, label=label) modGroup.forward(cusData) modGroup.backward() gradD = [[grad.copyto(grad.context) for grad in grads] for grads in modGroup._exec_group.grad_arrays] # update discriminator on real label[:] = 1 c = mx.ndarray.array(np.zeros((64, 10))) cusData = cusDataBatch(data=batch.data, c=c, label=label) modGroup.forward(cusData, is_train=True) modGroup.backward() # update discriminator for gradsr, gradsf in zip(modGroup._exec_group.grad_arrays, gradD): for gradr, gradf in zip(gradsr, gradsf): gradr += gradf modGroup.update() # update generator label[:] = 1 c = mx.ndarray.array(rbatch.data[0].asnumpy()[:, n_rand:n_rand+n_class, :, :].reshape(cfg.batch_size, n_class)) cusData = cusDataBatch(data=outG, c=c, label=label) modGroup.forward(cusData, is_train=True) modGroup.backward() l1_loss = modGroup.get_outputs()[1].asnumpy()[0] diffD = modGroup.get_input_grads() modG.backward(diffD) modG.update() if t % cfg.frequent == 0: print('epoch:', epoch+1, 'iteration: ', t, 'l1 loss: ', l1_loss) if t % cfg.frequent == 0: modG.forward(fix_noise, is_train=True) outG = modG.get_outputs() visual(cfg.out_path+'info_%d_%d.jpg'%(epoch+1, t+1), outG[0].asnumpy())
def main(): # read config read_cfg(args.cfg) cfg.memonger = args.memonger pprint.pprint(cfg) # get symbol aogs = [] for i in range(len(cfg.AOG.dims)): aog = get_aog(dim=cfg.AOG.dims[i], min_size=cfg.AOG.min_sizes[i], tnode_max_size=cfg.AOG.tnode_max_size[i], turn_off_unit_or_node=cfg.AOG.TURN_OFF_UNIT_OR_NODE) aogs.append(aog) symbol = AOGNet.get_symbol(aogs=aogs, cfg=cfg) # check shapes internals = symbol.get_internals() if cfg.dataset.data_type == 'imagenet': dshape = (cfg.batch_size, 3, 224, 224) elif cfg.dataset.data_type in ['cifar10', 'cifar100']: dshape = (cfg.batch_size, 3, 32, 32) _, out_shapes, _ = internals.infer_shape(data=dshape) shape_dict = dict(zip(internals.list_outputs(), out_shapes)) # count params size sum = 0.0 for k in shape_dict.keys(): if k.split('_')[-1] in ['weight', 'bias', 'gamma', 'beta']: size = 1 for val in shape_dict[k]: size *= val sum += size print('total number of params: {} M'.format(sum / 1e6)) # setup memonger if args.memonger: dshape_ = (1,) + dshape[1:] if args.no_run: old_cost = memonger.get_cost(symbol, data=dshape_) symbol = memonger.search_plan(symbol, data=dshape_) if args.no_run: new_cost = memonger.get_cost(symbol, data=dshape_) print('batch size=1, old cost= {} MB, new cost= {} MB'.format(old_cost, new_cost)) # training setup kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [mx.gpu(int(i)) for i in args.gpus.split(',')] epoch_size = max(int(cfg.dataset.num_examples / cfg.batch_size / kv.num_workers), 1) if not os.path.exists(args.modeldir): os.makedirs(args.modeldir) model_prefix = os.path.join(args.modeldir, 'aognet') checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None if args.resume: _, arg_params, aux_params = mx.model.load_checkpoint(model_prefix, args.resume) begin_epoch = args.resume # iterator train, val = eval(cfg.dataset.data_type + "_iterator")(cfg, kv) initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) lr_scheduler = multi_factor_scheduler(begin_epoch, epoch_size, step=cfg.train.lr_steps, factor=0.1) optimizer_params = { 'learning_rate': cfg.train.lr, 'momentum': cfg.train.mom, 'wd': cfg.train.wd, 'lr_scheduler': lr_scheduler } model = mx.mod.Module( context = devs, symbol = symbol) if cfg.dataset.data_type in ["cifar10", "cifar100"]: eval_metric = ['acc', 'ce'] elif cfg.dataset.data_type == 'imagenet': eval_metric = ['acc', mx.metric.create('top_k_accuracy', top_k = 5)] model.fit( train, begin_epoch = begin_epoch, num_epoch = cfg.num_epoch, eval_data = val, eval_metric = eval_metric, kvstore = kv, optimizer = 'sgd', # ['sgd', 'nag'] optimizer_params = optimizer_params, arg_params = arg_params, aux_params = aux_params, initializer = initializer, allow_missing = True, batch_end_callback = mx.callback.Speedometer(cfg.batch_size, args.frequent), epoch_end_callback = checkpoint)