def init_trainer_0(neural_network, number_of_batches): steps_iterations = [s * number_of_batches for s in SCHEDULER_STEPS] schedule = lr_scheduler.MultiFactorScheduler(step=steps_iterations, factor=SCHEDULER_FACTOR) schedule.base_lr = LEARNING_RATE sgd_optimizer = optimizer.SGD(learning_rate=LEARNING_RATE, momentum=MOMENTUM, lr_scheduler=schedule) trainer = gluon.Trainer(params=neural_network.collect_params(), optimizer=sgd_optimizer) return trainer
def train(params, loader, model=None): epoch = params.get('epoch', 10) verbose = params.get("verbose", True) batch_size = params.get("batch_size", 32) if model is None: class_name = params["class_name"] layer_num = params.get("layer_num", 5) class_num = params.get("class_num", 3) s = params.get("s", 4) b = params.get("b", 2) yolo = Yolo(layer_num, class_num, s=s, b=b, class_name=class_name) yolo.initialize(init=Xavier(magnitude=0.02)) else: print("model load finish") layer_num = model.layer_num class_num = model.class_num s = model.s b = model.b yolo = model if verbose: print("train params: \n\tepoch:%d \n\tlayer_num:%d \n\tclass_num:%d \n\ts:%d \n\tb:%d" % \ (epoch, layer_num, class_num, s, b)) ngd = optimizer.SGD(momentum=0.7, learning_rate=0.005) trainer = gluon.Trainer(yolo.collect_params(), ngd) for ep in range(epoch): loader.reset() mean_loss = 0 t1 = time() for i, batch in enumerate(loader): x = batch.data[0] y = batch.label[0].reshape((-1, 5)) y = translate_y(y, yolo.s, yolo.b, yolo.class_num) y = nd.array(y) with autograd.record(): loss_func = TotalLoss(s=s, c=class_num, b=b) ypre = yolo(x) # (32,output_dim) loss = nd.mean(loss_func(ypre, y)) mean_loss += loss.asscalar() loss.backward() trainer.step(batch_size) t2 = time() if verbose: print("epoch:%d/%d loss:%.5f time:%4f" % (ep + 1, epoch, mean_loss / 32, t2 - t1), flush=True) print() return yolo
def train2(params, loader: BaseDataLoader, model=None): epoch = params.get('epoch', 10) verbose = params.get("verbose", True) batch_size = params.get("batch_size", 32) if model is None: layer_num = params.get("layer_num", 5) class_num = params.get("class_num", 3) s = params.get("s", 4) b = params.get("b", 2) yolo = Yolo(layer_num, class_num, s=s, b=b) yolo.initialize(init=Xavier(magnitude=0.02)) else: print("model load finish") layer_num = model.layer_num class_num = model.class_num s = model.s b = model.b yolo = model if verbose: print("train params: \n\tepoch:%d \n\tlayer_num:%d \n\tclass_num:%d \n\ts:%d \n\tb:%d" % \ (epoch, layer_num, class_num, s, b)) ngd = optimizer.SGD(momentum=0.7, learning_rate=0.0025) trainer = gluon.Trainer(yolo.collect_params(), ngd) for ep in range(epoch): loss = 0 all_batch = int(loader.data_number() / batch_size) t1 = time() for _ in range(all_batch): x, y = loader.next_batch(batch_size) with autograd.record(): loss_func = TotalLoss(s=s, c=class_num, b=b) ypre = yolo(x) # (32,output_dim) loss = nd.mean(loss_func(ypre, y)) loss.backward() trainer.step(batch_size) t2 = time() if verbose: print("epoch:%d/%d loss:%.5f time:%4f" % (ep + 1, epoch, loss.asscalar(), t2 - t1), flush=True) return yolo
def build_optimizer(type, lr, kerasDefaults): if type == 'sgd': if kerasDefaults['nesterov_sgd']: return optimizer.NAG(learning_rate=lr, momentum=kerasDefaults['momentum_sgd'], #rescale_grad=kerasDefaults['clipnorm'], #clip_gradient=kerasDefaults['clipvalue'], lr_scheduler=None) else: return optimizer.SGD(learning_rate=lr, momentum=kerasDefaults['momentum_sgd'], #rescale_grad=kerasDefaults['clipnorm'], #clip_gradient=kerasDefaults['clipvalue'], lr_scheduler=None) elif type == 'rmsprop': return optimizer.RMSProp(learning_rate=lr, gamma1=kerasDefaults['rho'], epsilon=kerasDefaults['epsilon'], centered=False, #rescale_grad=kerasDefaults['clipnorm'], #clip_gradient=kerasDefaults['clipvalue'], lr_scheduler=None) elif type == 'adagrad': return optimizer.AdaGrad(learning_rate=lr, epsilon=kerasDefaults['epsilon'])#, #rescale_grad=kerasDefaults['clipnorm'], #clip_gradient=kerasDefaults['clipvalue']) elif type == 'adadelta': return optimizer.AdaDelta(epsilon=kerasDefaults['epsilon'], rho=kerasDefaults['rho'])#, #rescale_grad=kerasDefaults['clipnorm'], #clip_gradient=kerasDefaults['clipvalue']) elif type == 'adam': return optimizer.Adam(learning_rate=lr, beta_1=kerasDefaults['beta_1'], beta_2=kerasDefaults['beta_2'], epsilon=kerasDefaults['epsilon'])#,
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) network, num_layers = args.network.split(',') print('num_layers', num_layers) if args.per_batch_size==0: args.per_batch_size = 128 args.batch_size = args.per_batch_size*args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') path_imgrecs = [] path_imglist = None args.num_classes = [] for data_dir in data_dir_list: prop = face_image.load_property(data_dir) args.num_classes.append(prop.num_classes) image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert(args.num_classes[-1]>0) print('num_classes', args.num_classes) path_imgrecs.append(os.path.join(data_dir, "train.rec")) if args.loss_type==1 and args.num_classes>20000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel,image_size[0],image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained)==0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(network, int(num_layers), args, arg_params, aux_params) else: vec = args.pretrained.split(',') _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #label_name = 'softmax_label' #label_shape = (args.batch_size,) ctx_group = dict(zip(['dev%d' % (i+1) for i in range(len(ctx))], ctx)) ctx_group['dev0'] = ctx model = mx.mod.Module( context = ctx, symbol = sym, data_names = ['bn1_orig', 'bn1_mask', 'fc1_orig', 'fc1_mask'], label_names = ['softmax_label'], group2ctxs = ctx_group ) val_dataiter = None from config import cutout data_shapes = [('data', (args.batch_size, args.image_channel, image_size[0], image_size[1]))] pretrain_model = get_module(args, data_shapes) train_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrecs = path_imgrecs, shuffle = True, rand_mirror = args.rand_mirror, mean = mean, cutout = cutout, loss_type = args.loss_type, #margin_m = args.margin_m, #margin_policy = args.margin_policy, #max_steps = args.max_steps, data_names = ['bn1', 'fc1'], downsample_back = args.downsample_back, motion_blur = args.motion_blur, mx_model = pretrain_model, ) #if args.loss_type<10: # _metric = AccMetric() #else: #_metric = LossValueMetric() eval_metrics = mx.metric.CompositeEvalMetric() for loss_name, loss_idx in zip(['distill_loss', 'softmax_orig', 'softmax_mask'], [2, 3, 4]): eval_metrics.add(LossValueMetric(loss_name, loss_idx)) #eval_metrics = None if args.network[1]=='r' or args.network[1]=='y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style _rescale = 1.0/args.ctx_num #opt = AdaBound() #opt = AdaBound(lr=base_lr, wd=base_wd, gamma = 2. / args.max_steps) lr_steps = [int(x) for x in args.lr_steps.split(',')] lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_steps, factor=0.1, base_lr=base_lr) optimizer_params = {'learning_rate':base_lr, 'momentum':base_mom, 'wd':base_wd, 'rescale_grad':_rescale, 'lr_scheduler': lr_scheduler} opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 200 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] def ver_test(nbatch): return [0] highest_acc = [0.0, 0.0] #lfw and target #for i in range(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps)==0: lr_steps = [40000, 60000, 80000] if args.loss_type>=1 and args.loss_type<=7: lr_steps = [100000, 140000, 160000] p = 512.0/args.batch_size for l in range(len(lr_steps)): lr_steps[l] = int(lr_steps[l]*p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] #for _lr in lr_steps: # if mbatch==args.beta_freeze+_lr: # opt.lr *= 0.1 # print('lr change to', opt.lr) # break _cb(param) if mbatch%10000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if mbatch>=0 and mbatch%args.verbose==0: acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False if len(acc_list)>0: lfw_score = acc_list[0] if lfw_score>highest_acc[0]: highest_acc[0] = lfw_score if lfw_score>=0.998: do_save = True if acc_list[-1]>=highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score>=0.99: do_save = True if args.ckpt==0: do_save = False elif args.ckpt>1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) if mbatch<=args.beta_freeze: _beta = args.beta else: move = max(0, mbatch-args.beta_freeze) _beta = max(args.beta_min, args.beta*math.pow(1+args.gamma*move, -1.0*args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps>0 and mbatch>args.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, eval_data = val_dataiter, eval_metric = eval_metrics, kvstore = 'device', #optimizer = opt, optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, #allow_extra = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = "%s-%s-p%s" % (args.prefix, args.network, args.patch) end_epoch = args.end_epoch pretrained = args.pretrained load_epoch = args.load_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 if args.network[0] == 'r': args.per_batch_size = 128 else: if args.num_layers >= 64: args.per_batch_size = 120 if args.ctx_num == 2: args.per_batch_size *= 2 elif args.ctx_num == 3: args.per_batch_size = 170 if args.network[0] == 'm': args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 ppatch = [int(x) for x in args.patch.split('_')] image_size = [int(x) for x in args.image_size.split(',')] args.image_h = image_size[0] args.image_w = image_size[1] assert len(ppatch) == 5 #if args.patch%2==1: # args.image_channel = 1 #os.environ['GLOBAL_STEP'] = "0" os.environ['BETA'] = str(args.beta) args.use_val = False path_imgrec = None path_imglist = None val_rec = None #path_imglist = "/raid5data/dplearn/faceinsight_align_webface.lst.new" #path_imglist = "/raid5data/dplearn/faceinsight_align_webface_clean.lst.new" for line in open(os.path.join(args.data_dir, 'property')): args.num_classes = int(line.strip()) assert (args.num_classes > 0) print('num_classes', args.num_classes) #path_imglist = "/raid5data/dplearn/MS-Celeb-Aligned/lst2" path_imgrec = os.path.join(args.data_dir, "train.rec") val_rec = os.path.join(args.data_dir, "val.rec") if os.path.exists(val_rec): args.use_val = True else: val_rec = None #args.num_classes = 10572 #webface #args.num_classes = 81017 #args.num_classes = 82395 if args.loss_type == 1 and args.num_classes > 40000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) #mean = [127.5,127.5,127.5] mean = None if args.use_val: val_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=val_rec, #path_imglist = val_path, shuffle=False, rand_mirror=False, mean=mean, ) else: val_dataiter = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = 0.9 if not args.retrain: #load and initialize params #print(pretrained) #_, arg_params, aux_params = mx.model.load_checkpoint(pretrained, load_epoch) arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #arg_params, aux_params = load_param(pretrained, epoch, convert=True) data_shape_dict = { 'data': (args.batch_size, ) + data_shape, 'softmax_label': (args.batch_size, ) } if args.network[0] == 's': arg_params, aux_params = spherenet.init_weights( sym, data_shape_dict, args.num_layers) elif args.network[0] == 'm': arg_params, aux_params = marginalnet.init_weights( sym, data_shape_dict, args.num_layers) #resnet_dcn.init_weights(sym, data_shape_dict, arg_params, aux_params) else: #sym, arg_params, aux_params = mx.model.load_checkpoint(pretrained, load_epoch) _, arg_params, aux_params = mx.model.load_checkpoint( pretrained, load_epoch) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #begin_epoch = load_epoch #end_epoch = begin_epoch+10 #base_wd = 0.00005 if args.loss_type != 10: model = mx.mod.Module( context=ctx, symbol=sym, ) else: data_names = ('data', 'extra') model = mx.mod.Module( context=ctx, symbol=sym, data_names=data_names, ) if args.loss_type <= 9: train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=True, mean=mean, ) elif args.loss_type == 10: train_dataiter = FaceImageIter4( batch_size=args.batch_size, ctx_num=args.ctx_num, images_per_identity=args.images_per_identity, data_shape=data_shape, path_imglist=path_imglist, shuffle=True, rand_mirror=True, mean=mean, patch=ppatch, use_extra=True, model=model, ) elif args.loss_type == 11: train_dataiter = FaceImageIter5( batch_size=args.batch_size, ctx_num=args.ctx_num, images_per_identity=args.images_per_identity, data_shape=data_shape, path_imglist=path_imglist, shuffle=True, rand_mirror=True, mean=mean, patch=ppatch, ) #args.epoch_size = int(math.ceil(train_dataiter.num_samples()/args.batch_size)) #_dice = DiceMetric() _acc = AccMetric() eval_metrics = [mx.metric.create(_acc)] # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric #for child_metric in [fcn_loss_metric]: # eval_metrics.add(child_metric) # callback #batch_end_callback = callback.Speedometer(input_batch_size, frequent=args.frequent) #epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True) # decide learning rate #lr_step = '10,20,30' #train_size = 4848 #nrof_batch_in_epoch = int(train_size/input_batch_size) #print('nrof_batch_in_epoch:', nrof_batch_in_epoch) #lr_factor = 0.1 #lr_epoch = [float(epoch) for epoch in lr_step.split(',')] #lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) #lr_iters = [int(epoch * train_size / batch_size) for epoch in lr_epoch_diff] #print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters #lr_scheduler = MultiFactorScheduler(lr_iters, lr_factor) # optimizer #optimizer_params = {'momentum': 0.9, # 'wd': 0.0005, # 'learning_rate': base_lr, # 'rescale_grad': 1.0, # 'clip_gradient': None} if args.network[0] == 'r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num #_rescale = 1.0 opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) #opt = optimizer.RMSProp(learning_rate=base_lr, wd=base_wd, rescale_grad=_rescale) #opt = optimizer.AdaGrad(learning_rate=base_lr, wd=base_wd, rescale_grad=_rescale) #opt = optimizer.AdaGrad(learning_rate=base_lr, wd=base_wd, rescale_grad=1.0) _cb = mx.callback.Speedometer(args.batch_size, 10) lfw_dir = os.path.join(args.data_dir, 'lfw') lfw_set = lfw.load_dataset(lfw_dir, image_size) def lfw_test(nbatch): acc1, std1, acc2, std2, xnorm, embeddings_list = lfw.test( lfw_set, model, args.batch_size) print('[%d]XNorm: %f' % (nbatch, xnorm)) print('[%d]Accuracy: %1.5f+-%1.5f' % (nbatch, acc1, std1)) print('[%d]Accuracy-Flip: %1.5f+-%1.5f' % (nbatch, acc2, std2)) return acc2, embeddings_list def val_test(): acc = AccMetric() val_metric = mx.metric.create(acc) val_metric.reset() val_dataiter.reset() for i, eval_batch in enumerate(val_dataiter): model.forward(eval_batch, is_train=False) model.update_metric(val_metric, eval_batch.label) acc_value = val_metric.get_name_value()[0][1] print('VACC: %f' % (acc_value)) #global_step = 0 highest_acc = [0.0] last_save_acc = [0.0] global_step = [0] save_step = [0] if len(args.lr_steps) == 0: #lr_steps = [40000, 70000, 90000] lr_steps = [40000, 60000, 80000] if args.loss_type == 1: lr_steps = [100000, 140000, 160000] else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == args.beta_freeze + _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) #os.environ['GLOBAL_STEP'] = str(mbatch) if mbatch >= 0 and mbatch % args.verbose == 0: acc, embeddings_list = lfw_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False if acc >= highest_acc[0]: highest_acc[0] = acc if acc >= 0.996: do_save = True if mbatch > lr_steps[-1] and mbatch % 10000 == 0: do_save = True if do_save: print('saving', msave, acc) if val_dataiter is not None: val_test() arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) if acc >= highest_acc[0]: lfw_npy = "%s-lfw-%04d" % (prefix, msave) X = np.concatenate(embeddings_list, axis=0) print('saving lfw npy', X.shape) np.save(lfw_npy, X) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[0])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max( args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) #_beta = max(args.beta_min, args.beta*math.pow(0.7, move//500)) #print('beta', _beta) os.environ['BETA'] = str(_beta) #epoch_cb = mx.callback.do_checkpoint(prefix, 1) epoch_cb = None #def _epoch_callback(epoch, sym, arg_params, aux_params): # print('epoch-end', epoch) model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = os.path.join(args.models_root, '%s-%s-%s' % (args.network, args.loss, args.dataset), 'model') prefix_dir = os.path.dirname(prefix) print('prefix', prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) args.ctx_num = len(ctx) args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = config.image_shape[2] data_dir = config.dataset_path path_imgrec = None path_imglist = None image_size = config.image_shape[0:2] assert len(image_size) == 2 assert image_size[0] == image_size[1] print('image_size', image_size) print('num_classes', config.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") print('Called with argument:', args, config) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 if len(args.pretrained) == 0: arg_params = None aux_params = None sym = get_symbol(args) if config.net_name == 'spherenet': data_shape_dict = {'data': (args.per_batch_size, ) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) else: print('loading', args.pretrained, args.pretrained_epoch) _, arg_params, aux_params = mx.model.load_checkpoint( args.pretrained, args.pretrained_epoch) sym = get_symbol(args) if config.count_flops: all_layers = sym.get_internals() _sym = all_layers['fc1_output'] FLOPs = flops_counter.count_flops(_sym, data=(1, 3, image_size[0], image_size[1])) print('Network FLOPs: %d' % FLOPs) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) val_dataiter = None if config.loss_name.find('triplet') >= 0: from triplet_image_iter import FaceImageIter triplet_params = [ config.triplet_bag_size, config.triplet_alpha, config.triplet_max_ap ] train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=config.data_rand_mirror, mean=mean, cutoff=config.data_cutoff, ctx_num=args.ctx_num, images_per_identity=config.images_per_identity, triplet_params=triplet_params, mx_model=model, ) _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] else: from image_iter import FaceImageIter train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=config.data_rand_mirror, mean=mean, cutoff=config.data_cutoff, color_jittering=config.data_color, images_filter=config.data_images_filter, ) metric1 = AccMetric() eval_metrics = [mx.metric.create(metric1)] if config.ce_loss: metric2 = LossValueMetric() eval_metrics.append(mx.metric.create(metric2)) if config.net_name == 'fresnet' or config.net_name == 'fmobilefacenet': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) ver_list = [] ver_name_list = [] for name in config.val_targets: path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for step in lr_steps: if mbatch == step: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False is_highest = False if len(acc_list) > 0: #lfw_score = acc_list[0] #if lfw_score>highest_acc[0]: # highest_acc[0] = lfw_score # if lfw_score>=0.998: # do_save = True score = sum(acc_list) if acc_list[-1] >= highest_acc[-1]: if acc_list[-1] > highest_acc[-1]: is_highest = True else: if score >= highest_acc[0]: is_highest = True highest_acc[0] = score highest_acc[-1] = acc_list[-1] #if lfw_score>=0.99: # do_save = True if is_highest: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt == 2: do_save = True elif args.ckpt == 3: msave = 1 if do_save: print('saving', msave) arg, aux = model.get_params() if config.ckpt_embedding: all_layers = model.symbol.get_internals() _sym = all_layers['fc1_output'] _arg = {} for k in arg: if not k.startswith('fc7'): _arg[k] = arg[k] mx.model.save_checkpoint(prefix, msave, _sym, _arg, aux) else: mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if config.max_steps > 0 and mbatch > config.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=999999, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore=args.kvstore, optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size==0: args.per_batch_size = 128 if args.loss_type==10: args.per_batch_size = 256 args.batch_size = args.per_batch_size*args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 ppatch = [int(x) for x in args.patch.split('_')] assert len(ppatch)==5 os.environ['BETA'] = str(args.beta) args.use_val = False path_imgrec = None path_imglist = None val_rec = None prop = face_image.load_property(args.data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert(args.num_classes>0) print('num_classes', args.num_classes) #path_imglist = "/raid5data/dplearn/MS-Celeb-Aligned/lst2" path_imgrec = os.path.join(args.data_dir, "train.rec") val_rec = os.path.join(args.data_dir, "val.rec") if os.path.exists(val_rec) and args.loss_type<10: args.use_val = True else: val_rec = None #args.num_classes = 10572 #webface #args.num_classes = 81017 #args.num_classes = 82395 if args.loss_type==1 and args.num_classes>40000: args.beta_freeze = 5000 args.gamma = 0.06 if args.loss_type==11: args.images_per_identity = 2 elif args.loss_type==10: args.images_per_identity = 16 if args.loss_type<10: assert args.images_per_identity==0 else: assert args.images_per_identity>=2 args.per_identities = int(args.per_batch_size/args.images_per_identity) print('Called with argument:', args) data_shape = (args.image_channel,image_size[0],image_size[1]) mean = None if args.use_val: val_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrec = val_rec, #path_imglist = val_path, shuffle = False, rand_mirror = False, mean = mean, ) else: val_dataiter = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained)==0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) data_extra = None hard_mining = False if args.loss_type==10: hard_mining = True _shape = (args.batch_size, args.per_batch_size) data_extra = np.full(_shape, -1.0, dtype=np.float32) c = 0 while c<args.batch_size: a = 0 while a<args.per_batch_size: b = a+args.images_per_identity data_extra[(c+a):(c+b),a:b] = 1.0 #print(c+a, c+b, a, b) a = b c += args.per_batch_size elif args.loss_type==11: data_extra = np.zeros( (args.batch_size, args.per_identities), dtype=np.float32) c = 0 while c<args.batch_size: for i in xrange(args.per_identities): data_extra[c+i][i] = 1.0 c+=args.per_batch_size label_name = 'softmax_label' if data_extra is None: model = mx.mod.Module( context = ctx, symbol = sym, ) else: data_names = ('data', 'extra') #label_name = '' model = mx.mod.Module( context = ctx, symbol = sym, data_names = data_names, label_names = (label_name,), ) train_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrec = path_imgrec, shuffle = True, rand_mirror = True, mean = mean, ctx_num = args.ctx_num, images_per_identity = args.images_per_identity, data_extra = data_extra, hard_mining = hard_mining, mx_model = model, label_name = label_name, ) if args.loss_type<10: _metric = AccMetric() else: _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0]=='r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0]=='i' or args.network[0]=='x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0/args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) _cb = mx.callback.Speedometer(args.batch_size, 20) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(args.data_dir,name+".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, data_extra) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results def val_test(): acc = AccMetric() val_metric = mx.metric.create(acc) val_metric.reset() val_dataiter.reset() for i, eval_batch in enumerate(val_dataiter): model.forward(eval_batch, is_train=False) model.update_metric(val_metric, eval_batch.label) acc_value = val_metric.get_name_value()[0][1] print('VACC: %f'%(acc_value)) highest_acc = [] for i in xrange(len(ver_list)): highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps)==0: lr_steps = [40000, 60000, 70000] if args.loss_type==1: lr_steps = [50000, 70000, 80000] p = 512.0/args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l]*p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] for _lr in lr_steps: if mbatch==args.beta_freeze+_lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch%1000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if mbatch>=0 and mbatch%args.verbose==0: acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False lfw_score = acc_list[0] for i in xrange(len(acc_list)): acc = acc_list[i] if acc>=highest_acc[i]: highest_acc[i] = acc if lfw_score>=0.99: do_save = True if args.loss_type==1 and mbatch>lr_steps[-1] and mbatch%10000==0: do_save = True if do_save: print('saving', msave, acc) if val_dataiter is not None: val_test() arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) #if acc>=highest_acc[0]: # lfw_npy = "%s-lfw-%04d" % (prefix, msave) # X = np.concatenate(embeddings_list, axis=0) # print('saving lfw npy', X.shape) # np.save(lfw_npy, X) #print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[0])) if mbatch<=args.beta_freeze: _beta = args.beta else: move = max(0, mbatch-args.beta_freeze) _beta = max(args.beta_min, args.beta*math.pow(1+args.gamma*move, -1.0*args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) #epoch_cb = mx.callback.do_checkpoint(prefix, 1) epoch_cb = None #def _epoch_callback(epoch, sym, arg_params, aux_params): # print('epoch-end', epoch) model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, eval_data = val_dataiter, eval_metric = eval_metrics, kvstore = 'device', optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def train_net(args): ctx = [] cvd = '0' # os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 print('num_classes', args.num_classes) print('Called with argument:', args) train_dataiter, val_dataiter, emb_size, data_size = load_data(args) args.emb_size = emb_size print('emb_size', emb_size) print('data_size', data_size) begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) metric1 = AccMetric() eval_metrics = [mx.metric.create(metric1)] if args.ce_loss: metric2 = LossValueMetric() eval_metrics.append(mx.metric.create(metric2)) if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale) #opt = optimizer.Nadam(learning_rate=base_lr, wd=args.wd, rescale_grad=_rescale, clip_gradient=5.0) #opt = optimizer.Adam(learning_rate=base_lr, wd=args.wd, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) lr_step = [int(x) for x in args.lr_step.split(',')] begin_epoch = args.begin_epoch epoches = lr_step end_epoch = epoches[-1] lr_factor = 0.1 #lr_epoch = [int(epoch) for epoch in lr_step.split(',')] #lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) #lr_iters = [int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff] #lr_iters = [36000,42000] #TODO #lr_iters = [40000,50000,60000] #TODO #lr_iters = [40,50,60] #TODO #logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] #lr_steps = [6000,10000,12000] lr_steps = [] for ep in epoches: lr_steps.append(data_size * ep // args.batch_size) print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) epoch_cb = mx.callback.do_checkpoint(args.prefix, period=end_epoch) train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): #_seed = 727 #random.seed(_seed) #np.random.seed(_seed) #mx.random.seed(_seed) ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) if len(args.extra_model_name)==0: prefix = os.path.join(args.models_root, '%s-%s-%s'%(args.network, args.loss, args.dataset), 'model') else: prefix = os.path.join(args.models_root, '%s-%s-%s-%s'%(args.network, args.loss, args.dataset, args.extra_model_name), 'model') prefix_dir = os.path.dirname(prefix) print('prefix', prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) args.ctx_num = len(ctx) if args.per_batch_size==0: args.per_batch_size = 128 args.batch_size = args.per_batch_size*args.ctx_num args.rescale_threshold = 0 args.image_channel = config.image_shape[2] config.batch_size = args.batch_size config.per_batch_size = args.per_batch_size data_dir = config.dataset_path path_imgrec = None path_imglist = None image_size = config.image_shape[0:2] assert len(image_size)==2 assert image_size[0]==image_size[1] print('image_size', image_size) print('num_classes', config.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") data_shape = (args.image_channel,image_size[0],image_size[1]) num_workers = config.num_workers global_num_ctx = num_workers * args.ctx_num if config.num_classes%global_num_ctx==0: args.ctx_num_classes = config.num_classes//global_num_ctx else: args.ctx_num_classes = config.num_classes//global_num_ctx+1 args.local_num_classes = args.ctx_num_classes * args.ctx_num args.local_class_start = args.local_num_classes * args.worker_id #if len(args.partial)==0: # local_classes_range = (0, args.num_classes) #else: # _vec = args.partial.split(',') # local_classes_range = (int(_vec[0]), int(_vec[1])) #args.partial_num_classes = local_classes_range[1] - local_classes_range[0] #args.partial_start = local_classes_range[0] print('Called with argument:', args, config) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom arg_params = None aux_params = None if len(args.pretrained)==0: esym = get_symbol_embedding() asym = get_symbol_arcface else: assert False if config.count_flops: all_layers = esym.get_internals() _sym = all_layers['fc1_output'] FLOPs = flops_counter.count_flops(_sym, data=(1,3,image_size[0],image_size[1])) _str = flops_counter.flops_str(FLOPs) print('Network FLOPs: %s'%_str) if config.num_workers==1: from parall_module_local_v1 import ParallModule else: from parall_module_dist import ParallModule model = ParallModule( context = ctx, symbol = esym, data_names = ['data'], label_names = ['softmax_label'], asymbol = asym, args = args, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrec = path_imgrec, shuffle = True, rand_mirror = config.data_rand_mirror, mean = mean, cutoff = config.data_cutoff, color_jittering = config.data_color, images_filter = config.data_images_filter, ) if config.net_name=='fresnet' or config.net_name=='fmobilefacenet': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0/args.batch_size opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) ver_list = [] ver_name_list = [] for name in config.val_targets: path = os.path.join(data_dir,name+".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in range(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in range(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] for step in lr_steps: if mbatch==step: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch%1000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if mbatch>=0 and mbatch%args.verbose==0: acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False is_highest = False if len(acc_list)>0: #lfw_score = acc_list[0] #if lfw_score>highest_acc[0]: # highest_acc[0] = lfw_score # if lfw_score>=0.998: # do_save = True score = sum(acc_list) if acc_list[-1]>=highest_acc[-1]: if acc_list[-1]>highest_acc[-1]: is_highest = True else: if score>=highest_acc[0]: is_highest = True highest_acc[0] = score highest_acc[-1] = acc_list[-1] #if lfw_score>=0.99: # do_save = True if is_highest: do_save = True if args.ckpt==0: do_save = False elif args.ckpt==2: do_save = True elif args.ckpt==3: msave = 1 if do_save: print('saving', msave) arg, aux = model.get_export_params() all_layers = model.symbol.get_internals() _sym = all_layers['fc1_output'] mx.model.save_checkpoint(prefix, msave, _sym, arg, aux) print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) if config.max_steps>0 and mbatch>config.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = 999999, eval_data = val_dataiter, #eval_metric = eval_metrics, kvstore = args.kvstore, optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def main(args): _seed = 727 random.seed(_seed) np.random.seed(_seed) mx.random.seed(_seed) ctx = [] # cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() # if len(cvd)>0: # for i in range(len(cvd.split(','))): # ctx.append(mx.gpu(i)) # if len(ctx)==0: # ctx = [mx.cpu()] # print('use cpu') # else: # print('gpu num:', len(ctx)) ctx = [mx.cpu()] args.ctx_num = len(ctx) args.batch_size = args.per_batch_size * args.ctx_num config.per_batch_size = args.per_batch_size print('Call with', args, config) train_iter = FaceSegIter( path_imgrec=os.path.join(config.dataset_path, 'train.rec'), batch_size=args.batch_size, per_batch_size=args.per_batch_size, aug_level=1, exf=args.exf, args=args, ) data_shape = train_iter.get_data_shape() #label_shape = train_iter.get_label_shape() sym = sym_heatmap.get_symbol(num_classes=config.num_classes) if len(args.pretrained) == 0: #data_shape_dict = {'data' : (args.per_batch_size,)+data_shape, 'softmax_label' : (args.per_batch_size,)+label_shape} data_shape_dict = train_iter.get_shape_dict() arg_params, aux_params = sym_heatmap.init_weights(sym, data_shape_dict) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) #sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) model = mx.mod.Module( context=ctx, symbol=sym, label_names=train_iter.get_label_names(), ) #lr = 1.0e-3 #lr = 2.5e-4 _rescale_grad = 1.0 / args.ctx_num #_rescale_grad = 1.0/args.batch_size #lr = args.lr #opt = optimizer.Nadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0) if args.optimizer == 'onadam': opt = ONadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0) elif args.optimizer == 'nadam': opt = optimizer.Nadam(learning_rate=args.lr, rescale_grad=_rescale_grad) elif args.optimizer == 'rmsprop': opt = optimizer.RMSProp(learning_rate=args.lr, rescale_grad=_rescale_grad) elif args.optimizer == 'adam': opt = optimizer.Adam(learning_rate=args.lr, rescale_grad=_rescale_grad) else: opt = optimizer.SGD(learning_rate=args.lr, momentum=0.9, wd=args.wd, rescale_grad=_rescale_grad) initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) _metric = LossValueMetric() #_metric = NMEMetric() #_metric2 = AccMetric() #eval_metrics = [_metric, _metric2] eval_metrics = [_metric] lr_steps = [int(x) for x in args.lr_step.split(',')] print('lr-steps', lr_steps) global_step = [0] def val_test(): all_layers = sym.get_internals() vsym = all_layers['heatmap_output'] vmodel = mx.mod.Module(symbol=vsym, context=ctx, label_names=None) #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))]) vmodel.bind(data_shapes=[('data', (args.batch_size, ) + data_shape)]) arg_params, aux_params = model.get_params() vmodel.set_params(arg_params, aux_params) for target in config.val_targets: _file = os.path.join(config.dataset_path, '%s.rec' % target) if not os.path.exists(_file): continue val_iter = FaceSegIter( path_imgrec=_file, batch_size=args.batch_size, #batch_size = 4, aug_level=0, args=args, ) _metric = NMEMetric() val_metric = mx.metric.create(_metric) val_metric.reset() val_iter.reset() for i, eval_batch in enumerate(val_iter): #print(eval_batch.data[0].shape, eval_batch.label[0].shape) batch_data = mx.io.DataBatch(eval_batch.data) model.forward(batch_data, is_train=False) model.update_metric(val_metric, eval_batch.label) nme_value = val_metric.get_name_value()[0][1] print('[%d][%s]NME: %f' % (global_step[0], target, nme_value)) def _batch_callback(param): _cb(param) global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == _lr: opt.lr *= 0.2 print('lr change to', opt.lr) break if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch > 0 and mbatch % args.verbose == 0: val_test() if args.ckpt == 1: msave = mbatch // args.verbose print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux) if mbatch == lr_steps[-1]: if args.ckpt == 2: #msave = mbatch//args.verbose msave = 1 print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux) sys.exit(0) train_iter = mx.io.PrefetchingIter(train_iter) model.fit( train_iter, begin_epoch=0, num_epoch=9999, #eval_data = val_iter, eval_data=None, eval_metric=eval_metrics, kvstore='device', optimizer=opt, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=None, )
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) args.batch_size = args.batch_size1 args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') assert len(data_dir_list) == 1 data_dir = data_dir_list[0] prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert (args.num_classes > 0) print('num_classes', args.num_classes) path_imgrec1 = os.path.join(data_dir, "train.rec") data_dir_interclass_list = args.data_dir_interclass.split(',') assert len(data_dir_interclass_list) == 1 data_dir_interclass = data_dir_interclass_list[0] path_imgrec2 = os.path.join(data_dir_interclass, "train.rec") if args.loss_type == 1 and args.num_classes > 20000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) print('sym: ', sym) else: vec = args.pretrained.split(',') #print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) #if 'fc7_weight' in arg_params.keys(): # del arg_params['fc7_weight'] sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) sym_test, _, _ = get_symbol(args, arg_params, aux_params) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) val_dataiter = None train_dataiter = FaceImageIter( mx_model=model, ctx=ctx, ctx_num=args.ctx_num, data_shape=data_shape, batch_size1=args.batch_size1, path_imgrec1=path_imgrec1, batchsize_id=args.batchsize_id, batch_size2=args.batch_size2, path_imgrec2=path_imgrec2, images_per_identity=args.images_per_identity, interclass_bag_size=args.bag_size, shuffle=True, aug_list=None, rand_mirror=True, ) eval_metrics = [ mx.metric.create(AccMetric()), mx.metric.create(LossValue()), mx.metric.create(LossValue2()) ] if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 2 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") #path = os.path.join('/ssd/MegaFace/MF2_aligned_pic9/', name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) model_t = None def ver_test(nbatch, model_t): results = [] if model_t is None: all_layers = model.symbol.get_internals() symbol_t = all_layers['blockgrad0_output'] model_t = mx.mod.Module(symbol=symbol_t, context=ctx, label_names=None) print([('data', (10, ) + data_shape)]) model_t.bind(data_shapes=[('data', (10, ) + data_shape)]) arg_t, aux_t = model.get_params() model_t.set_params(arg_t, aux_t) else: arg_t, aux_t = model.get_params() model_t.set_params(arg_t, aux_t) for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model_t, 10, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [100000, 140000, 160000] p = 512.0 / args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l] * p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == args.beta_freeze + _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) #if mbatch%1==0: #print('mbatch:',mbatch) #arg, aux = model.get_params() if mbatch == 1: ver_test(mbatch, model_t) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, 1000, model.symbol, arg, aux) print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: arg, aux = model.get_params() mx.model.save_checkpoint(prefix, 0, model.symbol, arg, aux) acc_list = ver_test(mbatch, model_t) save_step[0] += 1 msave = save_step[0] do_save = False if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f %1.5f %1.5f' % (mbatch, highest_acc[0], highest_acc[1], highest_acc[2])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max( args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None #print('arg_params',arg_params,aux_params) model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) network, num_layers = args.network.split(',') print('num_layers', num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') path_imgrecs = [] path_imglist = None args.num_classes = [] for data_idx, data_dir in enumerate(data_dir_list): prop = face_image.load_property(data_dir) args.num_classes.append(prop.num_classes) image_size = prop.image_size if data_idx == 0: args.image_h = image_size[0] args.image_w = image_size[1] else: args.image_h = min(args.image_h, image_size[0]) args.image_w = min(args.image_w, image_size[1]) print('image_size', image_size) assert (args.num_classes[-1] > 0) print('num_classes', args.num_classes) path_imgrecs.append(os.path.join(data_dir, "train.rec")) if args.loss_type == 1 and args.num_classes > 20000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(network, int(num_layers), args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(network, int(num_layers), args, arg_params, aux_params) #label_name = 'softmax_label' #label_shape = (args.batch_size,) ctx_group = dict(zip(['dev%d' % (i + 1) for i in range(len(ctx))], ctx)) ctx_group['dev0'] = ctx model = mx.mod.Module( context=ctx, symbol=sym, data_names=['data'] if args.loss_type != 6 else ['data', 'margin'], group2ctxs=ctx_group) val_dataiter = None from config import crop from config import cutout train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrecs=path_imgrecs, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutout=cutout, crop=crop, loss_type=args.loss_type, #margin_m = args.margin_m, #margin_policy = args.margin_policy, #max_steps = args.max_steps, #data_names = ['data', 'margin'], downsample_back=args.downsample_back, motion_blur=args.motion_blur, ) _metric = AccMetric() #_metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num if len(args.lr_steps) == 0: print('Error: lr_steps is not seted') sys.exit(0) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_steps, factor=0.1, base_lr=base_lr) optimizer_params = { 'learning_rate': base_lr, 'momentum': base_mom, 'wd': base_wd, 'rescale_grad': _rescale, 'lr_scheduler': lr_scheduler } #opt = AdaBound() #opt = AdaBound(lr=base_lr, wd=base_wd, gamma = 2. / args.max_steps) opt = optimizer.SGD(**optimizer_params) som = 2000 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in range(len(ver_list)): _, issame_list = ver_list[i] if all(issame_list): fp_rates, fp_dict, thred_dict, recall_dict = verification.test( ver_list[i], model, args.batch_size, label_shape=(args.batch_size, len(path_imgrecs))) for k in fp_rates: print("[%s] TPR at FPR %.2e[%.2e: %.4f]:\t%.5f" % (ver_name_list[i], k, fp_dict[k], thred_dict[k], recall_dict[k])) else: acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, label_shape=(args.batch_size, len(path_imgrecs))) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in range(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] _cb(param) if mbatch % 10000 == 0: print('lr-batch-epoch:', opt.learning_rate, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score if lfw_score >= 0.998: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score >= 0.99: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max( args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit(train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, optimizer_params=optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) network, num_layers = args.network.split(',') print('num_layers', num_layers) if args.per_batch_size==0: args.per_batch_size = 128 args.batch_size = args.per_batch_size*args.ctx_num args.image_channel = 3 data_dir_list = args.data_dir.split(',') path_imgrecs = [] path_imglist = None for data_idx, data_dir in enumerate(data_dir_list): image_size = (112, 112) if data_idx == 0: args.image_h = image_size[0] args.image_w = image_size[1] else: args.image_h = min(args.image_h, image_size[0]) args.image_w = min(args.image_w, image_size[1]) print('image_size', image_size) path_imgrecs.append(data_dir) args.use_val = False val_rec = None print('Called with argument:', args) data_shape = (args.image_channel,image_size[0],image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained)==0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(network, int(num_layers), args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(network, int(num_layers), args, arg_params, aux_params) if args.network[0]=='s': data_shape_dict = {'data' : (args.per_batch_size,)+data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) data_extra = None hard_mining = False model = mx.mod.Module( context = ctx, symbol = sym, #data_names = ('data',), #label_names = None, #label_names = ('softmax_label',), ) label_shape = (args.batch_size,) val_dataiter = None from config import crop from config import cutout train_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrecs = path_imgrecs, shuffle = True, rand_mirror = args.rand_mirror, mean = mean, cutout = None, #cutout, crop = crop, downsample_back = args.downsample_back, motion_blur = args.motion_blur, mx_model = model, ctx_num = args.ctx_num, ) _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0]=='r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0]=='i' or args.network[0]=='x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0/args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 200 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] """ for name in args.target.split(','): path = os.path.join(os.path.dirname(data_dir),name+".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) """ def ver_test(nbatch): results = [] for i in range(len(ver_list)): _, issame_list = ver_list[i] if all(issame_list): fp_rates, fp_dict, thred_dict, recall_dict = verification.test(ver_list[i], model, args.batch_size, label_shape = (args.batch_size, len(path_imgrecs))) for k in fp_rates: print("[%s] TPR at FPR %.2e[%.2e: %.4f]:\t%.5f" %(ver_name_list[i], k, fp_dict[k], thred_dict[k], recall_dict[k])) else: acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, label_shape = (args.batch_size, len(path_imgrecs))) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in range(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps)==0: lr_steps = [40000, 60000, 80000] if args.loss_type>=1 and args.loss_type<=7: lr_steps = [100000, 140000, 160000] p = 512.0/args.batch_size for l in range(len(lr_steps)): lr_steps[l] = int(lr_steps[l]*p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] for _lr in lr_steps: if mbatch==_lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch%1000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if mbatch>=0 and mbatch%args.verbose==0: acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False if len(acc_list)>0: lfw_score = acc_list[0] if lfw_score>highest_acc[0]: highest_acc[0] = lfw_score if lfw_score>=0.998: do_save = True if acc_list[-1]>=highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score>=0.99: do_save = True if args.ckpt==0: do_save = False elif args.ckpt>1: do_save = True #for i in range(len(acc_list)): # acc = acc_list[i] # if acc>=highest_acc[i]: # highest_acc[i] = acc # if lfw_score>=0.99: # do_save = True #if args.loss_type==1 and mbatch>lr_steps[-1] and mbatch%10000==0: # do_save = True if do_save: print('saving', msave) if val_dataiter is not None: val_test() arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) if args.max_steps>0 and mbatch>args.max_steps: sys.exit(0) #epoch_cb = mx.callback.do_checkpoint(prefix, 1) epoch_cb = None model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, eval_data = val_dataiter, eval_metric = eval_metrics, kvstore = 'device', optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def test_lr_scheduler(): from mxnet import lr_scheduler, optimizer scheduler = lr_scheduler.FactorScheduler(base_lr=1, step=250, factor=0.5) optim = optimizer.SGD(learning_rate=0.1, lr_scheduler=scheduler)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.image_channel = 3 data_dir_list = args.data_dir.split(',') assert len(data_dir_list) == 1 data_dir = data_dir_list[0] path_imgrec = None path_imglist = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert (args.num_classes > 0) print('num_classes', args.num_classes) #path_imglist = "/raid5data/dplearn/MS-Celeb-Aligned/lst2" path_imgrec = os.path.join(data_dir, "train.rec") assert args.images_per_identity >= 2 assert args.triplet_bag_size % args.batch_size == 0 print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) if args.network[0] == 's': data_shape_dict = {'data': (args.per_batch_size, ) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) else: vec = args.pretrained.split(',') print('loading', vec) sym, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) all_layers = sym.get_internals() sym = all_layers['fc1_output'] sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params, sym_embedding=sym) data_extra = None hard_mining = False triplet_params = [ args.triplet_bag_size, args.triplet_alpha, args.triplet_max_ap ] model = mx.mod.Module( context=ctx, symbol=sym, #data_names = ('data',), #label_names = None, #label_names = ('softmax_label',), ) label_shape = (args.batch_size, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, ctx_num=args.ctx_num, images_per_identity=args.images_per_identity, triplet_params=triplet_params, mx_model=model, ) _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0] == 'r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num if args.noise_sgd > 0.0: print('use noise sgd') opt = NoiseSGD(scale=args.noise_sgd, learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) else: opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 2 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, label_shape) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [1000000000] else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score if lfw_score >= 0.998: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score >= 0.99: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True #for i in xrange(len(acc_list)): # acc = acc_list[i] # if acc>=highest_acc[i]: # highest_acc[i] = acc # if lfw_score>=0.99: # do_save = True #if args.loss_type==1 and mbatch>lr_steps[-1] and mbatch%10000==0: # do_save = True if do_save: print('saving', msave) if val_dataiter is not None: val_test() arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) #epoch_cb = mx.callback.do_checkpoint(prefix, 1) epoch_cb = None model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def main(args): _seed = 727 random.seed(_seed) np.random.seed(_seed) mx.random.seed(_seed) ctx = [] os.environ['CUDA_VISIBLE_DEVICES'] = '0' cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) #ctx = [mx.gpu(0)] args.ctx_num = len(ctx) args.batch_size = args.per_batch_size * args.ctx_num config.per_batch_size = args.per_batch_size print('Call with', args, config) train_iter = FaceSegIter( path_imgrec=os.path.join(config.dataset_path, 'train.rec'), batch_size=args.batch_size, per_batch_size=args.per_batch_size, aug_level=1, exf=args.exf, args=args, ) data_shape, data_size = train_iter.get_data_shape() #label_shape = train_iter.get_label_shape() sym = eval(config.network).get_symbol(num_classes=config.num_classes) if len(args.pretrained) == 0: #data_shape_dict = {'data' : (args.per_batch_size,)+data_shape, 'softmax_label' : (args.per_batch_size,)+label_shape} data_shape_dict = train_iter.get_shape_dict() arg_params, aux_params = init_weights(sym, data_shape_dict) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) #sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) model = mx.mod.Module( context=ctx, symbol=sym, label_names=train_iter.get_label_names(), ) #lr = 1.0e-3 #lr = 2.5e-4 _rescale_grad = 1.0 / args.ctx_num #_rescale_grad = 1.0/args.batch_size #lr = args.lr #opt = optimizer.Nadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0) if args.optimizer == 'onadam': opt = ONadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0) elif args.optimizer == 'nadam': opt = optimizer.Nadam(learning_rate=args.lr, rescale_grad=_rescale_grad) elif args.optimizer == 'rmsprop': opt = optimizer.RMSProp(learning_rate=args.lr, rescale_grad=_rescale_grad) elif args.optimizer == 'adam': opt = optimizer.Adam(learning_rate=args.lr, rescale_grad=_rescale_grad) else: opt = optimizer.SGD(learning_rate=args.lr, momentum=0.9, wd=args.wd, rescale_grad=_rescale_grad) initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) _metric = LossValueMetric() #_metric = NMEMetric() #_metric2 = AccMetric() #eval_metrics = [_metric, _metric2] eval_metrics = [_metric] lr_epoch_steps = [int(x) for x in args.lr_epoch_step.split(',')] print('lr-epoch-steps', lr_epoch_steps) global_step = [0] highest_acc = [1.0, 1.0] def _batch_callback(param): _cb(param) global_step[0] += 1 mbatch = global_step[0] mepoch = mbatch * args.batch_size // data_size pre = mbatch * args.batch_size % data_size is_highest = False for _lr in lr_epoch_steps[0:-1]: if mepoch == _lr and pre < args.batch_size: opt.lr *= 0.2 print('lr change to', opt.lr) break if mbatch % 1000 == 0: print('lr:', opt.lr, 'batch:', param.nbatch, 'epoch:', param.epoch) if mbatch > 0 and mbatch % args.verbose == 0: acc_list = val_test(sym, model, ctx, data_shape, global_step) score = np.mean(acc_list) if acc_list[0] < highest_acc[0]: # ibug is_highest = True highest_acc[0] = acc_list[0] if score < highest_acc[1]: # mean is_highest = True highest_acc[1] = score if args.ckpt == 1 and is_highest == True: msave = mbatch // args.verbose print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux) if mepoch == lr_epoch_steps[-1]: if args.ckpt == 1: acc_list = val_test(sym, model, ctx, data_shape, global_step) msave = mbatch // args.verbose print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux) sys.exit(0) train_iter = mx.io.PrefetchingIter(train_iter) model.fit( train_iter, begin_epoch=0, num_epoch=9999, #eval_data = val_iter, eval_data=None, eval_metric=eval_metrics, kvstore='device', optimizer=opt, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=None, )
def fit(args, network, train_data_loader, val_data_loader, logger, **kwargs): """ train a model args : argparse returns network : the symbol definition of the nerual network data_loader : function that returns the train and val data iterators """ # kvstore kv = mx.kvstore.create(args.kv_store) if args.gc_type != 'none': kv.set_gradient_compression({ 'type': args.gc_type, 'threshold': args.gc_threshold }) # logging #head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s' #logging.basicConfig(level=logging.DEBUG, format=head) #logging.info('start with arguments %s', args) # data iterators #(train, val) = data_loader(args, kv) (train, val) = (train_data_loader, val_data_loader) if args.test_io: tic = time.time() for i, batch in enumerate(train): for j in batch.data: j.wait_to_read() if (i + 1) % args.disp_batches == 0: logging.info( 'Batch [%d]\tSpeed: %.2f samples/sec', i, args.disp_batches * args.batch_size / (time.time() - tic)) tic = time.time() return # load model if 'arg_params' in kwargs and 'aux_params' in kwargs: arg_params = kwargs['arg_params'] aux_params = kwargs['aux_params'] else: sym, arg_params, aux_params = _load_model(args, kv.rank) print("load model", args.load_epoch) #if sym is not None: # assert sym.tojson() == network.tojson() # save model checkpoint = _save_model(args, kv.rank) # devices for training devs = mx.cpu() if args.gpus is None or args.gpus == "" else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] # learning rate lr, lr_scheduler = _get_lr_scheduler(args, kv) # create model model = mx.mod.Module(context=devs, symbol=network, logger=logger) lr_scheduler = lr_scheduler optimizer_params = { 'learning_rate': lr, 'wd': args.wd, 'lr_scheduler': lr_scheduler, 'multi_precision': True, # 'centered': False, # 'gamma1': 0.95, } # Only a limited number of optimizers have 'momentum' property has_momentum = {'sgd', 'dcasgd', 'nag'} if args.optimizer in has_momentum: optimizer_params['momentum'] = args.mom monitor = mx.mon.Monitor(args.monitor, pattern=".*") if args.monitor > 0 else None # A limited number of optimizers have a warmup period if args.initializer == 'default': if args.network == 'alexnet': # AlexNet will not converge using Xavier initializer = mx.init.Normal() # VGG will not trend to converge using Xavier-Gaussian elif 'vgg' in args.network: initializer = mx.init.Xavier() else: initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) # initializer = mx.init.Xavier(factor_type="in", magnitude=2.34), elif args.initializer == 'xavier': initializer = mx.init.Xavier() elif args.initializer == 'msra': initializer = mx.init.MSRAPrelu() elif args.initializer == 'orthogonal': initializer = mx.init.Orthogonal() elif args.initializer == 'normal': initializer = mx.init.Normal() elif args.initializer == 'uniform': initializer = mx.init.Uniform() elif args.initializer == 'one': initializer = mx.init.One() elif args.initializer == 'zero': initializer = mx.init.Zero() # evaluation metrices eval_metrics = ['accuracy'] if args.top_k > 0: eval_metrics.append( mx.metric.create('top_k_accuracy', top_k=args.top_k)) supported_loss = ['ce', 'nll_loss'] if len(args.loss) > 0: # ce or nll loss is only applicable to softmax output loss_type_list = args.loss.split(',') if 'softmax_output' in network.list_outputs(): for loss_type in loss_type_list: loss_type = loss_type.strip() if loss_type == 'nll': loss_type = 'nll_loss' if loss_type not in supported_loss: logging.warning(loss_type + ' is not an valid loss type, only cross-entropy or ' \ 'negative likelihood loss is supported!') else: eval_metrics.append(mx.metric.create(loss_type)) else: logging.warning( "The output is not softmax_output, loss argument will be skipped!" ) #optimizer base_wd = args.wd base_mom = 0.9 base_lr = args.lr gpu_list = [int(i) for i in args.gpus.split(',')] _rescale = 1.0 / len(gpu_list) opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) # callbacks that run after each batch #global global_step global_step = [0] save_step = [args.load_epoch] epoch_step = [0] som = args.display _cb = mx.callback.Speedometer(args.batch_size, som) model_prefix = args.model_prefix lr_steps = [int(l) for l in args.lr_step_epochs.split(',')] def _batch_callback(param): global_step[0] += 1 mbatch = global_step[0] _cb(param) if mbatch % 1000 == 0: print("%s : " % (datetime.now()), 'lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.savestep == 0: save_step[0] += 1 msave = save_step[0] print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(model_prefix, msave, model.symbol, arg, aux) def _epoch_callback(param1, param2, param3, param4): epoch_step[0] += 1 mepoch = epoch_step[0] for _lr in lr_steps: if mepoch == args.beta_freeze + _lr: opt.lr *= args.lr_factor print('lr change to', opt.lr, param1) break # run model.fit(train, begin_epoch=args.load_epoch if args.load_epoch else 0, num_epoch=args.num_epochs, eval_data=val, eval_metric=eval_metrics, kvstore='device', optimizer=opt, optimizer_params=optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, batch_end_callback=_batch_callback, epoch_end_callback=_epoch_callback, allow_missing=True)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config #init_config() #print(config) # setup multi-gpu input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx) # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb) # load symbol #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #feat_sym = sym.get_internals()['rpn_cls_score_output'] #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list, # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) sym = eval('get_' + args.network + '_train')() #print(sym.get_internals()) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' % stride]) #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list) train_data = CropLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) # load and initialize params if args.resume: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']: # _k = k+"_weight" # if _k in arg_shape_dict: # v = 0.001 if _k.startswith('bbox_') else 0.01 # arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k]) # print('init %s with normal %.5f'%(_k,v)) # _k = k+"_bias" # if _k in arg_shape_dict: # arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k]) # print('init %s with zero'%(_k)) for k, v in arg_shape_dict.iteritems(): if k.find('upsampling') >= 0: print('initializing upsampling_weight', k) arg_params[k] = mx.nd.zeros(shape=v) init = mx.init.Initializer() init._init_bilinear(k, arg_params[k]) #print(args[k]) # check parameter shapes #for k in sym.list_arguments(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' # assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # create solver fixed_param_prefix = config.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] #mod = MutableModule(sym, data_names=data_names, label_names=label_names, # logger=logger, context=ctx, work_load_list=args.work_load_list, # max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, # fixed_param_prefix=fixed_param_prefix) fixed_param_names = get_fixed_params(sym, fixed_param_prefix) print('fixed', fixed_param_names, file=sys.stderr) mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, fixed_param_names=fixed_param_names) # decide training params # metric eval_metrics = mx.metric.CompositeEvalMetric() #if len(sym.list_outputs())>4: # metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric', 'RCNNAccMetric', 'RCNNLogLossMetric', 'RCNNL1LossMetric'] #else:#train rpn only #print('sym', sym.list_outputs()) #metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric'] mids = [0, 4, 8] for mid in mids: _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1) eval_metrics.add(_metric) #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid + 2, weight_idx=mid + 3) eval_metrics.add(_metric) #rpn_eval_metric = metric.RPNAccMetric() #rpn_cls_metric = metric.RPNLogLossMetric() #rpn_bbox_metric = metric.RPNL1LossMetric() #eval_metric = metric.RCNNAccMetric() #cls_metric = metric.RCNNLogLossMetric() #bbox_metric = metric.RCNNL1LossMetric() #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: # eval_metrics.add(child_metric) # callback means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) #epoch_end_callback = callback.do_checkpoint(prefix, means, stds) epoch_end_callback = None # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff ] #lr_iters = [36000,42000] #TODO #lr_iters = [40000,50000,60000] #TODO #lr_iters = [40,50,60] #TODO end_epoch = 10000 #lr_iters = [4,8] #TODO logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters)) #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor) # optimizer opt = optimizer.SGD(learning_rate=lr, momentum=0.9, wd=0.0005, rescale_grad=1.0 / len(ctx), clip_gradient=None) initializer = mx.init.Xavier() #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style if len(ctx) > 1: train_data = mx.io.PrefetchingIter(train_data) _cb = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) global_step = [0] def save_model(epoch): arg, aux = mod.get_params() all_layers = mod.symbol.get_internals() outs = [] for stride in config.RPN_FEAT_STRIDE: num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS'] _name = 'rpn_cls_score_stride%d_output' % stride rpn_cls_score = all_layers[_name] # prepare rpn data rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape_stride%d" % stride) rpn_cls_prob = mx.symbol.SoftmaxActivation( data=rpn_cls_score_reshape, mode="channel", name="rpn_cls_prob_stride%d" % stride) rpn_cls_prob_reshape = mx.symbol.Reshape( data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape_stride%d' % stride) _name = 'rpn_bbox_pred_stride%d_output' % stride rpn_bbox_pred = all_layers[_name] outs.append(rpn_cls_prob_reshape) outs.append(rpn_bbox_pred) _sym = mx.sym.Group(outs) mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux) def _batch_callback(param): #global global_step _cb(param) global_step[0] += 1 mbatch = global_step[0] for _iter in lr_iters: if mbatch == _iter: opt.lr *= 0.1 print('lr change to', opt.lr, ' in batch', mbatch, file=sys.stderr) break if mbatch % 1000 == 0: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(mbatch) if mbatch == lr_iters[-1]: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(0) #arg, aux = mod.get_params() #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux) sys.exit(0) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=_batch_callback, kvstore=args.kvstore, optimizer=opt, initializer=initializer, allow_missing=True, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 data_dir_list = args.data_dir.split(',') assert len(data_dir_list) == 1 data_dir = data_dir_list[0] path_imgrec = None path_imglist = None image_size = [int(x) for x in args.image_size.split(',')] assert len(image_size) == 2 assert image_size[0] == image_size[1] args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) path_imgrec = os.path.join(data_dir, "train.rec") path_imgrec_val = os.path.join(data_dir, "val.rec") print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, color_jittering=args.color, ) val_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec_val, shuffle=False, rand_mirror=False, mean=mean, ) metric = mx.metric.CompositeEvalMetric( [AccMetric(), MAEMetric(), CUMMetric()]) if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) #opt = optimizer.Nadam(learning_rate=base_lr, wd=base_wd, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) lr_steps = [int(x) for x in args.lr_steps.split(',')] global_step = [0] save_step = [0] def _batch_callback(param): _cb(param) global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) def _epoch_callback(epoch, symbol, arg_params, aux_params): save_step[0] += 1 msave = save_step[0] do_save = False if args.ckpt == 0: do_save = False elif args.ckpt == 2: do_save = True if do_save: print('saving %s' % msave) arg, aux = model.get_params() all_layers = model.symbol.get_internals() _sym = all_layers['fc1_output'] mx.model.save_checkpoint(args.prefix, msave, _sym, arg, aux) train_dataiter = mx.io.PrefetchingIter(train_dataiter) print('start fitting') model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=metric, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=_epoch_callback)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix end_epoch = args.end_epoch pretrained = '../model/resnet-152' load_epoch = args.load_epoch args.image_size = 160 per_batch_size = 60 args.ctx_num = len(ctx) args.batch_size = per_batch_size*args.ctx_num #args.all_batch_size = args.batch_size*args.ctx_num args.bag_size = 3600 args.margin = 0.2 args.num_classes = 10575 #webface data_shape = (3,args.image_size,args.image_size) begin_epoch = 0 base_lr = 0.05 base_wd = 0.0002 base_mom = 0.0 lr_decay = 0.98 if not args.retrain: #load and initialize params print(pretrained) _, arg_params, aux_params = mx.model.load_checkpoint(pretrained, load_epoch) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) #arg_params, aux_params = load_param(pretrained, epoch, convert=True) data_shape_dict = {'data': (args.batch_size, 3, args.image_size, args.image_size), 'softmax_label': (args.batch_size,)} resnet_dcn.init_weights(sym, data_shape_dict, arg_params, aux_params) else: pretrained = args.prefix sym, arg_params, aux_params = mx.model.load_checkpoint(pretrained, load_epoch) begin_epoch = load_epoch end_epoch = begin_epoch+10 base_wd = 0.00005 lr_decay = 0.5 base_lr = 0.015 # infer max shape model = mx.mod.Module( context = ctx, symbol = sym, #label_names = [], #fixed_param_prefix = fixed_param_prefix, ) train_dataiter = FaceIter( path_imglist = "/raid5data/dplearn/faceinsight_align_webface.lst", data_shape = data_shape, mod = model, ctx_num = args.ctx_num, batch_size = args.batch_size, bag_size = args.bag_size, images_per_person = 5, ) #_dice = DiceMetric() _acc = AccMetric() eval_metrics = [mx.metric.create(_acc)] # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric #for child_metric in [fcn_loss_metric]: # eval_metrics.add(child_metric) # callback #batch_end_callback = callback.Speedometer(input_batch_size, frequent=args.frequent) #epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True) # decide learning rate #lr_step = '10,20,30' #train_size = 4848 #nrof_batch_in_epoch = int(train_size/input_batch_size) #print('nrof_batch_in_epoch:', nrof_batch_in_epoch) #lr_factor = 0.1 #lr_epoch = [float(epoch) for epoch in lr_step.split(',')] #lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) #lr_iters = [int(epoch * train_size / batch_size) for epoch in lr_epoch_diff] #print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters #lr_scheduler = MultiFactorScheduler(lr_iters, lr_factor) # optimizer #optimizer_params = {'momentum': 0.9, # 'wd': 0.0005, # 'learning_rate': base_lr, # 'rescale_grad': 1.0, # 'clip_gradient': None} initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #opt = optimizer.SGD(learning_rate=base_lr, momentum=0.9, wd=base_wd, rescale_grad=(1.0/args.batch_size)) opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=1.0) #opt = optimizer.AdaGrad(learning_rate=base_lr, wd=base_wd, rescale_grad=1.0) _cb = mx.callback.Speedometer(args.batch_size, 10) lfw_dir = '/raid5data/dplearn/lfw_mtcnn' lfw_pairs = lfw.read_pairs(os.path.join(lfw_dir, 'pairs.txt')) lfw_paths, issame_list = lfw.get_paths(lfw_dir, lfw_pairs, 'png') imgs = [] lfw_data_list = [] for flip in [0,1]: lfw_data = nd.empty((len(lfw_paths), 3, args.image_size, args.image_size)) i = 0 for path in lfw_paths: with open(path, 'rb') as fin: _bin = fin.read() img = mx.image.imdecode(_bin) img = nd.transpose(img, axes=(2, 0, 1)) if flip==1: img = img.asnumpy() for c in xrange(img.shape[0]): img[c,:,:] = np.fliplr(img[c,:,:]) img = nd.array( img ) #print(img.shape) lfw_data[i][:] = img i+=1 if i%1000==0: print('loading lfw', i) print(lfw_data.shape) lfw_data_list.append(lfw_data) def lfw_test(nbatch): print('testing lfw..') embeddings_list = [] for i in xrange( len(lfw_data_list) ): lfw_data = lfw_data_list[i] embeddings = None ba = 0 while ba<lfw_data.shape[0]: bb = min(ba+args.batch_size, lfw_data.shape[0]) _data = nd.slice_axis(lfw_data, axis=0, begin=ba, end=bb) _label = nd.ones( (bb-ba,) ) db = mx.io.DataBatch(data=(_data,), label=(_label,)) model.forward(db, is_train=False) net_out = model.get_outputs() _embeddings = net_out[0].asnumpy() if embeddings is None: embeddings = np.zeros( (lfw_data.shape[0], _embeddings.shape[1]) ) embeddings[ba:bb,:] = _embeddings ba = bb embeddings_list.append(embeddings) acc_list = [] embeddings = embeddings_list[0] _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10) acc_list.append(np.mean(accuracy)) print('[%d]Accuracy: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) embeddings = np.concatenate(embeddings_list, axis=1) embeddings = sklearn.preprocessing.normalize(embeddings) print(embeddings.shape) _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10) acc_list.append(np.mean(accuracy)) print('[%d]Accuracy-Flip: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) pca = PCA(n_components=128) embeddings = pca.fit_transform(embeddings) embeddings = sklearn.preprocessing.normalize(embeddings) print(embeddings.shape) _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, issame_list, nrof_folds=10) acc_list.append(np.mean(accuracy)) print('[%d]Accuracy-PCA: %1.3f+-%1.3f' % (nbatch, np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) return max(*acc_list) #global_step = 0 highest_acc = [0.0] last_save_acc = [0.0] def _batch_callback(param): #global global_step mbatch = param.nbatch+1 if mbatch % 4000 == 0: opt.lr *= lr_decay #print(param.nbatch, opt.lr) _cb(param) if param.nbatch%100==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if param.nbatch>=0 and param.nbatch%400==0: acc = lfw_test(param.nbatch) if acc>highest_acc[0]: highest_acc[0] = acc if acc>0.9 and acc-last_save_acc[0]>=0.01: print('saving', mbatch, acc, last_save_acc[0]) _arg, _aux = model.get_params() mx.model.save_checkpoint(args.prefix, mbatch, model.symbol, _arg, _aux) last_save_acc[0] = acc print('[%d]highest Accu: %1.3f'%(param.nbatch, highest_acc[0])) sys.stdout.flush() sys.stderr.flush() epoch_cb = mx.callback.do_checkpoint(prefix, 1) #epoch_cb = None def _epoch_callback(epoch, sym, arg_params, aux_params): print('epoch-end', epoch) model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, #eval_data = val_dataiter, eval_metric = eval_metrics, kvstore = 'device', optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def main(): ratio_list = [0.25, 0.125, 0.0625, 0.03125] # 1/4, 1/8, 1/16, 1/32 if args.depth == 18: units = [2, 2, 2, 2] elif args.depth == 34: units = [3, 4, 6, 3] elif args.depth == 50: units = [3, 4, 6, 3] elif args.depth == 101: units = [3, 4, 23, 3] elif args.depth == 152: units = [3, 8, 36, 3] elif args.depth == 200: units = [3, 24, 36, 3] elif args.depth == 269: units = [3, 30, 48, 8] else: raise ValueError( "no experiments done on detph {}, you can do it youself".format( args.depth)) symbol = resnext(units=units, num_stage=4, filter_list=[64, 256, 512, 1024, 2048] if args.depth >= 50 else [64, 64, 128, 256, 512], ratio_list=ratio_list, num_class=args.num_classes, num_group=args.num_group, data_type="imagenet", drop_out=args.drop_out, bottle_neck=True if args.depth >= 50 else False, bn_mom=args.bn_mom, workspace=args.workspace, memonger=args.memonger) kv = mx.kvstore.create(args.kv_store) devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] epoch_size = max(int(args.num_examples / args.batch_size / kv.num_workers), 1) begin_epoch = args.model_load_epoch if args.model_load_epoch else 0 if not os.path.exists("./" + args.model_name): os.mkdir("./" + args.model_name) model_prefix = args.model_name + "/se-resnext-{}-{}-{}".format( args.data_type, args.depth, kv.rank) checkpoint = mx.callback.do_checkpoint(model_prefix) arg_params = None aux_params = None load_model_prefix = 'model/se-resnext-imagenet-50-0' if args.retrain: if args.finetune: (symbol, arg_params, aux_params) = get_fine_tune_model(load_model_prefix, args.model_load_epoch) else: symbol, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, args.model_load_epoch) if args.memonger: import memonger symbol = memonger.search_plan( symbol, data=(args.batch_size, 3, 32, 32) if args.data_type == "cifar10" else (args.batch_size, 3, 224, 224)) train = mx.io.ImageRecordIter( path_imgrec=args.data_train + '.rec', path_imgidx=args.data_train + '.idx', label_width=1, data_name='data', label_name='softmax_label', data_shape=(3, 224, 224), batch_size=args.batch_size, pad=0, fill_value=0, # only used when pad is valid rand_crop=False, shuffle=True) train.reset() if (args.data_val == 'None'): val = None else: val = mx.io.ImageRecordIter(path_imgrec=args.data_val, label_width=1, data_name='data', label_name='softmax_label', batch_size=args.batch_size, data_shape=(3, 224, 224), rand_crop=False, rand_mirror=False, num_parts=kv.num_workers, part_index=kv.rank) fix_param = None if args.freeze: fix_param = [k for k in arg_params if 'fc' not in k] model = mx.mod.Module(symbol=symbol, context=devs, fixed_param_names=fix_param) model.bind(data_shapes=train.provide_data, label_shapes=train.provide_label) # sgd = mx.optimizer.Optimizer.create_optimizer('sgd') # finetune_lr = dict({k: 0 for k in arg_params}) # sgd.set_lr_mult(finetune_lr) opt = optimizer.SGD(learning_rate=args.lr, momentum=0.9, wd=0.0005, rescale_grad=1.0 / args.batch_size / (len(args.gpus.split(',')))) # training model.fit(train, val, num_epoch=args.num_epoch, arg_params=arg_params, aux_params=aux_params, allow_missing=True, kvstore='device', optimizer=opt, initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2), batch_end_callback=mx.callback.Speedometer( args.batch_size, args.frequent), epoch_end_callback=checkpoint, eval_metric=['acc', 'ce'])
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = os.path.join(args.models_root, '%s-%s-%s' % (args.network, args.loss, args.dataset), 'model') prefix_dir = os.path.dirname(prefix) print('prefix', prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = config.image_shape[2] data_dir = config.dataset_path path_imgrecs = None path_imglist = None image_size = config.image_shape[0:2] assert len(image_size) == 2 assert image_size[0] == image_size[1] print('image_size', image_size) print('num_classes', config.num_classes) path_imgrecs = [os.path.join(data_dir, "train.rec")] data_shape = (args.image_channel, image_size[0], image_size[1]) num_workers = config.num_workers global_num_ctx = num_workers * args.ctx_num if config.num_classes % global_num_ctx == 0: args.ctx_num_classes = config.num_classes // global_num_ctx else: args.ctx_num_classes = config.num_classes // global_num_ctx + 1 print(config.num_classes, global_num_ctx, args.ctx_num_classes) args.local_num_classes = args.ctx_num_classes * args.ctx_num args.local_class_start = args.local_num_classes * args.worker_id #if len(args.partial)==0: # local_classes_range = (0, args.num_classes) #else: # _vec = args.partial.split(',') # local_classes_range = (int(_vec[0]), int(_vec[1])) #args.partial_num_classes = local_classes_range[1] - local_classes_range[0] #args.partial_start = local_classes_range[0] print('Called with argument:', args, config) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom arg_params = None aux_params = None esym = get_symbol_embedding() asym = get_symbol_arcface if config.num_workers == 1: sys.path.append(os.path.join(os.path.dirname(__file__), 'utils')) from parall_module_local_v1 import ParallModule else: from parall_module_dist import ParallModule model = ParallModule( context=ctx, symbol=esym, data_names=['data'], label_names=['softmax_label'], asymbol=asym, args=args, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrecs=path_imgrecs, shuffle=True, rand_mirror=config.data_rand_mirror, mean=mean, cutout=default.cutout if config.data_cutout else None, crop=default.crop if config.data_crop else None, mask=default.mask if config.data_mask else None, gridmask=default.gridmask if config.data_grid else None, #color_jittering = config.data_color, #images_filter = config.data_images_filter, loss_type=args.loss, #margin_m = config.loss_m2, data_names=['data'], downsample_back=config.downsample_back, motion_blur=config.motion_blur, use_bgr=config.use_bgr) if config.net_name == 'fresnet' or config.net_name == 'fmobilefacenet': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / 8 #/ args.batch_size print(base_lr, base_mom, base_wd, args.batch_size) lr_steps = [int(x) for x in args.lr_steps.split(',')] lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_steps, factor=0.1, base_lr=base_lr) optimizer_params = { 'learning_rate': base_lr, 'momentum': base_mom, 'wd': base_wd, 'rescale_grad': _rescale, 'lr_scheduler': lr_scheduler } opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) ver_list = [] ver_name_list = [] for name in config.val_targets: path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in range(len(ver_list)): _, issame_list = ver_list[i] if all(issame_list): fp_rates, fp_dict, thred_dict, recall_dict = verification.test( ver_list[i], model, args.batch_size, use_bgr=config.use_bgr, label_shape=(args.batch_size, len(path_imgrecs))) for k in fp_rates: print("[%s] TPR at FPR %.2e[%.2e: %.4f]:\t%.5f" % (ver_name_list[i], k, fp_dict[k], thred_dict[k], recall_dict[k])) else: acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, label_shape=(args.batch_size, len(path_imgrecs)), use_bgr=config.use_bgr) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in range(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] #for step in lr_steps: # if mbatch==step: # opt.lr *= 0.1 # print('lr change to', opt.lr) # break _cb(param) if mbatch % 1000 == 0: #print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) print('batch-epoch:', param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False is_highest = False if len(acc_list) > 0: #lfw_score = acc_list[0] #if lfw_score>highest_acc[0]: # highest_acc[0] = lfw_score # if lfw_score>=0.998: # do_save = True score = sum(acc_list) if acc_list[-1] >= highest_acc[-1]: if acc_list[-1] > highest_acc[-1]: is_highest = True else: if score >= highest_acc[0]: is_highest = True highest_acc[0] = score highest_acc[-1] = acc_list[-1] #if lfw_score>=0.99: # do_save = True if is_highest: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt == 2: do_save = True elif args.ckpt == 3: msave = 1 if do_save: print('saving', msave) arg, aux = model.get_params() #get_export_params() all_layers = model.symbol.get_internals() _sym = model.symbol #all_layers['fc1_output'] mx.model.save_checkpoint(prefix, msave, _sym, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) if len(args.pretrained) != 0: model_prefix, epoch = args.pretrained.split(',') begin_epoch = int(epoch) _, arg_params, aux_params = mx.model.load_checkpoint( model_prefix, begin_epoch) #model.set_params(arg_params, aux_params) model.fit( train_dataiter, begin_epoch=0, #begin_epoch, num_epoch=default.end_epoch, eval_data=val_dataiter, #eval_metric = eval_metrics, kvstore=args.kvstore, #optimizer = opt, optimizer_params=optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.image_channel = 3 data_dir = args.data_dir if args.task == 'gender': data_dir = args.gender_data_dir elif args.task == 'age': data_dir = args.age_data_dir print('data dir', data_dir) path_imgrec = None path_imglist = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert (args.num_classes > 0) print('num_classes', args.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 net = get_model() #if args.task=='': # test_net = get_model_test(net) #print(net.__class__) #net = net0[0] if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) net.hybridize() if args.mode == 'gluon': if len(args.pretrained) == 0: pass else: net.load_params(args.pretrained, allow_missing=True, ignore_extra=True) net.initialize(initializer) net.collect_params().reset_ctx(ctx) val_iter = None if args.task == '': train_iter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, ) else: train_iter = FaceImageIterAge( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, task=args.task, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, ) if args.task == 'age': metric = CompositeEvalMetric([MAEMetric(), CUMMetric()]) elif args.task == 'gender': metric = CompositeEvalMetric([AccMetric()]) else: metric = CompositeEvalMetric([AccMetric()]) ver_list = [] ver_name_list = [] if args.task == '': for name in args.eval.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], net, ctx, batch_size=args.batch_size) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results def val_test(nbatch=0): acc = 0.0 #if args.task=='age': if len(args.age_data_dir) > 0: val_iter = FaceImageIterAge( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=os.path.join(args.age_data_dir, 'val.rec'), task=args.task, shuffle=False, rand_mirror=False, mean=mean, ) _metric = MAEMetric() val_metric = mx.metric.create(_metric) val_metric.reset() _metric2 = CUMMetric() val_metric2 = mx.metric.create(_metric2) val_metric2.reset() val_iter.reset() for batch in val_iter: data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) outputs = [] for x in data: outputs.append(net(x)[2]) val_metric.update(label, outputs) val_metric2.update(label, outputs) _value = val_metric.get_name_value()[0][1] print('[%d][VMAE]: %f' % (nbatch, _value)) _value = val_metric2.get_name_value()[0][1] if args.task == 'age': acc = _value print('[%d][VCUM]: %f' % (nbatch, _value)) if len(args.gender_data_dir) > 0: val_iter = FaceImageIterAge( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=os.path.join(args.gender_data_dir, 'val.rec'), task=args.task, shuffle=False, rand_mirror=False, mean=mean, ) _metric = AccMetric() val_metric = mx.metric.create(_metric) val_metric.reset() val_iter.reset() for batch in val_iter: data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) outputs = [] for x in data: outputs.append(net(x)[1]) val_metric.update(label, outputs) _value = val_metric.get_name_value()[0][1] if args.task == 'gender': acc = _value print('[%d][VACC]: %f' % (nbatch, _value)) return acc total_time = 0 num_epochs = 0 best_acc = [0] highest_acc = [0.0, 0.0] #lfw and target global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [100000, 140000, 160000] p = 512.0 / args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l] * p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) kv = mx.kv.create('device') #kv = mx.kv.create('local') #_rescale = 1.0/args.ctx_num #opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale) #opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd) if args.mode == 'gluon': trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.mom, 'multi_precision': True }, kvstore=kv) else: _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale) _cb = mx.callback.Speedometer(args.batch_size, 20) arg_params = None aux_params = None data = mx.sym.var('data') label = mx.sym.var('softmax_label') if args.margin_a > 0.0: fc7 = net(data, label) else: fc7 = net(data) #sym = mx.symbol.SoftmaxOutput(data=fc7, label = label, name='softmax', normalization='valid') ceop = gluon.loss.SoftmaxCrossEntropyLoss() loss = ceop(fc7, label) #loss = loss/args.per_batch_size loss = mx.sym.mean(loss) sym = mx.sym.Group([ mx.symbol.BlockGrad(fc7), mx.symbol.MakeLoss(loss, name='softmax') ]) def _batch_callback(): mbatch = global_step[0] global_step[0] += 1 for _lr in lr_steps: if mbatch == _lr: args.lr *= 0.1 if args.mode == 'gluon': trainer.set_learning_rate(args.lr) else: opt.lr = args.lr print('lr change to', args.lr) break #_cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', args.lr, mbatch) if mbatch > 0 and mbatch % args.verbose == 0: save_step[0] += 1 msave = save_step[0] do_save = False is_highest = False if args.task == 'age' or args.task == 'gender': acc = val_test(mbatch) if acc >= highest_acc[-1]: highest_acc[-1] = acc is_highest = True do_save = True else: acc_list = ver_test(mbatch) if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score if lfw_score >= 0.998: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score >= 0.99: do_save = True is_highest = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True if do_save: print('saving', msave) #print('saving gluon params') fname = os.path.join(args.prefix, 'model-gluon.params') net.save_params(fname) fname = os.path.join(args.prefix, 'model') net.export(fname, msave) #arg, aux = model.get_params() #mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) def _batch_callback_sym(param): _cb(param) _batch_callback() if args.mode != 'gluon': model = mx.mod.Module( context=ctx, symbol=sym, ) model.fit(train_iter, begin_epoch=0, num_epoch=args.end_epoch, eval_data=None, eval_metric=metric, kvstore='device', optimizer=opt, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback_sym, epoch_end_callback=None) else: loss_weight = 1.0 if args.task == 'age': loss_weight = 1.0 / AGE #loss = gluon.loss.SoftmaxCrossEntropyLoss(weight = loss_weight) loss = nd.SoftmaxOutput #loss = gluon.loss.SoftmaxCrossEntropyLoss() while True: #trainer = update_learning_rate(opt.lr, trainer, epoch, opt.lr_factor, lr_steps) tic = time.time() train_iter.reset() metric.reset() btic = time.time() for i, batch in enumerate(train_iter): _batch_callback() #data = gluon.utils.split_and_load(batch.data[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0) #label = gluon.utils.split_and_load(batch.label[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0) data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) outputs = [] Ls = [] with ag.record(): for x, y in zip(data, label): #print(y.asnumpy()) if args.task == '': if args.margin_a > 0.0: z = net(x, y) else: z = net(x) #print(z[0].shape, z[1].shape) else: z = net(x) if args.task == 'gender': L = loss(z[1], y) #L = L/args.per_batch_size Ls.append(L) outputs.append(z[1]) elif args.task == 'age': for k in xrange(AGE): _z = nd.slice_axis(z[2], axis=1, begin=k * 2, end=k * 2 + 2) _y = nd.slice_axis(y, axis=1, begin=k, end=k + 1) _y = nd.flatten(_y) L = loss(_z, _y) #L = L/args.per_batch_size #L /= AGE Ls.append(L) outputs.append(z[2]) else: L = loss(z, y) #L = L/args.per_batch_size Ls.append(L) outputs.append(z) # store the loss and do backward after we have done forward # on all GPUs for better speed on multiple GPUs. ag.backward(Ls) #trainer.step(batch.data[0].shape[0], ignore_stale_grad=True) #trainer.step(args.ctx_num) n = batch.data[0].shape[0] #print(n,n) trainer.step(n) metric.update(label, outputs) if i > 0 and i % 20 == 0: name, acc = metric.get() if len(name) == 2: logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f, %s=%f' % (num_epochs, i, args.batch_size / (time.time() - btic), name[0], acc[0], name[1], acc[1])) else: logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f' % (num_epochs, i, args.batch_size / (time.time() - btic), name[0], acc[0])) #metric.reset() btic = time.time() epoch_time = time.time() - tic # First epoch will usually be much slower than the subsequent epics, # so don't factor into the average if num_epochs > 0: total_time = total_time + epoch_time #name, acc = metric.get() #logger.info('[Epoch %d] training: %s=%f, %s=%f'%(num_epochs, name[0], acc[0], name[1], acc[1])) logger.info('[Epoch %d] time cost: %f' % (num_epochs, epoch_time)) num_epochs = num_epochs + 1 #name, val_acc = test(ctx, val_data) #logger.info('[Epoch %d] validation: %s=%f, %s=%f'%(epoch, name[0], val_acc[0], name[1], val_acc[1])) # save model if meet requirements #save_checkpoint(epoch, val_acc[0], best_acc) if num_epochs > 1: print('Average epoch time: {}'.format( float(total_time) / (num_epochs - 1)))
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr=0.001, lr_step='5'): # setup config #init_config() #print(config) # setup multi-gpu input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx) # print config logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = [iset for iset in args.image_set.split('+')] roidbs = [ load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path, flip=not args.no_flip) for image_set in image_sets ] #roidb = merge_roidb(roidbs) #roidb = filter_roidb(roidb) roidb = roidbs[0] # load symbol #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #feat_sym = sym.get_internals()['rpn_cls_score_output'] #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, # ctx=ctx, work_load_list=args.work_load_list, # feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES, # anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # load and initialize params sym = None if len(pretrained) == 0: arg_params = {} aux_params = {} else: logger.info('loading %s,%d' % (pretrained, epoch)) sym, arg_params, aux_params = mx.model.load_checkpoint( pretrained, epoch) #arg_params, aux_params = load_param(pretrained, epoch, convert=True) #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']: # _k = k+"_weight" # if _k in arg_shape_dict: # v = 0.001 if _k.startswith('bbox_') else 0.01 # arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k]) # print('init %s with normal %.5f'%(_k,v)) # _k = k+"_bias" # if _k in arg_shape_dict: # arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k]) # print('init %s with zero'%(_k)) sym = eval('get_' + args.network + '_train')(sym) feat_sym = [] for stride in config.RPN_FEAT_STRIDE: feat_sym.append( sym.get_internals()['face_rpn_cls_score_stride%s_output' % stride]) train_data = CropLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle, ctx=ctx, work_load_list=args.work_load_list) # infer max shape max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5))) logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape)) # infer shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) out_shape_dict = dict(zip(sym.list_outputs(), out_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) logger.info('output shape %s' % pprint.pformat(out_shape_dict)) for k, v in arg_shape_dict.items(): if k.find('upsampling') >= 0: print('initializing upsampling_weight', k) arg_params[k] = mx.nd.zeros(shape=v) init = mx.init.Initializer() init._init_bilinear(k, arg_params[k]) #print(args[k]) # check parameter shapes #for k in sym.list_arguments(): # if k in data_shape_dict: # continue # assert k in arg_params, k + ' not initialized' # assert arg_params[k].shape == arg_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) #for k in sym.list_auxiliary_states(): # assert k in aux_params, k + ' not initialized' # assert aux_params[k].shape == aux_shape_dict[k], \ # 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) fixed_param_prefix = config.FIXED_PARAMS # create solver data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] fixed_param_names = get_fixed_params(sym, fixed_param_prefix) print('fixed', fixed_param_names, file=sys.stderr) mod = Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=args.work_load_list, fixed_param_names=fixed_param_names) # metric eval_metrics = mx.metric.CompositeEvalMetric() mid = 0 for m in range(len(config.RPN_FEAT_STRIDE)): stride = config.RPN_FEAT_STRIDE[m] #mid = m*MSTEP _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1, name='RPNAcc_s%s' % stride) eval_metrics.add(_metric) mid += 2 #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid, weight_idx=mid + 1, name='RPNL1Loss_s%s' % stride) eval_metrics.add(_metric) mid += 2 if config.FACE_LANDMARK: _metric = metric.RPNL1LossMetric(loss_idx=mid, weight_idx=mid + 1, name='RPNLandMarkL1Loss_s%s' % stride) eval_metrics.add(_metric) mid += 2 if config.HEAD_BOX: _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1, name='RPNAcc_head_s%s' % stride) eval_metrics.add(_metric) mid += 2 #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1) #eval_metrics.add(_metric) _metric = metric.RPNL1LossMetric(loss_idx=mid, weight_idx=mid + 1, name='RPNL1Loss_head_s%s' % stride) eval_metrics.add(_metric) mid += 2 # callback #means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES) #stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES) #epoch_end_callback = callback.do_checkpoint(prefix) epoch_end_callback = None # decide learning rate #base_lr = lr #lr_factor = 0.1 #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr_iters = [ int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff ] lr_steps = [] if len(lr_iters) == 5: factors = [0.5, 0.5, 0.4, 0.1, 0.1] for i in range(5): lr_steps.append((lr_iters[i], factors[i])) elif len(lr_iters) == 8: #warmup for li in lr_iters[0:5]: lr_steps.append((li, 1.5849)) for li in lr_iters[5:]: lr_steps.append((li, 0.1)) else: for li in lr_iters: lr_steps.append((li, 0.1)) #lr_steps = [ (20,0.1), (40, 0.1) ] #XXX end_epoch = 10000 logger.info('lr %f lr_epoch_diff %s lr_steps %s' % (lr, lr_epoch_diff, lr_steps)) # optimizer opt = optimizer.SGD(learning_rate=lr, momentum=0.9, wd=0.0005, rescale_grad=1.0 / len(ctx), clip_gradient=None) initializer = mx.init.Xavier() #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style train_data = mx.io.PrefetchingIter(train_data) _cb = mx.callback.Speedometer(train_data.batch_size, frequent=args.frequent, auto_reset=False) global_step = [0] def save_model(epoch): arg, aux = mod.get_params() all_layers = mod.symbol.get_internals() outs = [] for stride in config.RPN_FEAT_STRIDE: num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS'] _name = 'face_rpn_cls_score_stride%d_output' % stride rpn_cls_score = all_layers[_name] # prepare rpn data rpn_cls_score_reshape = mx.symbol.Reshape( data=rpn_cls_score, shape=(0, 2, -1, 0), name="face_rpn_cls_score_reshape_stride%d" % stride) rpn_cls_prob = mx.symbol.SoftmaxActivation( data=rpn_cls_score_reshape, mode="channel", name="face_rpn_cls_prob_stride%d" % stride) rpn_cls_prob_reshape = mx.symbol.Reshape( data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='face_rpn_cls_prob_reshape_stride%d' % stride) _name = 'face_rpn_bbox_pred_stride%d_output' % stride rpn_bbox_pred = all_layers[_name] outs.append(rpn_cls_prob_reshape) outs.append(rpn_bbox_pred) if config.FACE_LANDMARK: _name = 'face_rpn_landmark_pred_stride%d_output' % stride rpn_landmark_pred = all_layers[_name] outs.append(rpn_landmark_pred) _sym = mx.sym.Group(outs) mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux) def _batch_callback(param): #global global_step _cb(param) global_step[0] += 1 mbatch = global_step[0] for step in lr_steps: if mbatch == step[0]: opt.lr *= step[1] print('lr change to', opt.lr, ' in batch', mbatch, file=sys.stderr) break if mbatch == lr_steps[-1][0]: print('saving final checkpoint', mbatch, file=sys.stderr) save_model(0) #arg, aux = mod.get_params() #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux) sys.exit(0) if args.checkpoint is not None: _, arg_params, aux_params = mx.model.load_checkpoint( args.checkpoint, 0) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=checkpoint_callback('model/testR50'), batch_end_callback=_batch_callback, kvstore=args.kvstore, optimizer=opt, initializer=initializer, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args): ctx = [] # cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() #0,使用第一块GPU cvd = [] if len(cvd) > 0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) #讲GPU context添加到ctx,ctx = [gpu(0)] if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) #使用了gpu prefix = args.prefix #../model-r100 prefix_dir = os.path.dirname(prefix) #.. if not os.path.exists(prefix_dir): #未执行 os.makedirs(prefix_dir) end_epoch = args.end_epoch #100 000 args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) #100 if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num #10 args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) #1000.0,参见Arcface公式(6),退火训练的lambda data_dir_list = args.data_dir.split(',') print('data_dir_list: ', data_dir_list) data_dir = data_dir_list[0] # 加载数据集属性 prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) print('num_classes: ', args.num_classes) # path_imgrec = os.path.join(data_dir, "train.rec") path_imgrec = os.path.join(data_dir, "all.rec") if args.loss_type == 1 and args.num_classes > 20000: #sphereface args.beta_freeze = 5000 args.gamma = 0.06 print('***Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1] ) #(3L,112L,112L) mean = None begin_epoch = 0 base_lr = args.lr #0.1 base_wd = args.wd #weight decay = 0.0005 base_mom = args.mom #动量:0.9 if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split( ',') #['../models/model-r50-am-lfw/model', '0000'] print('***loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) # print(sym[1]) # mx.viz.plot_network(sym[1]).view() #可视化 # sys.exit() if args.network[0] == 's': # spherenet data_shape_dict = {'data': (args.per_batch_size, ) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) # print(args.batch_size) # print(data_shape) # print(path_imgrec) # print(args.rand_mirror) # print(mean) # print(args.cutoff) # sys.exit() train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, #(3L,112L,112L) path_imgrec=path_imgrec, # train.rec shuffle=True, rand_mirror=args.rand_mirror, # 1 mean=mean, cutoff=args.cutoff, # 0 ) if args.loss_type < 10: _metric = AccMetric() else: _metric = LossValueMetric() # 创建一个评价指标 eval_metrics = [mx.metric.create(_metric)] if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style mobilefacenet elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) #多卡训练的话,rescale_grad将总的结果分开 som = 64 # 回调函数,用来阶段性显示训练速度和准确率 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in range(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [30000, 40000, 50000] if args.loss_type >= 1 and args.loss_type <= 7: lr_steps = [100000, 140000, 160000] # 单GPU,去掉p # p = 512.0/args.batch_size for l in range(len(lr_steps)): # lr_steps[l] = int(lr_steps[l]*p) lr_steps[l] = int(lr_steps[l]) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step mbatch = global_step[0] global_step[0] += 1 for _lr in lr_steps: if mbatch == args.beta_freeze + _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) print('mbatch=', mbatch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) print(acc_list) save_step[0] += 1 msave = save_step[0] do_save = False if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score # 修改验证集阈值,测试最佳阈值 # if lfw_score>=0.998: if lfw_score >= 0.99: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] # if lfw_score>=0.99: #LFW测试大于0.99时,保存模型 if lfw_score >= 0.99: #LFW测试大于0.99时,保存模型 do_save = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max( args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit( train_data=train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=None, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd) > 0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size == 0: args.per_batch_size = 128 args.batch_size = args.per_batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 os.environ['BETA'] = str(args.beta) data_dir_list = args.data_dir.split(',') assert len(data_dir_list) == 1 data_dir = data_dir_list[0] path_imgrec = None path_imglist = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert (args.num_classes > 0) print('num_classes', args.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") if args.loss_type == 1 and args.num_classes > 20000: args.beta_freeze = 5000 args.gamma = 0.06 print('Called with argument:', args) data_shape = (args.image_channel, image_size[0], image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) # if args.finetune: # def get_fine_tune_model(symbol, arg_params, num_classes, layer_name='flatten0'): # """ # symbol: the pretrained network symbol # arg_params: the argument parameters of the pretrained model # num_classes: the number of classes for the fine-tune datasets # layer_name: the layer name before the last fully-connected layer # """ # all_layers = symbol.get_internals() # # print(all_layers);exit(0) # for k in arg_params: # if k.startswith('fc'): # print(k) # exit(0) # net = all_layers[layer_name + '_output'] # net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc1') # net = mx.symbol.SoftmaxOutput(data=net, name='softmax') # new_args = dict({k: arg_params[k] for k in arg_params if 'fc1' not in k}) # return (net, new_args) # sym, arg_params = get_fine_tune_model(sym, arg_params, args.num_classes) if args.network[0] == 's': data_shape_dict = {'data': (args.per_batch_size, ) + data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context=ctx, symbol=sym, ) val_dataiter = None train_dataiter = FaceImageIter( batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, ) if args.loss_type < 10: _metric = AccMetric() else: _metric = LossValueMetric() eval_metrics = [mx.metric.create(_metric)] if args.network[0] == 'r' or args.network[0] == 'y': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0] == 'i' or args.network[0] == 'x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir, name + ".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test( ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps) == 0: lr_steps = [40000, 60000, 80000] if args.loss_type >= 1 and args.loss_type <= 7: lr_steps = [100000, 140000, 160000] p = 512.0 / args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l] * p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0] += 1 mbatch = global_step[0] for _lr in lr_steps: if mbatch == args.beta_freeze + _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch % 1000 == 0: print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch) if mbatch >= 0 and mbatch % args.verbose == 0: acc_list = ver_test(mbatch) save_step[0] += 1 msave = save_step[0] do_save = False if len(acc_list) > 0: lfw_score = acc_list[0] if lfw_score > highest_acc[0]: highest_acc[0] = lfw_score if lfw_score >= 0.998: do_save = True if acc_list[-1] >= highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score >= 0.99: do_save = True if args.ckpt == 0: do_save = False elif args.ckpt > 1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f' % (mbatch, highest_acc[-1])) if mbatch <= args.beta_freeze: _beta = args.beta else: move = max(0, mbatch - args.beta_freeze) _beta = max( args.beta_min, args.beta * math.pow(1 + args.gamma * move, -1.0 * args.power)) #print('beta', _beta) os.environ['BETA'] = str(_beta) if args.max_steps > 0 and mbatch > args.max_steps: sys.exit(0) epoch_cb = None train_dataiter = mx.io.PrefetchingIter(train_dataiter) model.fit( train_dataiter, begin_epoch=begin_epoch, num_epoch=end_epoch, eval_data=val_dataiter, eval_metric=eval_metrics, kvstore='device', optimizer=opt, #optimizer_params = optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=epoch_cb)
def train_net(args): ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in xrange(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix = args.prefix prefix_dir = os.path.dirname(prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) end_epoch = args.end_epoch args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) if args.per_batch_size==0: args.per_batch_size = 128 args.batch_size = args.per_batch_size*args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 data_dir_list = args.data_dir.split(',') assert len(data_dir_list)==1 data_dir = data_dir_list[0] path_imgrec = None path_imglist = None prop = face_image.load_property(data_dir) args.num_classes = prop.num_classes image_size = prop.image_size args.image_h = image_size[0] args.image_w = image_size[1] print('image_size', image_size) assert(args.num_classes>0) print('num_classes', args.num_classes) path_imgrec = os.path.join(data_dir, "train.rec") print('Called with argument:', args) data_shape = (args.image_channel,image_size[0],image_size[1]) mean = None begin_epoch = 0 base_lr = args.lr base_wd = args.wd base_mom = args.mom if len(args.pretrained)==0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: vec = args.pretrained.split(',') print('loading', vec) _, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) if args.network[0]=='s': data_shape_dict = {'data' : (args.per_batch_size,)+data_shape} spherenet.init_weights(sym, data_shape_dict, args.num_layers) #label_name = 'softmax_label' #label_shape = (args.batch_size,) model = mx.mod.Module( context = ctx, symbol = sym, ) train_dataiter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrec = path_imgrec, shuffle = True, rand_mirror = args.rand_mirror, mean = mean, cutoff = args.cutoff, ) val_rec = os.path.join(data_dir, "val.rec") val_iter = None if os.path.exists(val_rec): val_iter = FaceImageIter( batch_size = args.batch_size, data_shape = data_shape, path_imgrec = val_rec, shuffle = False, rand_mirror = False, mean = mean, ) if args.loss_type<10: _metric = AccMetric() else: _metric = LossValueMetric() eval_metrics = [] if USE_FR: _metric = AccMetric(pred_idx=1) eval_metrics.append(_metric) if USE_GENDER: _metric = AccMetric(pred_idx=2, name='gender') eval_metrics.append(_metric) elif USE_GENDER: _metric = AccMetric(pred_idx=1, name='gender') eval_metrics.append(_metric) if USE_AGE: _metric = MAEMetric() eval_metrics.append(_metric) _metric = CUMMetric() eval_metrics.append(_metric) if args.network[0]=='r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style elif args.network[0]=='i' or args.network[0]=='x': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0/args.ctx_num opt = optimizer.SGD(learning_rate=base_lr, momentum=base_mom, wd=base_wd, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) ver_list = [] ver_name_list = [] for name in args.target.split(','): path = os.path.join(data_dir,name+".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in xrange(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results def val_test(): _metric = MAEMetric() val_metric = mx.metric.create(_metric) val_metric.reset() _metric2 = CUMMetric() val_metric2 = mx.metric.create(_metric2) val_metric2.reset() val_iter.reset() for i, eval_batch in enumerate(val_iter): model.forward(eval_batch, is_train=False) model.update_metric(val_metric, eval_batch.label) model.update_metric(val_metric2, eval_batch.label) _value = val_metric.get_name_value()[0][1] print('MAE: %f'%(_value)) _value = val_metric2.get_name_value()[0][1] print('CUM: %f'%(_value)) highest_acc = [0.0, 0.0] #lfw and target #for i in xrange(len(ver_list)): # highest_acc.append(0.0) global_step = [0] save_step = [0] if len(args.lr_steps)==0: lr_steps = [40000, 60000, 80000] if args.loss_type>=1 and args.loss_type<=7: lr_steps = [100000, 140000, 160000] p = 512.0/args.batch_size for l in xrange(len(lr_steps)): lr_steps[l] = int(lr_steps[l]*p) else: lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] for _lr in lr_steps: if mbatch==_lr: opt.lr *= 0.1 print('lr change to', opt.lr) break _cb(param) if mbatch%1000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) if mbatch>=0 and mbatch%args.verbose==0: if val_iter is not None: val_test() acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False if len(acc_list)>0: lfw_score = acc_list[0] if lfw_score>highest_acc[0]: highest_acc[0] = lfw_score if lfw_score>=0.998: do_save = True if acc_list[-1]>=highest_acc[-1]: highest_acc[-1] = acc_list[-1] if lfw_score>=0.99: do_save = True if args.ckpt==0: do_save = False elif args.ckpt>1: do_save = True if do_save: print('saving', msave) arg, aux = model.get_params() mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux) print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) if args.max_steps>0 and mbatch>args.max_steps: sys.exit(0) epoch_cb = None model.fit(train_dataiter, begin_epoch = begin_epoch, num_epoch = end_epoch, eval_data = None, eval_metric = eval_metrics, kvstore = 'device', optimizer = opt, #optimizer_params = optimizer_params, initializer = initializer, arg_params = arg_params, aux_params = aux_params, allow_missing = True, batch_end_callback = _batch_callback, epoch_end_callback = epoch_cb )
def train_net(args): ctx = [] # 设置使用GPU或者CPU训练 gpu_ids = args.gpu_ids.split(',') for gpu_id in gpu_ids: ctx.append(mx.gpu(int(gpu_id))) if len(ctx) == 0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) prefix_dir = os.path.dirname(args.prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) args.ctx_num = len(ctx) args.num_layers = int(args.network[1:]) print('num_layers', args.num_layers) args.batch_size = args.batch_size * args.ctx_num args.rescale_threshold = 0 args.image_channel = 3 data_dir_list = args.data_dir.split(',') assert len(data_dir_list) == 1 data_dir = data_dir_list[0] data_shape = [int(x) for x in args.data_shape.split(',')] assert len(data_shape) == 3 assert data_shape[1] == data_shape[2] args.image_h = data_shape[1] args.image_w = data_shape[2] print('data_shape', data_shape) path_imgrec = os.path.join(data_dir, "train.rec") path_imgrec_val = os.path.join(data_dir, "val.rec") print('Called with argument:', args) data_shape = tuple(data_shape) mean = None begin_epoch = 0 if len(args.pretrained) == 0: arg_params = None aux_params = None sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) else: # 加载预训练模型 vec = args.pretrained.split(',') print('loading', vec) begin_epoch = int(vec[1]) _, arg_params, aux_params = mx.model.load_checkpoint( vec[0], int(vec[1])) sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params) model = mx.mod.Module(context=ctx, symbol=sym) train_dataiter = FaceImageIter(batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec, shuffle=True, rand_mirror=args.rand_mirror, mean=mean, cutoff=args.cutoff, color_jittering=args.color) val_dataiter = FaceImageIter(batch_size=args.batch_size, data_shape=data_shape, path_imgrec=path_imgrec_val, shuffle=False, rand_mirror=False, mean=mean) metric = mx.metric.CompositeEvalMetric( [AccMetric(), MAEMetric(), CUMMetric()]) if args.network[0] == 'r': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) _rescale = 1.0 / args.ctx_num opt = optimizer.SGD(learning_rate=args.lr, momentum=0.9, wd=0.0005, rescale_grad=_rescale) som = 20 _cb = mx.callback.Speedometer(args.batch_size, som) lr_steps = [int(x) for x in args.lr_steps.split(',')] def _batch_callback(param): _cb(param) # 每轮结束回调函数 def _epoch_callback(epoch, symbol, arg_params, aux_params): epoch = epoch + 1 for _lr in lr_steps: if epoch == _lr: opt.lr *= 0.1 print('lr change to', opt.lr) break # 保存模型 if epoch % 10 == 0 or epoch == args.end_epoch: print('lr-epoch:', opt.lr, epoch) arg, aux = model.get_params() all_layers = model.symbol.get_internals() _sym = all_layers['fc1_output'] mx.model.save_checkpoint(args.prefix, epoch, _sym, arg, aux) train_dataiter = mx.io.PrefetchingIter(train_dataiter) print('开始训练...') model.fit(train_dataiter, begin_epoch=begin_epoch, num_epoch=args.end_epoch, eval_data=val_dataiter, eval_metric=metric, kvstore='device', optimizer=opt, initializer=initializer, arg_params=arg_params, aux_params=aux_params, allow_missing=True, batch_end_callback=_batch_callback, epoch_end_callback=_epoch_callback)
def train_net(args): ## =================== parse context ========================== ctx = [] cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip() if len(cvd)>0: for i in range(len(cvd.split(','))): ctx.append(mx.gpu(i)) if len(ctx)==0: ctx = [mx.cpu()] print('use cpu') else: print('gpu num:', len(ctx)) ## ==================== get model save prefix and log ============ if len(args.extra_model_name)==0: prefix = os.path.join(args.models_root, '%s-%s-%s'%(args.network, args.loss, args.dataset), 'model') else: prefix = os.path.join(args.models_root, '%s-%s-%s-%s'%(args.network, args.loss, args.dataset, args.extra_model_name), 'model') prefix_dir = os.path.dirname(prefix) print('prefix', prefix) if not os.path.exists(prefix_dir): os.makedirs(prefix_dir) logger = logging.getLogger() logger.setLevel(logging.INFO) filehandler = logging.FileHandler("{}.log".format(prefix)) streamhandler = logging.StreamHandler() logger.addHandler(filehandler) logger.addHandler(streamhandler) ## ================ parse batch size and class info ====================== args.ctx_num = len(ctx) if args.per_batch_size==0: args.per_batch_size = 128 args.batch_size = args.per_batch_size*args.ctx_num global_num_ctx = config.num_workers * args.ctx_num if config.num_classes % global_num_ctx == 0: args.ctx_num_classes = config.num_classes//global_num_ctx else: args.ctx_num_classes = config.num_classes//global_num_ctx+1 args.local_num_classes = args.ctx_num_classes * args.ctx_num args.local_class_start = args.local_num_classes * args.worker_id logger.info("Train model with argument: {}\nconfig : {}".format(args, config)) train_dataiter, val_dataiter = get_data_iter(config, args.batch_size) ## =============== get train info ============================ image_size = config.image_shape[0:2] if len(args.pretrained) == 0: # train from scratch esym = get_symbol_embedding(config) asym = functools.partial(get_symbol_arcface, config=config) else: # load train model to continue assert False if config.count_flops: all_layers = esym.get_internals() _sym = all_layers['fc1_output'] FLOPs = flops_counter.count_flops(_sym, data=(1,3,image_size[0],image_size[1])) _str = flops_counter.flops_str(FLOPs) print('Network FLOPs: %s'%_str) logging.info("Network FLOPs : %s" % _str) if config.num_workers==1: #from parall_loss_module import ParallLossModule from parall_module_local_v1 import ParallModule else: # distribute parall loop assert False model = ParallModule( context = ctx, symbol = esym, data_names = ['data'], label_names = ['softmax_label'], asymbol = asym, args = args, logger=logger, ) ## ============ get optimizer ===================================== if config.net_name=='fresnet' or config.net_name=='fmobilefacenet': initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style else: initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2) opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=1.0/args.batch_size) _cb = mx.callback.Speedometer(args.batch_size, args.frequent) ver_list = [] ver_name_list = [] for name in config.val_targets: path = os.path.join(config.dataset_path, name+".bin") if os.path.exists(path): data_set = verification.load_bin(path, image_size) ver_list.append(data_set) ver_name_list.append(name) print('ver', name) def ver_test(nbatch): results = [] for i in range(len(ver_list)): acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None) print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) #print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) results.append(acc2) return results highest_acc = [0.0, 0.0] #lfw and target global_step = [0] save_step = [0] lr_steps = [int(x) for x in args.lr_steps.split(',')] print('lr_steps', lr_steps) ## =============== batch end callback definition =================================== def _batch_callback(param): #global global_step global_step[0]+=1 mbatch = global_step[0] for step in lr_steps: if mbatch==step: opt.lr *= 0.1 print('lr change to', opt.lr) logger.info('lr change to', opt.lr) break _cb(param) if mbatch%1000==0: print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch) logger.info('lr-batch-epoch: {}'.format(opt.lr,param.nbatch,param.epoch)) if mbatch>=0 and mbatch%args.verbose==0: acc_list = ver_test(mbatch) save_step[0]+=1 msave = save_step[0] do_save = False is_highest = False if len(acc_list)>0: #lfw_score = acc_list[0] #if lfw_score>highest_acc[0]: # highest_acc[0] = lfw_score # if lfw_score>=0.998: # do_save = True score = sum(acc_list) if acc_list[-1]>=highest_acc[-1]: if acc_list[-1]>highest_acc[-1]: is_highest = True else: if score>=highest_acc[0]: is_highest = True highest_acc[0] = score highest_acc[-1] = acc_list[-1] if is_highest: do_save = True if args.ckpt==0: do_save = False elif args.ckpt==2: do_save = True elif args.ckpt==3: msave = 1 if do_save: print('saving', msave) logger.info('saving {}'.format(msave)) arg, aux = model.get_export_params() all_layers = model.symbol.get_internals() _sym = all_layers['fc1_output'] mx.model.save_checkpoint(prefix, msave, _sym, arg, aux) print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) logger.info('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1])) if config.max_steps>0 and mbatch>config.max_steps: sys.exit(0) model.fit(train_dataiter, begin_epoch = 0, num_epoch = 999999, eval_data = val_dataiter, kvstore = args.kvstore, optimizer = opt, initializer = initializer, arg_params = None, aux_params = None, allow_missing = True, batch_end_callback = _batch_callback)