def test_step_schedule(backend): """ Test constant rate, fixed step and various modes of programmable steps. """ lr_init = 0.1 # default scheduler has a constant learning rate sch = Schedule() for epoch in range(10): lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch) assert lr == lr_init # test a uniform step schedule step_config = 2 change = 0.5 sch = Schedule(step_config=step_config, change=change) for epoch in range(10): lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch) # test a repeated call for the same epoch lr2 = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch) # print epoch, lr, lr2 assert np.allclose(lr, lr_init * change**(np.floor((epoch+1)/step_config))) assert np.allclose(lr2, lr_init * change**(np.floor((epoch+1)/step_config))) # test a list step schedule sch = Schedule(step_config=[2, 3], change=.1) assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=0)) assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=1)) assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2)) # test a repeated call for the same epoch assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2)) assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=3)) assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=4))
def get_args_and_hyperparameters(): parser = NeonArgparser(__doc__) args = parser.parse_args(gen_be=False) # Override save path if None if args.save_path is None: args.save_path = 'frcn_alexnet.pickle' if args.callback_args['save_path'] is None: args.callback_args['save_path'] = args.save_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = min(args.epochs, 10) # hyperparameters args.batch_size = 64 hyper_params = lambda: None hyper_params.use_pre_trained_weights = True # If true, load pre-trained weights to the model hyper_params.max_train_imgs = 5000 # Make this smaller in small trial runs to save time hyper_params.max_test_imgs = 5000 # Make this smaller in small trial runs to save time hyper_params.num_epochs = args.epochs hyper_params.samples_per_batch = args.batch_size # The mini-batch size # The number of multi-scale samples to make for each input image. These # samples are then fed into the network in multiple minibatches. hyper_params.samples_per_img = hyper_params.samples_per_batch * 7 hyper_params.frcn_fine_tune = False hyper_params.shuffle = True if hyper_params.use_pre_trained_weights: # This will typically train in 10-15 epochs. Use a small learning rate # and quickly reduce every 5-10 epochs. Use a high momentum since we # are close to the minima. s = 1e-4 hyper_params.learning_rate_scale = s hyper_params.learning_rate_sched = Schedule(step_config=[15, 20], change=[0.1 * s, 0.01 * s]) hyper_params.momentum = 0.9 else: # need to be less aggressive with reducing learning rate if the model is not pre-trained s = 1e-2 hyper_params.learning_rate_scale = 1e-2 hyper_params.learning_rate_sched = Schedule( step_config=[8, 14, 18, 20], change=[0.5 * s, 0.1 * s, 0.05 * s, 0.01 * s]) hyper_params.momentum = 0.1 hyper_params.class_score_threshold = 0.000001 hyper_params.score_exponent = 5 hyper_params.shuffle = True return args, hyper_params
print assignments num_epochs = int(assignments.get("epochs")) init_uni = Gaussian(scale=assignments.get("gaussian_scale")) step_config = [ int( assignments.get("momentum_step_schedule_start") + i * assignments.get("momentum_step_schedule_step_width")) for i in range(int(assignments.get("momentum_step_schedule_steps"))) ] opt_gdm = GradientDescentMomentum( learning_rate=float(10.0**assignments.get("log(learning_rate)")), momentum_coef=float(assignments.get("momentum_coef")), wdecay=float(10.0**assignments.get("log(weight_decay)")), schedule=Schedule(step_config=step_config, change=float( assignments.get("momentum_step_change"))), ) relu = Rectlin() conv = dict(init=init_uni, batch_norm=False, activation=relu) convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1) convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1),
## rate update frequency less than one means update twice per EM epoch (full set of training macrobatches) #if args.rate_freq < 1: args.rate_freq = train.nmacrobatches #if args.rate_decay > 0: # if args.rate_freq > 1: # weight_sched = DiscreteTauExpSchedule(args.rate_decay * train.nmacrobatches,num_epochs, args.rate_freq) # else: # weight_sched = TauExpSchedule(args.rate_decay * train.nmacrobatches, num_epochs) #else: # weight_sched = Schedule() # simpler method directly from neon Schedule(), specify step and change on command line if len(args.epoch_dstep) > 0: epoch_step = list(cumsum(args.epoch_dstep)) print('Adjusting learning rate by %.4f at %s' % (args.rate_change, ','.join([str(x) for x in epoch_step]))) weight_sched = Schedule(step_config=epoch_step, change=args.rate_change) else: weight_sched = PowerSchedule(step_config=int(args.rate_step * train.nmacrobatches), change=args.rate_change) opt_gdm = GradientDescentMomentum(args.rate_init[0], args.momentum[0], wdecay=args.weight_decay, schedule=weight_sched, stochastic_round=args.rounding) opt_biases = GradientDescentMomentum(args.rate_init[1], args.momentum[1], schedule=weight_sched, stochastic_round=args.rounding) opt_fixed = GradientDescentMomentum(0.0, 1.0, wdecay=0.0)
parser = NeonArgparser(__doc__) args = parser.parse_args() NervanaObject.be.enable_winograd = 4 # setup data provider X_train = np.random.uniform(-1, 1, (128, 3*224*224)) y_train = np.random.uniform(-1, 1, (128, 1000)) train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224)) layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())] model = Model(layers=layers) weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.)) opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm}) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=5)
bias=Constant(1.0), activation=relu, name='fc7')) layers.append(Dropout(keep=0.5, name='drop7')) layers.append( Affine(nout=1000, init=init_g1, bias=Constant(0.0), activation=Softmax(), name='fc8')) model = Model(layers=layers) # scale LR by 0.1 every 20 epochs (this assumes batch_size = 256) weight_sched = Schedule(20, 0.1) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) if args.model_file is not None:
inner_size=224, set_name='validation', do_transforms=False) except (OSError, IOError, ValueError) as err: print err sys.exit(0) train.init_batch_provider() test.init_batch_provider() init1 = Gaussian(scale=0.01) init2 = Gaussian(scale=0.03) relu = Rectlin() # drop LR by 1/250**(1/3) at beginning of epochs 23, 45, 66 weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched) # drop bias weights by 1/10 at the beginning of epoch 45. opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=Schedule([44], 0.1)) # Set up the model layers layers = [] layers.append( Conv((11, 11, 64), strides=4, padding=3, init=init1,
activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] model = Model(layers=layers) # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45 weight_sched = Schedule([22, 44, 65], 0.15874) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched, stochastic_round=args.rounding) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=Schedule([44], 0.1), stochastic_round=args.rounding) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test,
activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] model = Model(layers=layers) # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45 weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched, stochastic_round=args.rounding) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=Schedule([44], 0.1), stochastic_round=args.rounding) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test,
# Now construct the network layers = [Conv(**conv_params(3, 16))] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling(8, op='avg')) layers.append( Affine(nout=10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=Schedule([90, 135], 0.1)) # configure callbacks callbacks = Callbacks(model, eval_set=test, metric=Misclassification(), **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
config_files = [train_config] if os.path.exists(train_config) else [] parser = NeonArgparser(__doc__, default_config_files=config_files) parser.add_argument('--subset_pct', type=float, default=100, help='subset of training dataset to use (percentage)') args = parser.parse_args() model, cost = create_network() rseed = 0 if args.rng_seed is None else args.rng_seed # setup data provider assert 'train' in args.manifest, "Missing train manifest" assert 'val' in args.manifest, "Missing validation manifest" train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, rseed, dtype=args.datatype) valid = make_validation_loader(args.manifest['val'], args.manifest_root, model.be, args.subset_pct, dtype=args.datatype) # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45 sched_weight = Schedule([22, 44, 65], 0.15874) sched_biases = Schedule([44], 0.1) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=sched_biases) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=valid, metric=valmetric, **args.callback_args) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
normalize=False, contrast_normalize=True, whiten=False, ) # really 10 classes, pad to nearest power of 2 to match conv output train_set = DataIterator(X_train, y_train, nclass=16, lshape=(3, 32, 32)) valid_set = DataIterator(X_test, y_test, nclass=16, lshape=(3, 32, 32)) init_uni = Gaussian(scale=args.gaussian_scale) step_config = [int(args.momentum_step_schedule_start + i*args.momentum_step_schedule_step_width) for i in range(int(args.momentum_step_schedule_steps))] opt_gdm = GradientDescentMomentum( learning_rate=float(args.learning_rate), momentum_coef=float(args.momentum_coef), wdecay=float(args.weight_decay), schedule=Schedule(step_config=step_config, change=float(args.momentum_step_change)), ) relu = Rectlin() conv = dict(init=init_uni, batch_norm=False, activation=relu) convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1) convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2) layers = [Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2),
def main(): # larger batch sizes may not fit on GPU parser = NeonArgparser(__doc__, default_overrides={'batch_size': 4}) parser.add_argument("--bench", action="store_true", help="run benchmark instead of training") parser.add_argument("--num_classes", type=int, default=12, help="number of classes in the annotation") parser.add_argument("--height", type=int, default=256, help="image height") parser.add_argument("--width", type=int, default=512, help="image width") args = parser.parse_args(gen_be=False) # check that image dimensions are powers of 2 if((args.height & (args.height - 1)) != 0): raise TypeError("Height must be a power of 2.") if((args.width & (args.width - 1)) != 0): raise TypeError("Width must be a power of 2.") (c, h, w) = (args.num_classes, args.height, args.width) # need to use the backend with the new upsampling layer implementation be = NervanaGPU_Upsample(rng_seed=args.rng_seed, device_id=args.device_id) # set batch size be.bsz = args.batch_size # couple backend to global neon object NervanaObject.be = be shape = dict(channel_count=3, height=h, width=w, subtract_mean=False) train_params = ImageParams(center=True, flip=False, scale_min=min(h, w), scale_max=min(h, w), aspect_ratio=0, **shape) test_params = ImageParams(center=True, flip=False, scale_min=min(h, w), scale_max=min(h, w), aspect_ratio=0, **shape) common = dict(target_size=h*w, target_conversion='read_contents', onehot=False, target_dtype=np.uint8, nclasses=args.num_classes) train_set = PixelWiseImageLoader(set_name='train', repo_dir=args.data_dir, media_params=train_params, shuffle=False, subset_percent=100, index_file=os.path.join(args.data_dir, 'train_images.csv'), **common) val_set = PixelWiseImageLoader(set_name='val', repo_dir=args.data_dir,media_params=test_params, index_file=os.path.join(args.data_dir, 'val_images.csv'), **common) # initialize model object layers = gen_model(c, h, w) segnet_model = Model(layers=layers) # configure callbacks callbacks = Callbacks(segnet_model, eval_set=val_set, **args.callback_args) opt_gdm = GradientDescentMomentum(1.0e-6, 0.9, wdecay=0.0005, schedule=Schedule()) opt_biases = GradientDescentMomentum(2.0e-6, 0.9, schedule=Schedule()) opt_bn = GradientDescentMomentum(1.0e-6, 0.9, schedule=Schedule()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'BatchNorm': opt_bn}) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) if args.bench: segnet_model.initialize(train_set, cost=cost) segnet_model.benchmark(train_set, cost=cost, optimizer=opt) sys.exit(0) else: segnet_model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # get the trained segnet model outputs for valisation set outs_val = segnet_model.get_outputs(val_set) with open('outputs.pkl', 'w') as fid: pickle.dump(outs_val, fid, -1)
model, cost = create_network() # setup data provider assert 'train' in args.manifest, "Missing train manifest" assert 'test' in args.manifest, "Missing validation manifest" train = make_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) valid = make_test_loader(args.manifest['test'], args.manifest_root, model.be, args.subset_pct) # setup callbacks callbacks = Callbacks(model, eval_set=valid, **args.callback_args) # gradient descent with momentum, weight decay, and learning rate decay schedule learning_rate_sched = Schedule(list(range(6, args.epochs, 6)), 0.1) opt_gdm = GradientDescentMomentum(0.003, 0.9, wdecay=0.005, schedule=learning_rate_sched) opt_biases = GradientDescentMomentum(0.006, 0.9, schedule=learning_rate_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # train model model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # output accuracies
if args.rlayer_type == 'lstm': rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) else: rlayer1 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) rlayer2 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) layers = [rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) learning_rate_sched = Schedule(range(10, args.epochs), .97) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding, schedule=learning_rate_sched) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # get predictions ypred = model.get_outputs(valid_set)
Affine(nout=1000, init=init1, bias=Constant(0), activation=Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale)) mlp = Model(layers=layers) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(mlp, train, eval_set=test, metric=valmetric, **args.callback_args) # create learning rate schedules and optimizers weight_sched = Schedule(range(14, 75, 15), 0.1) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # clean up the data providers test.exit_batch_provider()
def test_step_schedule(backend): """ Test constant rate, fixed step and various modes of programmable steps. """ lr_init = 0.1 # default scheduler has a constant learning rate sch = Schedule() for epoch in range(10): lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch) assert lr == lr_init # test a uniform step schedule step_config = 2 change = 0.5 sch = Schedule(step_config=step_config, change=change) for epoch in range(10): lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch) # test a repeated call for the same epoch lr2 = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch) # print epoch, lr, lr2 assert np.allclose( lr, lr_init * change**(np.floor((epoch + 1) / step_config))) assert np.allclose( lr2, lr_init * change**(np.floor((epoch + 1) / step_config))) # test a list step schedule sch = Schedule(step_config=[2, 3], change=.1) assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=0)) assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=1)) assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2)) # test a repeated call for the same epoch assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2)) assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=3)) assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=4))
res_module = module_s1(nfm) if stride == 1 else module_s2(nfm) layers.append(res_module) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling('all', op='avg')) layers.append( Affine(10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=Schedule([82, 124], 0.1)) # configure callbacks valmetric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) callbacks.add_callback(BatchNormTuneCallback(tune_set), insert_pos=0) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost,
# outputs on the validation set. #layers.append(Conv(fshape=(1,1,100), init=Kaiming(local=True), batch_norm=True)) #layers.append(Pooling(fshape='all', op='avg')) #layers.append(Activation(Softmax())) layers.append( Affine(nout=100, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0005, schedule=Schedule([40, 70], 0.1)) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, train, eval_set=test, metric=valmetric, **args.callback_args) callbacks.add_deconv_callback(train, test) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs,
init=Kaiming(local=False), batch_norm=True, activation=Rectlin())) layers.append(Affine(1, init=Kaiming(local=False), activation=Logistic())) #return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary()) return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary()) lunaModel, cost = create_network(args.depth) modelFileName = 'LUNA16_resnet.prm' # If model file exists, then load the it and start from there. # if (os.path.isfile(modelFileName)): # lunaModel = Model(modelFileName) weight_sched = Schedule([30, 60], 0.1) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=weight_sched) # configure callbacks if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 # configure callbacks callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args) # add a callback that saves the best model state callbacks.add_save_best_state_callback(modelFileName) lunaModel.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost,
# hyperparameters num_epochs = args.epochs dataset = CIFAR10(path=args.data_dir, normalize=False, contrast_normalize=True, whiten=True, pad_classes=True) train_set = dataset.train_iter valid_set = dataset.valid_iter init_uni = Gaussian(scale=0.05) opt_gdm = GradientDescentMomentum(learning_rate=float(args.learning_rate), momentum_coef=0.9, wdecay=float(args.weight_decay), schedule=Schedule( step_config=[200, 250, 300], change=0.1)) relu = Rectlin() conv = dict(init=init_uni, batch_norm=False, activation=relu) convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1) convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2),
else: rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params) layers = [ LookupTable(vocab_size=len(train_set.vocab), embedding_dim=hidden_size, init=init), rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) # vanilla gradient descent with decay schedule on learning rate and gradient scaling learning_rate_sched = Schedule(list(range(5, args.epochs)), .5) optimizer = GradientDescentMomentum(1, 0, gradient_clip_norm=gradient_clip_norm, schedule=learning_rate_sched) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
model = Model( layers=SSD(ssd_config=train_config['ssd_config'], dataset=train_set)) cost = MBoxLoss(num_classes=train_set.num_classes) if args.model_file is None: load_vgg_weights(model, cache_dir) else: model.load_params(args.model_file) if args.lr_step is None: args.lr_step = [40, 80, 120] base_lr = 0.0001 * be.bsz * args.lr_scale schedule = Schedule(args.lr_step, 0.1) opt_w = GradientDescentMomentum(base_lr, momentum_coef=0.9, wdecay=0.0005, schedule=schedule) opt_b = GradientDescentMomentum(base_lr, momentum_coef=0.9, schedule=schedule) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # hijack the eval callback arg here eval_freq = args.callback_args.pop('eval_freq') callbacks = Callbacks(model, **args.callback_args) callbacks.add_callback(MAP_Callback(eval_set=val_set, epoch_freq=eval_freq)) if args.image_sample_dir is not None: callbacks.add_callback( ssd_image_callback(eval_set=val_set,
default=100, help='subset of training dataset to use (percentage)') args = parser.parse_args() model, cost = create_network() rseed = 0 if args.rng_seed is None else args.rng_seed # setup data provider assert 'train' in args.manifest, "Missing train manifest" assert 'val' in args.manifest, "Missing validation manifest" train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, rseed) valid = make_validation_loader(args.manifest['val'], args.manifest_root, model.be, args.subset_pct) sched_weight = Schedule([10], change=0.1) opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=valid, metric=valmetric, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, valid) model.fit(train, optimizer=opt, num_epochs=args.epochs,
def main(): # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) #Set up batch iterator for training images print "Setting up data batch loaders..." train = ImgMaster(repo_dir='dataTmp', set_name='train', inner_size=120, subset_pct=100) val = ImgMaster(repo_dir='dataTmp', set_name='train', inner_size=120, subset_pct=100, do_transforms=False) test = ImgMaster(repo_dir='dataTestTmp', set_name='train', inner_size=120, subset_pct=100, do_transforms=False) train.init_batch_provider() val.init_batch_provider() test.init_batch_provider() print "Constructing network..." #Create AlexNet architecture model = constuct_network() #model.load_weights(args.model_file) # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45 weight_sched = Schedule([22, 44, 65, 90, 97], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.005, schedule=weight_sched) opt_biases = GradientDescentMomentum(0.04, 1.0, schedule=Schedule([130], .1)) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, train, eval_set=val, metric=valmetric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) #flag = input("Press Enter if you want to begin training process.") print "Training network..." model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % ( mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100) test.exit_batch_provider() val.exit_batch_provider() train.exit_batch_provider()
img_set_options = dict(repo_dir=args.data_dir, inner_size=224, dtype=args.datatype, subset_pct=100) train = img_provider(set_name='train', **img_set_options) test = img_provider(set_name='validation', do_transforms=False, **img_set_options) train.init_batch_provider() test.init_batch_provider() relu = Rectlin() init_uni = GlorotUniform() # The parameters below are straight out of [Springenberg2014] opt_gdm = GradientDescentMomentum(learning_rate=0.01, schedule=Schedule(step_config=[10], change=0.1), momentum_coef=0.9, wdecay=.0005) # set up model layers layers = [] layers.append(DataTransform(transform=Normalizer(divisor=128.))) layers.append(Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1)) layers.append(Conv((1, 1, 96), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 96), init=init_uni, activation=relu, strides=2, padding=1)) # 54->27 layers.append(Conv((5, 5, 256), init=init_uni, activation=relu, strides=1)) # 27->23 layers.append(Conv((1, 1, 256), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 256), init=init_uni, activation=relu, strides=2, padding=1)) # 23->12
def module_factory(nfm, stride=1): mainpath = [Conv(**conv_params(3, nfm, stride=stride)), Conv(**conv_params(3, nfm, relu=False))] sidepath = [SkipNode() if stride == 1 else Conv(**id_params(nfm))] module = [MergeSum([mainpath, sidepath]), Activation(Rectlin())] return module # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64 nfms = [2**(stage + 4) for stage in sorted(range(3) * args.depth)] strides = [1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] # Now construct the network layers = [Conv(**conv_params(3, 16))] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling(8, op='avg')) layers.append(Affine(nout=10, init=Kaiming(local=False), batch_norm=True, activation=Softmax())) model = Model(layers=layers) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=Schedule([90, 123], 0.1)) # configure callbacks callbacks = Callbacks(model, eval_set=test, metric=Misclassification(), **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)