Exemplo n.º 1
0
def test_step_schedule(backend):
    """
    Test constant rate, fixed step and various modes of programmable steps.
    """
    lr_init = 0.1

    # default scheduler has a constant learning rate
    sch = Schedule()
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        assert lr == lr_init

    # test a uniform step schedule
    step_config = 2
    change = 0.5
    sch = Schedule(step_config=step_config, change=change)
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        # test a repeated call for the same epoch
        lr2 = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        # print epoch, lr, lr2
        assert np.allclose(lr, lr_init * change**(np.floor((epoch+1)/step_config)))
        assert np.allclose(lr2, lr_init * change**(np.floor((epoch+1)/step_config)))

    # test a list step schedule
    sch = Schedule(step_config=[2, 3], change=.1)
    assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=0))
    assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=1))
    assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2))
    # test a repeated call for the same epoch
    assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2))
    assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=3))
    assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=4))
Exemplo n.º 2
0
def get_args_and_hyperparameters():
    parser = NeonArgparser(__doc__)
    args = parser.parse_args(gen_be=False)

    # Override save path if None
    if args.save_path is None:
        args.save_path = 'frcn_alexnet.pickle'

    if args.callback_args['save_path'] is None:
        args.callback_args['save_path'] = args.save_path

    if args.callback_args['serialize'] is None:
        args.callback_args['serialize'] = min(args.epochs, 10)

    # hyperparameters
    args.batch_size = 64
    hyper_params = lambda: None
    hyper_params.use_pre_trained_weights = True  # If true, load pre-trained weights to the model
    hyper_params.max_train_imgs = 5000  # Make this smaller in small trial runs to save time
    hyper_params.max_test_imgs = 5000  # Make this smaller in small trial runs to save time
    hyper_params.num_epochs = args.epochs
    hyper_params.samples_per_batch = args.batch_size  # The mini-batch size
    # The number of multi-scale samples to make for each input image. These
    # samples are then fed into the network in multiple minibatches.
    hyper_params.samples_per_img = hyper_params.samples_per_batch * 7
    hyper_params.frcn_fine_tune = False
    hyper_params.shuffle = True
    if hyper_params.use_pre_trained_weights:
        # This will typically train in 10-15 epochs. Use a small learning rate
        # and quickly reduce every 5-10 epochs. Use a high momentum since we
        # are close to the minima.
        s = 1e-4
        hyper_params.learning_rate_scale = s
        hyper_params.learning_rate_sched = Schedule(step_config=[15, 20],
                                                    change=[0.1 * s, 0.01 * s])
        hyper_params.momentum = 0.9
    else:  # need to be less aggressive with reducing learning rate if the model is not pre-trained
        s = 1e-2
        hyper_params.learning_rate_scale = 1e-2
        hyper_params.learning_rate_sched = Schedule(
            step_config=[8, 14, 18, 20],
            change=[0.5 * s, 0.1 * s, 0.05 * s, 0.01 * s])
        hyper_params.momentum = 0.1
    hyper_params.class_score_threshold = 0.000001
    hyper_params.score_exponent = 5
    hyper_params.shuffle = True
    return args, hyper_params
Exemplo n.º 3
0
    print assignments

    num_epochs = int(assignments.get("epochs"))
    init_uni = Gaussian(scale=assignments.get("gaussian_scale"))
    step_config = [
        int(
            assignments.get("momentum_step_schedule_start") +
            i * assignments.get("momentum_step_schedule_step_width"))
        for i in range(int(assignments.get("momentum_step_schedule_steps")))
    ]
    opt_gdm = GradientDescentMomentum(
        learning_rate=float(10.0**assignments.get("log(learning_rate)")),
        momentum_coef=float(assignments.get("momentum_coef")),
        wdecay=float(10.0**assignments.get("log(weight_decay)")),
        schedule=Schedule(step_config=step_config,
                          change=float(
                              assignments.get("momentum_step_change"))),
    )

    relu = Rectlin()
    conv = dict(init=init_uni, batch_norm=False, activation=relu)
    convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1)
    convp1s2 = dict(init=init_uni,
                    batch_norm=False,
                    activation=relu,
                    padding=1,
                    strides=2)

    layers = [
        Dropout(keep=.8),
        Conv((3, 3, 96), **convp1),
Exemplo n.º 4
0
        ## rate update frequency less than one means update twice per EM epoch (full set of training macrobatches)
        #if args.rate_freq < 1: args.rate_freq = train.nmacrobatches
        #if args.rate_decay > 0:
        #    if args.rate_freq > 1:
        #        weight_sched = DiscreteTauExpSchedule(args.rate_decay * train.nmacrobatches,num_epochs, args.rate_freq)
        #    else:
        #        weight_sched = TauExpSchedule(args.rate_decay * train.nmacrobatches, num_epochs)
        #else:
        #    weight_sched = Schedule()

        # simpler method directly from neon Schedule(), specify step and change on command line
        if len(args.epoch_dstep) > 0:
            epoch_step = list(cumsum(args.epoch_dstep))
            print('Adjusting learning rate by %.4f at %s' %
                  (args.rate_change, ','.join([str(x) for x in epoch_step])))
            weight_sched = Schedule(step_config=epoch_step,
                                    change=args.rate_change)
        else:
            weight_sched = PowerSchedule(step_config=int(args.rate_step *
                                                         train.nmacrobatches),
                                         change=args.rate_change)

        opt_gdm = GradientDescentMomentum(args.rate_init[0],
                                          args.momentum[0],
                                          wdecay=args.weight_decay,
                                          schedule=weight_sched,
                                          stochastic_round=args.rounding)
        opt_biases = GradientDescentMomentum(args.rate_init[1],
                                             args.momentum[1],
                                             schedule=weight_sched,
                                             stochastic_round=args.rounding)
        opt_fixed = GradientDescentMomentum(0.0, 1.0, wdecay=0.0)
parser = NeonArgparser(__doc__)
args = parser.parse_args()

NervanaObject.be.enable_winograd = 4

# setup data provider
X_train = np.random.uniform(-1, 1, (128, 3*224*224))
y_train = np.random.uniform(-1, 1, (128, 1000))
train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224))

layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01),
               activation=Rectlin(), padding=3, strides=4),
          Pooling(3, strides=2),
          Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2),
          Pooling(3, strides=2),
          Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Pooling(3, strides=2),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())]
model = Model(layers=layers)

weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.))
opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm})
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=5)
Exemplo n.º 6
0
           bias=Constant(1.0),
           activation=relu,
           name='fc7'))

layers.append(Dropout(keep=0.5, name='drop7'))
layers.append(
    Affine(nout=1000,
           init=init_g1,
           bias=Constant(0.0),
           activation=Softmax(),
           name='fc8'))

model = Model(layers=layers)

# scale LR by 0.1 every 20 epochs (this assumes batch_size = 256)
weight_sched = Schedule(20, 0.1)
opt_gdm = GradientDescentMomentum(0.01,
                                  0.9,
                                  wdecay=0.0005,
                                  schedule=weight_sched)
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      eval_set=test,
                      metric=valmetric,
                      **args.callback_args)

if args.model_file is not None:
Exemplo n.º 7
0
                     inner_size=224,
                     set_name='validation',
                     do_transforms=False)
except (OSError, IOError, ValueError) as err:
    print err
    sys.exit(0)

train.init_batch_provider()
test.init_batch_provider()

init1 = Gaussian(scale=0.01)
init2 = Gaussian(scale=0.03)
relu = Rectlin()

# drop LR by 1/250**(1/3) at beginning of epochs 23, 45, 66
weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.))
opt_gdm = GradientDescentMomentum(0.01,
                                  0.9,
                                  wdecay=0.0005,
                                  schedule=weight_sched)

# drop bias weights by 1/10 at the beginning of epoch 45.
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=Schedule([44], 0.1))

# Set up the model layers
layers = []
layers.append(
    Conv((11, 11, 64),
         strides=4,
         padding=3,
         init=init1,
Exemplo n.º 8
0
           activation=Rectlin()),
    Dropout(keep=0.5),
    Affine(nout=4096,
           init=Gaussian(scale=0.01),
           bias=Constant(1),
           activation=Rectlin()),
    Dropout(keep=0.5),
    Affine(nout=1000,
           init=Gaussian(scale=0.01),
           bias=Constant(-7),
           activation=Softmax())
]
model = Model(layers=layers)

# drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45
weight_sched = Schedule([22, 44, 65], 0.15874)
opt_gdm = GradientDescentMomentum(0.01,
                                  0.9,
                                  wdecay=0.0005,
                                  schedule=weight_sched,
                                  stochastic_round=args.rounding)
opt_biases = GradientDescentMomentum(0.02,
                                     0.9,
                                     schedule=Schedule([44], 0.1),
                                     stochastic_round=args.rounding)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      eval_set=test,
Exemplo n.º 9
0
           activation=Rectlin()),
    Dropout(keep=0.5),
    Affine(nout=4096,
           init=Gaussian(scale=0.01),
           bias=Constant(1),
           activation=Rectlin()),
    Dropout(keep=0.5),
    Affine(nout=1000,
           init=Gaussian(scale=0.01),
           bias=Constant(-7),
           activation=Softmax())
]
model = Model(layers=layers)

# drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45
weight_sched = Schedule([22, 44, 65], (1 / 250.)**(1 / 3.))
opt_gdm = GradientDescentMomentum(0.01,
                                  0.9,
                                  wdecay=0.0005,
                                  schedule=weight_sched,
                                  stochastic_round=args.rounding)
opt_biases = GradientDescentMomentum(0.02,
                                     0.9,
                                     schedule=Schedule([44], 0.1),
                                     stochastic_round=args.rounding)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      eval_set=test,
Exemplo n.º 10
0
# Now construct the network
layers = [Conv(**conv_params(3, 16))]
for nfm, stride in zip(nfms, strides):
    layers.append(module_factory(nfm, stride))
layers.append(Pooling(8, op='avg'))
layers.append(
    Affine(nout=10,
           init=Kaiming(local=False),
           batch_norm=True,
           activation=Softmax()))

model = Model(layers=layers)
opt = GradientDescentMomentum(0.1,
                              0.9,
                              wdecay=0.0001,
                              schedule=Schedule([90, 135], 0.1))

# configure callbacks
callbacks = Callbacks(model,
                      eval_set=test,
                      metric=Misclassification(),
                      **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
Exemplo n.º 11
0
config_files = [train_config] if os.path.exists(train_config) else []

parser = NeonArgparser(__doc__, default_config_files=config_files)
parser.add_argument('--subset_pct', type=float, default=100,
                    help='subset of training dataset to use (percentage)')
args = parser.parse_args()

model, cost = create_network()
rseed = 0 if args.rng_seed is None else args.rng_seed

# setup data provider
assert 'train' in args.manifest, "Missing train manifest"
assert 'val' in args.manifest, "Missing validation manifest"
train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root,
                                  model.be, args.subset_pct, rseed, dtype=args.datatype)
valid = make_validation_loader(args.manifest['val'], args.manifest_root,
                               model.be, args.subset_pct, dtype=args.datatype)

# drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45
sched_weight = Schedule([22, 44, 65], 0.15874)
sched_biases = Schedule([44], 0.1)

opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight)
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=sched_biases)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model, eval_set=valid, metric=valmetric, **args.callback_args)
model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Exemplo n.º 12
0
    normalize=False,
    contrast_normalize=True,
    whiten=False,
    )

# really 10 classes, pad to nearest power of 2 to match conv output
train_set = DataIterator(X_train, y_train, nclass=16, lshape=(3, 32, 32))
valid_set = DataIterator(X_test, y_test, nclass=16, lshape=(3, 32, 32))

init_uni = Gaussian(scale=args.gaussian_scale)
step_config = [int(args.momentum_step_schedule_start + i*args.momentum_step_schedule_step_width) for i in range(int(args.momentum_step_schedule_steps))]
opt_gdm = GradientDescentMomentum(
    learning_rate=float(args.learning_rate),
    momentum_coef=float(args.momentum_coef),
    wdecay=float(args.weight_decay),
    schedule=Schedule(step_config=step_config, change=float(args.momentum_step_change)),
    )

relu = Rectlin()
conv = dict(init=init_uni, batch_norm=False, activation=relu)
convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1)
convp1s2 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1, strides=2)

layers = [Dropout(keep=.8),
          Conv((3, 3, 96), **convp1),
          Conv((3, 3, 96), **convp1),
          Conv((3, 3, 96), **convp1s2),
          Dropout(keep=.5),
          Conv((3, 3, 192), **convp1),
          Conv((3, 3, 192), **convp1),
          Conv((3, 3, 192), **convp1s2),
Exemplo n.º 13
0
def main():
    # larger batch sizes may not fit on GPU
    parser = NeonArgparser(__doc__, default_overrides={'batch_size': 4})
    parser.add_argument("--bench", action="store_true", help="run benchmark instead of training")
    parser.add_argument("--num_classes", type=int, default=12, help="number of classes in the annotation")
    parser.add_argument("--height", type=int, default=256, help="image height")
    parser.add_argument("--width", type=int, default=512, help="image width")

    args = parser.parse_args(gen_be=False)

    # check that image dimensions are powers of 2
    if((args.height & (args.height - 1)) != 0):
        raise TypeError("Height must be a power of 2.")
    if((args.width & (args.width - 1)) != 0):
        raise TypeError("Width must be a power of 2.")

    (c, h, w) = (args.num_classes, args.height, args.width)

    # need to use the backend with the new upsampling layer implementation
    be = NervanaGPU_Upsample(rng_seed=args.rng_seed,
                             device_id=args.device_id)
    # set batch size
    be.bsz = args.batch_size

    # couple backend to global neon object
    NervanaObject.be = be

    shape = dict(channel_count=3, height=h, width=w, subtract_mean=False)
    train_params = ImageParams(center=True, flip=False,
                               scale_min=min(h, w), scale_max=min(h, w),
                               aspect_ratio=0, **shape)
    test_params = ImageParams(center=True, flip=False,
                              scale_min=min(h, w), scale_max=min(h, w),
                              aspect_ratio=0, **shape)
    common = dict(target_size=h*w, target_conversion='read_contents',
                  onehot=False, target_dtype=np.uint8, nclasses=args.num_classes)

    train_set = PixelWiseImageLoader(set_name='train', repo_dir=args.data_dir,
                                      media_params=train_params,
                                      shuffle=False, subset_percent=100,
                                      index_file=os.path.join(args.data_dir, 'train_images.csv'),
                                      **common)
    val_set = PixelWiseImageLoader(set_name='val', repo_dir=args.data_dir,media_params=test_params, 
                      index_file=os.path.join(args.data_dir, 'val_images.csv'), **common)

    # initialize model object
    layers = gen_model(c, h, w)
    segnet_model = Model(layers=layers)

    # configure callbacks
    callbacks = Callbacks(segnet_model, eval_set=val_set, **args.callback_args)

    opt_gdm = GradientDescentMomentum(1.0e-6, 0.9, wdecay=0.0005, schedule=Schedule())
    opt_biases = GradientDescentMomentum(2.0e-6, 0.9, schedule=Schedule())
    opt_bn = GradientDescentMomentum(1.0e-6, 0.9, schedule=Schedule())
    opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'BatchNorm': opt_bn})

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    if args.bench:
        segnet_model.initialize(train_set, cost=cost)
        segnet_model.benchmark(train_set, cost=cost, optimizer=opt)
        sys.exit(0)
    else:
        segnet_model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)

    # get the trained segnet model outputs for valisation set
    outs_val = segnet_model.get_outputs(val_set)

    with open('outputs.pkl', 'w') as fid:
        pickle.dump(outs_val, fid, -1)
Exemplo n.º 14
0
model, cost = create_network()

# setup data provider
assert 'train' in args.manifest, "Missing train manifest"
assert 'test' in args.manifest, "Missing validation manifest"

train = make_train_loader(args.manifest['train'], args.manifest_root, model.be,
                          args.subset_pct, random_seed)
valid = make_test_loader(args.manifest['test'], args.manifest_root, model.be,
                         args.subset_pct)

# setup callbacks
callbacks = Callbacks(model, eval_set=valid, **args.callback_args)

# gradient descent with momentum, weight decay, and learning rate decay schedule
learning_rate_sched = Schedule(list(range(6, args.epochs, 6)), 0.1)
opt_gdm = GradientDescentMomentum(0.003,
                                  0.9,
                                  wdecay=0.005,
                                  schedule=learning_rate_sched)
opt_biases = GradientDescentMomentum(0.006, 0.9, schedule=learning_rate_sched)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# train model
model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)

# output accuracies
if args.rlayer_type == 'lstm':
    rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic())
    rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic())
else:
    rlayer1 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic())
    rlayer2 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic())

layers = [rlayer1,
          rlayer2,
          Affine(len(train_set.vocab), init, bias=init, activation=Softmax())]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

learning_rate_sched = Schedule(range(10, args.epochs), .97)
optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding,
                    schedule=learning_rate_sched)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost, callbacks=callbacks)

# get predictions
ypred = model.get_outputs(valid_set)
Exemplo n.º 16
0
    Affine(nout=1000, init=init1, bias=Constant(0), activation=Softmax()))

cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale))

mlp = Model(layers=layers)

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(mlp,
                      train,
                      eval_set=test,
                      metric=valmetric,
                      **args.callback_args)

# create learning rate schedules and optimizers
weight_sched = Schedule(range(14, 75, 15), 0.1)
opt_gdm = GradientDescentMomentum(0.01,
                                  0.9,
                                  wdecay=0.0005,
                                  schedule=weight_sched)
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

mlp.fit(train,
        optimizer=opt,
        num_epochs=args.epochs,
        cost=cost,
        callbacks=callbacks)

# clean up the data providers
test.exit_batch_provider()
Exemplo n.º 17
0
def test_step_schedule(backend):
    """
    Test constant rate, fixed step and various modes of programmable steps.
    """
    lr_init = 0.1

    # default scheduler has a constant learning rate
    sch = Schedule()
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        assert lr == lr_init

    # test a uniform step schedule
    step_config = 2
    change = 0.5
    sch = Schedule(step_config=step_config, change=change)
    for epoch in range(10):
        lr = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        # test a repeated call for the same epoch
        lr2 = sch.get_learning_rate(learning_rate=lr_init, epoch=epoch)
        # print epoch, lr, lr2
        assert np.allclose(
            lr, lr_init * change**(np.floor((epoch + 1) / step_config)))
        assert np.allclose(
            lr2, lr_init * change**(np.floor((epoch + 1) / step_config)))

    # test a list step schedule
    sch = Schedule(step_config=[2, 3], change=.1)
    assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=0))
    assert np.allclose(.1, sch.get_learning_rate(learning_rate=.1, epoch=1))
    assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2))
    # test a repeated call for the same epoch
    assert np.allclose(.01, sch.get_learning_rate(learning_rate=.1, epoch=2))
    assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=3))
    assert np.allclose(.001, sch.get_learning_rate(learning_rate=.1, epoch=4))
Exemplo n.º 18
0
    res_module = module_s1(nfm) if stride == 1 else module_s2(nfm)
    layers.append(res_module)
layers.append(BatchNorm())
layers.append(Activation(Rectlin()))
layers.append(Pooling('all', op='avg'))
layers.append(
    Affine(10,
           init=Kaiming(local=False),
           batch_norm=True,
           activation=Softmax()))

model = Model(layers=layers)
opt = GradientDescentMomentum(0.1,
                              0.9,
                              wdecay=0.0001,
                              schedule=Schedule([82, 124], 0.1))

# configure callbacks
valmetric = Misclassification()
callbacks = Callbacks(model,
                      eval_set=test,
                      metric=valmetric,
                      **args.callback_args)
callbacks.add_callback(BatchNormTuneCallback(tune_set), insert_pos=0)

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
Exemplo n.º 19
0
# outputs on the validation set.
#layers.append(Conv(fshape=(1,1,100), init=Kaiming(local=True), batch_norm=True))
#layers.append(Pooling(fshape='all', op='avg'))
#layers.append(Activation(Softmax()))

layers.append(
    Affine(nout=100,
           init=Kaiming(local=False),
           batch_norm=True,
           activation=Softmax()))

model = Model(layers=layers)
opt = GradientDescentMomentum(0.1,
                              0.9,
                              wdecay=0.0005,
                              schedule=Schedule([40, 70], 0.1))

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      train,
                      eval_set=test,
                      metric=valmetric,
                      **args.callback_args)
callbacks.add_deconv_callback(train, test)

cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
Exemplo n.º 20
0
               init=Kaiming(local=False),
               batch_norm=True,
               activation=Rectlin()))
    layers.append(Affine(1, init=Kaiming(local=False), activation=Logistic()))
    #return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())


lunaModel, cost = create_network(args.depth)

modelFileName = 'LUNA16_resnet.prm'
# If model file exists, then load the it and start from there.
# if (os.path.isfile(modelFileName)):
#   lunaModel = Model(modelFileName)

weight_sched = Schedule([30, 60], 0.1)
opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=weight_sched)

# configure callbacks
if args.callback_args['eval_freq'] is None:
    args.callback_args['eval_freq'] = 1

# configure callbacks
callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
# add a callback that saves the best model state
callbacks.add_save_best_state_callback(modelFileName)

lunaModel.fit(train_set,
              optimizer=opt,
              num_epochs=num_epochs,
              cost=cost,
Exemplo n.º 21
0
# hyperparameters
num_epochs = args.epochs

dataset = CIFAR10(path=args.data_dir,
                  normalize=False,
                  contrast_normalize=True,
                  whiten=True,
                  pad_classes=True)
train_set = dataset.train_iter
valid_set = dataset.valid_iter

init_uni = Gaussian(scale=0.05)
opt_gdm = GradientDescentMomentum(learning_rate=float(args.learning_rate),
                                  momentum_coef=0.9,
                                  wdecay=float(args.weight_decay),
                                  schedule=Schedule(
                                      step_config=[200, 250, 300], change=0.1))

relu = Rectlin()
conv = dict(init=init_uni, batch_norm=False, activation=relu)
convp1 = dict(init=init_uni, batch_norm=False, activation=relu, padding=1)
convp1s2 = dict(init=init_uni,
                batch_norm=False,
                activation=relu,
                padding=1,
                strides=2)

layers = [
    Dropout(keep=.8),
    Conv((3, 3, 96), **convp1),
    Conv((3, 3, 96), **convp1),
    Conv((3, 3, 96), **convp1s2),
Exemplo n.º 22
0
else:
    rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params)

layers = [
    LookupTable(vocab_size=len(train_set.vocab),
                embedding_dim=hidden_size,
                init=init), rlayer1, rlayer2,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

# vanilla gradient descent with decay schedule on learning rate and gradient scaling
learning_rate_sched = Schedule(list(range(5, args.epochs)), .5)
optimizer = GradientDescentMomentum(1,
                                    0,
                                    gradient_clip_norm=gradient_clip_norm,
                                    schedule=learning_rate_sched)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
Exemplo n.º 23
0
model = Model(
    layers=SSD(ssd_config=train_config['ssd_config'], dataset=train_set))

cost = MBoxLoss(num_classes=train_set.num_classes)

if args.model_file is None:
    load_vgg_weights(model, cache_dir)
else:
    model.load_params(args.model_file)

if args.lr_step is None:
    args.lr_step = [40, 80, 120]

base_lr = 0.0001 * be.bsz * args.lr_scale
schedule = Schedule(args.lr_step, 0.1)
opt_w = GradientDescentMomentum(base_lr,
                                momentum_coef=0.9,
                                wdecay=0.0005,
                                schedule=schedule)
opt_b = GradientDescentMomentum(base_lr, momentum_coef=0.9, schedule=schedule)
opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b})

# hijack the eval callback arg here
eval_freq = args.callback_args.pop('eval_freq')
callbacks = Callbacks(model, **args.callback_args)
callbacks.add_callback(MAP_Callback(eval_set=val_set, epoch_freq=eval_freq))

if args.image_sample_dir is not None:
    callbacks.add_callback(
        ssd_image_callback(eval_set=val_set,
Exemplo n.º 24
0
                    default=100,
                    help='subset of training dataset to use (percentage)')
args = parser.parse_args()

model, cost = create_network()
rseed = 0 if args.rng_seed is None else args.rng_seed

# setup data provider
assert 'train' in args.manifest, "Missing train manifest"
assert 'val' in args.manifest, "Missing validation manifest"
train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root,
                                  model.be, args.subset_pct, rseed)
valid = make_validation_loader(args.manifest['val'], args.manifest_root,
                               model.be, args.subset_pct)

sched_weight = Schedule([10], change=0.1)
opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight)

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      eval_set=valid,
                      metric=valmetric,
                      **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train, valid)

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
Exemplo n.º 25
0
def main():
    # parse the command line arguments
    parser = NeonArgparser(__doc__)

    args = parser.parse_args()

    logger = logging.getLogger()
    logger.setLevel(args.log_thresh)

    #Set up batch iterator for training images
    print "Setting up data batch loaders..."
    train = ImgMaster(repo_dir='dataTmp',
                      set_name='train',
                      inner_size=120,
                      subset_pct=100)
    val = ImgMaster(repo_dir='dataTmp',
                    set_name='train',
                    inner_size=120,
                    subset_pct=100,
                    do_transforms=False)
    test = ImgMaster(repo_dir='dataTestTmp',
                     set_name='train',
                     inner_size=120,
                     subset_pct=100,
                     do_transforms=False)

    train.init_batch_provider()
    val.init_batch_provider()
    test.init_batch_provider()

    print "Constructing network..."
    #Create AlexNet architecture
    model = constuct_network()

    #model.load_weights(args.model_file)

    # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45
    weight_sched = Schedule([22, 44, 65, 90, 97], (1 / 250.)**(1 / 3.))
    opt_gdm = GradientDescentMomentum(0.01,
                                      0.9,
                                      wdecay=0.005,
                                      schedule=weight_sched)
    opt_biases = GradientDescentMomentum(0.04,
                                         1.0,
                                         schedule=Schedule([130], .1))
    opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

    # configure callbacks
    valmetric = TopKMisclassification(k=5)
    callbacks = Callbacks(model,
                          train,
                          eval_set=val,
                          metric=valmetric,
                          **args.callback_args)

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())

    #flag = input("Press Enter if you want to begin training process.")
    print "Training network..."
    model.fit(train,
              optimizer=opt,
              num_epochs=args.epochs,
              cost=cost,
              callbacks=callbacks)
    mets = model.eval(test, metric=valmetric)

    print 'Validation set metrics:'
    print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % (
        mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100)
    test.exit_batch_provider()
    val.exit_batch_provider()
    train.exit_batch_provider()
Exemplo n.º 26
0
img_set_options = dict(repo_dir=args.data_dir,
                       inner_size=224,
                       dtype=args.datatype,
                       subset_pct=100)
train = img_provider(set_name='train', **img_set_options)
test = img_provider(set_name='validation', do_transforms=False, **img_set_options)
train.init_batch_provider()
test.init_batch_provider()

relu = Rectlin()

init_uni = GlorotUniform()

# The parameters below are straight out of [Springenberg2014]
opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                  schedule=Schedule(step_config=[10],
                                                    change=0.1),
                                  momentum_coef=0.9, wdecay=.0005)


# set up model layers
layers = []
layers.append(DataTransform(transform=Normalizer(divisor=128.)))

layers.append(Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1))
layers.append(Conv((1, 1, 96),   init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 96),   init=init_uni, activation=relu, strides=2,  padding=1))  # 54->27

layers.append(Conv((5, 5, 256),  init=init_uni, activation=relu, strides=1))              # 27->23
layers.append(Conv((1, 1, 256),  init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 256),  init=init_uni, activation=relu, strides=2,  padding=1))  # 23->12
Exemplo n.º 27
0
def module_factory(nfm, stride=1):
    mainpath = [Conv(**conv_params(3, nfm, stride=stride)),
                Conv(**conv_params(3, nfm, relu=False))]
    sidepath = [SkipNode() if stride == 1 else Conv(**id_params(nfm))]
    module = [MergeSum([mainpath, sidepath]),
              Activation(Rectlin())]
    return module

# Structure of the deep residual part of the network:
# args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64
nfms = [2**(stage + 4) for stage in sorted(range(3) * args.depth)]
strides = [1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]

# Now construct the network
layers = [Conv(**conv_params(3, 16))]
for nfm, stride in zip(nfms, strides):
    layers.append(module_factory(nfm, stride))
layers.append(Pooling(8, op='avg'))
layers.append(Affine(nout=10, init=Kaiming(local=False), batch_norm=True, activation=Softmax()))

model = Model(layers=layers)
opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001,
                              schedule=Schedule([90, 123], 0.1))

# configure callbacks
callbacks = Callbacks(model, eval_set=test, metric=Misclassification(), **args.callback_args)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)