示例#1
0
def main():
    # setup the model and run for num_epochs saving the last state only
    # this is at the top so that the be is generated
    mlp = gen_model(args.backend)

    # setup data iterators
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
    if args.backend == 'nervanacpu' or args.backend == 'cpu':
        # limit data since cpu backend runs slower
        train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28))
        valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28))
    else:
        train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28))
        valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))

    # serialization related
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

    checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl')
    checkpoint_schedule = 1  # save at every step

    callbacks = Callbacks(mlp, train)
    callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2)

    # run the fit all the way through saving a checkpoint e
    mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks)

    # setup model with same random seed run epoch by epoch
    # serializing and deserializing at each step
    mlp = gen_model(args.backend)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

    # reset data iterators
    train.reset()
    valid.reset()

    checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl')
    checkpoint_schedule = 1  # save at evey step
    callbacks = Callbacks(mlp, train)
    callbacks.add_serialize_callback(checkpoint_schedule,
                                     checkpoint_model_path,
                                     history=num_epochs)
    for epoch in range(num_epochs):
        # _0 points to state at end of epoch 0
        mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks)

        # load saved file
        prts = os.path.splitext(checkpoint_model_path)
        fn = prts[0] + '_%d' % epoch + prts[1]
        mlp.load_weights(fn)  # load the saved weights

    # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl
    try:
        compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1),
                              'test_manyshot_%d.pkl' % (num_epochs-1))
    except:
        print 'test failed....'
        sys.exit(1)
def main(args):
    # load up the mnist data set
    dataset = MNIST(path=args.data_dir)

    # initialize model object
    mlp = Model(layers=[
        Affine(nout=100,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Rectlin()),
        Affine(nout=10,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Logistic(shortcut=True))
    ])

    # setup optimizer
    optimizer = GradientDescentMomentum(0.1,
                                        momentum_coef=0.9,
                                        stochastic_round=args.rounding)

    # configure callbacks
    callbacks = Callbacks(mlp,
                          eval_set=dataset.valid_iter,
                          **args.callback_args)

    # run fit
    # setup cost function as CrossEntropy
    mlp.fit(dataset.train_iter,
            optimizer=optimizer,
            num_epochs=args.epochs,
            cost=GeneralizedCost(costfunc=CrossEntropyBinary()),
            callbacks=callbacks)
    error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification())
    neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
示例#3
0
    def build_model(self):
        # setup weight initialization function
        init_norm = Gaussian(loc=0.0, scale=0.01)

        # setup model layers
        layers = [
            Affine(nout=100,
                   init=init_norm,
                   bias=Uniform(),
                   activation=Rectlin()),
            Affine(nout=10,
                   init=init_norm,
                   bias=Uniform(),
                   activation=Logistic(shortcut=True))
        ]

        # setup cost function as CrossEntropy
        self.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

        # setup optimizer
        self.optimizer = GradientDescentMomentum(
            0.1, momentum_coef=0.9, stochastic_round=self.args.rounding)

        # initialize model object
        self.model = ModelDist(layers=layers)
示例#4
0
def test_gdm_nesterov(backend_default):
    lrate, mom, wdecay = 0.1, 0.9, 0.005
    gdm = GradientDescentMomentum(learning_rate=lrate,
                                  momentum_coef=mom,
                                  wdecay=wdecay,
                                  nesterov=True)
    data_shape = (200, 128)

    # params to be updated using GDM
    np_param = np.random.rand(*data_shape)
    param = wrap(np_param)

    # Optimizer states
    velocity = 0.01 * np.random.rand(*data_shape)
    states = [wrap(velocity)]

    # Check a few iterations in a row
    for ii in range(20):
        # Choose a gradient
        np_grad = 0.01 * np.random.rand(*data_shape)
        grad = wrap(np_grad)

        # Update manually
        np_grad = np_grad / data_shape[1]
        velocity[:] = mom * velocity - lrate * (np_grad + wdecay * np_param)
        np_param[:] = np_param + mom * velocity - lrate * (np_grad +
                                                           wdecay * np_param)
        param_list = [((param, grad), states)]
        compare_tensors(gdm, param_list, np_param, tol=1e-6)
示例#5
0
    def train(self, dataset, model=None):
        """Trains the passed model on the given dataset. If no model is passed, `generate_default_model` is used."""
        print "[%s] Starting training..." % self.model_name                                                              
        start = time.time()

        # The training will be run on the CPU. If a GPU is available it should be used instead.
        backend = gen_backend(backend='cpu',
                              batch_size=self.batch_size,
                              rng_seed=self.random_seed,
                              stochastic_round=False)

        cost = GeneralizedCost(
            name='cost',
            costfunc=CrossEntropyMulti())

        optimizer = GradientDescentMomentum(
            learning_rate=self.lrate,
            momentum_coef=0.9)

        # set up the model and experiment
        if not model:
            model = self.generate_default_model(dataset.num_labels)

        args = NeonCallbackParameters()
        args.output_file = os.path.join(self.root_path, self.Callback_Store_Filename)
        args.evaluation_freq = 1
        args.progress_bar = False
        args.epochs = self.max_epochs
        args.save_path = os.path.join(self.root_path, self.Intermediate_Model_Filename)
        args.serialize = 1
        args.history = 100
        args.model_file = None

        callbacks = Callbacks(model, dataset.train(), args, eval_set=dataset.test())

        # add a callback that saves the best model state
        callbacks.add_save_best_state_callback(self.model_path)

        # Uncomment line below to run on GPU using cudanet backend
        # backend = gen_backend(rng_seed=0, gpu='cudanet')
        model.fit(
            dataset.train(),
            optimizer=optimizer,
            num_epochs=self.max_epochs,
            cost=cost,
            callbacks=callbacks)

        print("[%s] Misclassification error = %.1f%%"
              % (self.model_name, model.eval(dataset.test(), metric=Misclassification()) * 100))
        print "[%s] Finished training!" % self.model_name
        end = time.time()
        print "[%s] Duration in seconds", end - start

        return model
示例#6
0
 def __init__(self, rounding, callback_args, epochs):
     # setup weight initialization function
     self.init = Gaussian(loc=0.0, scale=0.01)
     # setup optimizer
     self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9,
                                              stochastic_round=rounding)
     # setup cost function as CrossEntropy
     self.cost = GeneralizedCost(costfunc=SumSquared())
     self.epochs = epochs
     self.model = None
     self.callback_args = callback_args
示例#7
0
def test_gdm(backend):
    lrate, mom, wdecay = 0.1, 0.9, 0.005
    gdm = GradientDescentMomentum(
        learning_rate=lrate, momentum_coef=mom, wdecay=wdecay)
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    states = [0.01 * np.random.rand(200, 128)]
    velocity = states[0]
    param2[:] = param2 + velocity * mom - grad * lrate - wdecay * lrate * param
    param_list = [((wrap(param), wrap(grad)), [wrap(states[0])])]
    compare_tensors(gdm, param_list, param2, tol=1e-7)
示例#8
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
示例#9
0
def test_gdm_wclip(backend_default):
    lrate, mom, wdecay, wclip = 0.1, 0.9, 0.005, 0.5
    gdm = GradientDescentMomentum(
        learning_rate=lrate, momentum_coef=mom, wdecay=wdecay, param_clip_value=wclip)
    param = np.random.rand(200, 128)
    param2 = copy.deepcopy(param)
    grad = 0.01 * np.random.rand(200, 128)
    grad2 = grad / 128.
    states = [0.01 * np.random.rand(200, 128)]
    velocity = states[0]
    param2[:] = param2 + velocity * mom - grad2 * lrate - wdecay * lrate * param
    np.clip(param2, -wclip, wclip, param2)
    param_list = [((wrap(param), wrap(grad)), [wrap(states[0])])]
    compare_tensors(gdm, param_list, param2, tol=1e-7)
示例#10
0
def run(be, fake_dilation, fsz, stride, pad, dilation):
    K = 8
    strides = stride
    padding = pad
    be.rng = be.gen_rng(be.rng_seed)

    in_shape = 16
    while out_shape(in_shape, fsz, stride, dilation, pad) < 3:
        in_shape *= 2
    train_shape = (1, in_shape, in_shape)

    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    init = Gaussian()

    layers = [
        Conv((5, 5, K), init=init),
        Conv((fsz, fsz, K),
             strides=strides,
             padding=padding,
             init=init,
             dilation=dict(dil_d=1, dil_h=dilation, dil_w=dilation)),
        Conv((3, 3, K), init=init),
        Affine(nout=1, init=init)
    ]
    model = Model(layers=layers)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    model.initialize(train_shape, cost)

    if fake_dilation:
        # Perform regular convolution with an expanded filter.
        weights = save(model)
        new_layers = layers
        # Replace the middle layers.
        new_fsz = dilated_fsz(fsz, dilation)
        new_layers[1] = Conv((new_fsz, new_fsz, K),
                             strides=strides,
                             padding=padding,
                             init=init)
        model = Model(layers=new_layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        load(weights, model, K, fsz, dilation)

    print(model)
    model.optimizer = GradientDescentMomentum(learning_rate=0.01,
                                              momentum_coef=0.9)
    outputs = fprop(model, inp)
    weights = bprop(model, outputs)
    model.optimizer.optimize(model.layers_to_optimize, epoch=0)
    return outputs.get(), weights.get()
示例#11
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
示例#12
0
文件: util.py 项目: yw774/neon
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab):
    """
    Return regressor to map word2vec to RNN word space

    Function modified from:
    https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda: 0)
    for w in w2v_vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0

    for w in list(orig_wordvecs.keys())[:-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = np.zeros((len(shared), 300), dtype='float32')
    sg = np.zeros((len(shared), 620), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = w2v_W[w2v_vocab[w]]
        sg[shared[w]] = orig_wordvecs[w]

    train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False)

    layers = [
        Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)),
        Bias(init=Constant(0.0))
    ]
    clf = Model(layers=layers)

    # regression model is trained using default global batch size
    cost = GeneralizedCost(costfunc=SumSquared())
    opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0)
    callbacks = Callbacks(clf)

    clf.fit(train_set,
            num_epochs=20,
            optimizer=opt,
            cost=cost,
            callbacks=callbacks)
    return clf
示例#13
0
def run(args, train, test):
    init_uni = Uniform(low=-0.1, high=0.1)
    opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)
    layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True),
              Pooling((2, 2)),
              Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True),
              Pooling((2, 2)),
              Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True),
              Affine(nout=10, init=init_uni, activation=Softmax())]
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
    mlp = Model(layers=layers)
    callbacks = Callbacks(mlp, eval_set=test, **args.callback_args)
    mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
    err = mlp.eval(test, metric=Misclassification())*100
    print('Misclassification error = %.2f%%' % err)
    return err
    def __init__(self,
                 num_epochs,
                 callback_args,
                 optimizer=GradientDescentMomentum(0.07, momentum_coef=0.9)):
        """

        Args:
            num_epochs(int): number of epochs to train the model
            **callback_args (dict): callback args keyword arguments to init Callback for the model
            cost: the model's cost function. Default is 'neon.transforms.CrossEntropyBinary' cost
            optimizer (:obj:`neon.optimizers`): the model's optimizer. Default is
            `neon.optimizers.GradientDescentMomentum(0.07, momentum_coef=0.9)`
        """
        self.model = None
        self.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        self.optimizer = optimizer
        self.epochs = num_epochs
        self.callback_args = callback_args
示例#15
0
def test_gdm_nesterov(backend_default):
    lrate, mom, wdecay = 0.1, 0.9, 0.005
    gdm = GradientDescentMomentum(learning_rate=lrate,
                                  momentum_coef=mom,
                                  wdecay=wdecay,
                                  nesterov=True)
    # params to be updated using GDM
    param = np.random.rand(200, 128)
    grad = 0.01 * np.random.rand(200, 128)

    # params to be update manually
    param2 = copy.deepcopy(param)
    grad2 = grad / 128.
    states = [0.01 * np.random.rand(200, 128), 0.01 * np.zeros_like(grad)]
    velocity = states[0]
    velocity_backup = states[1]
    velocity_backup[:] = velocity

    param2[:] = param2 + (1 + mom) * (velocity * mom - grad2 * lrate - wdecay *
                                      lrate * param) - mom * velocity_backup
    param_list = [((wrap(param), wrap(grad)),
                   [wrap(states[0]), wrap(states[1])])]
    compare_tensors(gdm, param_list, param2, tol=1e-7)
示例#16
0
class Trainer(BaseTrainer):
    def __init__(self, model, ngpu, options,
                 data_options=None, time_options=None):
        self.model = model

        #self.model.set_batch_size(data_options['batch_size'])
        
        self.ngpu = ngpu
        self.gpu_mode = True if ngpu >= 1 else False
        self.time_options = time_options
        self.data_options = data_options
        if self.gpu_mode:
            try:
                self.be = gen_backend(backend='nervanagpu',
                                      batch_size=data_options['batch_size'])
                print("Backgrand: nervanagpu")
            except:
                self.be = gen_backend(backend='gpu',
                                      batch_size=data_options['batch_size'])
                print("Backgrand: gpu")                
        else:
            self.be = gen_backend(backend='mkl',
                                  batch_size=data_options['batch_size'])

        self.loss = L.GeneralizedCost(costfunc=TF.CrossEntropyMulti())
        B = self.data_options['batch_size']
        self.model.bsz(B)
        
        C, W, H = self.data_options['image_shape']
            
        self.model.initialize(((C, H, W), B), self.loss)
            
    def set_optimizer(self, opt_type, opt_conf):
        if opt_type == 'SGD':
            self.optimizer = GradientDescentMomentum(opt_conf['lr'],
                                                     momentum_coef=opt_conf['momentum'])
        else:
            raise NotImplementedError
        
    def run(self, iterator, mode='train'):
        report = dict()

        time_series = []
        start_event = torch.cuda.Event(enable_timing=True)
        end_event = torch.cuda.Event(enable_timing=True)

        total_s = time.perf_counter()
        
        for idx, (x, t) in enumerate(iterator):
            if self.time_options == 'total':            
                start_event.record()
            x = self.be.array(x)
            t = self.be.array(t)
            self.be.begin(Block.minibatch, idx)

            if self.time_options == 'forward':
                with self._record(start_event, end_event):
                    x = self.model(x)
            else:
                x = self.model(x)

            self.total_cost[:] = self.total_cost + self.loss.get_cost(x, t)

            # deltas back propagate through layers
            # for every layer in reverse except the 0th one
            loss = self.loss.get_errors(x, t)
            
            if self.time_options == 'backward':
                with self._record(start_event, end_event):
                    self.model.backward(loss)                    
            else:
                self.model.backward(loss)

            self.optimizer.optimize(self.model.layers_to_optimize, epoch=0)
                
            if self.time_options == 'total':            
                end_event.record()
                torch.cuda.synchronize()
                self._elapsed_time = start_event.elapsed_time(end_event)/1000
                
            if isinstance(iterator, tqdm):
                iterator.set_description('{:>10s} :{:10.7f}s/it'.format(self.time_options,
                                                                        self._elapsed_time))            
            time_series.append(self._elapsed_time)
        torch.cuda.synchronize()
        total_e = time.perf_counter()
        report = dict(
            time_series=time_series,
            total=total_e - total_s,
            )

        return report
示例#17
0
# hyperparameters
num_epochs = args.epochs

(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28))

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
          Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

# fit and validate
optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
示例#18
0
 def set_optimizer(self, opt_type, opt_conf):
     if opt_type == 'SGD':
         self.optimizer = GradientDescentMomentum(opt_conf['lr'],
                                                  momentum_coef=opt_conf['momentum'])
     else:
         raise NotImplementedError
parser = NeonArgparser(__doc__)
args = parser.parse_args()

NervanaObject.be.enable_winograd = 4

# setup data provider
X_train = np.random.uniform(-1, 1, (128, 3*224*224))
y_train = np.random.uniform(-1, 1, (128, 1000))
train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224))

layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01),
               activation=Rectlin(), padding=3, strides=4),
          Pooling(3, strides=2),
          Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2),
          Pooling(3, strides=2),
          Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Pooling(3, strides=2),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())]
model = Model(layers=layers)

weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.))
opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm})
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=5)
from neon.optimizers import GradientDescentMomentum
from neon.transforms import Misclassification, CrossEntropyBinary, Logistic, Rectlin
from neon.callbacks.callbacks import Callbacks
from neon.util.argparser import NeonArgparser

# parse the command line arguments
parser = NeonArgparser(__doc__)
args = parser.parse_args()

(X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir)

train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32))
test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32))

init_uni = Uniform(low=-0.1, high=0.1)
opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9)

# set up the model layers
layers = [
    Affine(nout=200, init=init_uni, activation=Rectlin()),
    Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True))
]

cost = GeneralizedCost(costfunc=CrossEntropyBinary())

mlp = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(mlp, eval_set=test, **args.callback_args)

mlp.fit(train,
示例#21
0
def test_model_serialize(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator([X_train, X_train],
                             y_train,
                             nclass=nclass,
                             lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = [
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    path2 = [
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    layers = [
        MergeConcat([path1, path2]),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        BatchNorm(),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            else:
                assert np.allclose(p, p_e)

    os.remove(tmp_save)
示例#22
0
               activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1),
               activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1),
               activation=Rectlin(), padding=1),
          Pooling(3, strides=2),
          Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()),
          Dropout(keep=0.5),
          Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()),
          Dropout(keep=0.5),
          Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax())]
model = Model(layers=layers)

# drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45
weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.))
opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched,
                                  stochastic_round=args.rounding)
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=Schedule([44], 0.1),
                                     stochastic_round=args.rounding)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args)

if args.model_file is not None:
    model.load_params(args.model_file)
if not args.test_only:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
    model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)

mets = model.eval(test, metric=valmetric)
示例#23
0
# layers = [Conv(fshape=(5,5,16), init=init_uni, activation=Rectlin()),
#           Pooling(fshape=2, strides=2),
#           Conv(fshape=(5,5,32), init=init_uni, activation=Rectlin()),
#           Pooling(fshape=2, strides=2),
#           Affine(nout=500, init=init_uni, activation=Rectlin()),
#           Affine(nout=10, init=init_uni, activation=Softmax())]
# learning_rate = 0.005
# momentum = 0.9

cnn = Model(layers=layers)

# - cost function
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

# - learning rule
optimizer = GradientDescentMomentum(learning_rate, momentum_coef=momentum)

# Progress bar for each epoch - what's an epoch again? by default 10 Crazy magic - don't even go here!
callbacks = Callbacks(cnn, eval_set=test_set, **args.callback_args)

# put everything together!
cnn.fit(train_set,
        optimizer=optimizer,
        num_epochs=epochs,
        cost=cost,
        callbacks=callbacks)

# # Calculate test set results
# results = cnn.get_outputs(test_set)

# dump(cnn, "cnn_0_005.jbl")
示例#24
0
           name='fc7'))

layers.append(Dropout(keep=0.5, name='drop7'))
layers.append(
    Affine(nout=1000,
           init=init_g1,
           bias=Constant(0.0),
           activation=Softmax(),
           name='fc8'))

model = Model(layers=layers)

# scale LR by 0.1 every 20 epochs (this assumes batch_size = 256)
weight_sched = Schedule(20, 0.1)
opt_gdm = GradientDescentMomentum(0.01,
                                  0.9,
                                  wdecay=0.0005,
                                  schedule=weight_sched)
opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases})

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      eval_set=test,
                      metric=valmetric,
                      **args.callback_args)

if args.model_file is not None:
    model.load_params(args.model_file)
if not args.test_only:
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
示例#25
0
def test_gdm(args, transformer_factory):
    """
    Test the ngraph GradientDescentMomentum against the neon version across 10 update steps.
    """
    # set up parameters
    C = ng.make_axis(20, name="C")
    N = ng.make_axis(32, name="N", batch=True)

    be = gen_backend(backend='cpu', batch_size=N.length)

    # restrict to numpy transformer for now
    factory = ngt.make_transformer_factory('numpy')
    ngt.set_transformer_factory(factory)
    ngt.make_transformer()

    # generate dummy data (to initialize values)
    w_init = np.random.rand(C.length).astype('float32')

    # set up nervana graph
    X = ng.placeholder([C, N]).named('X')
    Y = ng.placeholder([N]).named('Y')
    W = ng.variable([C - 1], initial_value=w_init).named('W')

    ex = ExecutorFactory()
    transformer = ex.transformer

    lrate, mom, wdecay = args
    gdm = GradientDescentMomentum(learning_rate=lrate, momentum_coef=mom, wdecay=wdecay)
    cost = ng.sum(Y - ng.dot(W, X), out_axis=())

    # to call ngraph gdm, use (ngraph_W, _) = ngraph_optimize(x, y)
    # where (x, y) are nparrays that fill the placeholders X and Y
    updates = gdm(cost)
    ngraph_optimize = transformer.computation([W, updates], X, Y)
    transformer.initialize()

    # set up the neon gdm
    neon_gdm = NeonGradientDescentMomentum(learning_rate=lrate, momentum_coef=mom, wdecay=wdecay)
    # dev_v0 = be.zeros((C.length, 1))  # velocities are zero at the beginning
    dev_dw = be.zeros((C.length, 1))  # we fill the gradient info in the below
    dev_w_init = be.array(w_init)  # copy w_init to device
    param_list = [((dev_w_init, dev_dw), [])]

    # store the weights with each minibatch for debugging
    ng_Ws = []
    be_Ws = []

    # run for 20 minibatches
    for i, (x, y) in enumerate([generate_data(C.length, N.length) for _ in range(20)]):
        # obtain ngraph results
        (ng_W, _) = ngraph_optimize(x, y)
        ng_Ws.append(copy.deepcopy(ng_W))

        # obtain neon results
        dw = -1 * x.sum(axis=1)   # the gradients we compute analytically
        param_list[0][0][1].set(dw)  # fill the gradient

        neon_gdm.optimize([DummyLayer(param_list)], epoch=0)
        (param, grad), states = param_list[0]
        be_W = param.get()[:, 0]
        be_Ws.append(be_W)

        np.testing.assert_allclose(be_W, ng_W, rtol=1e-3)
示例#26
0
文件: word_lstm.py 项目: ydm2011/neon
else:
    rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params)

layers = [
    LookupTable(vocab_size=len(train_set.vocab),
                embedding_dim=hidden_size,
                init=init), rlayer1, rlayer2,
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

model = Model(layers=layers)

# vanilla gradient descent with decay schedule on learning rate and gradient scaling
learning_rate_sched = Schedule(list(range(5, args.epochs)), .5)
optimizer = GradientDescentMomentum(1,
                                    0,
                                    gradient_clip_norm=gradient_clip_norm,
                                    schedule=learning_rate_sched)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
          cost=cost,
          callbacks=callbacks)
示例#27
0
args = parser.parse_args()

# hyperparameters
if args.datatype in [np.float16]:
    cost_scale = 10.
num_epochs = args.epochs

(X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir)

train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32))
test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32))

init_uni = Uniform(low=-0.1, high=0.1)
if args.datatype in [np.float32, np.float64]:
    opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)
elif args.datatype in [np.float16]:
    opt_gdm = GradientDescentMomentum(learning_rate=0.01 / cost_scale,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)

bn = True
layers = [
    Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn),
    Pooling((2, 2)),
    Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn),
    Pooling((2, 2)),
    Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn),
    Affine(nout=10, init=init_uni, activation=Softmax())
]
示例#28
0
               batch_norm=True,
               activation=Rectlin()))
    layers.append(Affine(1, init=Kaiming(local=False), activation=Logistic()))
    #return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())


lunaModel, cost = create_network(args.depth)

modelFileName = 'LUNA16_resnet.prm'
# If model file exists, then load the it and start from there.
# if (os.path.isfile(modelFileName)):
#   lunaModel = Model(modelFileName)

weight_sched = Schedule([30, 60], 0.1)
opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=weight_sched)

# configure callbacks
if args.callback_args['eval_freq'] is None:
    args.callback_args['eval_freq'] = 1

# configure callbacks
callbacks = Callbacks(lunaModel, eval_set=valid_set, **args.callback_args)
# add a callback that saves the best model state
callbacks.add_save_best_state_callback(modelFileName)

lunaModel.fit(train_set,
              optimizer=opt,
              num_epochs=num_epochs,
              cost=cost,
              callbacks=callbacks)
示例#29
0
      Affine(nout=16, linear_name="b1_l1", **normrelu),
      Affine(nout=10, linear_name="b1_l2", **normsigm)]

p3 = [b2,
      Affine(nout=16, linear_name="b2_l1", **normrelu),
      Affine(nout=10, linear_name="b2_l2", **normsigm)]


# setup cost function as CrossEntropy
cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()),
                        GeneralizedCost(costfunc=CrossEntropyBinary()),
                        GeneralizedCost(costfunc=CrossEntropyBinary())],
                 weights=[1, 0., 0.])

# setup optimizer
optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding)

# initialize model object
alphas = [1, 0.25, 0.25]
mlp = Model(layers=Tree([p1, p2, p3], alphas=alphas))

# setup standard fit callbacks
callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args)

# run fit
mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)

logging.getLogger('neon').info("Misclassification error = %.1f%%",
                               (mlp.eval(valid_set, metric=Misclassification())*100))
print('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
示例#30
0
                    help='subset of training dataset to use (percentage)')
args = parser.parse_args()

model, cost = create_network()
rseed = 0 if args.rng_seed is None else args.rng_seed

# setup data provider
assert 'train' in args.manifest, "Missing train manifest"
assert 'val' in args.manifest, "Missing validation manifest"
train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root,
                                  model.be, args.subset_pct, rseed)
valid = make_validation_loader(args.manifest['val'], args.manifest_root,
                               model.be, args.subset_pct)

sched_weight = Schedule([10], change=0.1)
opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight)

# configure callbacks
valmetric = TopKMisclassification(k=5)
callbacks = Callbacks(model,
                      eval_set=valid,
                      metric=valmetric,
                      **args.callback_args)

if args.deconv:
    callbacks.add_deconv_callback(train, valid)

model.fit(train,
          optimizer=opt,
          num_epochs=args.epochs,
          cost=cost,
示例#31
0
img_set_options = dict(repo_dir=args.data_dir,
                       inner_size=224,
                       dtype=args.datatype,
                       subset_pct=100)
train = img_provider(set_name='train', **img_set_options)
test = img_provider(set_name='validation', do_transforms=False, **img_set_options)
train.init_batch_provider()
test.init_batch_provider()

relu = Rectlin()

init_uni = GlorotUniform()

# The parameters below are straight out of [Springenberg2014]
opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                  schedule=Schedule(step_config=[10],
                                                    change=0.1),
                                  momentum_coef=0.9, wdecay=.0005)


# set up model layers
layers = []
layers.append(DataTransform(transform=Normalizer(divisor=128.)))

layers.append(Conv((11, 11, 96), init=init_uni, activation=relu, strides=4, padding=1))
layers.append(Conv((1, 1, 96),   init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 96),   init=init_uni, activation=relu, strides=2,  padding=1))  # 54->27

layers.append(Conv((5, 5, 256),  init=init_uni, activation=relu, strides=1))              # 27->23
layers.append(Conv((1, 1, 256),  init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 256),  init=init_uni, activation=relu, strides=2,  padding=1))  # 23->12
示例#32
0
文件: train.py 项目: zmoon111/neon
# setup training dataset
train_set = PASCALVOCTrain('trainval', '2007', path=args.data_dir, n_mb=n_mb,
                           img_per_batch=img_per_batch, rois_per_img=rois_per_img,
                           rois_random_sample=True,
                           add_flipped=False, subset_pct=args.subset_pct)
test_set = PASCALVOCTrain('test', '2007', path=args.data_dir, n_mb=n_mb,
                          img_per_batch=img_per_batch, rois_per_img=rois_per_img,
                          rois_random_sample=True,
                          add_flipped=False)

# setup model
model = create_frcn_model(frcn_fine_tune)

# setup optimizer
opt_w = GradientDescentMomentum(
    0.001 * learning_rate_scale, 0.9, wdecay=0.0005)
opt_b = GradientDescentMomentum(0.002 * learning_rate_scale, 0.9)

optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b})

# if training a new model, seed the image model conv layers with pre-trained weights
# otherwise, just load the model file
if args.model_file is None:
    load_vgg_weights(model, args.data_dir)

cost = Multicost(costs=[GeneralizedCostMask(costfunc=CrossEntropyMulti()),
                        GeneralizedCostMask(costfunc=SmoothL1Loss())],
                 weights=[1, 1])

callbacks = Callbacks(model, eval_set=test_set, **args.callback_args)