def create_frcn_model(frcn_fine_tune=False): b1 = BranchNode(name="b1") imagenet_layers = add_vgg_layers() HW = (7, 7) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=HW, bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] bb_layers = [ b1, Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) ] return Model(layers=Tree([frcn_layers, bb_layers]))
def main(args): # load up the mnist data set dataset = MNIST(path=args.data_dir) # initialize model object mlp = Model(layers=[ Affine(nout=100, init=Gaussian(loc=0.0, scale=0.01), activation=Rectlin()), Affine(nout=10, init=Gaussian(loc=0.0, scale=0.01), activation=Logistic(shortcut=True)) ]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(mlp, eval_set=dataset.valid_iter, **args.callback_args) # run fit # setup cost function as CrossEntropy mlp.fit(dataset.train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyBinary()), callbacks=callbacks) error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification()) neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
def create_network(): # weight initialization g1 = Gaussian(scale=0.01) g5 = Gaussian(scale=0.005) c0 = Constant(0) c1 = Constant(1) # model initialization padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1} strides = {'str_d': 2, 'str_h': 2, 'str_w': 2} layers = [ Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}), Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=101, init=g1, bias=c0, activation=Softmax()) ] return Model(layers=layers)
def constuct_network(): """ Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a few convolutional layers and 3 LSTM layers. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((7, 7, 128), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((5, 5, 256), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def build_model(self): # setup weight initialization function init_norm = Gaussian(loc=0.0, scale=0.01) # setup model layers layers = [ Affine(nout=100, init=init_norm, bias=Uniform(), activation=Rectlin()), Affine(nout=10, init=init_norm, bias=Uniform(), activation=Logistic(shortcut=True)) ] # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # setup optimizer self.optimizer = GradientDescentMomentum( 0.1, momentum_coef=0.9, stochastic_round=self.args.rounding) # initialize model object self.model = ModelDist(layers=layers)
def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] if self.screen_dim == (84, 84): # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. logger.info("Using 8x8 filter for first Conv") layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin())) elif self.screen_dim == (52, 40): # The first hidden layer convolves 32 filters of 5x4 with stride 2 with the input image and applies a rectifier nonlinearity. logger.info("Using 5x4 filter for first Conv") layers.append( Conv((5, 4, 32), strides=2, init=init_norm, activation=Rectlin())) else: raise NotImplementedError("Unsupported screen dim.") # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin())) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin())) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=512, init=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers
def run(train, test): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity. layers.append( Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity. layers.append( Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier. layers.append( Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append( Affine(nout=512, init=init_norm, activation=Rectlin(), batch_norm=self.batch_norm)) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm)) return layers
def _createLayers(self): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] for i in xrange(self.num_layers): layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin())) layers.append(Affine(nout=self.num_actions, init = init_norm)) return layers
def _createLayers(self, num_actions): # create network init_norm = Gaussian(loc=0.0, scale=0.01) layers = [] # The final hidden layer is fully-connected and consists of 512 rectifier units. layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin())) # The output layer is a fully-connected linear layer with a single output for each valid action. layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm)) return layers
def get_initializer(self, input_size): dnnInit = self.args.dnn_initializer if dnnInit == 'xavier': initializer = Xavier() elif dnnInit == 'fan_in': std_dev = 1.0 / math.sqrt(input_size) initializer = Uniform(low=-std_dev, high=std_dev) else: initializer = Gaussian(0, 0.01) return initializer
def layers(self): return [ Conv((7, 7, 96), init=Gaussian(scale=0.0001), bias=Constant(0), activation=Rectlin(), padding=3, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=2, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def __init__(self, rounding, callback_args, epochs): # setup weight initialization function self.init = Gaussian(loc=0.0, scale=0.01) # setup optimizer self.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, stochastic_round=rounding) # setup cost function as CrossEntropy self.cost = GeneralizedCost(costfunc=SumSquared()) self.epochs = epochs self.model = None self.callback_args = callback_args
def prepare_model(ninputs=9600, nclass=5): """ Set up and compile the model architecture (Logistic regression) """ layers = [Affine(nout=nclass, init=Gaussian(loc=0.0, scale=0.01), activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = Adam() lrmodel = Model(layers=layers) return lrmodel, opt, cost
def constuct_network(): """ Constructs the layers of the AlexNet architecture. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) map_list = opt._map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def create_network(): layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()), ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def __init__(self): self.in_shape = (1, 32, 32) init_norm = Gaussian(loc=0.0, scale=0.01) normrelu = dict(init=init_norm, activation=Rectlin()) normsigm = dict(init=init_norm, activation=Logistic(shortcut=True)) normsoft = dict(init=init_norm, activation=Softmax()) # setup model layers b1 = BranchNode(name="b1") b2 = BranchNode(name="b2") p1 = [ Affine(nout=100, name="main1", **normrelu), b1, Affine(nout=32, name="main2", **normrelu), Affine(nout=160, name="main3", **normrelu), b2, Affine(nout=32, name="main2", **normrelu), # make next layer big to check sizing Affine(nout=320, name="main2", **normrelu), Affine(nout=10, name="main4", **normsoft) ] p2 = [ b1, Affine(nout=16, name="branch1_1", **normrelu), Affine(nout=10, name="branch1_2", **normsigm) ] p3 = [ b2, Affine(nout=16, name="branch2_1", **normrelu), Affine(nout=10, name="branch2_2", **normsigm) ] self.cost = Multicost(costs=[ GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary()) ], weights=[1, 0., 0.]) self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2]) self.model = Model(layers=self.layers) self.model.initialize(self.in_shape, cost=self.cost)
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def test_model_get_outputs(backend_default): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)
def create_network(): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=2, init=init, activation=Softmax()) ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
def build(self): """ Build the model's layers """ first_layer_dens = 64 second_layer_dens = 64 output_layer_dens = 2 # setup weight initialization function init_norm = Gaussian(scale=0.01) # setup model layers layers = [ Affine(nout=first_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=second_layer_dens, init=init_norm, activation=Rectlin()), Affine(nout=output_layer_dens, init=init_norm, activation=Logistic(shortcut=True)) ] # initialize model object self.model = Model(layers=layers)
def test_model_get_outputs(backend_default, data): dataset = MNIST(path=data) train_set = dataset.train_iter init_norm = Gaussian(loc=0.0, scale=0.1) layers = [ Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output[:output.shape[0], :]) # test model benchmark inference mlp.benchmark(train_set, inference=True, niterations=5)
# parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() # load up the mnist data set # split into train and tests sets (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) # setup a training set iterator train_set = DataIterator(X_train, y_train, nclass=nclass) # setup a validation data set iterator valid_set = DataIterator(X_test, y_test, nclass=nclass) # setup weight initialization function init_norm = Gaussian(loc=0.0, scale=0.01) normrelu = dict(init=init_norm, activation=Rectlin()) normsigm = dict(init=init_norm, activation=Logistic(shortcut=True)) normsoft = dict(init=init_norm, activation=Softmax()) # setup model layers b1 = BranchNode(name="b1") b2 = BranchNode(name="b2") p1 = [Affine(nout=100, linear_name="m_l1", **normrelu), b1, Affine(nout=32, linear_name="m_l2", **normrelu), Affine(nout=16, linear_name="m_l3", **normrelu), b2,
rate = 0.00001 nepochs = {1: 8, 2: 3, 3: 6}[subj] logger.warn('Overriding --epochs option') if args.electrode == '-1': from loader import MultiLoader as Loader elecs = range(16) else: from loader import SingleLoader as Loader rate /= 10 elecs = args.electrode tain = Loader(data_dir, subj, elecs, args.validate_mode, training=True) test = Loader(data_dir, subj, elecs, args.validate_mode, training=False) gauss = Gaussian(scale=0.01) glorot = GlorotUniform() tiny = dict(str_h=1, str_w=1) small = dict(str_h=1, str_w=2) big = dict(str_h=1, str_w=4) common = dict(batch_norm=True, activation=Rectlin()) layers = { 1: [ Conv((3, 5, 64), init=gauss, strides=big, **common), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=3, **common),
from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule from neon.transforms import Rectlin, Softmax, CrossEntropyMulti from neon.models import Model from neon.data import ArrayIterator import numpy as np parser = NeonArgparser(__doc__) args = parser.parse_args() NervanaObject.be.enable_winograd = 4 # setup data provider X_train = np.random.uniform(-1, 1, (128, 3*224*224)) y_train = np.random.uniform(-1, 1, (128, 1000)) train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224)) layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())] model = Model(layers=layers) weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.)) opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
from neon.util.argparser import NeonArgparser from neon.util.persist import ensure_dirs_exist from neon.layers.layer import Dropout # parse the command line arguments parser = NeonArgparser(__doc__) parser.add_argument('--kbatch', type=int, default=1, help='number of data batches per noise batch in training') args = parser.parse_args() # load up the data set # setup weight initialization function init = Gaussian() # discriminiator using convolution layers lrelu = Rectlin(slope=0.1) # leaky relu for discriminator # sigmoid = Logistic() # sigmoid activation function conv1 = dict( init=init, batch_norm=False, activation=lrelu) # what's about BatchNorm Layer and batch_norm parameter? conv2 = dict(init=init, batch_norm=True, activation=lrelu, padding=2) conv3 = dict(init=init, batch_norm=True, activation=lrelu, padding=1) D_layers = [ Conv((5, 5, 5, 32), **conv1), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv2), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv2),
logger.setLevel(logging.DEBUG) parser = NeonArgparser(__doc__) args = parser.parse_args() be = gen_backend(backend='gpu', batch_size=128, datatype=np.float32) # setup a dataset iterator mnist = MNIST(path='../dataset/mnist') (X_train, y_train), (X_test, y_test), nclass = mnist.load_data() train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # define model nfilters = [20, 50, 500] # nfilters = [24, 56, 500] init_w = Gaussian(scale=0.01) relu = Rectlin() common_params = dict(init=init_w, activation=relu) layers = [ Conv((5, 5, nfilters[0]), bias=Constant(0.1), padding=0, **common_params), Pooling(2, strides=2, padding=0), Conv((5, 5, nfilters[1]), bias=Constant(0.1), padding=0, **common_params), Pooling(2, strides=2, padding=0), Affine(nout=nfilters[2], bias=Constant(0.1), **common_params), Affine(nout=10, bias=Constant(0.1), activation=Softmax(), init=Gaussian(scale=0.01)) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti())