def create_frcn_model(frcn_fine_tune=False): b1 = BranchNode(name="b1") imagenet_layers = add_vgg_layers() HW = (7, 7) frcn_layers = [ RoiPooling(layers=imagenet_layers, HW=HW, bprop_enabled=frcn_fine_tune), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.005), bias=Constant(.1), activation=Rectlin()), Dropout(keep=0.5), b1, Affine(nout=21, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax()) ] bb_layers = [ b1, Affine(nout=84, init=Gaussian(scale=0.001), bias=Constant(0), activation=Identity()) ] return Model(layers=Tree([frcn_layers, bb_layers]))
def create_network(): # weight initialization g1 = Gaussian(scale=0.01) g5 = Gaussian(scale=0.005) c0 = Constant(0) c1 = Constant(1) # model initialization padding = {'pad_d': 1, 'pad_h': 1, 'pad_w': 1} strides = {'str_d': 2, 'str_h': 2, 'str_w': 2} layers = [ Conv((3, 3, 3, 64), padding=padding, init=g1, bias=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={'str_d': 1, 'str_h': 2, 'str_w': 2}), Conv((3, 3, 3, 128), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Conv((3, 3, 3, 256), padding=padding, init=g1, bias=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, init=g5, bias=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=101, init=g1, bias=c0, activation=Softmax()) ] return Model(layers=layers)
def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) map_list = opt._map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def constructCNN(self): layers = [] if self.network_type == "idsia": layers.append( Conv((3, 3, 100), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv1")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool1")) layers.append( Conv((4, 4, 150), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv2")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool2")) layers.append( Conv((3, 3, 250), strides=1, init=Kaiming(), bias=Constant(0.0), activation=Rectlin(), name="Conv3")) layers.append(Pooling(2, op="max", strides=2, name="neon_pool3")) layers.append( Affine(nout=200, init=Kaiming(local=False), bias=Constant(0.0), activation=Rectlin(), name="neon_fc1")) layers.append( Affine(nout=self.class_num, init=Kaiming(local=False), bias=Constant(0.0), activation=Softmax(), name="neon_fc2")) elif self.network_type == "resnet-56": layers = resnet(9, self.class_num, int(self.resize_size[0] / 4)) # 6*9 + 2 = 56 elif self.network_type == "resnet-32": layers = resnet(5, self.class_num, int(self.resize_size[0] / 4)) # 6*5 + 2 = 32 elif self.network_type == "resnet-20": layers = resnet(3, self.class_num, int(self.resize_size[0] / 4)) # 6*3 + 2 = 20 return layers
def generate_leafs(self, layer): """ Given a key to the ssd_config, generate the leafs """ config = self.ssd_config[layer] leaf_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), 'bias': Constant(0) } # to match caffe layer's naming if config['normalize']: layer += '_norm' priorbox_args = self.get_priorbox_args(config) mbox_prior = PriorBox(**priorbox_args) num_priors = mbox_prior.num_priors_per_pixel loc_name = layer + '_mbox_loc' mbox_loc = Conv((3, 3, 4 * num_priors), name=loc_name, **leaf_params) conf_name = layer + '_mbox_conf' mbox_conf = Conv((3, 3, self.num_classes * num_priors), name=conf_name, **leaf_params) return { 'mbox_prior': mbox_prior, 'mbox_loc': mbox_loc, 'mbox_conf': mbox_conf }
def layers(self): init_uni = Uniform(low=-0.1, high=0.1) bn = False return [ DOG((5.0, 4.0, 3.0, 1.6), 1.8), Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=self.noutputs, init=init_uni, bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def main_branch(branch_nodes): return [ Conv((7, 7, 64), padding=3, strides=2, name='conv1/7x7_s2', **common), Pooling(name="pool1/3x3_s2", **pool3s2p1), Conv((1, 1, 64), name='conv2/3x3_reduce', **common), Conv((3, 3, 192), name="conv2/3x3", **commonp1), Pooling(name="pool2/3x3_s2", **pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )], name='inception_3a/'), inception([(128, ), (128, 192), (32, 96), (64, )], name='inception_3b/'), Pooling(name='pool3/3x3_s2', **pool3s2p1), inception([(192, ), (96, 208), (16, 48), (64, )], name='inception_4a/'), branch_nodes[0], inception([(160, ), (112, 224), (24, 64), (64, )], name='inception_4b/'), inception([(128, ), (128, 256), (24, 64), (64, )], name='inception_4c/'), inception([(112, ), (144, 288), (32, 64), (64, )], name='inception_4d/'), branch_nodes[1], inception([(256, ), (160, 320), (32, 128), (128, )], name='inception_4e/'), Pooling(name='pool4/3x3_s2', **pool3s2p1), inception([(256, ), (160, 320), (32, 128), (128, )], name='inception_5a/'), inception([(384, ), (192, 384), (48, 128), (128, )], name="inception_5b/"), Pooling(fshape=7, strides=1, op="avg", name='pool5/7x7_s1'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name='loss3/classifier') ]
def test_model_get_outputs_rnn(backend_default, data): data_path = load_ptb_test(path=data) data_set = Text(time_steps=50, path=data_path) # weight initialization init = Constant(0.08) # model initialization layers = [ Recurrent(150, init, activation=Logistic()), Affine(len(data_set.vocab), init, bias=init, activation=Rectlin()) ] model = Model(layers=layers) output = model.get_outputs(data_set) assert output.shape == (data_set.ndata, data_set.seq_length, data_set.nclass) # since the init are all constant and model is un-trained: # along the feature dim, the values should be all the same assert np.allclose(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-5) assert np.allclose(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-5) # along the time dim, the values should be increasing: assert np.alltrue(output[0, 2] > output[0, 1]) assert np.alltrue(output[0, 1] > output[0, 0])
def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def aux_branch(bnode): return [ bnode, Pooling(fshape=5, strides=3, op="avg"), Conv((1, 1, 128), **common), Affine(nout=1024, init=init1, activation=relu, bias=bias), Dropout(keep=0.3), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0)) ]
def aux_branch(bnode, ind): # TODO put dropout back in nm = 'loss%d/' % ind return [bnode, Pooling(fshape=5, strides=3, op="avg", name=nm+'ave_pool'), Conv((1, 1, 128), name=nm+'conv', **common), Affine(nout=1024, init=init1, activation=relu, bias=bias, name=nm+'fc'), Dropout(keep=1.0, name=nm+'drop_fc'), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0), name=nm+'classifier')]
def layers(self): return [ Conv((7, 7, 96), init=Gaussian(scale=0.0001), bias=Constant(0), activation=Rectlin(), padding=3, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=2, strides=1), LRN(31, ascale=0.001, bpower=0.75), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 384), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Conv((3, 3, 256), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=1, strides=1), Pooling(3, strides=2, padding=1), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(0), activation=Identity()), Dropout(keep=0.5), Affine(nout=self.noutputs, init=Gaussian(scale=0.01), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic( shortcut=True)) ]
def constuct_network(): """ Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a few convolutional layers and 3 LSTM layers. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((7, 7, 128), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((5, 5, 256), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def layers(self): bn = True return [ Conv((7, 7, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1)\ if self.bn_first_layer else\ Conv((7, 7, 96), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1), Pooling(3, strides=2, padding=1), Conv((7, 7, 128), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1), Pooling(3, strides=2, padding=1), Conv((5, 5, 256), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=2, strides=1), Pooling(3, strides=2, padding=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1), Pooling(3, strides=2, padding=1, op='avg'), Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn), Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn), Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0), activation=Softmax() if self.use_softmax else Logistic(shortcut=True)) ]
def constuct_network(): """ Constructs the layers of the AlexNet architecture. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def create_network(): layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=1000, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()), ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def test_model_N_S_setter(backend_default): # weight initialization init = Constant(0.08) # model initialization layers = [ Recurrent(150, init, activation=Logistic()), Affine(100, init, bias=init, activation=Rectlin()) ] model = Model(layers=layers) model.set_batch_size(20) model.set_seq_len(10)
def create_normalize_branch(self, leafs, branch_node, layer): """ Append leafs to trunk_layers at the branch_node. If normalize, add a Normalize layer. """ tree_branch = BranchNode(name=layer + '_norm_branch') branch1 = [ Normalize(init=Constant(20.0), name=layer + '_norm'), tree_branch, leafs['mbox_loc'] ] branch2 = [tree_branch, leafs['mbox_conf']] new_tree = Tree([branch1, branch2]) return [branch_node, new_tree]
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
def add_vgg_layers(): # setup layers init1_vgg = Xavier(local=True) relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': init1_vgg, 'bias': Constant(0), 'activation': relu } # Set up the model layers vgg_layers = [] # set up 3x3 conv stacks with different feature map sizes vgg_layers.append(Conv((3, 3, 64), **conv_params)) vgg_layers.append(Conv((3, 3, 64), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Conv((3, 3, 128), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Conv((3, 3, 256), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Pooling(2, strides=2)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) vgg_layers.append(Conv((3, 3, 512), **conv_params)) # not used after this layer # vgg_layers.append(Pooling(2, strides=2)) # vgg_layers.append(Affine(nout=4096, init=initfc, bias=Constant(0), activation=relu)) # vgg_layers.append(Dropout(keep=0.5)) # vgg_layers.append(Affine(nout=4096, init=initfc, bias=Constant(0), activation=relu)) # vgg_layers.append(Dropout(keep=0.5)) # vgg_layers.append(Affine(nout=1000, init=initfc, bias=Constant(0), activation=Softmax())) return vgg_layers
def test_model_get_outputs_rnn(backend_default, data): data_path = load_text('ptb-valid', path=data) data_set = Text(time_steps=50, path=data_path) # weight initialization init = Constant(0.08) # model initialization layers = [ Recurrent(150, init, Logistic()), Affine(len(data_set.vocab), init, bias=init, activation=Rectlin()) ] model = Model(layers=layers) output = model.get_outputs(data_set) assert output.shape == ( data_set.ndata, data_set.seq_length, data_set.nclass)
def gen_model(backend_type): # setup backend gen_backend(backend=backend_type, batch_size=batch_size, rng_seed=2, device_id=args.device_id, datatype=args.datatype) init_uni = Uniform(low=-0.1, high=0.1) # Set up the model layers layers = [] layers.append(Conv((5, 5, 16), init=init_uni, bias=Constant(0), activation=Rectlin())) layers.append(Pooling(2)) layers.append(Conv((5, 5, 32), init=init_uni, activation=Rectlin())) layers.append(Pooling(2)) layers.append(Affine(nout=500, init=init_uni, activation=Rectlin())) layers.append(Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True))) mlp = Model(layers=layers) return mlp
def main_branch(branch_nodes): return [ Conv((7, 7, 64), padding=3, strides=2, **common), Pooling(**pool3s2p1), Conv((1, 1, 64), **common), Conv((3, 3, 192), **commonp1), Pooling(**pool3s2p1), inception([(64, ), (96, 128), (16, 32), (32, )]), inception([(128, ), (128, 192), (32, 96), (64, )]), Pooling(**pool3s2p1), inception([(192, ), (96, 208), (16, 48), (64, )]), branch_nodes[0], inception([(160, ), (112, 224), (24, 64), (64, )]), inception([(128, ), (128, 256), (24, 64), (64, )]), inception([(112, ), (144, 288), (32, 64), (64, )]), branch_nodes[1], inception([(256, ), (160, 320), (32, 128), (128, )]), Pooling(**pool3s2p1), inception([(256, ), (160, 320), (32, 128), (128, )]), inception([(384, ), (192, 384), (48, 128), (128, )]), Pooling(fshape=7, strides=1, op="avg"), Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0)) ]
def __init__(self): self.in_shape = (3, 32, 32) relu = Rectlin() init_use = Constant(0) conv = dict(init=init_use, batch_norm=False, activation=relu) convp1 = dict(init=init_use, batch_norm=False, bias=init_use, activation=relu, padding=1) convp1s2 = dict(init=init_use, batch_norm=False, bias=init_use, padding=1, strides=2) layers = [ Dropout(keep=.8), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1), Conv((3, 3, 96), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()) ] self.layers = layers model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(self.in_shape, cost=cost) self.model = model
from neon.util.argparser import NeonArgparser # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() dataset = MNIST(path=args.data_dir) train_set = dataset.train_iter valid_set = dataset.valid_iter # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model layers = [] layers.append(Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin())) layers.append(Affine(nout=10, init=init_norm, bias=Constant(0), activation=Logistic(shortcut=True), name='special_linear')) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # fit and validate optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) optimizer_two = RMSProp() # all bias layers and the last linear layer will use # optimizer_two. all other layers will use optimizer_one. opt = MultiOptimizer({'default': optimizer_one,
# hyperparameters hidden_size = 512 num_epochs = args.epochs # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # download dataset data_path = load_flickr8k(path=args.data_dir) # Other setnames are flickr30k and coco # load data train_set = ImageCaption(path=data_path, max_images=-1) # weight initialization init = Uniform(low=-0.08, high=0.08) init2 = Constant(val=train_set.be.array(train_set.bias_init)) # model initialization image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))]) sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks
def test_model_serialize(backend): (X_train, y_train), (X_test, y_test), nclass = load_mnist() train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = [ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] path2 = [ Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ] layers = [ MergeConcat([path1, path2]), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), BatchNorm(), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model save_obj(mlp.serialize(keep_states=True), tmp_save) # Load model mlp = Model(layers=layers) mlp.load_weights(tmp_save) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) else: assert np.allclose(p, p_e) os.remove(tmp_save)
# setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # download dataset data_path = load_flickr8k( path=args.data_dir) # Other setnames are flickr30k and coco # load data train_set = ImageCaption(path=data_path, max_images=-1) # weight initialization init = Uniform(low=-0.08, high=0.08) init2 = Array(val=train_set.be.array(train_set.bias_init)) # model initialization image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))]) sent_path = Sequential([Affine(hidden_size, init, name='sent')]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))