def constuct_network(): """ Constructs the layers of our RCNN architecture. It is similar to AlexNet but simplified to only a few convolutional layers and 3 LSTM layers. """ layers = [ Conv((11, 11, 64), init=Gaussian(scale=0.01), bias=Constant(0), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((7, 7, 128), init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((5, 5, 256), init=Gaussian(scale=0.03), bias=Constant(0), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), bias=Constant(1), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), LSTM(512, init=Gaussian(scale=0.03), activation=Rectlin(), gate_activation=Tanh()), Affine(nout=4096, init=Gaussian(scale=0.01), bias=Constant(1), activation=Rectlin()), DropoutBinary(keep=0.5), Affine(nout=101, init=Gaussian(scale=0.01), bias=Constant(-7), activation=Softmax()) ] return Model(layers=layers)
def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
def input_layers(self, analytics_input, init, activation, gate): """ return the input layers. we currently support convolutional and LSTM :return: """ if self.recurrent: if analytics_input: # support analytics + content input_layers = MergeMultistream([[ LSTM(300, init, init_inner=Kaiming(), activation=activation, gate_activation=gate, reset_cells=True), RecurrentSum() ], [Affine(30, init, activation=activation)]], 'stack') else: # content only input_layers = [ LSTM(300, init, init_inner=Kaiming(), activation=activation, gate_activation=gate, reset_cells=True), RecurrentSum() ] else: if analytics_input: # support analytics + content input_layers = MergeMultistream([ self.conv_net(activation), [Affine(30, init, activation=Logistic())] ], 'stack') else: # content only input_layers = self.conv_net(activation) return input_layers
def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) map_list = opt._map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
print "Vocab size - ", vocab_size print "Sentence Length - ", sentence_length print "# of train sentences", X_train.shape[0] print "# of test sentence", X_test.shape[0] train_set = ArrayIterator(X_train, y_train, nclass=2) valid_set = ArrayIterator(X_test, y_test, nclass=2) # weight initialization uni = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim) g_uni = GlorotUniform() if args.rlayer_type == 'lstm': rlayer = LSTM(hidden_size, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True) elif args.rlayer_type == 'bilstm': rlayer = DeepBiLSTM(hidden_size, g_uni, activation=Tanh(), depth=1, gate_activation=Logistic(), reset_cells=True) elif args.rlayer_type == 'rnn': rlayer = Recurrent(hidden_size, g_uni, activation=Tanh(), reset_cells=True) elif args.rlayer_type == 'birnn': rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(),
# load data train_set = ImageCaption(path=data_path, max_images=-1) # weight initialization init = Uniform(low=-0.08, high=0.08) init2 = Constant(val=train_set.be.array(train_set.bias_init)) # model initialization image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))]) sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = 1 model = Model(layers=layers)
def test_reshape_layer_model(backend_default, fargs): """ test cases: - conv before RNNs - conv after RNNs - conv after LUT """ np.random.seed(seed=0) nin, nout, bsz = fargs be = backend_default be.bsz = bsz input_size = (nin, be.bsz) init = Uniform(-0.1, 0.1) g_uni = GlorotUniform() inp_np = np.random.rand(nin, be.bsz) delta_np = np.random.rand(nout, be.bsz) inp = be.array(inp_np) delta = be.array(delta_np) conv_lut_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Reshape(reshape=(4, 100, -1)), Conv((3, 3, 16), init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Affine(nout, init, bias=init, activation=Softmax()) ] conv_lut_2 = [ LookupTable(vocab_size=1000, embedding_dim=400, init=init), Reshape(reshape=(4, 50, -1)), Conv((3, 3, 16), init=init), Pooling(2, strides=2), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] conv_rnn_1 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), LSTM(64, g_uni, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), Reshape(reshape=(4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] conv_rnn_2 = [ LookupTable(vocab_size=2000, embedding_dim=400, init=init), Recurrent(64, g_uni, activation=Tanh(), reset_cells=True), Reshape(reshape=(4, -1, 32)), Conv((3, 3, 16), init=init), Affine(nout, init, bias=init, activation=Softmax()) ] lut_sum_1 = [ LookupTable(vocab_size=1000, embedding_dim=128, init=init), RecurrentSum(), Affine(nout=nout, init=init, bias=init, activation=Softmax()), ] lut_birnn_1 = [ LookupTable(vocab_size=1000, embedding_dim=200, init=init), DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(), reset_cells=True, depth=1), Reshape((4, 32, -1)), Conv((3, 3, 16), init=init), Affine(nout=nout, init=init, bias=init, activation=Softmax()) ] layers_test = [ conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1 ] for lg in layers_test: model = Model(layers=lg) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(input_size, cost) model.fprop(inp) model.bprop(delta)
vocab=train_set.vocab, tokenizer=tokenizer, onehot_input=False) # weight initialization init = Uniform(low=-0.1, high=0.1) # model initialization rlayer_params = { "output_size": hidden_size, "init": init, "activation": Tanh(), "gate_activation": Logistic() } if args.rlayer_type == 'lstm': rlayer1, rlayer2 = LSTM(**rlayer_params), LSTM(**rlayer_params) else: rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params) layers = [ LookupTable(vocab_size=len(train_set.vocab), embedding_dim=hidden_size, init=init), rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) # vanilla gradient descent with decay schedule on learning rate and gradient scaling
default_dtype=args.datatype) # download penn treebank train_path = load_text('ptb-train', path=args.data_dir) valid_path = load_text('ptb-valid', path=args.data_dir) # load data and parse on character-level train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization if rlayer_type == 'lstm': rlayer = LSTM(hidden_size, init, Logistic(), Tanh()) elif rlayer_type == 'gru': rlayer = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) else: raise NotImplementedError('%s layer not implemented' % rlayer_type) layers = [ rlayer, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding)
def main(): parser = NeonArgparser(__doc__) args = parser.parse_args(gen_be=False) #mat_data = sio.loadmat('../data/timeseries/02_timeseries.mat') #ts = V1TimeSeries(mat_data['timeseries'], mat_data['stim'], binning=10) seq_len = 30 hidden = 20 be = gen_backend(**extract_valid_args(args, gen_backend)) kohn = KohnV1Dataset(path='../tmp/') kohn.gen_iterators(seq_len) import pdb; pdb.set_trace() train_spike_set = V1IteratorSequence(ts.train, seq_len, return_sequences=False) valid_spike_set = V1IteratorSequence(ts.test, seq_len, return_sequences=False) init = GlorotUniform() # dataset = MNIST(path=args.data_dir) # (X_train, y_train), (X_test, y_test), nclass = dataset.load_data() # train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) # valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # # weight initialization # init_norm = Gaussian(loc=0.0, scale=0.01) # # initialize model # path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), # Affine(nout=100, init=init_norm, activation=Rectlin())]) # layers = [MergeMultistream(layers=[path1, path2], merge="stack"), # Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] spike_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), #Dropout(keep=0.85), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_in')]) stim_rnn_path = Sequential( layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Logistic(), reset_cells=False), Dropout(keep=0.5), RecurrentLast(), Affine(1, init, bias=init, activation=Identity(), name='stim')]) layers = [ MergeMultiStream( layers = [ spike_rnn_path, stim_rnn_path], merge="stack"), Affine(train_set.nfeatures, init, bias=init, activation=Identity(), name='spike_out'), Round() ] model = Model(layers=layers) sched = ExpSchedule(decay=0.7) # cost = GeneralizedCost(SumSquared()) cost = GeneralizedCost(MeanSquared()) optimizer_two = RMSProp(stochastic_round=args.rounding) optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9, schedule=sched) opt = MultiOptimizer({'default': optimizer_one, 'Bias': optimizer_two, 'special_linear': optimizer_two}) callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) callbacks.add_hist_callback(filter_key = ['W']) #callbacks.add_callback(MetricCallback(eval_set=valid_set, metric=FractionExplainedVariance(), epoch_freq=args.eval_freq)) #callbacks.add_callback(MetricCallback(eval_set=valid_set,metric=Accuracy(), epoch_freq=args.eval_freq)) model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train_output = model.get_outputs( train_set).reshape(-1, train_set.nfeatures) valid_output = model.get_outputs( valid_set).reshape(-1, valid_set.nfeatures) train_target = train_set.y_series valid_target = valid_set.y_series tfev = fev(train_output, train_target, train_set.mean) vfev = fev(valid_output, valid_target, valid_set.mean) neon_logger.display('Train FEV: %g, Valid FEV: %g' % (tfev, vfev)) # neon_logger.display('Train Mean: %g, Valid Mean: %g' % (train_set.mean, valid_set.mean)) plt.figure() plt.plot(train_output[:, 0], train_output[ :, 1], 'bo', label='prediction') plt.plot(train_target[:, 0], train_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on training set') plt.savefig('neon_series_training_output.png') plt.figure() plt.plot(valid_output[:, 0], valid_output[ :, 1], 'bo', label='prediction') plt.plot(valid_target[:, 0], valid_target[:, 1], 'r.', label='target') plt.legend() plt.title('Neon on validation set') plt.savefig('neon_series_validation_output.png')
default_dtype=args.datatype) # download shakespeare text data_path = load_text('shakespeare', path=args.data_dir) train_path, valid_path = Text.create_valid_file(data_path) # load data and parse on character-level train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization layers = [ LSTM(hidden_size, init, Logistic(), Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, train_set, output_file=args.output_file, valid_set=valid_set, valid_freq=1,
train_set = DataIteratorSequence(time_series.train, seq_len, return_sequences=return_sequences) valid_set = DataIteratorSequence(time_series.test, seq_len, return_sequences=return_sequences) # define weights initialization init = GlorotUniform() # Uniform(low=-0.08, high=0.08) # define model: model is different for the 2 strategies (sequence target or not) if return_sequences is True: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] else: layers = [ LSTM(hidden, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ]
# setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # download penn treebank dataset = PTB(time_steps, path=args.data_dir) train_set = dataset.train_iter valid_set = dataset.valid_iter # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization if args.rlayer_type == 'lstm': rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) else: rlayer1 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) rlayer2 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic())
common_params = dict(sampling_freq=22050, clip_duration=16000, frame_duration=16) train_params = AudioParams(**common_params) valid_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='music-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) valid = DataLoader(set_name='music-valid', media_params=valid_params, index_file=valid_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [Conv((2, 2, 4), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), LSTM(128, init=GlorotUniform(), gate_activation=Tanh(), activation=Logistic(), reset_cells=True), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adagrad(learning_rate=0.01, gradient_clip_value=15) metric = Misclassification() callbacks = Callbacks(model, eval_set=valid, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (model.eval(valid, metric=metric)*100)) display(model, ['Convolution_0'], 'inputs') display(model, ['Convolution_0', 'Convolution_1', 'Pooling_0'], 'outputs')
train_set = imdb.train_iter test_set = imdb.test_iter valid_set = imdb.test_iter #Model specification #Initialization init_glorot = GlorotUniform() init_uniform = Uniform(-0.1 / 128, 0.1 / 128) #Following are the list of layers we are gonna implement in out network layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=128, init=init_uniform), LSTM(output_size=128, init=init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(nout=2, init=init_glorot, bias=init_glorot, activation=Softmax()) ] #cost optimizer and callbacks cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01) num_epochs = 2 fname = 'imdb_lstm_model'
def test_conv_rnn(backend_default): train_shape = (1, 17, 142) be = backend_default inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) delta = be.array(be.rng.randn(10, be.bsz)) init_norm = Gaussian(loc=0.0, scale=0.01) bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), depth=1, reset_cells=True) birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False) birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=2, reset_cells=True, batch_norm=False) bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=True) birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False, bi_sum=True) rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True) lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru] for rl in rlayers: layers = [ Conv((2, 2, 4), init=init_norm, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), rl, RecurrentMean(), Affine(nout=10, init=init_norm, activation=Rectlin()), ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) model.fprop(inp) model.bprop(delta)
seq_len, return_sequences=args.predict_seq) # define weights initialization init = GlorotUniform() # Uniform(low=-0.08, high=0.08) # Number of recurrent units in the network recurrent_units = 32 # define model: model is different for the 2 strategies (sequence target # or not) if args.predict_seq: layers = [ LSTM(recurrent_units, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=False), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ] else: layers = [ LSTM(recurrent_units, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), RecurrentLast(), Affine(train_set.nfeatures, init, bias=init, activation=Identity()) ]
gradient_clip_value = 5 # download shakespeare text data_path = load_shakespeare(path=args.data_dir) train_path, valid_path = Text.create_valid_file(data_path) # load data and parse on character-level train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization layers = [ LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # fit and validate model.fit(train_set, optimizer=optimizer,
gauss = Gaussian(scale=0.01) glorot = GlorotUniform() tiny = dict(str_h=1, str_w=1) small = dict(str_h=1, str_w=2) big = dict(str_h=1, str_w=4) common = dict(batch_norm=True, activation=Rectlin()) layers = [ Conv((3, 5, 64), init=gauss, activation=Rectlin(), strides=big), Pooling(2, strides=2), Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Conv((3, 3, 256), init=gauss, strides=small, **common), Conv((2, 2, 512), init=gauss, strides=tiny, **common), LSTM(128, glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentMean(), Dropout(keep=0.5), Affine(nout=2, init=gauss, activation=Softmax()) ] model = Model(layers=layers) opt = Adagrad(learning_rate=0.001) callbacks = Callbacks(model, eval_set=test, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(tain, optimizer=opt, num_epochs=args.epochs,