def test_birecurrence(): dim = 10 hidden_dim = 30 a = C.sequence.input_variable(dim) b = BiRecurrence(LSTM(hidden_dim), weight_tie=False)(a) assert b.shape == (hidden_dim * 2, ) c = BiRecurrence(LSTM(hidden_dim), weight_tie=True)(a) assert c.shape == b.shape assert len(b.parameters) == 3 + 3 assert len(c.parameters) == 3 + 4 assert c.f_token0.shape == c.b_token0.shape == (hidden_dim, ) assert c.f_token1.shape == c.b_token1.shape == (hidden_dim, ) d = BiRecurrence(IndyLSTM(hidden_dim), weight_tie=True)(a) assert d.shape == b.shape assert len(b.parameters) == 3 + 3 assert len(d.parameters) == 3 + 4 assert d.f_token0.shape == d.b_token0.shape == (hidden_dim, ) assert d.f_token1.shape == d.b_token1.shape == (hidden_dim, ) n = [ np.random.random((5, 10)).astype(np.float32), np.random.random((7, 10)).astype(np.float32), ] c.eval({a: n}) b.eval({a: n})
def model(self, x): param1 = 500 param2 = 250 x = Dense(param1, activation=cntk.tanh)(x) x = Dense(param1, activation=cntk.tanh)(x) x = Dense(param1, activation=cntk.tanh)(x) x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x) x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x) x = Dense(self.dim_y)(x) return x
def test_pyramidal_bi_recurrence(): dim = 10 width = 2 hidden_dim = 30 seq_length = 16 a = C.sequence.input_variable(dim) b = PyramidalBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), width)(a) assert b.shape == (hidden_dim * 2 * width, ) n = np.random.random((1, 16, 10)).astype(np.float32) result = b.eval({a: n})[0] assert result.shape == (seq_length / width, hidden_dim * 2 * width)
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim): # Create the rnn that computes the latent representation for the next token. rnn_with_latent_output = Sequential([ C.layers.Embedding(hidden_dim), For( range(num_layers), lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), ]) # Apply it to the input sequence. latent_vector = rnn_with_latent_output(input_sequence) # Connect the latent output to (sampled/full) softmax. if use_sampled_softmax: weights = load_sampling_weights(token_frequencies_file_path) smoothed_weights = np.float32(np.power(weights, alpha)) sampling_weights = C.reshape(C.Constant(smoothed_weights), shape=(1, vocab_dim)) z, ce, errs = cross_entropy_with_sampled_softmax( latent_vector, label_sequence, vocab_dim, hidden_dim, softmax_sample_size, sampling_weights) else: z, ce, errs = cross_entropy_with_full_softmax(latent_vector, label_sequence, vocab_dim, hidden_dim) return z, ce, errs
def LSTM_sequence_classifier_net(input, num_output_classes, embedding_dim, LSTM_dim, cell_dim): lstm_classifier = Sequential([Embedding(embedding_dim), Recurrence(LSTM(LSTM_dim, cell_dim)), sequence.last, Dense(num_output_classes)]) return lstm_classifier(input)
def test_ctc_encoder_train_and_network_output_to_labels(): # test CTC encoder in training loop and CTCEncoder.network_output_to_labels a = C.sequence.input_variable(10) labels = ['a', 'b', 'c'] encoder = CTCEncoder(labels) labels_tensor = C.sequence.input_variable(len( encoder.classes_)) # number of classes = 4 input_tensor = C.sequence.input_variable(100) prediction_tensor = Dense(4)(Recurrence(LSTM(100))( C.ones_like(input_tensor))) labels_graph = C.labels_to_graph(labels_tensor) fb = C.forward_backward(labels_graph, prediction_tensor, blankTokenId=encoder.blankTokenId) ground_truth = ['a', 'b', 'b', 'b', 'c'] seq_length = 10 # must be the same length as the sequence length in network_out pred = np.array([ [0., 2., 0., 0.], [0., 2., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 0., 2.], [0., 0., 0., 2.], ]).astype(np.float32) n = np.random.random((10, 100)).astype(np.float32) # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)], # input_tensor: [n]}) # print(result) adam = C.adam(prediction_tensor.parameters, 0.01, 0.912) trainer = C.Trainer(prediction_tensor, (fb, ), [adam]) for i in range(300): trainer.train_minibatch({ labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)], input_tensor: [n] }) # print(trainer.previous_minibatch_loss_average) result = prediction_tensor.eval({input_tensor: [n]}) assert encoder.network_output_to_labels(result[0], squash_repeat=True) == ground_truth
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.ProgressPrinter(freq=0) trainer = C.Trainer(z, (ce, errs), learner, progress_printer) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) assert True os.chdir(abs_path)
def create_model(output_dim): return Sequential([ For( range(num_layers), lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), Dense(output_dim) ])
def create_model(): ''' Creates the model to train :return: Returns the last output of a sequential model using LSTMs ''' return Sequential([ For(range(NUMBER_LAYERS), lambda: Sequential([Recurrence(LSTM(HIDDEN_LAYER_DIMENSIONS))])), sequence.last, Dense(NUM_OUTPUT_CLASSES) ])
def create_network(): input_var = cntk.sequence.input_variable((num_channels, frame_height, frame_width), name='input_var') target_var = cntk.input_variable((num_classes,), is_sparse=True, name='target_var') with cntk.layers.default_options(enable_self_stabilization=True): model = Sequential([ resnet_model(cntk.placeholder()), Label('resnet'), Dense(hidden_dim, name='cnn_fc'), cntk.layers.Stabilizer(), bidirectional_recurrence(LSTM(hidden_dim // 2), LSTM(hidden_dim // 2)), cntk.sequence.last, BatchNormalization(), Dense(num_classes) ])(input_var) return { 'input': input_var, 'target': target_var, 'model': model, 'loss': cntk.cross_entropy_with_softmax(model, target_var), 'metric': cntk.classification_error(model, target_var) }
def test_large_model_serialization_float(tmpdir): import os from cntk.layers import Recurrence, LSTM, Dense type_size = np.dtype(np.float32).itemsize two_gb = 2**31 size = (2097152 + 4, 256, 512, 4096) assert size[0] * size[1] * type_size > two_gb device = C.device.cpu() i = C.sequence.input(size[0]) w = C.Parameter((size[0], size[1]), init=C.uniform(3.0, seed=12345), device=device) e = C.times(i, w) h_fwd = Recurrence(LSTM(size[2]))(e) h_bwd = Recurrence(LSTM(size[2]), go_backwards=True)(e) h_last_fwd = C.sequence.last(h_fwd) h_first_bwd = C.sequence.first(h_bwd) t = C.splice(h_last_fwd, h_first_bwd) z1 = Dense(size[2], activation=C.relu)(t) z = Dense(2, activation=None)(z1) filename = str(tmpdir / 'test_large_model_serialization_float.out') z.save(filename) assert os.path.getsize(filename) > two_gb y = C.Function.load(filename, device=device) assert (len(z.parameters) == len(y.parameters)) for param_pair in zip(z.parameters, y.parameters): assert param_pair[0].shape == param_pair[1].shape assert np.allclose(param_pair[0].value, param_pair[1].value)
def __init__(self, n_in, n_out, init_lr, momentum): self.param1 = 512 self.param2 = 256 self.n_in = int(n_in) self.n_out = int(n_out) self.input = C.sequence.input_variable(shape=(self.n_in, )) self.label = C.sequence.input_variable(shape=(self.n_out, )) self.three_dnn = Sequential([ Dense(self.param1, activation=C.tanh), Dense(self.param1, activation=C.tanh), Dense(self.param1, activation=C.tanh) ]) self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)), Recurrence(LSTM(self.param2), go_backwards=True)), C.splice]) self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)), Recurrence(LSTM(self.param2), go_backwards=True)), C.splice]) self.final_dnn = Dense(self.n_out) self.output = self.model(self.input) self.loss = loss_fun(self.output, self.label) self.eval_err = loss_fun(self.output, self.label) self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample) self.mom_s = C.momentum_schedule(momentum) self.learner = C.momentum_sgd(self.output.parameters, lr=self.lr_s, momentum=self.mom_s) self.trainer = C.Trainer(self.output, (self.loss, self.eval_err), [self.learner])
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input(((2*context+1)*feature_dim)) labels = sequence.input((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
X = np.eye(vocab_size, dtype=np.float32)[xi] Y = np.eye(vocab_size, dtype=np.float32)[yi] return [X], [Y] get_sample(0) input_sequence = sequence.input_variable(shape=vocab_size) label_sequence = sequence.input_variable(shape=vocab_size) model = Sequential([ For( range(2), lambda: Sequential( [Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size) ]) z = model(input_sequence) z_sm = cntk.softmax(z) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters,
X = np.eye(vocab_size, dtype=np.float32)[xi] Y = np.eye(vocab_size, dtype=np.float32)[yi] return [X], [Y] sample(0) input_seq_axis = Axis('inputAxis') input_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis) label_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis) # model = Sequential([Dense(300),Dense(vocab_size)]) model = Sequential([ For(range(2), lambda: Sequential([Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size)]) z = model(input_sequence) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training')
def model_lstm(input_tensor, hidden_dim): hidden = Recurrence(LSTM(hidden_dim))(input_tensor) prediction = Dense(1)(C.sequence.last(hidden)) return prediction