def test_cos_distane_backward(): x = sequence.input(shape=(2, ), sequence_axis=Axis("B"), needs_gradient=True) y = sequence.input(shape=(2, ), sequence_axis=Axis("B"), needs_gradient=True) z = cosine_distance(x, y) a = np.reshape(np.float32([0.25, 0.5, 0.1, 1]), (1, 2, 2)) b = np.reshape(np.float32([-0.5, 1.5, -0.3, -1]), (1, 2, 2)) bwd, fwd = z.forward({x: a, y: b}, [z.output], set([z.output])) value = list(fwd.values())[0] expected = [[0.707107, -0.981665]] assert np.allclose(value, expected) grad = z.backward(bwd, {z.output: np.ones_like(value)}, set([x, y])) x_driv_expected = np.ndarray( (1, 2, 2), dtype=np.float32, buffer=np.float32([-1.131371, 0.565686, -0.188727, 0.018873])) y_driv_expected = np.ndarray( (1, 2, 2), dtype=np.float32, buffer=np.float32([0.424264, 0.141421, -0.174876, 0.052463])) assert (np.all(np.absolute(grad[x] - x_driv_expected) < 1e-6)) assert (np.all(np.absolute(grad[y] - y_driv_expected) < 1e-6))
def test_op_times_reduce_sequence_axis(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times, Value, TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK from cntk import sequence dim = 10 seq = [[0,1,2], [3], [4,5,6,7,8,9]] right_data = Value.one_hot(seq, dim, dtype=dt_precision) right_var = sequence.input(shape=(dim), is_sparse=True, dtype=dt_precision) left_data = [AA([1,1,1],dtype=dt_precision), AA([1],dtype=dt_precision), AA([1,1,1,1,1,1],dtype=dt_precision)] left_var = sequence.input(shape=(1), dtype=dt_precision) func = times(left_var, right_var, infer_input_rank_to_map=TIMES_REDUCE_SEQUENCE_AXIS_WITHOUT_INFERRED_INPUT_RANK) func2 = sequence.reduce_sum(times(left_var, right_var)) assert func.dynamic_axes == func2.dynamic_axes _, forward_output = func.forward({left_var:left_data, right_var:right_data}) actual_forward = forward_output[func.output] expected_forward = AA([[[1,1,1,0,0,0,0,0,0,0]], [[0,0,0,1,0,0,0,0,0,0]], [[0,0,0,0,1,1,1,1,1,1]]]) assert np.allclose(actual_forward, expected_forward)
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = sequence.input((input_dim, )) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input((model1_dim, )) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = sequence.input((model2_dim, )) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({ x: [x_data], model1_label: [model1_label_data], model2_label: [model2_label_data] })
def test_cosine_distance_with_negative_samples(): a = np.array( [[1., 1., 0., 0., 0.], [0., 1., 1., 0., 0.], [0., 0., 1., 1., 0.], [0., 0., 0., 1., 1.], [1., 0., 0., 0., 1.]], dtype=np.float32) b = np.array( [[1., 1., 0., 0., 0.], [0., 1., 1., 0., 0.], [0., 0., 1., 1., 0.], [0., 0., 0., 1., 1.], [1., 0., 0., 0., 1.]], dtype=np.float32) qry = sequence.input(shape=(5)) doc = sequence.input(shape=(5)) num_neg_samples = 2 model = cosine_distance_with_negative_samples( qry, doc, shift=1, num_negative_samples=num_neg_samples) result = model.eval({qry: [a], doc: [b]}) # We expect 1 row per minibatch np.allclose(len(result), a.shape[0]) # We expect the number of columns to be number of negative samples + 1 np.allclose(result[0].shape[1], num_neg_samples + 1) # The first value is exact match, second ony 1 element match and last one is 0 match np.allclose(result[0], np.tile([1, 0.5, 0.], (a.shape[0], 1)))
def test_rank0_output(): x = sequence.input(shape=(768,), sequence_axis=Axis("B"), needs_gradient=True) y = sequence.input(shape=(768,), sequence_axis=Axis("B"), needs_gradient=True) z = cosine_distance(x, y) batch_num = 2 batch_size = 30 a = np.float32(np.random.rand(batch_num*batch_size,1500,768)) b = np.float32(np.random.rand(batch_num*batch_size,1500,768)) for i in range(batch_num): bwd, fwd = z.forward({x:a[i*batch_size:(i+1)*batch_size], y:b[i*batch_size:(i+1)*batch_size]}, [z.output], set([z.output])) grad = z.backward(bwd, {z.output:np.ones_like(fwd[z.output])}, set([x, y]))
def create_sample_model(device, writer=None): in1 = sequence.input(shape=(input_dim, )) labels = sequence.input(shape=(input_dim, )) p = parameter(shape=(input_dim, ), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def test_usermbsource_training(tmpdir): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_rate_schedule, sgd, Trainer, \ training_session, times, UnitType, input feature = sequence.input(shape=(input_dim, )) label = input(shape=(num_output_classes, )) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = {feature: mbs.fsi, label: mbs.lsi} session = training_session(trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20) session.train() assert trainer.total_number_of_samples_seen == 20
def test_not_replaced_placeholders(): def wrap_in_block(fun_args, name): block_args = [placeholder(name=arg.name) for arg in fun_args ] # placeholders inside the BlockFunction combined_block_args = combine( block_args) # the content of the BlockFunction arg_map = list( zip(block_args, fun_args)) # after wrapping, the block_args map to args combined_args = as_block(composite=combined_block_args, block_arguments_map=arg_map, block_op_name=name) return combined_args input_dim = 2 x = sequence.input(shape=(input_dim, )) p1 = placeholder() p2 = placeholder() a = abs(x) b = wrap_in_block(list(a.outputs) + [p1], "my_first_block") b = wrap_in_block(list(b.outputs) + [p2], "my_second_block") b = past_value(b.outputs[0]) model = b.replace_placeholders({p1: b.outputs[0], p2: b.outputs[0]}) x0 = [[1, 1], [2, 2]] with pytest.raises(RuntimeError): model.forward({x: x0}, model.outputs)
def test_sanitize_batch_sparse(): batch = [csr([[1, 0, 2], [2, 3, 0]]), csr([5, 0, 1])] var = sequence.input(3, is_sparse=True) b = sanitize_batch(var, batch) # 2 sequences, with max seq len of 2 and dimension 3 assert b.shape == (2, 2, 3)
def test_eval_sparse_dense(tmpdir, device_id): from cntk import Axis from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import input, times input_vocab_dim = label_vocab_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_vocab_dim, is_sparse=True), labels=StreamDef(field='S1', shape=label_vocab_dim, is_sparse=True))), randomize=False, epoch_size=2) raw_input = sequence.input(shape=input_vocab_dim, sequence_axis=Axis('inputAxis'), name='raw_input', is_sparse=True) mb_valid = mbs.next_minibatch(minibatch_size_in_samples=100, input_map={raw_input: mbs.streams.features}, device=cntk_device(device_id)) z = times(raw_input, np.eye(input_vocab_dim)) e_reader = z.eval(mb_valid, device=cntk_device(device_id)) # CSR with the raw_input encoding in ctf_data one_hot_data = [[3, 4, 5, 4, 7, 12, 1], [60, 61]] data = [ csr(np.eye(input_vocab_dim, dtype=np.float32)[d]) for d in one_hot_data ] e_csr = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_csr)]) # One-hot with the raw_input encoding in ctf_data data = Value.one_hot(one_hot_data, num_classes=input_vocab_dim, device=cntk_device(device_id)) e_hot = z.eval({raw_input: data}, device=cntk_device(device_id)) assert np.all([np.allclose(a, b) for a, b in zip(e_reader, e_hot)])
def test_cos_distane_backward2(): x = sequence.input(shape=(100,), sequence_axis=Axis("B"), needs_gradient=True) y = sequence.input(shape=(100,), sequence_axis=Axis("B"), needs_gradient=True) z = cosine_distance(x, y); np.random.seed(0) a = np.float32(np.random.rand(10,50,100)) b = np.float32(np.random.rand(10,50,100)) bwd, fwd = z.forward({x:a, y:b}, [z.output], set([z.output])) value = list(fwd.values())[0] expected_cos = numpy_cos(a,b) expected = expected_cos.forward() assert np.allclose(value, expected) grad = z.backward(bwd, {z.output:np.ones_like(value)}, set([x, y])) bwd = expected_cos.backward() x_driv_expected = bwd['a'] y_driv_expected = bwd['b'] assert (np.all(np.absolute(grad[x]-x_driv_expected) < 1e-6)) assert (np.all(np.absolute(grad[y]-y_driv_expected) < 1e-6))
def test_mask(batch, seq_starts, expected): shape = () var = sequence.input(shape) if type(expected) == type(ValueError): with pytest.raises(expected): s = sanitize_batch(var, batch, seq_starts) else: s = sanitize_batch(var, batch, seq_starts) assert np.allclose(s.mask, expected)
def run_distributed_training(tmpdir, create_func): in1 = sequence.input(shape=1) labels = sequence.input(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) dist_learner = create_func( momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = Trainer(z, (ce, errs), [dist_learner]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input(((2*context+1)*feature_dim)) labels = sequence.input((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def test_one_hot_int_types(dtype): data = [[0, 2, 1], [1]] if dtype is not None: data = [np.asarray(d, dtype=dtype) for d in data] a = Value.one_hot(data, 3) i = sequence.input(shape=(3, )) b = i * 1 expected = [[[1., 0., 0.], [0., 0., 1.], [0., 1., 0.]], [[0., 1., 0.]]] for a, b in zip(b.eval({i: a}), expected): assert np.allclose(a, b)
def test_eval_sparse_no_seq(batch_index_data, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input(shape=(dim, ), is_sparse=var_is_sparse) z = times(in1, multiplier * np.eye(dim)) batch = np.eye(dim)[batch_index_data] expected = batch * multiplier sparse_val = csr(batch.astype('f')) result = z.eval({in1: [sparse_val]}, device=cntk_device(device_id)) assert np.allclose(result, [expected])
def test_sanitize_batch_contiguity(): a1 = AA([[1, 2], [3, 4]]) a2 = AA([[5, 6], [7, 8]]) var = sequence.input((2, 2), is_sparse=True) batch = [a1.T, a2.T] with pytest.warns(RuntimeWarning): b = sanitize_batch(var, batch) assert b.shape == (2, 1, 2, 2) batch = [a1, a2] b = sanitize_batch(var, batch) assert b.shape == (2, 1, 2, 2)
def test_eval_one_hot_seq(one_hot_batch, device_id): dim = 10 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input(shape=(dim,), is_sparse=var_is_sparse) # Convert CNTK node value to dense so that we can compare it later z = times(in1, np.eye(dim)*multiplier) # Convert expectation to dense expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch] batch = Value.one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id)) result = z.eval({in1: batch}, device=cntk_device(device_id)) assert np.all([np.allclose(a,b) for a,b in zip(result, expected)])
def test_cosine_distance(): a = np.reshape(np.arange(25.0, dtype=np.float32), (5, 5)) b = np.reshape(np.arange(0, 5, dtype=np.float32), (1, 5)) src = sequence.input(shape=(5), sequence_axis=Axis("Seq")) tgt = input(shape=(5)) tgt_br = sequence.broadcast_as(tgt, src) cos_seq = cosine_distance(src, tgt_br) assert len(cos_seq.dynamic_axes) == 2 assert cos_seq.dynamic_axes[1].name == "Seq" val = cos_seq.eval({src: [a], tgt: [b]}) expected = [[1., 0.914659, 0.878459, 0.86155, 0.851852]] assert np.allclose(val, expected)
def test_eval_sparse_seq_1(batch, device_id): dim = 4 multiplier = 2 for var_is_sparse in [True, False]: in1 = sequence.input(shape=(dim,), is_sparse=var_is_sparse) z = times(in1, multiplier*np.eye(dim)) if isinstance(batch[0], list): expected = [np.vstack([m.todense() * multiplier for m in seq]) for seq in batch] else: expected = [seq.todense() * multiplier for seq in batch] result = z.eval({in1: batch}, device=cntk_device(device_id)) assert np.all([np.allclose(a,b) for a,b in zip(result, expected)]), \ "%s != %s"%(result,expected)
def test_op_times_sparse_grad(device_id, precision): dt_precision = PRECISION_TO_TYPE[precision] from cntk import times, times_transpose, parameter, reshape, Value, sequence dim = 5 num_sequences = 2 seq = [i for i in range(dim)] identity = np.identity(dim, dtype=dt_precision) input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision) input_var = sequence.input(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision) e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision) z = reshape(times_transpose(e, times(input_var, e)), dim) e_grad = z.grad({input_var : input_data}, [e]) assert np.allclose(e_grad, np.ones((dim,dim))*4)
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input(shape=input_dim, is_sparse=True) label = input(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def create_network(input_vocab_dim, label_vocab_dim): # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') raw_input = sequence.input(shape=(input_vocab_dim), sequence_axis=input_seq_axis, name='raw_input') raw_labels = sequence.input(shape=(label_vocab_dim), sequence_axis=label_seq_axis, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter( label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as( thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as( thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value( decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output}) return { 'raw_input' : raw_input, 'raw_labels' : raw_labels, 'ce' : ce, 'pe' : errs, 'ng' : ng, 'output': z }
def test_sweep_based_schedule(tmpdir, device_id): from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence from cntk import Trainer input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=False) in1 = sequence.input(shape=(input_dim, )) labels = sequence.input(shape=(input_dim, )) p = parameter(shape=(input_dim, ), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = {in1: mbs.streams.features, labels: mbs.streams.labels} # fetch minibatch (first sequence) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.3 # fetch minibatch (second sequence, sweep ends at this point) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.2 # fetch minibatch (both sequences -- entire sweep in one go) data = mbs.next_minibatch(9, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.1 # fetch minibatch (multiple sweeps) data = mbs.next_minibatch(30, input_map=input_map) trainer.train_minibatch(data, outputs=[z.output]) assert learner.learning_rate() == 0.0
def test_distributed_mb_source(tmpdir): input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |# <s> |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH 2 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 60:1 |# <s> |S1 3:1 |# <s> 3 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 61:1 |# A |S1 32:1 |# ~AH 3 |S0 61:1 |# A |S1 32:1 |# ~AH 4 |S0 60:1 |# <s> |S1 3:1 |# <s> 5 |S0 60:1 |# <s> |S1 3:1 |# <s> 5 |S0 61:1 |# A |S1 32:1 |# ~AH 6 |S0 60:1 |# <s> |S1 3:1 |# <s> 6 |S0 61:1 |# A |S1 32:1 |# ~AH 7 |S0 60:1 |# <s> |S1 3:1 |# <s> 8 |S0 60:1 |# <s> |S1 3:1 |# <s> 8 |S0 61:1 |# A |S1 32:1 |# ~AH 9 |S0 60:1 |# <s> |S1 3:1 |# <s> 9 |S0 61:1 |# A |S1 32:1 |# ~AH 10 |S0 61:1 |# A |S1 32:1 |# ~AH ''' from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, FULL_DATA_SWEEP ctf_file = str(tmpdir / '2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) # No randomization mb0 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=False, epoch_size=36) # A bit more than a sweep mb1 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=False, epoch_size=36) # A bit more than a sweep input = sequence.input(shape=(input_dim, )) label = sequence.input(shape=(input_dim, )) input_map = {input: mb0.streams.features, label: mb0.streams.labels} # Because we emulating two workers here, the minibatch_size_in_samples will be splitted in 2, # so below we expect 5 samples per worker. data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 7) # Sequence 0 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) # Sequence 3 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 5) # Sequences 5, 7, 9 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 7) # Sequence 0 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) # Sequence 3 data = mb0.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (len(data) == 0) # No data data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (data[input].num_samples == 4) # Sequences 2, 4 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (data[input].num_samples == 5) # Sequences 6, 8, 10 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (data[input].num_samples == 3) # Sequences 2 data = mb1.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (len(data) == 0) # No data # Radomization mb3 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=True, epoch_size=FULL_DATA_SWEEP) mb4 = MinibatchSource(CTFDeserializer( ctf_file, StreamDefs(features=StreamDef(field='S0', shape=input_dim, is_sparse=True), labels=StreamDef(field='S1', shape=input_dim, is_sparse=True))), randomize=True, epoch_size=FULL_DATA_SWEEP) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 5) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 4) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 5) data = mb3.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=0) assert (data[input].num_samples == 7) data = mb4.next_minibatch(minibatch_size_in_samples=10, input_map=input_map, num_data_partitions=2, partition_index=1) assert (len(data) == 0 ) # Due to chunking we do not expect any data for rank 1
def test_one_hot_skip(): a = Value.one_hot([[0, 1, Value.ONE_HOT_SKIP]], 3) i = sequence.input(shape=(3, )) b = i * 1 expected = [[[1., 0., 0.], [0., 1., 0.], [0., 0., 0.]]] assert np.allclose(b.eval({i: a}), expected)