def test_gather_op(device_id, precision): a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])] a = C.input_variable((2,1)) r_data = np.arange(12).reshape(6,2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a:a_data}) expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a:a_data}, [r]) expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) #gather with indices from learning parameter (no gradients should passed through the indices -- 0s should be passed) indices_params = C.parameter(shape=(1,), init=1.0) grads = C.gather(r, (indices_params *a)).grad({a:a_data}, [r, indices_params]) assert np.array_equal(grads[r], expectd_grad) assert np.array_equal(grads[indices_params], np.asarray([0.0], dtype=np.float32)) b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])] b = C.input_variable((2,2)) res2 = C.gather(r, b).eval({b:b_data}) expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2) #the following small model is to test the memory reuse issue of gather node. x = C.input((3, 4)) x1 = C.to_sequence(x) w = C.parameter((5, 6), init=1) z = C.gather(w, x1) assert z.shape == (4, 6) #need the unpack node to trigger memory reuse. f = C.sequence.unpack(z, 0, no_mask_output=True) y = C.input((3, 4, 6)) loss = C.reduce_mean(C.square(f - y), axis=-1) loss = C.reduce_mean(loss, axis=C.Axis.all_axes()) g = C.constant(0, shape=w.shape) u = C.assign(w, g + 1) learner = C.cntk_py.universal_learner([w], [g], u) trainer = C.trainer.Trainer(loss, [loss], [learner]) indices = np.asarray([[[1, 2, 1, 2]]]) input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0) lable = np.full((10, 3, 4, 6), 2) trainer.train_minibatch({x: input, y: lable}) # the 2nd and 3rd rows should be udpated by gradients. assert np.mean(w.value[1, :]) < 1 assert np.mean(w.value[2, :]) < 1 # the other three rows should keep as 1 assert np.isclose(np.mean(w.value[0, :]), 1) assert np.isclose(np.mean(w.value[3, :]), 1) assert np.isclose(np.mean(w.value[4, :]), 1)
def gradFunc(self, arg): # create an input variable corresponding the inputs of the forward prop function gradIn = C.input(shape=arg.shape, dynamic_axes=arg.dynamic_axes) # create an input variable for the gradient passed from the next stage gradRoot = C.input(shape=arg.shape, dynamic_axes=arg.dynamic_axes) # first step is to take absolute value of input arg signGrad = abs(gradIn) # then compare its magnitude to 1 signGrad = less_equal(signGrad, 1) # finish by multiplying this result with the input gradient return element_times(gradRoot, signGrad), gradIn, gradRoot
def train_eval_mnist_onelayer_from_file(criterion_name=None, eval_name=None): # Network definition feat_dim = 784 label_dim = 10 hidden_dim = 200 cur_dir = os.path.dirname(__file__) training_filename = os.path.join(cur_dir, "Data", "Train-28x28_text.txt") test_filename = os.path.join(cur_dir, "Data", "Test-28x28_text.txt") features = C.input(feat_dim) features.name = 'features' feat_scale = C.constant(0.00390625) feats_scaled = C.element_times(features, feat_scale) labels = C.input(label_dim) labels.tag = 'label' labels.name = 'labels' traning_reader = C.CNTKTextFormatReader(training_filename) test_reader = C.CNTKTextFormatReader(test_filename) h1 = add_dnn_sigmoid_layer(feat_dim, hidden_dim, feats_scaled, 1) out = add_dnn_layer(hidden_dim, label_dim, h1, 1) out.tag = 'output' ec = C.cross_entropy_with_softmax(labels, out) ec.name = criterion_name ec.tag = 'criterion' eval = C.ops.square_error(labels, out) eval.name = eval_name eval.tag = 'eval' # Specify the training parameters (settings are scaled down) my_sgd = C.SGDParams(epoch_size=600, minibatch_size=32, learning_rates_per_mb=0.1, max_epochs=5, momentum_per_mb=0) # Create a context or re-use if already there with C.LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx: # CNTK actions ctx.train( root_nodes=[ec, eval], training_params=my_sgd, input_map=traning_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim)) result = ctx.test( root_nodes=[ec, eval], input_map=test_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim)) return result
def test_validation_before_eval(): w = C.parameter((4, C.InferredDimension)) v = C.parameter((C.InferredDimension, 5)) wv = C.times(w, v) p = C.input((4, 1)) wp = C.times(w, p) q = C.input((1, 5)) qv = C.times(q, v) with pytest.raises(ValueError): wv.eval()
def test_free_dimension_broadcast(): i0 = C.sequence.input(shape=(5, )) i0_unpacked, _ = C.sequence.unpack(i0, padding_value=0).outputs i1 = C.input(shape=(5, )) m = i0_unpacked * i1 assert m.shape == (-3, 5) i1 = C.input(shape=( 1, 5, )) m = i0_unpacked * i1 assert m.shape == (-3, 5)
def test_validation_before_eval(): w = C.parameter((4,C.InferredDimension)) v = C.parameter((C.InferredDimension,5)) wv = C.times(w,v) p = C.input((4,1)) wp = C.times(w,p) q = C.input((1,5)) qv = C.times(q,v) with pytest.raises(ValueError): wv.eval()
def __init__(self, num_bandits: int, num_arms: int, hp: Hyperparameters): self.gang = BanditGang(num_bandits, num_arms) self.input_var = C.input(2, dtype=np.float32, name="input_var") #state and proposed action self.output_var = C.input(1, name="output_var") self.label_var = C.input(1, name="label_var") self.create_model(hp) self.actions = np.arange(num_arms, dtype=np.int32) self.softmax = C.softmax(self.output_var) self.in_data = np.array((2,), dtype=np.float32) #dummy input for network, for now. #self.truth = self.softmax.eval(np.array(self.bandit.arms, dtype=np.float32)) self.hp = hp # self.error = self.get_squared_error() self.plotdata = {"loss":[]}
def ffnet(): inputs = 3 outputs = 3 layers = 2 hidden_dimension = 3 # input variables denoting the features and label data features = C.input((inputs), np.float32) label = C.input((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential( [Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [ sgd(z.parameters, lr=lr_per_minibatch, gaussian_noise_injection_std_dev=0.01) ], [progress_printer]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 100 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({ features: test_features, label: test_labels }) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def train_eval_logistic_regression_from_file(criterion_name=None, eval_name=None, device_id=-1): cur_dir = os.path.dirname(__file__) # Using data from https://github.com/Microsoft/CNTK/wiki/Tutorial train_file = os.path.join(cur_dir, "Train-3Classes.txt") test_file = os.path.join(cur_dir, "Test-3Classes.txt") X = C.input(2) y = C.input(3) W = C.parameter(value=np.zeros(shape=(3, 2))) b = C.parameter(value=np.zeros(shape=(3, 1))) out = C.times(W, X) + b out.tag = 'output' ce = C.cross_entropy_with_softmax(y, out) ce.name = criterion_name ce.tag = 'criterion' eval = C.ops.square_error(y, out) eval.tag = 'eval' eval.name = eval_name # training data readers train_reader = C.CNTKTextFormatReader(train_file, randomize=None) # testing data readers test_reader = C.CNTKTextFormatReader(test_file, randomize=None) my_sgd = C.SGDParams(epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('logreg') as ctx: ctx.device_id = device_id ctx.train(root_nodes=[ce, eval], training_params=my_sgd, input_map=train_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3)) result = ctx.test(root_nodes=[ce, eval], input_map=test_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3)) return result
def test_eval_not_all_outputs(): x = input(1) x_data = [AA([3], dtype=np.float32)] y = input(1) y_data = [AA([2], dtype=np.float32)] plus_func = x + 1 minus_func = y - 1 func = combine([plus_func, minus_func]) result = func.eval({x: x_data}, [plus_func]) assert np.array_equal(result, np.asarray([[4.]])) result = func.eval({y: y_data}, [minus_func]) assert np.array_equal(result, np.asarray([[1.]]))
def gradFunc(self, arg): # create an input variable corresponding the inputs of the forward prop function gradIn = C.input(shape=arg.shape, dynamic_axes=arg.dynamic_axes) # create an input variable for the gradient passed from the next stage gradRoot = C.input(shape=arg.shape, dynamic_axes=arg.dynamic_axes) signGrad = C.abs(gradIn) # new idea, bound of clipping should be a function of the bit map since higher bits can represent higher numbers bit_map = C.constant(self.bit_map) signGrad = C.less_equal(signGrad, bit_map) outGrad = signGrad outGrad = element_times(gradRoot, outGrad) return outGrad, gradIn, gradRoot
def modelInit(self): #create output model folder: self.output_model_folder = os.path.join(self.base_folder, R'models') if not os.path.exists(self.output_model_folder): os.makedirs(self.output_model_folder) self.model = VGG13(self.num_classes) self.input_var = ct.input( (1, self.model.input_height, self.model.input_width), np.float32) self.label_var = ct.input((self.num_classes), np.float32) print("initialized model") self.genData() #ct.input_variables takes the no. of dimensions. and automatically creates #1-hot encoded. ct.input doesn't. #criterian of model: loss, metric: #loss = cross_entropy_with_softmax #metric = classification error self.z = self.model.model(self.input_var) loss = ct.cross_entropy_with_softmax(self.z, self.label_var) metric = ct.classification_error(self.z, self.label_var) """ pred = ct.softmax(z) loss = ct.negate(ct.reduce_sum(ct.element_times(label_var, ct.log(pred)), axis=-1)) """ minibatch_size = 32 epoch_size = self.trainingValues.getLengthOfData() #THROW MOMENTUM: lr_per_minibatch = [self.model.learning_rate ] * 20 + [self.model.learning_rate / 2.0] * 20 + [ self.model.learning_rate / 10.0 ] #use eta for 20 minibatches, then half of eta for other 20 batches then eta/10 for remaining minimaches mm_time_constant = -minibatch_size / np.log(0.9) lr_schedule = ct.learning_rate_schedule(lr_per_minibatch, unit=ct.UnitType.minibatch, epoch_size=epoch_size) mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant) # construct the trainer #learner performs model updates. can be adam() or sgd() learner = ct.momentum_sgd(self.z.parameters, lr_schedule, mm_schedule) # The Trainer optimizes the loss by SGD, and logs the metric self.trainer = ct.Trainer(self.z, (loss, metric), learner) print("created trainer and learner")
def test_debug_multi_output(): input_dim = 2 num_output_classes = 2 f_input = input(input_dim, np.float32, needs_gradient=True, name='features') p = parameter(shape=(input_dim, ), init=10, name='p') comb = combine([f_input, p]) ins = InStream(['n', 'n', 'n', 'n', 'n']) outs = OutStream() z = times(comb.outputs[0], comb.outputs[1], name='z') z = debug_model(z, ins, outs) l_input = input(num_output_classes, np.float32, name='labels') loss = cross_entropy_with_softmax(z, l_input) eval_error = classification_error(z, l_input) _train(z, loss, eval_error, loss.find_by_name('features'), loss.find_by_name('labels'), num_output_classes, 1) # outs.written contains something like # =================================== forward =================================== # Parameter('p', [], [2]) with uid 'Parameter4' # Input('features', [#, *], [2]) with uid 'Input3' # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # =================================== backward =================================== # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # Input('features', [#, *], [2]) with uid 'Input3' # Parameter('p', [], [2]) with uid 'Parameter4' assert outs.written == out_stuff assert len(outs.written) == 8 v_p = "Parameter('p', " v_i = "Input('features'" v_t = 'Times: ' assert outs.written[0].startswith('=') and 'forward' in outs.written[0] line_1, line_2, line_3 = outs.written[1:4] assert outs.written[4].startswith('=') and 'backward' in outs.written[4] line_5, line_6, line_7 = outs.written[5:8] assert line_5.startswith(v_t) assert line_6.startswith(v_p) and line_7.startswith(v_i) or \ line_6.startswith(v_i) and line_7.startswith(v_p)
def test_gather_op(device_id, precision): a_data = [AA([[0],[1]], dtype=PRECISION_TO_TYPE[precision]), AA([[3],[4]], dtype=PRECISION_TO_TYPE[precision])] a = C.input_variable((2,1)) r_data = np.arange(12).reshape(6,2).astype('f') r = C.parameter(shape=r_data.data, init=r_data) res = C.gather(r, a).eval({a:a_data}) expectd = np.asarray([[[[0., 1.]],[[2., 3.]]],[[[6., 7.]],[[8.,9.]]]]) assert np.array_equal(res, expectd) grads = C.gather(r, a).grad({a:a_data}, [r]) expectd_grad = np.asarray([[1,1],[1,1],[0,0],[1,1],[1,1],[0,0]], dtype=np.float32) assert np.array_equal(grads, expectd_grad) b_data = [AA([[0,2],[1,3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2,4],[3,5]], dtype=PRECISION_TO_TYPE[precision])] b = C.input_variable((2,2)) res2 = C.gather(r, b).eval({b:b_data}) expectd2 = np.asarray([[[[0., 1.],[4.,5.]],[[2., 3.],[6., 7.]]],[[[4., 5.],[8.,9.]],[[6., 7.], [10., 11.]]]]) assert np.array_equal(res2, expectd2) #the following small model is to test the memory reuse issue of gather node. x = C.input((3, 4)) x1 = C.to_sequence(x) w = C.parameter((5, 6), init=1) z = C.gather(w, x1) assert z.shape == (4, 6) #need the unpack node to trigger memory reuse. f = C.sequence.unpack(z, 0, no_mask_output=True) y = C.input((3, 4, 6)) loss = C.reduce_mean(C.square(f - y), axis=-1) loss = C.reduce_mean(loss, axis=C.Axis.all_axes()) g = C.constant(0, shape=w.shape) u = C.assign(w, g + 1) learner = C.cntk_py.universal_learner([w], [g], u) trainer = C.trainer.Trainer(loss, [loss], [learner]) indices = np.asarray([[[1, 2, 1, 2]]]) input = np.repeat(np.repeat(indices, 3, axis=1), 10, axis=0) lable = np.full((10, 3, 4, 6), 2) trainer.train_minibatch({x: input, y: lable}) # the 2nd and 3rd rows should be udpated by gradients. assert np.mean(w.value[1, :]) < 1 assert np.mean(w.value[2, :]) < 1 # the other three rows should keep as 1 assert np.isclose(np.mean(w.value[0, :]), 1) assert np.isclose(np.mean(w.value[3, :]), 1) assert np.isclose(np.mean(w.value[4, :]), 1)
def test_reshape_free_static_axis(): x = C.input((C.FreeDimension, 2, 3)) x_reshaped = C.reshape(x, (-1), 0, 2) assert x_reshaped.shape == (C.FreeDimension, 3) x_data = np.arange(12).reshape(2, 2, 3) result = x_reshaped.eval({x: x_data}) assert np.array_equal(result[0], x_data.reshape(4, 3)) x_data = np.arange(18).reshape(3, 2, 3) result = x_reshaped.eval({x: x_data}) assert np.array_equal(result[0], x_data.reshape(6, 3)) x_reshaped = C.reshape(x, (-1), 1, 3) assert x_reshaped.shape == (C.FreeDimension, 6) x_data = np.arange(12).reshape(2, 2, 3) result = x_reshaped.eval({x: x_data}) assert np.array_equal(result[0], x_data.reshape(2, 6)) x_reshaped = C.reshape(x, (4), 0, 2) assert x_reshaped.shape == (4, 3) x_data = np.arange(12).reshape(2, 2, 3) result = x_reshaped.eval({x: x_data}) assert np.array_equal(result[0], x_data.reshape(4, 3)) x_data = np.arange(6).reshape(1, 2, 3) with pytest.raises(ValueError): result = x_reshaped.eval({x: x_data})
def signFunc(self, arg): # create an input variable that matches the dimension of the input argument signIn = C.input(shape=arg.shape, dynamic_axes=arg.dynamic_axes) # create the first stage of the sign function, check if input is greater than zero actionfunc = greater(signIn, 0) # return the second stage of the sign function, replace any 0s with -1s return element_select(actionfunc, actionfunc, -1), signIn
def test_convolution_transpose(input_size, conv_size, result, device_id, precision): dt = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) # fill input operand with a sequence 1,2,3,... til total size and then # resize to input_size total_size = np.prod(input_size) x = np.arange(total_size, dtype=dt) input_operand = x.reshape(input_size) a = C.input(shape=input_operand.shape[1:], dtype=sanitize_dtype_cntk(precision), needs_gradient=False, name='a') # do the same for convolution kernel total_size = np.prod(conv_size) y = np.arange(total_size, dtype=dt) conv_map = constant(value=y.reshape(conv_size), device=dev) from cntk import convolution_transpose input_op = convolution_transpose(conv_map, a, auto_padding=[False]) forward_input = {a: input_operand} expected_forward = AA(result) unittest_helper(input_op, forward_input, expected_forward, None, device_id=device_id, precision=precision)
def test_op_average_pooling_include_pad(input_size, pooling_window, strides, result, device_id, precision): dt = PRECISION_TO_TYPE[precision] total_size = np.prod(input_size) x = np.arange(1, total_size + 1, 1, dtype=dt) input_operand = x.reshape(input_size) a = C.input(shape=input_operand.shape[1:], dtype=sanitize_dtype_cntk(precision), needs_gradient=True, name='a') backward = (1 / np.prod(pooling_window)) * np.ones_like(input_operand) from cntk import pooling input_op = pooling(a, AVG_POOLING, pooling_window, strides, auto_padding=[True], include_pad=True) forward_input = {a: input_operand} expected_forward = AA(result) expected_backward = {a: backward} unittest_helper(input_op, forward_input, expected_forward, expected_backward, device_id=device_id, precision=precision)
def test_free_and_inferred_static_dimension(): x = C.input((C.FreeDimension, -1)) w = C.parameter(init=np.asarray([[2, 5], [1, 3]], dtype=np.float32)) t = C.times(x, w) x_data = np.asarray([[0.5, 0.2]], np.float32) w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t]) assert np.array_equal(t_val, np.asarray([[[1.2, 3.1]]], dtype=np.float32)) assert np.array_equal(w_grad, np.asarray([[0.5, .5], [.2, .2]], dtype=np.float32)) x_data = np.asarray([[0.5, 0.2], [0.1, .6]], np.float32) w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t]) assert np.allclose( t_val, np.asarray([[[1.2, 3.1], [0.8, 2.3]]], dtype=np.float32)) assert np.array_equal(w_grad, np.asarray([[0.6, .6], [.8, .8]], dtype=np.float32)) x_data = np.asarray([[0.5, 0.2]], np.float32) w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t]) assert np.array_equal(t_val, np.asarray([[[1.2, 3.1]]], dtype=np.float32)) assert np.array_equal(w_grad, np.asarray([[0.5, .5], [.2, .2]], dtype=np.float32)) x_data = np.asarray([[0.5, 0.2, 0.9]], np.float32) with pytest.raises(ValueError): w_grad, t_val = t.grad({x: x_data}, wrt=[w], outputs=[t])
def test_per_dim_mean_var_norm(): mean = np.asarray([2.], dtype=np.float32) inv_stddev = np.asarray([0.5], dtype=np.float32) x = C.input((1, )) func = C.per_dim_mean_variance_normalize(x, mean, inv_stddev) result = func.eval({x: np.asarray([[3.], [1.]], dtype=np.float32)}) assert np.array_equal(result, [[.5], [-.5]])
def test_layers_conv_pool_unpool_deconv(): pass inC, inH, inW = 1,4,4 y = input((inC,inH, inW)) cMap = 1 zero_pad = True conv_init = 1 filter_shape = (2,2) pooling_strides = (2,2) dat = np.arange(0,16, dtype=np.float32).reshape(1,1,4,4) conv = Convolution(filter_shape, cMap, pad=zero_pad, init=conv_init,activation=None)(y) pool = MaxPooling(filter_shape, pooling_strides)(conv) unpool = MaxUnpooling(filter_shape, pooling_strides)(pool, conv) z = ConvolutionTranspose(filter_shape, cMap, init=conv_init, pad=zero_pad)(unpool) assert z.shape == y.shape res = z(dat) expected_res = np.asarray([[30, 64, 34], [76, 160, 84], [46, 96, 50]], np.float32) np.testing.assert_array_almost_equal(res[0][0][1:,1:], expected_res, decimal=6, err_msg="Wrong values in conv/pooling/unpooling/conv_transposed")
def test_sequential_convolution_without_reduction_dim(): c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False) c.update_signature(Sequence[Tensor[()]]) # input is a sequence of scalars data = [np.array([2., 6., 4., 8., 6.])] # like a short audio sequence, in the dynamic dimension out = c(data) exp = [[24., 40., 38.]] np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension') c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False) c.update_signature(Sequence[Tensor[1]]) # input is a sequence of dim-1 vectors data = [np.array([[2.], [6], [4.], [8.], [6.]])] out = c(data) exp = [[[24.], [40.], [38]]] # not reducing; hence, output is also a sequence of dim-1 vectors np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension') # these cases failed before emb_dim = 10 x = input(**Sequence[Tensor[20]]) m = Embedding(emb_dim)(x) m = Convolution(filter_shape=3, sequential=True)(m) # this one still fails # Reshape: Operand (sub-)dimensions '[3]' incompatible with desired replacement (sub-)dimensions '[]'. Number of elements must be the same.. m = Embedding(emb_dim)(x) m = reshape(m, (emb_dim,1)) m = Convolution(filter_shape=(3,1), num_filters=13, pad=True, sequential=True)(m) m = Embedding(emb_dim)(x) m = Convolution(filter_shape=3, pad=True, sequential=True)(m)
def test_op_pooling_geometry(input_size, pooling_window, strides, padding, result, device_id, precision): dt = PRECISION_TO_TYPE[precision] # fill input operand with a sequence 1,2,3,... til total size and then # resize to input_size total_size = np.prod(input_size) x = np.arange(total_size, dtype=dt) input_operand = x.reshape(input_size) a = C.input(shape=input_operand.shape[1:], dtype=sanitize_dtype_cntk(precision), needs_gradient=False, name='a') from cntk import pooling input_op = pooling(a, MAX_POOLING, pooling_window, strides, auto_padding=padding) forward_input = {a: input_operand} expected_forward = AA(result) unittest_helper(input_op, forward_input, expected_forward, None, device_id=device_id, precision=precision)
def interactive_session(s2smodel, vocab, i2w, show_attention=False): model_decoding = create_model_greedy( s2smodel) # wrap the greedy decoder around the model import sys print('Enter one or more words to see their phonetic transcription.') while True: line = input("> ") if line.lower() == "quit": break # tokenize. Our task is letter to sound. out_line = [] for word in line.split(): in_tokens = [c.upper() for c in word] out_tokens = translate(in_tokens, model_decoding, vocab, i2w, show_attention=True) out_line.extend(out_tokens) out_line = [" " if tok == '</s>' else tok[1:] for tok in out_line] print("=", " ".join(out_line)) sys.stdout.flush()
def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = sequence.input((input_dim, )) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input((model1_dim, )) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = sequence.input((model2_dim, )) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({ x: [x_data], model1_label: [model1_label_data], model2_label: [model2_label_data] })
def test_empty_minibatch(): scalar = input((1, ), dtype=np.float32, name='tscalar') op = scalar + 1 lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(op, (op, None), sgd(op.parameters, lr_per_sample)) trainer.train_minibatch({})
def test_op_broadcast_as(device_id, precision): a_data = [ AA([1], dtype=PRECISION_TO_TYPE[precision]), AA([2], dtype=PRECISION_TO_TYPE[precision]), AA([3], dtype=PRECISION_TO_TYPE[precision]) ] b_data = [ AA([[2]], dtype=PRECISION_TO_TYPE[precision]), AA([[2], [3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2], [3], [4]], dtype=PRECISION_TO_TYPE[precision]) ] a = C.input(shape=(1, ), dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), name='a') b = sequence.input(shape=(1, ), dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), name='b') broadcast_a_as_b = sequence.broadcast_as(a, b) res = broadcast_a_as_b.eval({a: a_data, b: b_data}) assert np.array_equal(res[0], np.asarray([[1.]])) assert np.array_equal(res[1], np.asarray([[2.], [2.]])) assert np.array_equal(res[2], np.asarray([[3.], [3.], [3.]]))
def test_op_dropout(shape, dropout_rate, device_id, precision): from cntk import dropout, input count = 10 resulted_non_zeros = 0 # As the dropout node is stochastic, we run it a couple times and aggregate # over the results to get more stable tests. for i in range(count): value = np.ones(shape=shape, dtype=PRECISION_TO_TYPE[precision]) a = input(shape=value.shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='a') dropout_node = dropout(a, dropout_rate=dropout_rate) value.shape = (1, ) + value.shape forward_input = {a: value} forward, backward = cntk_eval(dropout_node, forward_input, precision, cntk_device(device_id), backward_pass=True) resulted_non_zeros += np.count_nonzero(forward[dropout_node.output]) resulted_non_zeros /= count num_elements = np.multiply.reduce(shape) expected_non_zeros = num_elements * (1 - dropout_rate) max_off = 0.2 * num_elements assert (abs(resulted_non_zeros - expected_non_zeros) < max_off)
def test_convolution_attributes(): x = C.input((1, 5, 5)) filter = np.reshape(np.array([2, -1, -1, 2], dtype=np.float32), (1, 2, 2)) kernel = C.constant(value=filter) f = C.convolution(kernel, x, auto_padding=[False]) d = f.root_function.attributes expected = { 'autoPadding': [False, False, False], 'sharing': [True, True, True], 'strides': (1, 1, 1), 'maxTempMemSizeInSamples': 0, 'upperPad': (0, 0, 0), 'lowerPad': (0, 0, 0), 'transpose': False, 'outputShape': (0, ) } _check(expected, d) f = C.convolution(kernel, x, auto_padding=[False, True]) d = f.root_function.attributes expected = { 'autoPadding': [False, False, True], 'sharing': [True, True, True], 'strides': (1, 1, 1), 'maxTempMemSizeInSamples': 0, 'upperPad': (0, 0, 0), 'lowerPad': (0, 0, 0), 'transpose': False, 'outputShape': (0, ) } _check(expected, d)
def test_changing_dropout_rate(): from cntk import dropout, input resulted_non_zeros = 0 shape = (100, 100) dtype = np.float32 value = np.ones(shape=shape, dtype=dtype) a = input(shape=shape, needs_gradient=True, dtype=dtype) dropout_node = dropout(a, dropout_rate=0.1) value.shape = (1, ) + value.shape for dropout_rate in [0.0, 0.25, 0.5, 0.78, 0.99999]: dropout_node.set_attribute('dropoutRate', dropout_rate) forward, _ = cntk_eval(dropout_node, {a: value}, dtype, backward_pass=True) resulted_non_zeros = np.count_nonzero(forward[dropout_node.output]) if (dropout_rate == 0): assert resulted_non_zeros == value.size assert np.isclose((1 - dropout_rate), resulted_non_zeros * 1.0 / value.size, atol=0.01)
def test_op_dropout_bad_input(dropout_rate): from cntk import dropout, input a = input(shape=(1, 2), dtype='float', needs_gradient=True, name='a') with pytest.raises(ValueError): dropout_node = dropout(a, dropout_rate=dropout_rate)
def multiFunc(self, arg1): # load or create the inputs we need multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1,)) # lets compute the means we need # carry over represents the remaining value that needs to binarized. For a single bit, this is just the input. For more bits, # it is the difference between the previous bits approximation and the true value. carry_over = multiIn approx = C.element_times(multiIn, 0) # iterate through the maximum number of bits specified by the bit maps, basically compute each level of binarization for i in range(max_bits): # determine which values of the input should be binarized to i bits or more hot_vals = C.greater(bit_map, i) # select only the values which we need to binarize valid_vals = C.element_select(hot_vals, carry_over, 0) # compute mean on a per kernel basis, reshaping is done to allow for sum reduction along only axis 0 (the kernels) mean = C.element_divide(C.reduce_sum(C.reshape(C.abs(valid_vals), (valid_vals.shape[0], -1)), axis=1), C.reduce_sum(C.reshape(hot_vals, (hot_vals.shape[0], -1)), axis=1)) # reshape the mean to match the dimensionality of the input mean = C.reshape(mean, (mean.shape[0], mean.shape[1], 1, 1)) # binarize the carry over bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) # add in the equivalent binary representation to the approximation approx = C.plus(approx, C.element_times(mean, bits)) # compute the new carry over carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def test_sequence_auto_broadcast(): x = C.sequence.input((3,)) y = C.input((3,)) f = x * y result = f.eval({x:np.asarray([[1, 2, 3],[4, 5, 6]], dtype=np.float32), y:np.asarray([[1, 2, 3]], dtype=np.float32)}) assert np.array_equal(result[0], np.asarray([[1., 4., 9.],[4., 10., 18.]], dtype=np.float32))
def test_op_dropout_with_explicit_seed(device_id, precision): from cntk import combine, dropout, input value = np.ones(shape=(10, 10), dtype=PRECISION_TO_TYPE[precision]) a = input(shape=value.shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='a') seed = 123 dropout_nodes = [ dropout(a, dropout_rate=0.5, seed=seed), dropout(a, dropout_rate=0.5, seed=seed), dropout(a, dropout_rate=0.5, seed=seed + 1), dropout(a, dropout_rate=0.5) ] value.shape = (1, 1) + value.shape forward_input = {a: value} results = [] for node in dropout_nodes: forward, backward = cntk_eval(node, forward_input, precision, cntk_device(device_id), backward_pass=True) results.append(forward[node.output]) assert np.allclose(results[0], results[1]) assert not np.allclose(results[0], results[2]) assert not np.allclose(results[0], results[3])
def test_op_batch_normalization(use_cudnn, sample, device_id, precision): dtype = PRECISION_TO_TYPE[precision] epsilon = 0.00001 dev = cntk_device(device_id) t = AA(sample, dtype=dtype).reshape(-1, 1) mean = 1 var = 2 init_scale = 3 init_bias = 4 forward = [(x - mean) / np.sqrt(var + epsilon) * init_scale + init_bias for x in t] expected_forward = AA(forward) scale = Parameter(init=AA([init_scale], dtype=dtype), dtype=dtype, device=dev) bias = Parameter(init=AA([init_bias], dtype=dtype), dtype=dtype, device=dev) run_mean = constant(mean, shape=(1), dtype=dtype, device=dev) run_variance = constant(var, shape=(1), dtype=dtype, device=dev) run_count = constant(0, dtype=dtype, device=dev) from cntk import batch_normalization, input a = input(shape=(1), dtype=dtype, needs_gradient=False, name='a') with pytest.warns(Warning): op = batch_normalization( a, scale, bias, run_mean, run_variance, False, #no running_count here, epsilon=epsilon, use_cudnn_engine=use_cudnn) op_node = batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon, use_cudnn_engine=use_cudnn) forward_input = {a: t} unittest_helper(op_node, forward_input, expected_forward, expected_backward=None, device_id=device_id, precision=precision)
def seqcla(): # LSTM params input_dim = 50 output_dim = 128 cell_dim = 128 # model num_labels = 5 vocab = 2000 embed_dim = 50 t = C.dynamic_axis(name='t') features = C.sparse_input(vocab, dynamic_axis=t, name='features') labels = C.input(num_labels, name='labels') train_reader = C.CNTKTextFormatReader(train_file) # setup embedding matrix embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, init_from_file_path=embedding_file) # get the vector representing the word sequence = C.times(embedding, features, name='sequence') # add an LSTM layer L = lstm_layer(output_dim, cell_dim, sequence, input_dim) # add a softmax layer on top w = C.parameter((num_labels, output_dim), name='w') b = C.parameter((num_labels), name='b') z = C.times(w, L) + b z.name='z' z.tag = "output" # and reconcile the shared dynamic axis pred = C.reconcile_dynamic_axis(z, labels, name='pred') ce = C.cross_entropy_with_softmax(labels, pred) ce.tag = "criterion" my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('seqcla') as ctx: # train the model ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # write out the predictions ctx.write(input_map=train_reader.map( features, alias='x', dim=vocab, format='Sparse').map( labels, alias='y', dim=num_labels, format='Dense')) # do some manual accuracy testing acc = calc_accuracy(train_file, ctx.output_filename_base) # and test for the same number... TOLERANCE_ABSOLUTE = 1E-02 assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
def test_asarray_method(): shape = (3,) var = sequence.input(shape, is_sparse=True) data = [csr([[1,0,2], [5,0,1]])] # conversion array -> value val = asvalue(var, data) as_csr = val.as_sequences(var) for a, d in zip(as_csr, data): assert (a==d).toarray().all() var = C.input(shape, is_sparse=True) data = csr([[1,0,2], [5,0,1]]) # conversion array -> value val = asvalue(var, data) for v in [ val, # Value super(Value, val), # cntk_py.Value val.data, # NDArrayView super(NDArrayView, val.data), # cntk_py.NDArrayView ]: as_csr = v.asarray() for a, d in zip(as_csr, data): assert (a==d).toarray().all()
def test_auto_broadcast_reconcile_issue(): x = C.sequence.input((3,), name='x') y = C.input((3,), name='y') y2 = C.reconcile_dynamic_axes(y, x) inputs = y2.owner.inputs # check does the reconcile_dynamic_axes call trigger the auto broadcast assert len(inputs) == 2 assert inputs[0].name == 'y' and inputs[1].name == 'x'
def train_eval_logistic_regression_from_file(criterion_name=None, eval_name=None, device_id=-1): cur_dir = os.path.dirname(__file__) # Using data from https://github.com/Microsoft/CNTK/wiki/Tutorial train_file = os.path.join(cur_dir, "Train-3Classes.txt") test_file = os.path.join(cur_dir, "Test-3Classes.txt") X = C.input(2) y = C.input(3) W = C.parameter(value=np.zeros(shape=(3, 2))) b = C.parameter(value=np.zeros(shape=(3, 1))) out = C.times(W, X) + b out.tag = 'output' ce = C.cross_entropy_with_softmax(y, out) ce.name = criterion_name ce.tag = 'criterion' eval = C.ops.square_error(y, out) eval.tag = 'eval' eval.name = eval_name # training data readers train_reader = C.CNTKTextFormatReader(train_file, randomize=None) # testing data readers test_reader = C.CNTKTextFormatReader(test_file, randomize=None) my_sgd = C.SGDParams( epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3) with C.LocalExecutionContext('logreg') as ctx: ctx.device_id = device_id ctx.train( root_nodes=[ce, eval], training_params=my_sgd, input_map=train_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3)) result = ctx.test( root_nodes=[ce, eval], input_map=test_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3)) return result
def instance_input(self, data_providers): ''' Instace the inputs into CNTK variable Args: data_providers (list): the list contains the definition of inputs Return: None ''' if self._model_solver.cntk_tensor is not None: for key, tensor in self._model_solver.cntk_tensor.items(): input_var = cntk.input(tuple(tensor), name=key) self._functions[key] = input_var else: for data_provider in data_providers: input_var = cntk.input(tuple(data_provider.tensor[:]), name=data_provider.op_name) self._functions[data_provider.op_name] = input_var
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes,)) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def test_pad(): x = C.constant(value=np.arange(6).reshape((2,3))) pad1 = C.pad(x, [(1, 1), (2, 2)]).eval() expect1 = np.lib.pad([[0, 1, 2], [3, 4, 5]], ((1, 1), (2, 2)), 'constant') assert np.array_equal(pad1, expect1) pad2 = C.pad(x, [(1, 1), (2, 2)], mode=1).eval() expect2 = np.lib.pad([[0, 1, 2], [3, 4, 5]], ((1, 1), (2, 2)), 'reflect') assert np.array_equal(pad2, expect2) pad3 = C.pad(x, [(1, 1), (2, 2)], mode=2).eval() expect3 = np.lib.pad([[0, 1, 2], [3, 4, 5]], ((1, 1), (2, 2)), 'symmetric') assert np.array_equal(pad3, expect3) #test inferred dimension and free dimension x = C.input((C.InferredDimension, 3)) data = np.arange(12).reshape((2, 2, 3)) pad4 = C.pad(x, [(1, 1), (2, 2)], mode=1).eval({x:data}) expect4 = np.lib.pad([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]], ((0,0),(1,1),(2,2)), 'reflect') assert np.array_equal(pad4, expect4) x = C.input((C.FreeDimension, 3)) pad5 = C.pad(x, [(1, 1), (2, 2)], mode=2).eval({x: data}) expect5 = np.lib.pad([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]], ((0, 0), (1, 1), (2, 2)), 'symmetric') assert np.array_equal(pad5, expect5) #test grad x = C.parameter(init=np.arange(6).reshape((2,3))) p = C.pad(x, mode=C.ops.SYMMETRIC_PAD, pattern=[(1, 0), (2, 1)]) grad = p.grad({}, [x]) expect_grad = np.asarray([[4., 4., 4.],[2., 2., 2.]]) assert np.array_equal(grad, expect_grad) p2 = C.pad(x, mode=C.ops.REFLECT_PAD, pattern=[(1, 1), (2, 2)]) grad2 = p2.grad({}, [x]) expect_grad2 = np.asarray([[4., 6., 4.], [4., 6., 4.]]) assert np.array_equal(grad2, expect_grad2)
def test_set_rng_seed_attribute(): from cntk import random_sample, input; random_sample_node = random_sample(input(1), 1, True, seed=123) key = 'rngSeed' root = random_sample_node.root_function assert root.attributes[key] == 123 root.set_attribute(key, 11530328594546889191) assert root.attributes[key] == 11530328594546889191 random_sample_node.set_attribute(key, 2**31) assert root.attributes[key] == 2**31
def test_set_dropout_rate_attribute(): from cntk import dropout, input; from math import pi; dropout_node = dropout(input(1), dropout_rate=0.3) key = 'dropoutRate' root = dropout_node.root_function assert np.isclose(root.attributes[key], 0.3) root.set_attribute(key, 0.4) assert np.isclose(root.attributes[key], 0.4) dropout_node.set_attribute(key, 0.777) assert np.isclose(root.attributes[key], 0.777) dropout_node.set_attribute(key, pi) assert np.isclose(root.attributes[key], pi)
def multiFunc(self, arg1): multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1,)) carry_over = multiIn approx = C.element_times(multiIn, 0) for i in range(max_bits): hot_vals = C.greater(bit_map, i) valid_vals = C.element_select(hot_vals, carry_over, 0) mean = C.element_divide(C.reduce_sum(C.abs(valid_vals)), C.reduce_sum(hot_vals)) bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) approx = C.plus(approx, C.element_times(mean, bits)) carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def test_native_binary_function(): # user functions need to be registered before being callable by python if not nopt.native_convolve_function_registered: pytest.skip("Could not find {0} library. " "Please check if HALIDE_PATH is configured properly " "and try building {1} again" .format('Cntk.BinaryConvolution-' + C.__version__.rstrip('+'), 'Extnsibiliy\\BinaryConvolution')) # be sure to only run on CPU, binary convolution does not have GPU support for now dev = C.cpu() # create an arbitrary input mimicking a realistic cifar input x = input((64, 28, 28)) # random filter weights for testing w = parameter((64, 64, 3, 3), init=np.reshape(2*(np.random.rand(64*64*3*3)-.5), (64, 64, 3, 3)), dtype=np.float32, device=dev) # set the convolution parameters by passing in an attribute dictionary #attributes = {'stride' : 1, 'padding' : False, 'size' : 3} attributes = {'stride' : 1, 'padding' : False, 'size' : 3, 'h' : 28, 'w' : 28, 'channels' : 64, 'filters' : 64 } # define the binary convolution op op = ops.native_user_function('NativeBinaryConvolveFunction', [w, x], attributes, 'native_binary_convolve') # also define an op using python custom functions that should have the same output op2 = C.convolution(CustomMultibitKernel(w, 1), CustomSign(x), auto_padding = [False]) # create random input data x_data = NDArrayView.from_dense(np.asarray(np.reshape(2*(np.random.rand(64*28*28)-.5), (64, 28, 28)),dtype=np.float32), device=dev) # evaluate the CPP binary convolve result = op.eval({x : x_data}, device=dev) # evaluate the python emulator result2 = op2.eval({x : x_data}, device=dev) native_times_primitive = op.find_by_name('native_binary_convolve') # assert that both have the same result '''
def interactive_session(s2smodel, vocab, i2w, show_attention=False): model_decoding = create_model_greedy(s2smodel) # wrap the greedy decoder around the model import sys print('Enter one or more words to see their phonetic transcription.') while True: line = input("> ") if line.lower() == "quit": break # tokenize. Our task is letter to sound. out_line = [] for word in line.split(): in_tokens = [c.upper() for c in word] out_tokens = translate(in_tokens, model_decoding, vocab, i2w, show_attention=True) out_line.extend(out_tokens) out_line = [" " if tok == '</s>' else tok[1:] for tok in out_line] print("=", " ".join(out_line)) sys.stdout.flush()
def test_dropout_random_mask_is_recomputed_on_forward_pass(): from cntk import dropout, input shape = (100,100) dtype = np.float32 value = np.ones(shape=shape, dtype=dtype) a = input(shape=shape, needs_gradient=True, dtype=dtype) dropout_node = dropout(a, dropout_rate=0.1) network = dropout_node + constant(0) value.shape = (1,) + value.shape _, forward = network.forward({a: value}, network.outputs, network.outputs) non_zeros_1 = forward[network.output] > 0.0 _, forward = network.forward({a: value}, network.outputs, network.outputs) non_zeros_2 = forward[network.output] > 0.0 assert not (non_zeros_1 == non_zeros_2).all()
def _install_test_layer(op_type, parameters, weights, input_data): para_cls_id = 'Cntk' + op_type + 'Parameters' para_instance = eval('.'.join(('cntkmodel', para_cls_id)))() for key, value in parameters.items(): setattr(para_instance, key, value) layer_def = cntkmodel.CntkLayersDefinition() layer_def.parameters = para_instance layer_def.op_type = getattr(cntkmodel.CntkLayerType, utils.format.camel_to_snake(op_type)) layer_def.op_name = '_'.join(('test', op_type)) layer_def.parameter_tensor = [] if weights is not None: for weight in weights: weight_tensor = cntkmodel.CntkTensorDefinition() weight_tensor.tensor = np.array(weight).shape weight_tensor.data = weight layer_def.parameter_tensor.append(weight_tensor) inputs_variable = [] for input_tensor in input_data: inputs_variable.append(cntk.input(input_tensor.shape)) return layer_def, inputs_variable
def test_changing_dropout_rate(): from cntk import dropout, input resulted_non_zeros = 0 shape = (100,100) dtype = np.float32 value = np.ones(shape=shape, dtype=dtype) a = input(shape=shape, needs_gradient=True, dtype=dtype) dropout_node = dropout(a, dropout_rate=0.1) value.shape = (1,) + value.shape for dropout_rate in [0.0, 0.25, 0.5, 0.78, 0.99999]: dropout_node.set_attribute('dropoutRate', dropout_rate) forward, _ = cntk_eval(dropout_node, {a: value}, dtype, backward_pass=True) resulted_non_zeros = np.count_nonzero(forward[dropout_node.output]) if (dropout_rate == 0): assert resulted_non_zeros == value.size assert np.isclose((1-dropout_rate), resulted_non_zeros* 1.0/ value.size, atol=0.01)
def test_native_binary_function(): # user functions need to be registered before being callable by python ops.register_native_user_function('NativeBinaryConvolveFunction', 'Cntk.BinaryConvolutionExample-' + C.__version__.rstrip('+'), 'CreateBinaryConvolveFunction') # be sure to only run on CPU, binary convolution does not have GPU support for now dev = cpu() # create an arbitrary input mimicking a realistic cifar input x = input((64, 30, 30)) # random filter weights for testing w = parameter((64, 64, 3, 3), init=np.reshape(2*(np.random.rand(64*64*3*3)-.5), (64, 64, 3, 3)), dtype=np.float32, device=dev) # set the convolution parameters by passing in an attribute dictionary attributes = {'stride' : 1, 'padding' : False, 'size' : 3} # define the binary convolution op op = ops.native_user_function('NativeBinaryConvolveFunction', [w, x], attributes, 'native_binary_convolve_function') # also define an op using python custom functions that should have the same output op2 = C.convolution(CustomMultibitKernel(w, 1), CustomSign(x), auto_padding = [False]) # create random input data x_data = NDArrayView.from_dense(np.asarray(np.reshape(2*(np.random.rand(64*30*30)-.5), (64, 30, 30)),dtype=np.float32), device=dev) # evaluate the CPP binary convolve result = op.eval({x : x_data}, device=dev) # evaluate the python emulator result2 = op2.eval({x : x_data}, device=dev) native_times_primitive = op.find_by_name('native_binary_convolve_function') # assert that both have the same result assert np.allclose(result, result2, atol=0.001)
import numpy as np import pytest import cntk as C import cntk.contrib.netopt.quantization as qc C.cntk_py.set_fixed_random_seed(1) inC, inH, inW = 1, 28, 28 num_classes = 10 feature_var = C.input_variable((inC, inH, inW)) label_var = C.input((num_classes)) dat = np.ones([1, inC, inH, inW], dtype = np.float32) # create a network with convolutions for the tests def _create_convolution_model(): with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu): h = feature_var # The first two layers has bias=False to test, the conversion # work with and without bias in the Convolution. h = C.layers.Convolution2D(filter_shape=(5,5), num_filters=64, strides=(2,2), pad=True, bias=False, name='first_convo')(h) h = C.layers.Convolution2D(filter_shape=(5,5), num_filters=64, strides=(2,2), pad=True, bias=False, name='second_convo')(h) h = C.layers.Convolution2D(filter_shape=(5,5), num_filters=64,