def test_convert_dynamic_axis(): #test fix batch size batch_size = 4 a = C.parameter(shape=(batch_size, 2, 3), init=1) dynamic_a = C.to_batch(a) assert len(dynamic_a.dynamic_axes) == 1 assert dynamic_a.shape == (2, 3) x = C.input_variable((2, 3)) y = x * dynamic_a #test grad data = np.arange(batch_size * 2 * 3).reshape(batch_size, 2, 3).astype('f') assert np.array_equal(y.grad({x: data}, [a]), data) const_a = C.unpack_batch(y) assert len(const_a.dynamic_axes) == 0 assert const_a.shape == (C.FreeDimension, 2, 3) f = C.assign(a, const_a) f.eval({x: data}) assert np.array_equal(a.value, data) #test reshape with batch axis x = C.input_variable((2, 3)) const_x = C.unpack_batch(x) assert len(const_x.dynamic_axes) == 0 assert const_x.shape == (C.FreeDimension, 2, 3) const_y = C.reshape(const_x, (-1, 3)) assert const_y.shape == (C.FreeDimension, 3) y = C.to_batch(const_y) assert len(y.dynamic_axes) == 1 assert y.shape == (3, ) z = y * 2 expected = data.reshape((8, 3)) * 2 assert np.array_equal(z.eval({x: data}), expected) #test inferred dimension x = C.input_variable((C.InferredDimension, 3)) const_x = C.unpack_batch(x) assert len(const_x.dynamic_axes) == 0 assert const_x.shape == (C.FreeDimension, C.InferredDimension, 3) const_y = const_x * 2 y = C.to_batch(const_y) assert len(y.dynamic_axes) == 1 assert y.shape == (C.InferredDimension, 3)
def test_convert_dynamic_axis(): #test fix batch size batch_size = 4 a = C.parameter(shape=(batch_size, 2, 3), init=1) dynamic_a = C.to_batch(a) assert len(dynamic_a.dynamic_axes) == 1 assert dynamic_a.shape == (2, 3) x = C.input_variable((2, 3)) y = x * dynamic_a #test grad data = np.arange(batch_size * 2 * 3).reshape(batch_size, 2, 3).astype('f') assert np.array_equal(y.grad({x:data}, [a]), data) const_a = C.unpack_batch(y) assert len(const_a.dynamic_axes) == 0 assert const_a.shape == (C.FreeDimension, 2, 3) f = C.assign(a, const_a) f.eval({x:data}) assert np.array_equal(a.value, data) #test reshape with batch axis x = C.input_variable((2,3)) const_x = C.unpack_batch(x) assert len(const_x.dynamic_axes) == 0 assert const_x.shape == (C.FreeDimension, 2, 3) const_y = C.reshape(const_x, (-1, 3)) assert const_y.shape == (C.FreeDimension, 3) y = C.to_batch(const_y) assert len(y.dynamic_axes) == 1 assert y.shape == (3,) z = y * 2 expected = data.reshape((8, 3)) * 2 assert np.array_equal(z.eval({x:data}), expected) #test inferred dimension x = C.input_variable((C.InferredDimension, 3)) const_x = C.unpack_batch(x) assert len(const_x.dynamic_axes) == 0 assert const_x.shape == (C.FreeDimension, C.InferredDimension, 3) const_y = const_x * 2 y = C.to_batch(const_y) assert len(y.dynamic_axes) == 1 assert y.shape == (C.InferredDimension, 3)
def hierarchical_softmax_layer(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s): ''' A two layers hierarchical softmax function: Example: >>> input_dim = 2 >>> num_output_classes = 4 >>> minibatch_size = 3 >>> n_classes = int(math.ceil(math.sqrt(num_output_classes))) >>> n_outputs_per_class = n_classes >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=1), name='w1') >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=2), name='b1') >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=3), name='w2s') >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=4), name='b2s') # neural network structure for hierarchical softmax >>> h_input = C.input_variable(input_dim) >>> h_target_class = C.input_variable([1]) >>> h_target_output_in_class = C.input_variable([1]) >>> h_z, class_probs, all_probs = hierarchical_softmax_layer(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s) >>> a = np.reshape(np.arange(minibatch_size * input_dim, dtype = np.float32), (minibatch_size, input_dim)) >>> labels = np.reshape(np.arange(minibatch_size, dtype = np.float32), (minibatch_size, 1)) % num_output_classes >>> target_labels = labels // n_outputs_per_class >>> target_output_in_labels = labels % n_outputs_per_class >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels}) array([[ 0.031305], [ 0.003239], [ 0.990064]], dtype=float32) Args: input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis num_output_classes: int target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with batch axis batch_size: int w1: C.parameter b1: C.parameter w2s: C.parameter b2s: C.parameter Returns: output_prob: class:`~cntk.ops.functions.Function` class_probs: class:`~cntk.ops.functions.Function` all_probs: a list of class:`~cntk.ops.functions.Function` ''' input_dim = input_var.shape[0] n_classes = int(math.ceil(math.sqrt(num_output_classes))) n_outputs_per_class = n_classes class_probs = C.softmax(b1 + C.times(input_var, w1)) w2_temp = C.gather(w2s, target_class) w2 = reshape(w2_temp, (input_dim, n_outputs_per_class)) w2 = C.sequence.broadcast_as(w2, input_var) b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class)) b2 = C.sequence.broadcast_as(b2, input_var) times_result = times(input_var, w2) probs_in_class = softmax(b2 + times_result) probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class) target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False) probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class) prob_in_class = C.times_transpose(probs_in_class, target_output_in_class) target_class = C.one_hot(target_class, n_classes, False) class_probs = C.sequence.broadcast_as(class_probs, target_class) class_prob = C.times_transpose(class_probs, target_class) output_prob = C.element_times(class_prob, prob_in_class) # this is for calculating all the outputs' probabilities all_probs = [] for i in range(n_classes): ci = C.constant(i) w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class)) w2a = C.sequence.broadcast_as(w2a, input_var) b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class)) b2a = C.sequence.broadcast_as(b2a, input_var) probs_in_classa = C.softmax(b2a + times(input_var, w2a)) cia = C.constant(i, shape=[batch_size, 1]) cia = C.to_batch(cia) cia = C.one_hot(cia, n_outputs_per_class, False) cia = C.sequence.broadcast_as(cia, class_probs) class_proba = C.times_transpose(class_probs, cia) class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa) output_proba = C.element_times(class_proba, probs_in_classa) all_probs.append(output_proba) return output_prob, class_probs, all_probs