def Times(self, cntk_op, inputs): """ Returns input[0] x input[1] (matrix multiplication). Arguments: cntk_op: CNTK operation to be imported. inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = inputs cast_0_len = len(cast_0.axes) cast_1_len = len(cast_1.axes) if cast_0_len == cast_1_len == 1: if cast_0.axes[0] != cast_1.axes[0]: cast_0 = ng.cast_axes(cast_0, cast_1.axes) elif cast_0_len == cast_1_len: if cast_0.axes[1].length == cast_1.axes[0].length: axes = [cast_0.axes[0], cast_1.axes[0]] axes.extend(cast_0.axes[2::]) cast_0 = ng.cast_axes(cast_0, axes=axes) else: axes = self._match_axes(cast_0.axes, cast_1.axes) cast_0 = ng.cast_axes(cast_0, axes=axes) elif cast_0_len > cast_1_len: axes = self._match_axes(cast_0.axes, cast_1.axes) cast_0 = ng.cast_axes(cast_0, axes=axes) else: axes = self._match_axes(cast_1.axes, cast_0.axes) cast_1 = ng.cast_axes(cast_1, axes=axes) return ng.dot(cast_0, cast_1).named(cntk_op.uid)
def _expand_filters_axes(self, filters, C): """ Expand and cast 1D or 2D filter into 3D filter. Arguments: axes: Convolution filter's axes. Returns: Expanded list of filter's axes. """ axes = filters.axes dim = len(axes) if dim == 5: O, _, T, M1, M2 = axes filters = ng.cast_axes(filters, [O, C, T, M1, M2]) elif dim == 4: O, _, M1, M2 = axes filters = ng.cast_axes(filters, [O, C, M1, M2]) T = ng.make_axis(1) elif dim == 3: O, M1, M2 = axes T = ng.make_axis(1) elif dim == 2: O, M1 = axes T = ng.make_axis(1) M2 = ng.make_axis(1) elif dim == 1: O = axes T = ng.make_axis(1) M1 = ng.make_axis(1) M2 = ng.make_axis(1) else: raise ValueError("Convolution filter must have 1 to 5 axes.") return ng.broadcast(filters, [C, T, M1, M2, O])
def FC(self, c2_op, inputs): """ Multiplies matrix `a` by matrix `b`. The inputs must be two-dimensional, the inner dimensions must match (possibly after transpose). Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. Inputs to c2_op: a, b, transpose_a, transpose_b, a_is_sparse, b_is_sparse, name """ # get inputs left, right, bias = inputs # check shape assert left.axes[1].length == right.axes[1].length # cast axis left_cast = ng.cast_axes(left, [left.axes[0], right.axes[1]]) # add op dot_op = ng.dot(left_cast, right) # cast bias axis bias_cast = ng.cast_axes(bias, [dot_op.axes[-1]]) # result op result_op = ng.add(dot_op, bias_cast) return result_op
def test_cast_axes(transformer_factory): C = ng.make_axis(length=2) D = ng.make_axis(length=3) x = ng.placeholder([C, D]) with pytest.raises(ValueError): ng.cast_axes(x, [D, C]) x_slice = x[1, :] # Cast back to known axes x_cast = ng.cast_axes(x_slice, [D]) # Verfiy that the tensor broadcasts along D y = (x + x_cast).named('y') with ExecutorFactory() as ex: y_fun = ex.executor(y, x) num_deriv_fun = ex.numeric_derivative(y, x, delta) sym_deriv_fun = ex.derivative(y, x) x_np = np.array([[10, 20, 30], [1, 2, 3]], dtype='float32') y_fun_np = np.array([[11, 22, 33], [2, 4, 6]], dtype='float32') y_fun_ng = y_fun(x_np) assert ng.testing.allclose(y_fun_ng, y_fun_np) deriv_num = num_deriv_fun(x_np) deriv_sym = sym_deriv_fun(x_np) assert ng.testing.allclose(deriv_num, deriv_sym, rtol=rtol, atol=atol)
def __call__(self, inputs): query = ng.cast_axes(inputs['query'], [self.batch_axis, self.sentence_rec_axis]) # Query embedding [batch, sentence_axis, F] q_emb = self.LUT_A(query) # Multiply by position encoding and sum u_0 = ng.sum(q_emb * self.pos_enc, reduction_axes=[self.sentence_rec_axis]) # [batch, F] # Start a list of the internal states of the model. # Will be appended to after each memory hop u = [u_0] for hopn in range(self.nhops): keys = ng.cast_axes(inputs['keys'], [self.batch_axis, self.memory_axis, self.sentence_rec_axis]) value = ng.cast_axes(inputs['values'], [self.batch_axis, self.memory_axis, self.val_len_axis]) # Embed keys m_emb_A = self.LUT_A(keys) m_A = ng.sum(m_emb_A * self.pos_enc, reduction_axes=[self.sentence_rec_axis]) # [batch, memory_axis, F] # Compute scalar similarity between internal state and each memory # Equivalent to dot product between u[-1] and each memory in m_A dotted = ng.sum(u[-1] * m_A, reduction_axes=[self.embedding_axis]) probs = ng.softmax(dotted, self.memory_axis) # [batch, memory_axis] # Embed values with same embedding as keys, or new LUTs if self.use_v_luts: m_emb_C = self.LUTs_C[hopn](value) else: m_emb_C = self.LUT_A(value) m_C = ng.sum(m_emb_C * self.pos_enc, reduction_axes=[self.sentence_rec_axis]) # Compute weighted sum of output embeddings o_k = ng.sum(probs * m_C, reduction_axes=[self.memory_axis]) # [batch, F] u_k = u[-1] + o_k # [batch, F] # Add new internal state u.append(u_k) # Compute predicted answer from product of final internal state and final LUT weight matrix if self.use_v_luts: a_logits = ng.dot(self.LUTs_C[-1].W, u[-1]) # [batch, V] else: a_logits = ng.dot(self.LUT_A.W, u[-1]) # [batch, V] # rename V to vocab_axis to match answer a_logits = ng.cast_axes(a_logits, [self.vocab_axis, self.batch_axis]) a_pred = ng.softmax(a_logits, self.vocab_axis) return a_pred, a_logits
def _conv_bias_add(c2_op, inputs): X, bias = inputs bias = ng.cast_axes(bias, axes=ng.make_axes( [X.axes[1 if 'NCHW' == order else 3]])) Y = ng.Add(X, bias) return Y
def MatMul(self, tf_node, inputs): """ Multiplies matrix `a` by matrix `b`. The inputs must be two-dimensional, the inner dimensions must match (possibly after transpose). Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: a, b, transpose_a, transpose_b, a_is_sparse, b_is_sparse, name """ # get inputs left, right = inputs if tf_node.attr['transpose_a'].b: left = ng.Transpose(left) if tf_node.attr['transpose_b'].b: right = ng.Transpose(right) # check shape assert len(left.axes) == len(right.axes) == 2 assert left.axes[1].length == right.axes[0].length # cast axis left_casted = ng.cast_axes(left, [left.axes[0], right.axes[0] - 1]) # result op result_op = ng.dot(left_casted, right, name=tf_node.name) # return return result_op
def matmul(left, right, transpose_a=False, transpose_b=False, name=None): """ Only support 2d matmul for now. """ # Transpose if transpose_a: left = ng.Transpose(left) if transpose_b: right = ng.Transpose(right) # Check shape assert len(left.axes) == len(right.axes) == 2 assert left.axes[1].length == right.axes[0].length # step 1: cast left (pos_1, pos_0), right (pos_1, pos_0) => # left (temp , pos_1), right (pos_1, pos_0) # step 2: perform left dot right, result # (temp, pos_0) # step 3: cast back to (post_1, pos_0) left_temp_axes = ng.make_axes( [ng.make_axis(left.axes[0].length), right.axes[0]]) left = ng.cast_axes(left, axes=left_temp_axes) # Result op result_op = ng.dot(left, right).named(name) result_op = cast_to_pos_axes(result_op) # Return return result_op.named(name)
def Assign(self, tf_node, inputs): """ Assign `value` to `ref`. Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: ref, value, validate_shape, use_locking, name """ """ TODO: currently cannot fully support the TensorFlow semantics. 1. Assign in TF returns the assigned tensor, in ngraph, it returns None 2. In TF, is the assigned tensor is not used, then it retain the original value """ ref, value = inputs assert ref.axes.lengths == value.axes.lengths, "shape not the same" value = ng.cast_axes(value, ref.axes) return ng.assign(ref, value)
def test_cast_axes(transformer_factory): C = ng.make_axis(name='C') D = ng.make_axis(name='D') ex = ExecutorFactory() C.length = 2 D.length = 3 x = ng.placeholder((C, D)) x_slice = x[1, :] # Cast back to known axes x_cast = ng.cast_axes(x_slice, [D]) # Verfiy that the tensor broadcasts along ax.D y = x + x_cast y_fun = ex.executor(y, x) num_deriv_fun = ex.numeric_derivative(y, x, delta) sym_deriv_fun = ex.derivative(y, x) x_np = np.array([[10, 20, 30], [1, 2, 3]], dtype='float32') assert np.allclose(y_fun(x_np), np.array([[11, 22, 33], [2, 4, 6]], dtype='float32')) assert np.allclose(num_deriv_fun(x_np), sym_deriv_fun(x_np), rtol=rtol, atol=atol)
def create_loss_and_learner(model, labels, learning_rate, momentum_coef=0.0, wdecay=0.0, nesterov=False, gradient_clip_norm=None, gradient_clip_value=None): """ Auxiliary function to create loss function (cross entropy and softmax) and trainer using stochastic gradient descent with momentum. Arguments: model - imported model labels - placeholder for one-hot labels array learning_rate - learning rate for trainer momentum_coef - coefficient of momentum (deafult 0.0) wdecay - amount of weight decay (default 0.0) nesterov - use nesterov accelerated gradient (dafault False) gradient_clip_norm - target gradient norm (default None) gradient_clip_value - value to element-wise clip gradients (default None) Returns: Loss function (mean for batch) """ if model.axes.lengths != labels.axes.lengths: labels = ng.Transpose(labels) assert model.axes.lengths == labels.axes.lengths model = ng.cast_axes(model, axes=labels.axes) loss = ng.cross_entropy_multi(ng.softmax(model), labels) optimizer = GradientDescentMomentum(learning_rate, momentum_coef, wdecay, gradient_clip_norm, gradient_clip_value, nesterov) return ng.sequential([optimizer(loss), ng.mean(loss, out_axes=())])
def Times(self, cntk_op, inputs): """ Returns input[0] x input[1] (matrix multiplication). Arguments: inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = inputs if len(cast_0.axes) == 1 & len(cast_1.axes) == 1: pass elif len(cast_0.axes) == 1: temp = next((x for x in cast_1.axes if x.length == 1), None) if temp is None: temp = ng.make_axis(1) cast_0 = ng.broadcast(cast_0, [temp, cast_0.axes]) elif len(cast_1.axes) == 1: temp = next((x for x in cast_0.axes if x.length == 1), None) if temp is None: temp = ng.make_axis(1) cast_1 = ng.broadcast(cast_1, [ng.make_axis(1), cast_1.axes]) cast_0 = ng.cast_axes(cast_0, [cast_0.axes[0], cast_1.axes[0]]) return ng.dot(cast_0, cast_1).named(cntk_op.uid)
def cast_axes_for_compound_op(self, inputs): left, right = inputs left_dim = len(left.axes) right_dim = len(right.axes) # pad left and right axis to be the same length, align right result_dim = max(left_dim, right_dim) left_axes_pad = [ ng.make_axis(length=1) for _ in range(result_dim - left_dim) ] + list(left.axes) right_axes_pad = [ ng.make_axis(length=1) for _ in range(result_dim - right_dim) ] + list(right.axes) result_axes = [ ng.make_axis(length=max(l.length, r.length)) for l, r in zip(left_axes_pad, right_axes_pad) ] # broadcast left / right, introducing dummy length 1 axes left = ng.broadcast(left, left_axes_pad) right = ng.broadcast(right, right_axes_pad) # make two-way map of lr matching axes and map for result axes lr_axes_map = dict() result_axes_map = dict() for l, r, re in zip(left.axes, right.axes, result_axes): lr_axes_map[l] = r lr_axes_map[r] = l result_axes_map[l] = re result_axes_map[r] = re # get left / right slice left_slice = [] right_slice = [] for l, r in zip(left.axes, right.axes): if l.length == 1 and r.length != 1: left_slice.append(0) else: left_slice.append(slice(None)) if r.length == 1 and l.length != 1: right_slice.append(0) else: right_slice.append(slice(None)) # perform slicing left_sliced = ng.tensor_slice(left, left_slice) right_sliced = ng.tensor_slice(right, right_slice) # now cast the right_sliced to left_sliced from the axis map right_casted_axes = [] for r in right_sliced.axes: if r in lr_axes_map and lr_axes_map[r] in left_sliced.axes: right_casted_axes.append(lr_axes_map[r]) else: right_casted_axes.append(r) right_sliced_casted = ng.cast_axes(right_sliced, right_casted_axes) return left_sliced, right_sliced_casted
def cast_axes_for_matmul(ng_input_left, ng_input_right): # type: (TensorOp, TensorOp) -> Tuple[TensorOp, TensorOp] """ Prepare two ngraph tensors for matrix multiplication by casting axes. Matching axes will be cast to enable matrix @ matrix or vector @ matrix dot multiply. :param ng_input_left: first input to matrix multiplication :param ng_input_right: second input to matrix multiplication :return: tuple with the first and second input tensor with axes cast for matrix multiplication """ left, right = ng_input_left, ng_input_right left_num_axes = len(left.axes) right_num_axes = len(right.axes) if left_num_axes == right_num_axes == 1: # vector @ vector # cast to axes: i, icast_axes_for_matmul assert left.shape.lengths == right.shape.lengths, \ 'Vector lengths must be equal for multiplication.' if left.shape != right.shape: right = ng.cast_axes(right, axes=left.axes) elif left_num_axes == 1: # vector @ matrix # cast to axes: i, ...ij if left.axes[0] != right.axes[-2]: left = ng.cast_axes(left, axes=right.axes[-2]) elif right_num_axes == 1: # matrix @ vector # cast to axes: ...i, i if left.axes[-1] != right.axes[0]: right = ng.cast_axes(right, axes=left.axes[-1]) else: # matrix @ matrix # cast to axes: ...ij, ...jk right_axes = [ ng.make_axis(name='DOT_{}'.format(i), length=axis.length) for i, axis in enumerate(right.shape) ] right_axes[-2] = left.axes[-1] right = ng.cast_axes(right, axes=right_axes) return left, right
def test_conv_flatten_deriv(n4_hw12_c3_5x5): """ Test deriv of conv followed by flatten """ cf = ConvParams(**n4_hw12_c3_5x5) axes_rsck = ng.make_axes([cf.ax_f[2], cf.ax_f[3], cf.ax_f[0], cf.ax_f[-1]]) axes_rsck_prime = ng.make_axes([ng.make_axis(name=ax.name + 'p', length=ax.length) for ax in axes_rsck]) axes_nmpqk = ng.make_axes([cf.ax_o[-1], cf.ax_o[1], cf.ax_o[2], cf.ax_o[3], cf.ax_o[0]]) # broadcast input / filter axes input_var = ng.variable(cf.ax_i).named('input') input_val = np.ones(input_var.axes.lengths) filter_rsck_prime = ng.variable(axes_rsck_prime).named('filter') filter_var = filter_rsck_prime filter_rsck = ng.cast_axes(filter_rsck_prime, axes_rsck).named('frsck') filter_trsck = ng.expand_dims(filter_rsck, cf.ax_f[1], 0).named('ftrsck') filter_ctrsk = ng.axes_with_order(filter_trsck, axes=cf.ax_f).named('ctrsk') # convolution output_kmpqn = ng.convolution(cf.conv_params, input_var, filter_ctrsk, axes=cf.ax_o) output_nmpqk = ng.axes_with_order(output_kmpqn, axes=axes_nmpqk) # slice away the oD out_slicing = [slice(None), 0, slice(None), slice(None), slice(None)] output_npqk = ng.tensor_slice(output_nmpqk, out_slicing) output = ng.flatten_at(output_npqk, idx=1) # cost and grad cost = ng.sum(output, out_axes=()) filter_val = np.ones(filter_var.axes.lengths) with ExecutorFactory() as factory: conv_comp = factory.executor(output, filter_var, input_var) grad_filter_num_comp = factory.numeric_derivative(cost, filter_var, 1.0, input_var) grad_filter_sym_comp = factory.derivative(cost, filter_var, input_var) grad_input_num_comp = factory.numeric_derivative(cost, input_var, 1.0, filter_var) grad_input_sym_comp = factory.derivative(cost, input_var, filter_var) conv_val = conv_comp(filter_val, input_val) conv_val_num = np.empty_like(conv_val) conv_val_num.fill(np.prod(cf.ax_f.lengths[:-1])) ng.testing.assert_allclose(conv_val, conv_val_num) grad_filter_num_val = grad_filter_num_comp(filter_val, input_val) grad_filter_sym_val = grad_filter_sym_comp(filter_val, input_val) ng.testing.assert_allclose(grad_filter_num_val, grad_filter_sym_val) grad_input_num_val = grad_input_num_comp(input_val, filter_val) grad_input_sym_val = grad_input_sym_comp(input_val, filter_val) ng.testing.assert_allclose(grad_input_num_val, grad_input_sym_val)
def test_idempotent_axes_c(): """ Test test axes transformations with autodiff, case c, with broadcast, slice, cast and dim-shuffle """ with ExecutorFactory() as ex: axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)]) result_axes = [ng.make_axis(length=axis.length) for axis in axes] # variable w = ng.variable(axes, initial_value=np.ones((3, 1))) # broadcast l / r, introducing dummy length 1 axes l = ng.broadcast(w, axes) r = ng.broadcast(w, axes) # slice axes_slice = [slice(None, None, None), slice(None, None, None)] l_sliced = ng.tensor_slice(l, axes_slice) r_sliced = ng.tensor_slice(r, axes_slice) # cast r r_sliced_casted = ng.cast_axes(r_sliced, axes) # perform add result = ng.add(l_sliced, r_sliced_casted) # cast / dimshuffle result = ng.cast_axes(result, result_axes) result = ng.axes_with_order(result, result_axes) # cost and grad cost = ng.sum(result, reduction_axes=result.axes) grad = ng.deriv(cost, w) grad_comp = ex.executor(grad) cost_comp = ex.executor(cost) cost_comp_ng = cost_comp() grad_comp_ng = grad_comp() grad_comp_np = np.ones((3, 1)) * 2. assert cost_comp_ng == 6.0 assert np.array_equal(grad_comp_ng, grad_comp_np)
def Eltwise(self, layer, inputs): """ To support the Eltwise layer of caffe. Arguments: layer: Layer which needs to be be mapped to ngrpah op inputs: input ops on which current op depends on return: ngraph output operation corresponding to the given layer """ operation = layer.eltwise_param.operation if operation == caffe_pb2.EltwiseParameter.SUM: ax = inputs[0].axes out = ng.add(inputs[0], ng.cast_axes(inputs[1], ax)) for inp in inputs[2:]: out = ng.add(out, ng.cast_axes(inp, ax)) out.named = layer.name return out
def Reshape(self, tf_node, inputs): """ Reshapes a tensor. Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: tensor, shape, name """ # TODO: currently only support constants and flatten to 1d and 2d # get inputs tensor, shape = inputs def get_flatten_idx(shape_i, shape_o): """ check if flattening shape is valid Args: shape_i: input tensor shape shape_o: output flattend tensor shape Returns: None if flatten not valid, otherwise the flatten_at index """ return None # get input and output shape shape_i = tensor.shape.lengths shape_o = tuple(shape.const.astype(int)) if np.prod(shape_i) != np.prod(shape_o): raise ValueError("Total size of input and output dimension " "mismatch.") if tensor.const is not None: # reshape const np_val = np.reshape(tensor.const, shape_o) return ng.constant(np_val, make_pos_axes(np_val.shape)).named(tf_node.name) else: ndims_o = len(shape_o) if ndims_o != 1 and ndims_o != 2: raise NotImplementedError("Reshape can only support flatten" "to 1d or 2d.") if ndims_o == 1: tensor = ng.flatten(tensor) else: cumprods = list(np.cumprod(shape_i)) flatten_at_idx = cumprods.index(shape_o[0]) + 1 tensor = ng.flatten_at(tensor, flatten_at_idx) res = ng.cast_axes(tensor, make_pos_axes(shape_o)) return res.named(tf_node.name)
def _cast_for_binary_op(self, inputs): """ Cast axes for input with more axes by matching its axes with second input's axes. Arguments: inputs: List of inputs to be casted. Returns: Casted inputs. """ cast_0, cast_1 = remove_ones_axes(inputs) if len(cast_0.axes) >= len(cast_1.axes): axes = self._match_axes(cast_0.axes, cast_1.axes) cast_0 = ng.cast_axes(cast_0, axes) else: axes = self._match_axes(cast_1.axes, cast_0.axes) cast_1 = ng.cast_axes(cast_1, axes) return cast_0, cast_1
def test_conv_flatten_deriv(transformer_factory): """ Test deriv of conv followed by flatten """ # set shape C, D, H, W, N = (3, 1, 28, 28, 8) C, T, R, S, K = (3, 1, 5, 5, 32) # i, f, o axes ax_i = ng.make_axes([ax.C, ax.D, ax.H, ax.W, ax.N]) ax_f = ng.make_axes([ax.C, ax.T, ax.R, ax.S, ax.K]) ax_o = ng.make_axes([ ng.make_axis(32, roles=[ar.Channel]), ng.make_axis(1, roles=[ar.Depth]), ng.make_axis(24, roles=[ar.Height]), ng.make_axis(24, roles=[ar.Width]), ax.N ]) ax_i.set_shape((C, D, H, W, N)) ax_f.set_shape((C, T, R, S, K)) params = dict(pad_d=0, pad_h=0, pad_w=0, str_d=1, str_h=1, str_w=1) axes_rsck = ng.make_axes([ax.R, ax.S, ax.C, ax.K]) axes_rsck_prime = ng.make_axes( [ng.make_axis(l) for l in axes_rsck.lengths]) # broadcast input / filter axes image = ng.constant(np.ones(ax_i.lengths), ax_i) filter = ng.variable(axes_rsck_prime, initial_value=np.ones((R, S, C, K))) filter_casted = ng.cast_axes(filter, axes_rsck) filter_casted = ng.expand_dims(filter_casted, ax.T, 0) filter_casted = ng.axes_with_order(filter_casted, axes=ax_f) # convolution output = ng.convolution(params, image, filter_casted, axes=ax_o) oC, oD, oH, oW, oN = output.axes output = ng.axes_with_order(output, axes=ng.make_axes([oN, oD, oH, oW, oC])) # slice away the oD out_slicing = [slice(None), 0, slice(None), slice(None), slice(None)] conv = ng.Slice(output, out_slicing) flatten = ng.flatten_at(conv, idx=1) # cost and grad cost = ng.sum(flatten, reduction_axes=flatten.axes) grad = ng.deriv(cost, filter) # compute conv_grad_comp = executor([conv, grad]) conv_val, grad_val = conv_grad_comp() assert np.allclose(conv_val, np.zeros_like(conv_val) + 75.) assert np.allclose(grad_val, np.zeros_like(grad_val) + 4608.)
def SparseSoftmaxCrossEntropyWithLogits(self, tf_node, inputs): """ Computes softmax cross entropy. The inputs `logits` are unscaled log probabilities, and each row of `labels[i]` must be a valid distribution. Reference: https://goo.gl/z5T2my Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: logits, labels, name """ # logits: (N1, Y1), labels: (N2,) logits, labels = inputs # check input dimension try: assert len(logits.axes) == 2 assert len(labels.axes) == 1 assert logits.axes[0].length == labels.axes[0].length except: raise NotImplementedError("logits' shape must be (Y, N), " "labels' shape must be (N,), " "other shapes not supported yet.") # get axis axis_y = logits.axes[1] # labels_one_hot: (Y2, N2) labels_one_hot = ng.one_hot(labels, axis=axis_y) # predicts: (N1, Y1) predicts = ng.softmax(logits, normalization_axes=axis_y) # dim-shuffle / cast to (Y1, N1) predicts_axes = ng.make_axes( [axis for axis in reversed(predicts.axes)]) predicts = ng.axes_with_order(predicts, axes=predicts_axes) labels_one_hot = ng.cast_axes(labels_one_hot, predicts_axes) # cross_entropy: (N1,) cross_entropy = ng.cross_entropy_multi(predicts, labels_one_hot, out_axes=(logits.axes[0], )) return cross_entropy
def cast_axes_for_matmul(ng_input_left, ng_input_right): # type: (Op, Op) -> (Op, Op) left, right = ng_input_left, ng_input_right left_num_axes = len(left.axes) right_num_axes = len(right.axes) if left_num_axes == right_num_axes == 1: # vector @ vector # cast to axes: i, icast_axes_for_matmul assert left.shape.lengths == right.shape.lengths, \ "Vector lengths must be equal for multiplication." if left.shape != right.shape: right = ng.cast_axes(right, axes=left.axes) elif left_num_axes == 1: # vector @ matrix # cast to axes: i, ...ij if left.axes[0] != right.axes[-2]: left = ng.cast_axes(left, axes=right.axes[-2]) elif right_num_axes == 1: # matrix @ vector # cast to axes: ...i, i if left.axes[-1] != right.axes[0]: right = ng.cast_axes(right, axes=left.axes[-1]) else: # matrix @ matrix # cast to axes: ...ij, ...jk right_axes = [ ng.make_axis(name='DOT_{}'.format(i), length=axis.length) for i, axis in enumerate(right.shape) ] right_axes[-2] = left.axes[-1] right = ng.cast_axes(right, axes=right_axes) return left, right
def cast_to_pos_axes(x, prefix=POS_AXIS_PREFIX): """ Cast an op to positional axes. E.g. before: x.axes == ['H', 'W'] after: x.axes == ['pos_1', 'pos_0'] Args: x: ngraph op Returns: x casted to positional axes """ return ng.cast_axes(x, make_pos_axes(x.axes.lengths, prefix=prefix))
def LabelCrossEntropy(self, c2_op, inputs): """ Computes the cross entropy between the input and the label set. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. """ y, labels = inputs labels_one_hot = ng.one_hot(labels, axis=y.axes[1]) labels_one_hot = ng.cast_axes(labels_one_hot, [labels_one_hot.axes[0], y.axes[0]]) return ng.cross_entropy_multi(y, labels_one_hot, out_axes=y.axes[0])
def test_flatten_deriv(): from ngraph.frontends.neon import ax np.random.seed(0) # set shape C, D, H, W, N = (3, 1, 28, 28, 8) # image Y = 10 ax.C.length = C ax.D.length = D ax.H.length = H ax.W.length = W ax.N.length = N ax.Y.length = Y # conv output conv = ng.placeholder(ng.make_axes([ax.N, ax.H, ax.W, ax.C])) # flatten flatten = ng.flatten_at(conv, idx=1) num_flatten = flatten.axes.lengths[1] flatten = ng.cast_axes(flatten, ng.make_axes([ax.N, ng.make_axis(num_flatten)])) # fc fc_weights_axes = ng.make_axes([ng.make_axis(num_flatten), ax.Y]) fc_weights = ng.constant(np.random.randn(num_flatten, Y), axes=fc_weights_axes) flatten_casted = ng.cast_axes( flatten, ng.make_axes([flatten.axes[0], fc_weights_axes[0] - 1])) logits = ng.dot(flatten_casted, fc_weights) cost = ng.sum(logits, reduction_axes=logits.axes) delta = 0.001 u = rng.uniform(.1, 5.0, conv.axes) check_derivative(cost, conv, delta, u, atol=1e-2, rtol=1e-2)
def SquaredL2Distance(self, c2_op, inputs): """ Computes squared L2 distance between two inputs. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. """ x, y = inputs y = ng.cast_axes(y, x.axes) out_axes = y.axes.batch_axes() if y.axes.batch_axes() else y.axes[0] return 0.5 * ng.squared_L2(x - y, out_axes=out_axes)
def rename_axes(input_tensor, output_template): # type: (TensorOp, str) -> TensorOp """ Rename tensor axes according to letter names given in `output_template`. Example: if `output_template` is 'NHWC', then axes will be renamed to 'N', 'H', 'W' and 'C'. :param input_tensor: ngraph TensorOp :param output_template: string with one letter per axis in `input_tensor` :return: ngraph TensorOp with renamed axes """ output_axes = [ ng.make_axis(length=input_tensor.axes[i].length, name=output_template[i]) for i in range(len(input_tensor.axes)) ] return ng.cast_axes(input_tensor, axes=ng.make_axes(output_axes))
def cross_entropy_with_softmax(model, labels): """ Auxiliary function to add cross entropy and softmax (loss function) to imported model for training. Arguments: model - imported model labels - placeholder for one-hot labels array Returns: Loss function (mean for batch) """ if model.axes.lengths != labels.axes.lengths: model = ng.Transpose(model) assert model.axes.lengths == labels.axes.lengths model = ng.cast_axes(model, axes=labels.axes) loss = ng.cross_entropy_multi(ng.softmax(model), labels) return ng.mean(loss, out_axes=())
def test_shuffled_deriv(transformer_factory): # This gets the axes of a delta in a generate_add_delta in a different order than the # value being updated C = ng.make_axis(length=3) T = ng.make_axis(length=1) R = ng.make_axis(length=5) S = ng.make_axis(length=5) axes = [R, S, C] v = ng.variable([ng.make_axis(_.length) for _ in axes]) rsc = ng.cast_axes(v, axes) trsc = ng.expand_dims(rsc, T, 0) ctrs = ng.axes_with_order(trsc, axes=[C, T, R, S]) cost = ng.sum(ctrs, out_axes=None) grad = ng.deriv(cost, v) with ExecutorFactory() as ex: d_fun = ex.executor(grad) d_fun()
def test_idempotent_axes_a(): """ Test test axes transformations with autodiff, case a, reference test """ with ExecutorFactory() as ex: axes = ng.make_axes([ng.make_axis(3), ng.make_axis(1)]) w = ng.variable(axes, initial_value=np.ones((3, 1))) result = w + w result = ng.cast_axes(result, axes) cost = ng.sum(result, reduction_axes=axes) grad = ng.deriv(cost, w) grad_comp = ex.executor(grad) cost_comp = ex.executor(cost) assert cost_comp() == 6.0 assert np.array_equal(grad_comp(), np.ones((3, 1)) * 2.)
def AssignAdd(self, tf_node, inputs): """ Assign `ref` + `value` to `ref`. Update 'ref' by adding 'value' to it. Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: ref, value, use_locking, name """ ref, value = inputs assert ref.axes.lengths == value.axes.lengths, "shape not the same" value = ng.cast_axes(value, ref.axes) return ng.assign(ref, value)
const_loss = ng.constant(axes=[ax.Y, dummy_axis], const=1) const_LSTM_embed = ng.constant(axes=[F_embed, dummy_axis], const=1) # Create masks reorder_para_mask = ng.axes_with_order( inputs['para_len'], axes=[ dummy_axis, inputs['para_len'].axes[2], N]) reorder_ques_mask = ng.axes_with_order( inputs['question_len'], axes=[ dummy_axis, inputs['question_len'].axes[2], N]) # Masks for question and para after encoding layer mask_para = ng.dot(const_LSTM, reorder_para_mask) mask_question = ng.dot(const_LSTM, ng.cast_axes(reorder_ques_mask, [dummy_axis, REC, N])) # Masks for question and para after embedding/LookupTable layer mask_para_embed = ng.dot(const_LSTM_embed, reorder_para_mask) mask_question_embed = ng.dot( const_LSTM_embed, ng.cast_axes( reorder_ques_mask, [ dummy_axis, REC, N])) # Pass question and para through embedding layer and dropout layers embed_output_para_1 = embed_layer(inputs['para']) embed_output_para = dropout_1(embed_output_para_1, keep=dropout_val) question_inps = ng.cast_axes(inputs['question'], [N, REC]) embed_output_ques_1 = embed_layer(question_inps) embed_output_ques = dropout_2(embed_output_ques_1, keep=dropout_val)
def __call__(self, inputs): query = ng.cast_axes( inputs['user_utt'], [ self.batch_axis, self.sentence_rec_axis]) # Query embedding [batch, sentence_axis, F] q_emb = self.LUT_A(query) # Multiply by position encoding and sum u_0 = ng.sum(q_emb, reduction_axes=[self.sentence_rec_axis]) # Start a list of the internal states of the model. Will be appended to # after each memory hop u = [u_0] for hopn in range(self.nhops): story = ng.cast_axes( inputs['memory'], [ self.batch_axis, self.memory_axis, self.sentence_rec_axis]) # Re-use the query embedding matrix to embed the memory sentences # [batch, memory_axis, sentence_axis, F] m_emb_A = self.LUT_A(story) m_A = ng.sum( m_emb_A, reduction_axes=[ self.sentence_rec_axis]) # [batch, memory_axis, F] # Compute scalar similarity between internal state and each memory # Equivalent to dot product between u[-1] and each memory in m_A # [batch, memory_axis] dotted = ng.sum(u[-1] * m_A, reduction_axes=[self.embedding_axis]) # [batch, memory_axis] probs = ng.softmax(dotted, self.memory_axis) # Renormalize probabilites according to non-empty memories probs_masked = probs * inputs['memory_mask'] renorm_sum = ng.sum( probs_masked, reduction_axes=[ self.memory_axis]) + self.eps probs_renorm = (probs_masked + self.eps) / renorm_sum # Compute weighted sum of memory embeddings o_k = ng.sum( probs_renorm * m_A, reduction_axes=[ self.memory_axis]) # [batch, F] # Add the output back into the internal state and project u_k = ng.cast_axes(ng.dot(self.R_proj, o_k), [ self.embedding_axis, self.batch_axis]) + u[-1] # [batch, F_proj] # Add new internal state u.append(u_k) if self.use_match_type: # [batch_axis, cand_axis, cand_rec_axis, F] self.cands_mat = inputs['cands_mat'] # Embed all candidate responses using LUT_W # [<batch_axis>, cand_axis, cand_rec_axis, F] cand_emb_W = self.LUT_W(self.cands_mat) # No position encoding added yet cands_mat_emb = ng.sum( cand_emb_W, reduction_axes=[ self.cand_rec_axis]) # [<batch_axis>, cand_axis, F] # Compute predicted answer from product of final internal state # and embedded candidate answers # a_logits = ng.dot(cands_mat_emb, u[-1]) # [batch, cand_axis] # [batch, cand_axis] a_logits = ng.sum(u[-1] * cands_mat_emb, reduction_axes=[self.embedding_axis]) # rename V to vocab_axis to match answer a_logits = ng.cast_axes(a_logits, [self.batch_axis, self.cand_axis]) a_pred = ng.softmax(a_logits, self.cand_axis) return a_pred, probs_renorm