def attention(query, key, value): dk = C.reduce_sum(C.ones_like(query)) # cannot use sequence.last, will conflict with recurrence # dk: [#, *] [1, ] and value = int(dim_of_query) unpacked_key = C.sequence.unpack(key, padding_value=0, no_mask_output=True) # [#] [-3, key_dim] unpacked_value = C.sequence.unpack(value, padding_value=0, no_mask_output=True) # [#] [-3, value_dim] broadcasted_key = C.sequence.broadcast_as(unpacked_key, query) # [#, *] [-3, key_dim] scaled = C.times_transpose(query, broadcasted_key) / dk # [#, *] [q_dim] @ [#, *] [key_dim, -3], assert q_dim == key_dim # scaled: [#, *] [-3, ] => for every key seq element, there is a corresponding score # masked out invalid temporal connections to obey_sequence_order if obey_sequence_order and max_seq_len: unpacked_scaled, scaled_mask = C.sequence.unpack(scaled, padding_value=0).outputs # unpacked_scaled: [#] [-3, -3] <== matrix will be top right diagonally zero-ed # scaled_mask: [#] [-3,] minus_inf = C.constant(-1e+30) valid_connections = C.Constant(np.tril(np.ones((max_seq_len, max_seq_len)), k=0)) # [] [max_seq, max_seq] valid_connections = C.reconcile_dynamic_axes(valid_connections, unpacked_scaled) # [#] [max_seq, max_seq] valid_connections = C.crop_manual(valid_connections, unpacked_scaled, 0, 0) # [#] [-3, -3] unpacked_scaled = C.element_select(valid_connections, unpacked_scaled, minus_inf) # [#] [-3, -3] scaled = C.to_sequence_like(unpacked_scaled, query) # [#, *] [-3] elif obey_sequence_order and not max_seq_len: raise ValueError("max_seq_len must be defined when obey_sequence_order is True") attended = C.times(C.softmax(scaled, axis=-1), C.sequence.broadcast_as(unpacked_value, query)) # [#, *] [value_dim,] return attended
def centre_crop(larger_image, smaller_image, name: str = ''): """ Centre crop spatial dimensions only. Arguments: larger_image: class:`~cntk.ops.functions.Function` that outputs the tensor to be centre cropped smaller_image: class:`~cntk.ops.functions.Function` that outputs the reference tensor name (str, optional): the name of the Function instance in the network Returns: :class:`~cntk.ops.functions.Function` """ input_shape = larger_image.shape # larger referent_shape = smaller_image.shape # smaller row_offset = int((input_shape[1] - referent_shape[1]) / 2) col_offset = int((input_shape[2] - referent_shape[2]) / 2) if row_offset == 0 and col_offset == 0: return larger_image elif row_offset < 0 or col_offset < 0: raise ValueError( f"offset became negative, check if image was passed correctly. " f"larger image {larger_image.shape}, smaller image {smaller_image.shape}" ) return C.crop_manual(larger_image, smaller_image, row_offset, col_offset, name=name)
def test_crop(): # Small network. node_input = C.input_variable((1, 5, 5)) node_referent = C.input_variable((1, 5, 5)) node_output = C.layers.Sequential([ C.layers.Convolution2D(filter_shape=(3, 3), num_filters=1, init=1, strides=(2, 2), pad=True, bias=False), C.layers.MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True), C.layers.ConvolutionTranspose(filter_shape=(4, 4), num_filters=1, strides=(4, 4), init=1, bias=False) ])(node_input) # Input data. input_map = { node_input: -np.arange(25).reshape(1, 1, 5, 5).astype(np.float32), node_referent: np.zeros([1, 1, 5, 5]).astype(np.float32) } # Expected cropped output. expected = [-12, -12, -12, -24, -24] * 3 + [-63, -63, -63, -81, -81] * 2 expected = np.asarray(expected, dtype=np.float32).reshape(1, 1, 5, 5) # Test crop with explicitly specified offsets. cropped = C.crop_manual(node_output, node_referent, 1, 1).eval(input_map) assert np.array_equal(cropped, expected) # Test crop with automatically computed offsets where inputs # have common ancestor. cropped = C.crop_automatic(node_output, node_input).eval(input_map) assert np.array_equal(cropped, expected) # Test crop with automatically computed offsets where inputs do not # have common ancestor. cropped = C.crop_automatic_with_ancestors(node_output, node_referent, node_input, node_referent).eval(input_map) assert np.array_equal(cropped, expected)
def test_crop(): # Small network. node_input = C.input_variable((1, 5, 5)) node_referent = C.input_variable((1, 5, 5)) node_output = C.layers.Sequential([ C.layers.Convolution2D(filter_shape = (3, 3), num_filters = 1, init = 1, strides = (2, 2), pad = True, bias = False), C.layers.MaxPooling(filter_shape = (3, 3), strides = (2, 2), pad = True), C.layers.ConvolutionTranspose(filter_shape = (4, 4), num_filters = 1, strides = (4, 4), init = 1, bias = False)])(node_input) # Input data. input_map = { node_input: -np.arange(25).reshape(1, 1, 5, 5).astype(np.float32), node_referent: np.zeros([1, 1, 5, 5]).astype(np.float32) } # Expected cropped output. expected = [-12, -12, -12, -24, -24] * 3 + [-63, -63, -63, -81, -81] * 2 expected = np.asarray(expected, dtype = np.float32).reshape(1, 1, 5, 5) # Test crop with explicitly specified offsets. cropped = C.crop_manual(node_output, node_referent, 1, 1).eval(input_map) assert np.array_equal(cropped, expected) # Test crop with automatically computed offsets where inputs # have common ancestor. cropped = C.crop_automatic(node_output, node_input).eval(input_map) assert np.array_equal(cropped, expected) # Test crop with automatically computed offsets where inputs do not # have common ancestor. cropped = C.crop_automatic_with_ancestors( node_output, node_referent, node_input, node_referent).eval(input_map) assert np.array_equal(cropped, expected)