def get_icecube_kernel(shape, get_ones=False): ''' Get a kernel of shape 'shape' for IceCube where coordinates of no real strings are set to constant zeros. Parameters ---------- shape : list of int The shape of the desired kernel. get_ones : bool, optional If True, returns constant ones for real DOMs, zeros for virtual DOMs. If False, return trainable parameter for real DOMs, zeros for virtual DOMs Returns ------- tf.Tensor The icecube kernel with the desired shape. ''' zeros = tf.zeros(shape, dtype=FLOAT_PRECISION) ones = tf.ones(shape, dtype=FLOAT_PRECISION) a_list = [] for a in xrange(-4, 6): b_list = [] for b in xrange(-5, 5): if (a, b) in hex_string_coord_dict.keys(): # String exists if get_ones: weights = ones else: weights = new_weights(shape) else: # virtual string, string does not actually exist weights = zeros b_list.append(weights) a_list.append(tf.stack(b_list)) icecube_kernel = tf.stack(a_list) return icecube_kernel
def get_hex_kernel(filter_size, print_kernel=False, get_ones=False): '''Get hexagonal convolution kernel Create Weights for a hexagonal kernel. The Kernel will be of a hexagonal shape in the first two dimensions, while the other dimensions are normal. The hexagonal kernel is off the shape: [kernel_edge_points, kernel_edge_points, *filter_size[2:]] But elments with coordinates in the first two dimensions, that don't belong to the hexagon are set to a tf.Constant 0. The hexagon is defined by filter_size[0:2]. filter_size[0] defines the size of the hexagon and filter_size[1] the orientation. Parameters ---------- filter_size : A list of int filter_size = [s, o, 3. dim(e.g. z), 4. dim(e.g. t),...] s: size of hexagon o: orientation of hexagon Examples: s = 2, o = 0: 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 s = 3, o = 2: 0 1 0 0 0 0 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 1 print_kernel : bool. True: print first two dimensions of kernel. 0 represents a const 0 Tensor of shape filter_size[2:] 1 represents a trainable Tensor of shape filter_size[2:] This can be used to verify the shape of the hex kernel False: do not print get_ones : bool, optional If True, returns constant ones for elements in hexagon. If False, return trainable tf.tensor for elements in hexagon. In both cases, constant zeros are returned for elements outside of hexagon. Returns ------- tf.Tensor A Tensor with shape: [ s, s, *filter_size[2:] ] where s = 2*filter_size[0] -1 if x == o [hexagon is parallel to axis of first dimension] = 2*filter_size[0] +1 if x != o [hexagon is tilted to axis of first dimension] Raises ------ ValueError Description ''' k = filter_size[0] x = filter_size[1] if x >= k: raise ValueError("get_hex_kernel: filter_size (k,x,z) must fulfill " "x < k: ({}, {}, {})".format(k, x, filter_size[2])) if x == 0: kernel_edge_points = 2 * k - 1 else: kernel_edge_points = 2 * k + 1 zeros = tf.zeros(filter_size[2:], dtype=FLOAT_PRECISION) ones = tf.ones(filter_size[2:], dtype=FLOAT_PRECISION) a_list = [] test_hex_dict = {} for a in range(kernel_edge_points): b_list = [] for b in range(kernel_edge_points): # ------------------------- # regular aligned hexagons # ------------------------- if x == 0: if a + b < k - 1 or a + b > 3 * k - 3: weights = zeros test_hex_dict[(a, b)] = 0 else: if get_ones: weights = ones else: weights = new_weights(filter_size[2:]) test_hex_dict[(a, b)] = 1 # ------------------------- # tilted hexagons # ------------------------- else: inHexagon = False # check if inside normal k.0 aligned hexagon # |----inside normal k.0 rhombus -----------| if ((a > 0 and a < 2 * k) and (b > 0 and b < 2 * k) and # |--in k.0 aligned hexagon-| (a + b > k and a + b < 3 * k)): if a + b > k and a + b < 3 * k: inHexagon = True else: # add 6 additional edges outside of k.0 aligned hexagon if a == 2 * k - x and b == 0: # Edge 1 inHexagon = True elif a == k - x and b == x: # Edge 2 inHexagon = True elif a == 0 and b == k + x: # Edge 3 inHexagon = True elif a == x and b == 2 * k: # Edge 4 inHexagon = True elif a == k + x and b == 2 * k - x: # Edge 5 inHexagon = True elif a == 2 * k and b == k - x: # Edge 6 inHexagon = True # get weights or constant 0 depending on if point is in hexagon if inHexagon: if get_ones: weights = ones else: weights = new_weights(filter_size[2:]) test_hex_dict[(a, b)] = 1 else: weights = zeros test_hex_dict[(a, b)] = 0 b_list.append(weights) a_list.append(tf.stack(b_list)) hexKernel = tf.stack(a_list) if print_kernel: print_hex_data(test_hex_dict) return hexKernel
def add_residual(input, residual, strides=None, use_scale_factor=True, scale_factor=0.001): '''Convenience function to add a residual Will add input + scale*residual where these overlap in the last dimension currently only supports input and residual tensors of same shape in other dimensions Parameters ---------- input : tf.Tensor Input tensor. residual : tf.Tensor Residual to be added to the input tensor strides : list of int, optional strides must define a stride (int) for each dimension of input. use_scale_factor : bool, optional If true, the residuals will be scaled by the scale_factor prior to addition. scale_factor : float, optional Defines how much the residuals will be scaled prior to addition if use_scale_factor is True. Returns ------- tf.Tensor The output Tensor: input + scale * residual(if use_scale_factor) ''' # ---------------------- # strides for mismatching # dimensions other than channel # dimension # (Post Masterthesis) # ---------------------- if strides is not None: assert len(strides) == len(input.get_shape().as_list()), \ 'Number of dimensions of strides and input must match' assert strides[0] == 1, 'stride in batch dimension must be 1' if not strides == [1 for s in strides]: begin = [0 for s in strides] end = [0] + input.get_shape().as_list()[1:] input = tf.strided_slice(input, begin=begin, end=end, strides=strides, begin_mask=1, end_mask=1, ) # ---------------------- num_outputs = residual.get_shape().as_list()[-1] num_inputs = input.get_shape().as_list()[-1] # Residuals added over multiple layers accumulate. # A scale factor < 1 reduces instabilities in beginnning if use_scale_factor: scale = new_weights([num_outputs], stddev=scale_factor) residual = residual*scale if num_inputs == num_outputs: output = residual + input elif num_inputs > num_outputs: output = residual + input[..., :num_outputs] elif num_inputs < num_outputs: output = tf.concat([residual[..., :num_inputs] + input, residual[..., num_inputs:]], axis=-1) else: if num_inputs == num_outputs: output = (residual + input)/np.sqrt(2.) elif num_inputs > num_outputs: output = (residual + input[..., :num_outputs])/np.sqrt(2.) elif num_inputs < num_outputs: output = tf.concat( [(residual[..., :num_inputs] + input)/np.sqrt(2.), residual[..., num_inputs:]], axis=-1) return output
def activation(layer, activation_type, use_batch_normalisation=False, is_training=None, verbose=True): ''' Helper-functions to perform activation on a layer for parametric activation functions this assumes that the first dimension is batch size and that for each of the other dimensions seperate parametrizations should be learned Parameters ---------- layer : tf.Tensor Input tensor. activation_type : str or callable The activation type to be used. use_batch_normalisation : bool, optional True: use batch normalisation is_training : None, optional Indicates whether currently in training or inference mode. True: in training mode False: inference mode. verbose : bool, optional If true, more verbose output is printed. Returns ------- tf.Tensor The output tensor. Raises ------ ValueError If wrong settings passed. ''' # Use batch normalisation? if use_batch_normalisation: if verbose: print('Using Batch Normalisation') if is_training is None: raise ValueError('To use batch normalisation a boolean is_training' ' needs to be passed') layer = batch_norm_wrapper(layer, is_training) if activation_type == '': return layer if hasattr(tf.nn, activation_type): layer = getattr(tf.nn, activation_type)(layer) elif hasattr(tf, activation_type): layer = getattr(tf, activation_type)(layer) elif activation_type == 'leaky': layer = tf.multiply(tf.maximum(-0.01*layer, layer), tf.sign(layer)) # todo: NecroRelu # https://stats.stackexchange.com/questions/176794/ # how-does-rectilinear-activation-function-solve-the- # vanishing-gradient-problem-in # https://github.com/ibmua/learning-to-make-nn-in-python/ # blob/master/nn_classifier.py elif activation_type == 'requ': layer = tf.where(tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), tf.zeros_like(layer, dtype=FLOAT_PRECISION), tf.square(layer)) elif activation_type == 'selu': lam = 1.0507 alpha = 1.6733 # from https://arxiv.org/abs/1706.02515 # self normalizing networks layer = tf.where(tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), tf.exp(layer) * tf.constant(alpha, dtype=FLOAT_PRECISION) - tf.constant(alpha,dtype=FLOAT_PRECISION), layer) layer = layer * tf.constant(lam, dtype=FLOAT_PRECISION) elif activation_type == 'centeredRelu': layer = tf.nn.relu6(layer) - tf.constant(3, dtype=FLOAT_PRECISION) elif activation_type == 'negrelu': layer = -tf.nn.relu(layer) elif activation_type == 'invrelu': layer = tf.where(tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), layer, (layer+1e-8)**-1) elif activation_type == 'sign': layer = tf.where(tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), layer, tf.sign(layer)) elif activation_type == 'prelu': slope = new_weights(layer.get_shape().as_list()[1:]) + 1.0 layer = tf.where(tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), layer*slope, layer) elif activation_type == 'pelu': a = new_weights(layer.get_shape().as_list()[1:]) + 1.0 b = new_weights(layer.get_shape().as_list()[1:]) + 1.0 layer = tf.where(tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), (tf.exp(layer/b) - 1)*a, layer*(a/b)) elif activation_type == 'gaussian': layer = tf.exp(-tf.square(layer)) elif activation_type == 'pgaussian': sigma = new_weights(layer.get_shape().as_list()[1:]) + \ tf.constant(1.0, dtype=FLOAT_PRECISION) mu = new_weights(layer.get_shape().as_list()[1:]) layer = tf.exp(tf.square((layer - mu) / sigma) * tf.constant(-0.5, dtype=FLOAT_PRECISION)) / (sigma) elif callable(activation_type): layer = activation_type(layer) else: raise ValueError('activation: Unknown activation type: {!r}'.format( activation_type)) return layer
def new_channel_wise_fc_layer( input, num_outputs, use_dropout=False, keep_prob=None, activation='elu', use_batch_normalisation=False, use_residual=False, is_training=None, weights=None, biases=None, max_out_size=None, ): ''' Helper-function for creating a new cahnnel wise Fully-Connected Layer input: 3-dim tensor of shape [batch_size, num_inputs, num_channel] output: 3-dim tensor of shape [batch_size, num_outputs, num_channel] Parameters ---------- input : tf.Tensor Input layer. Shape: [batch, num_inputs, num_channel] num_outputs : int The number of output nodes. use_dropout : bool, optional If True, dropout will be used. keep_prob : None, optional The keep probability to be used for dropout. Can either be a float or a scalar float tf.Tensor. activation : str or callable, optional The type of activation function to be used. use_batch_normalisation : bool, optional If True, batch normalisation will be used. use_residual : bool, optional If True, layer result will be added as a residual to the input layer. is_training : None, optional Indicates whether currently in training or inference mode. Must be provided if batch normalisation is used. True: in training mode False: inference mode. weights : None or tf.Tensor, optional Optionally, the weights to be used in the layer can be provided. If None, new weights are created. biases : None or tf.Tensor, optional Optionally, the biases to be used in the layer can be provided. If None, new biases are created. max_out_size : None or int, optional The max_out_size for the layer. If None, no max_out is used in the layer. Returns ------- tf.Tensor, tf.Tensor, tf.Tensor The layer, weights, and biases are returned as tf.Tensor The shape of the output layer is: [batch, num_outputs, num_channel] where num_channel is the same as the input number of channels. ''' input_shape = input.get_shape().as_list() # input: [batch, num_inputs, num_channel] assert len(input_shape) == 3, \ '{} != [batch, num_inputs, num_channel]'.format(input_shape) num_inputs = input_shape[1] num_channels = input_shape[2] # input_transpose: [num_channel, batch, num_inputs] input_transpose = tf.transpose(input, [2, 0, 1]) # Create new weights and biases. if weights is None: weights = new_weights(shape=[num_channels, num_inputs, num_outputs]) if biases is None: biases = new_weights(shape=[num_outputs, num_channels]) # Calculate the layer as the matrix multiplication of # the input and weights, and then add the bias-values. # output: [num_channel, batch, num_outputs] output = tf.matmul(input_transpose, weights) layer = tf.transpose(output, [1, 2, 0]) # layer: [batch, num_outputs, num_channel] # repair to get std dev of 1 layer = layer / np.sqrt(num_inputs) layer = (layer + biases) / np.sqrt(2.) # Apply activation and batch normalisation layer = core.activation(layer, activation, use_batch_normalisation, is_training) # Use as Residual if use_residual: # convert to [batch, num_channel, num_outputs] layer = tf.transpose(layer, [0, 2, 1]) layer = core.add_residual(input=tf.transpose(input, [0, 2, 1]), residual=layer) # convert back to [batch, num_outputs, num_channel] layer = tf.transpose(layer, [0, 2, 1]) if max_out_size is not None: layer = tf.contrib.layers.maxout( inputs=layer, num_units=max_out_size, axis=-1, ) if use_dropout: layer = tf.nn.dropout(layer, keep_prob) return layer, weights, biases
def new_fc_layer( input, num_outputs, use_dropout=False, keep_prob=None, activation='elu', use_batch_normalisation=False, use_residual=False, is_training=None, weights=None, biases=None, max_out_size=None, ): ''' Helper-function for creating a new Fully-Connected Layer input: 2-dim tensor of shape [batch_size, num_inputs] output: 2-dim tensor of shape [batch_size, num_outputs] Parameters ---------- input : tf.Tensor Input layer. Shape: [batch, num_inputs] num_outputs : int The number of output nodes. use_dropout : bool, optional If True, dropout will be used. keep_prob : None, optional The keep probability to be used for dropout. Can either be a float or a scalar float tf.Tensor. activation : str or callable, optional The type of activation function to be used. use_batch_normalisation : bool, optional If True, batch normalisation will be used. use_residual : bool, optional If True, layer result will be added as a residual to the input layer. is_training : None, optional Indicates whether currently in training or inference mode. Must be provided if batch normalisation is used. True: in training mode False: inference mode. weights : None or tf.Tensor, optional Optionally, the weights to be used in the layer can be provided. If None, new weights are created. biases : None or tf.Tensor, optional Optionally, the biases to be used in the layer can be provided. If None, new biases are created. max_out_size : None or int, optional The max_out_size for the layer. If None, no max_out is used in the layer. Returns ------- tf.Tensor, tf.Tensor, tf.Tensor The layer, weights, and biases are returned as tf.Tensor The shape of the output layer is: [batch, num_outputs] ''' num_inputs = input.get_shape().as_list()[-1] # Create new weights and biases. if weights is None: weights = new_weights(shape=[num_inputs, num_outputs]) if biases is None: biases = new_biases(length=num_outputs) # Calculate the layer as the matrix multiplication of # the input and weights, and then add the bias-values. layer = tf.matmul(input, weights) # repair to get std dev of 1 layer = layer / np.sqrt(num_inputs) layer = (layer + biases) / np.sqrt(2.) # Apply activation and batch normalisation layer = core.activation(layer, activation, use_batch_normalisation, is_training) if max_out_size is not None: layer_shape = layer.get_shape().as_list() assert layer_shape[-1] % max_out_size == 0, \ "max out needs to match dim" layer_shape[-1] = layer_shape[-1] // max_out_size layer = tf.contrib.layers.maxout( inputs=layer, num_units=max_out_size, axis=-1, ) channel_stride = max(1, num_inputs // layer_shape[-1]) res_strides = [1 for i in input.get_shape()[:-1]] + [channel_stride] else: res_strides = None # Use as Residual if use_residual: layer = core.add_residual( input=input, residual=layer, strides=res_strides, ) if use_dropout: layer = tf.nn.dropout(layer, keep_prob) return layer, weights, biases
def new_conv_nd_layer( input, filter_size, num_filters, pooling_type=None, pooling_strides=None, pooling_ksize=None, pooling_padding='SAME', use_dropout=False, keep_prob=None, activation='elu', strides=None, padding='SAME', use_batch_normalisation=False, dilation_rate=None, use_residual=False, method='convolution', weights=None, biases=None, trafo=None, is_training=None, hex_num_rotations=1, hex_azimuth=None, hex_zero_out=False, ): '''Helper-function for creating a new nD Convolutional Layer 2 <= n <=4 are supported. For n == 3 (3 spatial dimensions x, y, and z): input: (n+2)-dim tensor of shape [batch, x, y, z, num_input_channels] output: (n+2)-dim tensor of shape [batch, x_p, y_p, z_p, num_filters] Where x_p, y_p, z_p may differ from the input spatial dimensions due to downsizing in pooling operation. Parameters ---------- input : tf.Tensor Input layer. Shape: [batch, ... , num_input_channels] where ... are the n spatial axes. filter_size : list of int The size of the convolution kernel: [filter_1, ..., filter_n] Example n == 3: [3, 3, 5] will perform a convolution with a 3x3x5 kernel. if method == 'hex_convolution': filter_size = [s, o, z, t] s: size of hexagon o: orientation of hexagon z: size along z axis [if n>= 3] t: size along t axis [if n>= 4] The hexagonal filter along axis x and y is put together from s and o. Examples: s = 2, o = 0: 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 s = 3, o = 2: 0 1 0 0 0 0 0 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 1 num_filters : int The number of filters to use in the convolution operation. pooling_type : str, optional The pooling method to use, e.g. 'max', 'avg', 'max_avg' If None, no pooling is applied. pooling_strides : list, optional The strides to be used for the pooling operation. Shape: [1, stride_1, ..., stride_n, stride_channel] Example n == 3: [1, 2, 2, 2, 1]: a stride of 2 is used along the x, y, and z axes. pooling_ksize : list, optional The pooling window size to be used. Shape: [1, pool_1, ..., pool_n, pool_channel] Example n == 3: [1, 2, 2, 2, 1] Will apply a pooling window of 2x2x2 in the spatial coordinates. pooling_padding : str, optional The padding method to be used for the pooling operation. Options are 'VALID', 'SAME'. use_dropout : bool, optional If True, dropout will be used. keep_prob : None, optional The keep probability to be used for dropout. Can either be a float or a scalar float tf.Tensor. activation : str or callable, optional The type of activation function to be used. strides : list, optional The strides to be used for the convolution operation. Shape: [1, stride_1, ..., stride_n, stride_channel] Examples n == 3: [1, 1, 1, 1, 1]: a stride of 1 is used along all axes. [1, 1, 2, 1, 1]: a stride of 2 is used along the y axis. padding : str, optional The padding method to be used for the convolution operation. Options are 'VALID', 'SAME'. use_batch_normalisation : bool, optional If True, batch normalisation will be used. dilation_rate : None or list of int, optional The dilation rate to be used for the layer. Dilation rate is given by: [dilation_1, ..., dilation_n] where dilation_i specifies the dilation rate for axis_i. If the dilation rate is None, no dilation is applied in convolution. use_residual : bool, optional If True, layer result will be added as a residual to the input layer. method : str, optional Which convolution method to use for the layer, e.g.: 'convolution', 'dynamic_convolution', 'local_trafo'. For details, see: tfscripts.conv.trans3d_op weights : None or tf.Tensor, optional Optionally, the weights to be used in the layer can be provided. If None, new weights are created. biases : None or tf.Tensor, optional Optionally, the biases to be used in the layer can be provided. If None, new biases are created. trafo : None or callable, optional If the convolution method is 'local_trafo', the callable provided by the 'trafo_list' will be used as the transformation on the input patch. is_training : None, optional Indicates whether currently in training or inference mode. Must be provided if batch normalisation is used. True: in training mode False: inference mode. hex_num_rotations : int, optional Only used if method == 'hex_convolution'. If num_rotations >= 1: weights of a kernel will be shared over 'num_rotations' many rotated versions of that kernel. hex_azimuth : None or float or scalar float tf.Tensor Only used if method == 'hex_convolution'. Hexagonal kernel is turned by the angle 'azimuth' [given in degrees] in counterclockwise direction. If azimuth is None, the kernel will not be rotated dynamically. hex_zero_out : bool, optional Only used if method == 'hex_convolution'. If True, elements in result tensor which are not part of hexagon or IceCube strings (if shape in x and y dimensions is 10x10), will be set to zero. Returns ------- tf.Tensor, tf.Tensor, tf.Tensor The layer, weights, and biases are returned as tf.Tensor The shape of the output layer is: [batch, s_1, ..., s_n, num_filters] Where the s_i may differ from the input spatial dimensions due to downsizing in pooling operation. Raises ------ NotImplementedError Description ValueError Description ''' # check dimension of input num_dims = len(input.shape) - 2 if num_dims == 4: # 4D convolution if pooling_strides is None: pooling_strides = [1, 2, 2, 2, 2, 1] if pooling_ksize is None: pooling_ksize = [1, 2, 2, 2, 2, 1] if strides is None: strides = [1, 1, 1, 1, 1, 1] elif num_dims == 3: # 3D convolution if pooling_strides is None: pooling_strides = [1, 2, 2, 2, 1] if pooling_ksize is None: pooling_ksize = [1, 2, 2, 2, 1] if strides is None: strides = [1, 1, 1, 1, 1] elif num_dims == 2: # 2D convolution if pooling_strides is None: pooling_strides = [1, 2, 2, 1] if pooling_ksize is None: pooling_ksize = [1, 2, 2, 1] if strides is None: strides = [1, 1, 1, 1] else: raise ValueError( 'Currently only 2D, 3D or 4D supported {!r}'.format(input)) # make sure inferred dimension matches filter_size if not len(filter_size) == num_dims: err_msg = 'Filter size {!r} does not fit to input shape {!r}'.format( input.shape) raise ValueError(err_msg) num_input_channels = input.get_shape().as_list()[-1] # Shape of the filter-weights for the convolution. shape = list(filter_size) + [num_input_channels, num_filters] # Create new weights aka. filters with the given shape. if method.lower() == 'convolution': if weights is None: # weights = new_kernel_weights(shape=shape) weights = new_weights(shape=shape) # Create new biases, one for each filter. if biases is None: biases = new_biases(length=num_filters) # ------------------- # Perform convolution # ------------------- if method.lower() == 'convolution': if num_dims == 2 or num_dims == 3: layer = tf.nn.convolution(input=input, filter=weights, strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate) elif num_dims == 4: layer = conv.conv4d_stacked(input=input, filter=weights, strides=strides, padding=padding, dilation_rate=dilation_rate) # --------------------- # Hexagonal convolution # --------------------- elif method.lower() == 'hex_convolution': if num_dims == 2 or num_dims == 3: layer, weights = hx.conv_hex( input_data=input, filter_size=filter_size, num_filters=num_filters, padding=padding, strides=strides, num_rotations=hex_num_rotations, azimuth=hex_azimuth, dilation_rate=dilation_rate, zero_out=hex_zero_out, kernel=weights, ) elif num_dims == 4: layer, weights = hx.conv_hex4d( input_data=input, filter_size=filter_size, num_filters=num_filters, padding=padding, strides=strides, num_rotations=hex_num_rotations, azimuth=hex_azimuth, dilation_rate=dilation_rate, zero_out=hex_zero_out, kernel=weights, ) # Create new biases, one for each filter. if biases is None: biases = new_biases(length=num_filters * hex_num_rotations) # ------------------- # locally connected # ------------------- elif method.lower() == 'locally_connected': if (weights is not None) or (biases is not None): raise NotImplementedError("Locally conncected layers currently do " "not support predefined weights") if num_dims == 2: layer, weights = conv.locally_connected_2d( input=input, num_outputs=num_filters, filter_size=filter_size, strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate) elif num_dims == 3: layer, weights = conv.locally_connected_3d( input=input, num_outputs=num_filters, filter_size=filter_size, strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate) elif num_dims == 4: raise NotImplementedError('4D locally connected not implemented!') # Create new biases, one for each filter and position biases = new_weights(shape=layer.get_shape().as_list()[1:]) # ------------------- # local trafo # ------------------- elif method.lower() == 'local_trafo': assert (weights is None and biases is None) if num_dims == 3: layer = conv.trans3d_op( input=input, num_out_channel=num_filters, filter_size=filter_size, method=method, trafo=trafo, filter=weights, strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate, stack_axis=None, ) else: raise NotImplementedError('local_trafo currently only for 3D') # -------------------- # dynamic convolution # -------------------- elif method.lower() == 'dynamic_convolution': assert weights is not None if num_dims == 2 or num_dims == 3: layer = conv.dynamic_conv(input=input, filter=weights, strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate) elif num_dims == 4: raise NotImplementedError('4D dynamic_convolution not implemented') if biases is None: biases = new_biases(length=num_filters) else: raise ValueError('Unknown method: {!r}'.format(method)) # repair to get std dev of 1 # In convolution operation, a matrix multiplication is performed # over the image patch and the kernel. Afterwards, a reduce_sum # is called. Assuming that all inputs and weights are normalized, # the result of the matrix multiplication will approximately still # have std dev 1. # However, the reduce_sum operation over the filter size and number # of input channels will add up # n = np.prod(filter_size) * num_input_channels # variables which each have a std of 1. In the case of normal # distributed values, this results in a resulting std deviation # of np.sqrt(np.prod(filter_size) * num_input_channels). To ensure # that the result of the convolutional layer is still normalized, # the values need to be divided by this factor. # In the case of the hex_convolution, this factor gets reduced to # the number of non zero elements in the hex kernel. if method.lower() == 'hex_convolution': num_filter_vars = hx.get_num_hex_points(filter_size[0]) if len(filter_size) > 2: # This should probably be *= , but empirically this provides better # results... [At least for IceCube applications] # Possibly because variance is actually a lot lower in input, since # it will be padded with zeros and these will propagate to later # layers. num_filter_vars += np.prod(filter_size[2:]) layer = layer / np.sqrt(num_filter_vars * num_input_channels) else: layer = layer / np.sqrt(np.prod(filter_size) * num_input_channels) # Add the biases to the results of the convolution. # A bias-value is added to each filter-channel. if biases is not None: layer = (layer + biases) / np.sqrt(2.) # Apply activation and batch normalisation layer = core.activation(layer, activation, use_batch_normalisation, is_training) # Use as Residual if use_residual: layer = core.add_residual(input=input, residual=layer, strides=strides) # Use pooling to down-sample the image resolution? if num_dims == 2: layer = pooling.pool( layer=layer, ksize=pooling_ksize, strides=pooling_strides, padding=pooling_padding, pooling_type=pooling_type, ) elif num_dims == 3: layer = pooling.pool3d( layer=layer, ksize=pooling_ksize, strides=pooling_strides, padding=pooling_padding, pooling_type=pooling_type, ) elif num_dims == 4: if pooling_type == 'max': layer = pooling.max_pool4d_stacked(input=layer, ksize=pooling_ksize, strides=pooling_strides, padding=pooling_padding) elif pooling_type == 'avg': layer = pooling.avg_pool4d_stacked(input=layer, ksize=pooling_ksize, strides=pooling_strides, padding=pooling_padding) else: raise NotImplementedError("Pooling type not supported: " "{!r}".format(pooling_type)) else: raise NotImplementedError('Only supported 2d, 3d, 4d!') if use_dropout: layer = tf.nn.dropout(layer, keep_prob) return layer, weights, biases
def get_dynamic_rotation_hex_kernel(filter_size, azimuth): '''Dynamically azimuthally rotated hexagonal kernels. Create Weights for a hexagonal kernel. The Kernel is dynamically rotated by the 'azimuth' angle. The Kernel will be of a hexagonal shape in the first two dimensions, while the other dimensions are normal. The hexagonal kernel is off the shape: [kernel_edge_points, kernel_edge_points, *filter_size[2:]] But elments with coordinates in the first two dimensions, that don't belong to the hexagon are set to a tf.Constant 0. The hexagon is defined by filter_size[0:2]. filter_size[0] defines the size of the hexagon and filter_size[1] the orientation. Parameters ---------- filter_size : A list of int filter_size = [s, o, 3. dim(e.g. z), 4. dim(e.g. t),...] filter_size[-2:] = [no_in_channels, no_out_channels] s: size of hexagon o: orientation of hexagon Examples: s = 2, o = 0: 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 azimuth : tf tensor A scalar float tf.Tensor denoting the angle by which the kernel will be dynamically rotated. Azimuth angle is given in degrees. Returns ------- tf.Tensor A Tensor with shape: [ s, s, *filter_size[2:]] where s = 2*filter_size[0] -1 if x == o [hexagon is parallel to axis of first dimension] = 2*filter_size[0] +1 if x != o [hexagon is tilted to axis of first dimension] Raises ------ ValueError Description ''' no_of_dims = len(filter_size) rotated_filter_size = filter_size[2:-1] + [filter_size[-1]] Z = tf.zeros([tf.shape(azimuth)[0]] + filter_size[2:], dtype=FLOAT_PRECISION) center_weight = new_weights([1] + filter_size[2:]) multiples = [tf.shape(azimuth)[0]] + [1] * (no_of_dims - 2) center_weight = tf.tile(center_weight, multiples) # HARDCODE MAGIC... ToDo: Generalize if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter corner_weights1 = new_weights([6] + filter_size[2:]) elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter corner_weights1 = new_weights([6] + filter_size[2:]) corner_weights2 = [] for i in range(6): corner_weights2.extend([Z, new_weights(filter_size[2:])]) corner_weights2 = tf.stack(corner_weights2) elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter corner_weights1 = new_weights([6] + filter_size[2:]) corner_weights2 = new_weights([12] + filter_size[2:]) elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter corner_weights1 = new_weights([6] + filter_size[2:]) corner_weights2 = new_weights([12] + filter_size[2:]) corner_weights3 = [] for i in range(6): corner_weights3.extend([Z, new_weights(filter_size[2:]), Z]) corner_weights3 = tf.stack(corner_weights3) elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter corner_weights1 = new_weights([6] + filter_size[2:]) corner_weights2 = new_weights([12] + filter_size[2:]) corner_weights3 = [] for i in range(6): corner_weights3.extend([Z, Z, new_weights(filter_size[2:])]) corner_weights3 = tf.stack(corner_weights3) elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter corner_weights1 = new_weights([6] + filter_size[2:]) corner_weights2 = new_weights([12] + filter_size[2:]) corner_weights2 = new_weights([18] + filter_size[2:]) else: raise ValueError("get_dynamic_rotation_hex_kernel: Unsupported " "hexagonal filter_size: {!r}".formt(filter_size[0:2])) rotated_kernel_rows = [] if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter A = tf_get_rotated_corner_weights(corner_weights1, azimuth) rotated_kernel_rows.append(tf.stack([Z, A[5], A[0]], axis=1)) rotated_kernel_rows.append( tf.stack([A[3], center_weight, A[1]], axis=1)) rotated_kernel_rows.append(tf.stack([A[3], A[2], Z], axis=1)) elif filter_size[0:2] == [2, 1] or filter_size[0:2] == [3, 0]: # hexagonal 2,1 and 3,0 Filter A = tf_get_rotated_corner_weights(corner_weights1, azimuth) B = tf_get_rotated_corner_weights(corner_weights2, azimuth) rotated_kernel_rows.append(tf.stack([Z, Z, B[9], B[10], B[11]], axis=1)) rotated_kernel_rows.append( tf.stack([Z, B[8], A[5], A[0], B[0]], axis=1)) rotated_kernel_rows.append( tf.stack([B[7], A[4], center_weight, A[1], B[1]], axis=1)) rotated_kernel_rows.append( tf.stack([B[6], A[3], A[2], B[2], Z], axis=1)) rotated_kernel_rows.append(tf.stack([B[5], B[4], B[3], Z, Z], axis=1)) elif (filter_size[0:2] == [3, 1] or filter_size[0:2] == [3, 2] or filter_size[0:2] == [4, 0]): # hexagonal 3,1 3,2 and 4,0 filter A = tf_get_rotated_corner_weights(corner_weights1, azimuth) B = tf_get_rotated_corner_weights(corner_weights2, azimuth) C = tf_get_rotated_corner_weights(corner_weights3, azimuth) rotated_kernel_rows.append( tf.stack([Z, Z, Z, C[15], C[16], C[17], C[0]], axis=1)) rotated_kernel_rows.append( tf.stack([Z, Z, C[14], B[9], B[10], B[11], C[1]], axis=1)) rotated_kernel_rows.append( tf.stack([Z, C[13], B[8], A[5], A[0], B[0], C[2]], axis=1)) rotated_kernel_rows.append( tf.stack([C[12], B[7], A[4], center_weight, A[1], B[1], C[3]], axis=1)) rotated_kernel_rows.append( tf.stack([C[11], B[6], A[3], A[2], B[2], C[4], Z], axis=1)) rotated_kernel_rows.append( tf.stack([C[10], B[5], B[4], B[3], C[5], Z, Z], axis=1)) rotated_kernel_rows.append( tf.stack([C[9], C[8], C[7], C[6], Z, Z, Z], axis=1)) else: raise ValueError("get_dynamic_rotation_hex_kernel: Unsupported " "hexagonal filter_size: {!r}".formt(filter_size[0:2])) rotated_kernel = tf.stack(rotated_kernel_rows, axis=1) return rotated_kernel
def get_rotated_hex_kernel(filter_size, num_rotations): ''' Create Weights for a hexagonal kernel. The kernel is rotated 'num_rotations' many times. Weights are shared over rotated versions. The Kernel will be of a hexagonal shape in the first two dimensions, while the other dimensions are normal. The hexagonal kernel is off the shape: [kernel_edge_points, kernel_edge_points, *filter_size[2:]] But elments with coordinates in the first two dimensions, that don't belong to the hexagon are set to a tf.Constant 0. The hexagon is defined by filter_size[0:2]. filter_size[0] defines the size of the hexagon and filter_size[1] the orientation. Parameters ---------- filter_size : A list of int filter_size = [s, o, 3. dim(e.g. z), 4. dim(e.g. t),...] filter_size[-2:] = [no_in_channels, no_out_channels] s: size of hexagon o: orientation of hexagon Examples: s = 2, o = 0: 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 num_rotations : int. number of rotational kernels to create. Kernels will be rotated by 360 degrees / num_rotations Returns ------- tf.Tensor A Tensor with shape: [ s, s, *filter_size[2:-1], filter_size[-1]*num_rotations ] where s = 2*filter_size[0] -1 if x == o [hexagon is parallel to axis of first dimension] = 2*filter_size[0] +1 if x != o [hexagon is tilted to axis of first dimension] Raises ------ ValueError Description ''' no_of_dims = len(filter_size) rotated_filter_size = filter_size[2:-1] + [filter_size[-1] * num_rotations] azimuths = np.linspace(0, 360, num_rotations + 1)[:-1] Z = tf.zeros(filter_size[2:-2], dtype=FLOAT_PRECISION) center_weight = new_weights(filter_size[2:-2]) # HARDCODE MAGIC... ToDo: Generalize if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] corner_weights2 = [] for i in range(6): corner_weights2.extend([Z, new_weights(filter_size[2:-2])]) elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] corner_weights3 = [] for i in range(6): corner_weights3.extend([Z, new_weights(filter_size[2:-2]), Z]) elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] corner_weights3 = [] for i in range(6): corner_weights3.extend([Z, Z, new_weights(filter_size[2:-2])]) elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] corner_weights3 = [new_weights(filter_size[2:-2]) for i in range(18)] else: raise ValueError("get_rotated_hex_kernel: Unsupported " "hexagonal filter_size: {!r}".formt(filter_size[0:2])) rotated_kernels = [] in_out_channel_weights = new_weights([num_rotations] + filter_size[-2:]) for i, azimuth in enumerate(azimuths): rotated_kernel_rows = [] if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter A = get_rotated_corner_weights(corner_weights1, azimuth) rotated_kernel_rows.append(tf.stack([Z, A[5], A[0]])) rotated_kernel_rows.append(tf.stack([A[3], center_weight, A[1]])) rotated_kernel_rows.append(tf.stack([A[3], A[2], Z])) elif filter_size[0:2] == [2, 1] or filter_size[0:2] == [3, 0]: # hexagonal 2,1 and 3,0 Filter A = get_rotated_corner_weights(corner_weights1, azimuth) B = get_rotated_corner_weights(corner_weights2, azimuth) rotated_kernel_rows.append(tf.stack([Z, Z, B[9], B[10], B[11]])) rotated_kernel_rows.append(tf.stack([Z, B[8], A[5], A[0], B[0]])) rotated_kernel_rows.append( tf.stack([B[7], A[4], center_weight, A[1], B[1]])) rotated_kernel_rows.append(tf.stack([B[6], A[3], A[2], B[2], Z])) rotated_kernel_rows.append(tf.stack([B[5], B[4], B[3], Z, Z])) elif (filter_size[0:2] == [3, 1] or filter_size[0:2] == [3, 2] or filter_size[0:2] == [4, 0]): # hexagonal 3,1 3,2 and 4,0 filter A = get_rotated_corner_weights(corner_weights1, azimuth) B = get_rotated_corner_weights(corner_weights2, azimuth) C = get_rotated_corner_weights(corner_weights3, azimuth) rotated_kernel_rows.append( tf.stack([Z, Z, Z, C[15], C[16], C[17], C[0]])) rotated_kernel_rows.append( tf.stack([Z, Z, C[14], B[9], B[10], B[11], C[1]])) rotated_kernel_rows.append( tf.stack([Z, C[13], B[8], A[5], A[0], B[0], C[2]])) rotated_kernel_rows.append( tf.stack([C[12], B[7], A[4], center_weight, A[1], B[1], C[3]])) rotated_kernel_rows.append( tf.stack([C[11], B[6], A[3], A[2], B[2], C[4], Z])) rotated_kernel_rows.append( tf.stack([C[10], B[5], B[4], B[3], C[5], Z, Z])) rotated_kernel_rows.append( tf.stack([C[9], C[8], C[7], C[6], Z, Z, Z])) else: raise ValueError("get_rotated_hex_kernel: Unsupported hexagonal " "filter_size: {!r}".formt(filter_size[0:2])) rotated_kernel_single = tf.stack(rotated_kernel_rows) # Add free parameters for in and out channel # tile to correct format rotated_kernel_single = tf.expand_dims(rotated_kernel_single, -1) rotated_kernel_single = tf.expand_dims(rotated_kernel_single, -1) multiples = [1 for i in range(no_of_dims - 2)] + filter_size[-2:] rotated_kernel_tiled = tf.tile(rotated_kernel_single, multiples) # multiply weights to make in and out channels independent rotated_kernel = rotated_kernel_tiled * in_out_channel_weights[i] rotated_kernels.append(rotated_kernel) rotated_kernels = tf.concat(values=rotated_kernels, axis=len(filter_size) - 1) return rotated_kernels