def residual(x, in_channel, out_channel, is_training, norm): """residual unit with 2 layers convolution: width filter: 1 height filter: 3 """ orig_x = x with tf.variable_scope('conv1'): conv1 = utils.conv(x, [1, 3, in_channel, out_channel], [out_channel], padding='SAME') if norm: conv1 = utils.batch_norm(conv1, is_training) relu1 = utils.activation(conv1) with tf.variable_scope('conv2'): conv2 = utils.conv(relu1, [1, 3, out_channel, out_channel], [out_channel], padding='SAME') if norm: conv2 = utils.batch_norm(conv2, is_training) with tf.variable_scope('add'): if in_channel != out_channel: orig_x = utils.conv(x, [1, 1, in_channel, out_channel], [out_channel], padding='SAME') return utils.activation(conv2 + orig_x)
def forward(self, x): out = utils.activation(self.bn1(self.conv1(x))) if self.dropout > 0: out = F.dropout2d(out, self.dropout) out = self.bn2(self.conv2(out)) out += self.shortcut(x) out = utils.activation(out) return out
def build_heading_resnet_2(opts, inputs, regularizer=None): """Build generic heading estimation resnet architecture (type 2).""" nlayers = 2 noutputs = opts.noutputs filters = [[256] * 2, [512] * 2] strides = [4] * nlayers kernel_sizes = [7] * nlayers paddings = ['SAME'] * nlayers activations = ['relu'] * nlayers layers = [inputs] # Build network with tf.variable_scope('weights'): for i in range(nlayers): with tf.variable_scope('block%02d' % i): print('block%02d' % i) layer = utils.resnet_conv(inputs=layers[-1], filters=filters[i], kernel_size=kernel_sizes[i], strides=[strides[i]] * 2, activation=utils.activation( activations[i]), batch_norm=opts.use_batch_norm, kernel_regularizer=regularizer) layers.append(layer) in_size = reduce(mul, layers[-1].get_shape()[1:].as_list(), 1) reshape = tf.reshape(layers[-1], [-1, in_size]) layers.append(reshape) if opts.use_fully_connected: print("fully_connected_layer") fc = utils.fully_connected_layer( inputs=layers[-1], mid_size=opts.fully_connected_size, out_size=noutputs, activation=utils.activation(activations[-1]), regularizer=regularizer) layers.append(fc) else: with tf.name_scope('linear_block'): linear_layer = tf.layers.dense(inputs=layers[-1], units=noutputs, kernel_regularizer=regularizer) layers.append(linear_layer) if opts.output_type == 'foe': with tf.name_scope('normalize'): layers.append(tf.nn.l2_normalize(layers[-1], 1)) elif opts.output_type == 'foeomega': with tf.name_scope('halfnormalize'): o = layers[-1] layers.append( tf.concat([tf.nn.l2_normalize(o[:, :3], 1), o[:, 3:]], 1)) return layers
def forward(self, x): out = utils.activation(self.conv1(x)) out = utils.activation(self.conv2(out)) out = F.max_pool2d(out, kernel_size=2) out = utils.activation(self.conv3(out)) out = utils.activation(self.conv4(out)) out = F.max_pool2d(out, kernel_size=2) out = utils.activation(self.lin1(out.view(out.size(0), -1))) out = self.lin2(out) return out
def __init__(self, layer, name, shape, bias=True, activation=None): super().__init__(name) with self.name_scope: self.kernel = layer.add_weight('kernel', shape=shape) if bias: self.bias = layer.add_weight('bias', shape=shape[1:]) if activation: self.activate = qu.activation(activation)
def build_architecture(opts, sample): opts.architecture = {} opts.architecture['nlayers'] = 6 opts.architecture['kernel_sizes'] = [5, 5, 3, 3, 3, 3] opts.architecture['filters'] = [64, 128, 256, 256, 512, 1024] opts.architecture['paddings'] = ['same'] * 7 opts.architecture['activation'] = utils.activation(opts.activation_type) opts.architecture['embedding_size'] = 2048 return network(sample['image'], opts, regularizer=None)
def forward(self, x): out = utils.activation(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = F.avg_pool2d(out, out.size()[3]) out = out.view(out.size(0), -1) out = self.linear(out) return out
def forward_propagation(self, X, Y, output_activation='sigmoid'): ''' This method performs the forward propagation step of the neural network. Arguments: output_activation: activation function applied to the output layer 'sigmoid': apply sigmoid activation to the output layer with cross-entropy cost function. 'softmax': apply softmax activation to the output layer with log-likelihood cost function. Returns: forward_cache : dictionary containing values A and Z for the current mini-batch ''' L = self.L - 1 A = self.A.copy() A[0] = X Z = {} W = self.W b = self.b # catch to pass intermediate results to backprop step forward_cache = {} for l in range(L - 1): Z[l + 1] = np.dot(W[l + 1], A[l]) + b[l + 1] A[l + 1] = activation(Z[l + 1], 'relu') Z[L] = np.dot(W[L], A[L - 1]) + b[L] A[L] = activation(Z[L], output_activation) # store intermediate results in cache forward_cache['A'] = A forward_cache['Z'] = Z forward_cache['Y'] = Y return forward_cache
def predict(self, X): ''' This method predicts the label for a given image X. Arguments: X : the given image vector for which prediction has to be done. Returns: Y_hat : the predicted label for the input X. ''' L = self.L - 1 self.A[0] = X for l in range(L - 1): self.Z[l + 1] = np.dot(self.W[l + 1], self.A[l]) + self.b[l + 1] self.A[l + 1] = activation(self.Z[l + 1], 'relu') self.Z[L] = np.dot(self.W[L], self.A[L - 1]) + self.b[L] self.A[L] = activation(self.Z[L], 'sigmoid') Y_hat = self.A[L] return Y_hat
# layers.append(softmax) for i, l in enumerate(layers): pr("{}: {}".format(i, l)) return layers def build_architecture(opts, sample): opts.architecture = {} opts.architecture['nlayers'] = 6 opts.architecture['kernel_sizes'] = [5, 5, 3, 3, 3, 3] opts.architecture['filters'] = [64, 128, 256, 256, 512, 1024] opts.architecture['paddings'] = ['same'] * 7 opts.architecture['activation'] = utils.activation(opts.activation_type) opts.architecture['embedding_size'] = 2048 return network(sample['image'], opts, regularizer=None) if __name__ == '__main__': opts = options.get_opts() opts.architecture = {} opts.architecture['nlayers'] = 6 opts.architecture['kernel_sizes'] = [5, 5, 3, 3, 3, 3] opts.architecture['filters'] = [64, 128, 256, 256, 512, 1024] opts.architecture['paddings'] = ['same'] * 7 opts.architecture['activation'] = utils.activation(opts.activation_type) opts.architecture['embedding_size'] = 2048 size = [None, opts.net_img_size, opts.net_img_size, opts.nchannels] x = tf.placeholder(tf.float32, size) network(x, opts, debug=True)
def _add_layers(self, x): def residual(x, in_channel, out_channel, is_training, norm): """residual unit with 2 layers convolution: width filter: 1 height filter: 3 """ orig_x = x with tf.variable_scope('conv1'): conv1 = utils.conv(x, [1, 3, in_channel, out_channel], [out_channel], padding='SAME') if norm: conv1 = utils.batch_norm(conv1, is_training) relu1 = utils.activation(conv1) with tf.variable_scope('conv2'): conv2 = utils.conv(relu1, [1, 3, out_channel, out_channel], [out_channel], padding='SAME') if norm: conv2 = utils.batch_norm(conv2, is_training) with tf.variable_scope('add'): if in_channel != out_channel: orig_x = utils.conv(x, [1, 1, in_channel, out_channel], [out_channel], padding='SAME') return utils.activation(conv2 + orig_x) x_shape = x.get_shape() with tf.variable_scope('residual1'): r1 = residual(x, x_shape[-1], 32, self.is_training, self.norm) tf.summary.histogram('res_output1', r1) with tf.variable_scope('residual2'): r2 = residual(r1, r1.get_shape()[-1], 32, self.is_training, self.norm) tf.summary.histogram('res_output2', r2) with tf.variable_scope('pool0'): h_pool0 = utils.max_pool(r2, 1, 2, 1, 2, padding='SAME') with tf.variable_scope('residual3'): r3 = residual(h_pool0, h_pool0.get_shape()[-1], 64, self.is_training, self.norm) tf.summary.histogram('res_output3', r3) with tf.variable_scope('residual4'): r4 = residual(r3, r3.get_shape()[-1], 64, self.is_training, self.norm) tf.summary.histogram('res_output4', r4) with tf.variable_scope('pool1'): h_pool1 = utils.max_pool(r4, 1, 5, 1, 5, padding='SAME') with tf.variable_scope('full_conn_1'): flat_size = 5 * 64 h_pool2_flat = tf.reshape(h_pool1, [-1, flat_size]) h_fc1 = utils.full_conn(h_pool2_flat, [flat_size, 1024], [1024]) h_fc1 = utils.activation(h_fc1) with tf.variable_scope('full_conn_2'): h_fc2 = utils.full_conn(h_fc1, [1024, 128], [128]) h_fc2 = utils.activation(h_fc2) return h_fc2
def build_heading_resnet(opts, inputs, regularizer=None): """Build generic heading estimation architecture. Args: opts: dictionary with required members (nlayers,noutputs), and optional fields (filters,strides,kernelSizes,paddings,activations) in it. filters: List of positive integer, number of hidden features per layer strides: List of positive integer, strides of convolution per layer kernelSizes: List of positive integer, width of filters per layer padding: List of strings in ('VALID', 'SAME') activations: List of strings in ('relu', 'leakyRelu', or 'reluSq') inputs: Input tensor for the network regularization: Regularization funtion to apply to weights Returns: List of tf.Tensors, each corresponding to layer of network. Final entry of the list is the output.""" nlayers = opts.architecture.get('nlayers') noutputs = opts.architecture.get('noutputs') filters = opts.architecture.get('filters', [[2.0**(6 + i)] * 2 for i in range(nlayers)]) strides = opts.architecture.get('strides', [2 for i in range(nlayers)]) kernel_sizes = opts.architecture.get('kernel_sizes', [3 for i in range(nlayers)]) paddings = opts.architecture.get('paddings', ["VALID" for i in range(nlayers)]) activations = opts.architecture.get('activations', ['relu' for i in range(nlayers)]) layers = [inputs] # Build network with tf.variable_scope('weights'): for i in range(nlayers): with tf.variable_scope('block%02d' % i): layer = utils.resnet_conv(inputs=layers[-1], filters=filters[i], kernel_size=kernel_sizes[i], strides=[strides[i]] * 2, activation=utils.activation( activations[i]), batch_norm=opts.use_batch_norm, kernel_regularizer=regularizer) layers.append(layer) with tf.name_scope('linear_block'): inSize = reduce(mul, layers[-1].get_shape()[1:].as_list(), 1) reshape = tf.reshape(layers[-1], [-1, inSize]) linear_layer = tf.layers.dense(inputs=reshape, units=noutputs, kernel_regularizer=regularizer) layers.append(reshape) layers.append(linear_layer) # TODO: Check for another fully connected output if opts.output_type == 'foe': with tf.name_scope('normalize'): layers.append(tf.nn.l2_normalize(layers[-1], 1)) elif opts.output_type == 'foeomega': with tf.name_scope('halfnormalize'): o = layers[-1] layers.append( tf.concat([tf.nn.l2_normalize(o[:, :3], 1), o[:, 3:]], 1)) return layers
def main(): from vgg16 import model dirname = os.path.dirname(os.path.abspath(__file__)) layer_idx = 18 img_disp = cv2.imread('{}/images/woh.png'.format(dirname)) img_disp = cv2.resize(img_disp, (224, 224)) img = img_disp[np.newaxis, :, :, :] img = img.astype(np.float32) img = img - np.array([103.939, 116.779, 123.68]) # bgr for i, layer in enumerate(model.layers): print(i, layer) compute_weight = True is_changed = True while True: if is_changed: is_changed = False out = utils.activation(img, model, layer_idx) if len(out.shape) == 4: is_conv = True is_fc = False out = np.transpose(out, (3, 1, 2, 0)) else: is_conv = False is_fc = True out = np.transpose(out, (1, 0)) out = utils.normalize(out) disp = utils.combine_and_fit(out, is_conv=is_conv, is_fc=is_fc, disp_w=800) cv2.imshow('input', img_disp) cv2.imshow('disp', disp) if compute_weight: compute_weight = False weight = model.get_weights()[ 0] # only first layer is interpretable for *me* weight = utils.normalize_weights(weight, 'conv') weight = np.transpose(weight, (3, 0, 1, 2)) weight_disp = utils.combine_and_fit(weight, is_weights=True, disp_w=400) cv2.imshow('weight_disp', weight_disp) val = cv2.waitKey(1) & 0xFF if val == ord('q'): break elif val == ord('w'): if layer_idx < 22: layer_idx += 1 is_changed = True print(model.layers[layer_idx].name) elif val == ord('s'): if layer_idx > 1: layer_idx -= 1 is_changed = True print(model.layers[layer_idx].name)