def _maybe_calibrate_size(self, layers, out_filters, is_training): """Makes sure layers[0] and layers[1] have the same shapes.""" hw = [self._get_HW(layer) for layer in layers] c = [self._get_C(layer) for layer in layers] with tf.variable_scope("calibrate"): x = layers[0] if hw[0] != hw[1]: assert hw[0] == 2 * hw[1] with tf.variable_scope("pool_x"): x = tf.nn.relu(x) x = self._factorized_reduction(x, out_filters, 2, is_training) elif c[0] != out_filters: with tf.variable_scope("pool_x"): w = create_weight("w", [1, 1, c[0], out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) y = layers[1] if c[1] != out_filters: with tf.variable_scope("pool_y"): w = create_weight("w", [1, 1, c[1], out_filters]) y = tf.nn.relu(y) y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) y = batch_norm(y, is_training, data_format=self.data_format) return [x, y]
def _enas_cell(self, x, curr_cell, prev_cell, op_id, out_filters): """Performs an enas operation specified by op_id.""" num_possible_inputs = curr_cell + 1 with tf.variable_scope("avg_pool"): # x is prev_layer[x_id] # [None,48,32,32] avg_pool = tf.layers.average_pooling2d( x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) avg_pool_c = self._get_C(avg_pool) if avg_pool_c != out_filters: with tf.variable_scope("conv"): w = create_weight( "w", [num_possible_inputs, avg_pool_c * out_filters]) w = w[prev_cell] w = tf.reshape(w, [1, 1, avg_pool_c, out_filters]) avg_pool = tf.nn.relu(avg_pool) avg_pool = tf.nn.conv2d(avg_pool, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) avg_pool = batch_norm(avg_pool, is_training=True, data_format=self.data_format) with tf.variable_scope("max_pool"): max_pool = tf.layers.max_pooling2d( x, [3, 3], [1, 1], "SAME", data_format=self.actual_data_format) max_pool_c = self._get_C(max_pool) if max_pool_c != out_filters: with tf.variable_scope("conv"): w = create_weight( "w", [num_possible_inputs, max_pool_c * out_filters]) w = w[prev_cell] w = tf.reshape(w, [1, 1, max_pool_c, out_filters]) max_pool = tf.nn.relu(max_pool) max_pool = tf.nn.conv2d(max_pool, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) max_pool = batch_norm(max_pool, is_training=True, data_format=self.data_format) x_c = self._get_C(x) if x_c != out_filters: with tf.variable_scope("x_conv"): w = create_weight("w", [num_possible_inputs, x_c * out_filters]) w = w[prev_cell] w = tf.reshape(w, [1, 1, x_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) x = batch_norm(x, is_training=True, data_format=self.data_format) out = [ self._enas_conv(x, curr_cell, prev_cell, 3, out_filters), self._enas_conv(x, curr_cell, prev_cell, 5, out_filters), avg_pool, max_pool, x, ] out = tf.stack(out, axis=0) out = out[op_id, :, :, :, :] return out
def _maybe_calibrate_size(self, layers, out_filters, is_training): # make the layers's channels = out_filters """Makes sure layers[0] and layers[1] have the same shapes.""" hw = [self._get_HW(layer) for layer in layers] # [32,32] # [32,16] c = [self._get_C(layer) for layer in layers] # [48*3 = 144] # [48,96] with tf.variable_scope("calibrate"): x = layers[0] # first conv layer if hw[0] != hw[1]: # if both layers hw is not same then we will modify this! assert hw[0] == 2 * hw[1] # originally hw[1] is lower than hw[1] with tf.variable_scope("pool_x"): x = tf.nn.relu(x) x = self._factorized_reduction(x, out_filters, 2, is_training) elif c[0] != out_filters: ## if c[0] != out_filters///// if 144 != 48 with tf.variable_scope("pool_x"): w = create_weight("w", [1, 1, c[0], out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) ## [?,48,32,32] y = layers[1] # second conv layer = [?,128,32,32] if c[1] != out_filters: # if 144 is not 48 with tf.variable_scope("pool_y"): w = create_weight("w", [1, 1, c[1], out_filters]) y = tf.nn.relu(y) y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) y = batch_norm(y, is_training, data_format=self.data_format) return [x, y]
def _factorized_reduction(self, x, out_filters, stride, is_training): # s = [?,48,32,32],out_filters = 96, stride = 2 """Reduces the shape of x without information loss due to striding.""" assert out_filters % 2 == 0, ( "Need even number of filters when using this factorized reduction.") if stride == 1: with tf.variable_scope("path_conv"): inp_c = self._get_C(x) w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) return x stride_spec = self._get_strides(stride) # [1,1,2,2] # Skip path 1 path1 = tf.nn.avg_pool( x, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format) # path1 = [None,128,16,16] # second loop x is [None,48,16,16] with tf.variable_scope("path1_conv"): inp_c = self._get_C(path1) # 48 w = create_weight("w", [1, 1, inp_c, out_filters // 2]) # [1,1,48,48] path1 = tf.nn.conv2d(path1, w, [1, 1, 1, 1], "VALID", data_format=self.data_format) # [None,48,16,16] # Skip path 2 # First pad with 0"s on the right and bottom, then shift the filter to # include those 0"s that were added. if self.data_format == "NHWC": pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]] path2 = tf.pad(x, pad_arr)[:, 1:, 1:, :] concat_axis = 3 else: pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]] path2 = tf.pad(x, pad_arr)[:, :, 1:, 1:] concat_axis = 1 path2 = tf.nn.avg_pool( path2, [1, 1, 1, 1], stride_spec, "VALID", data_format=self.data_format) with tf.variable_scope("path2_conv"): inp_c = self._get_C(path2) w = create_weight("w", [1, 1, inp_c, out_filters // 2]) path2 = tf.nn.conv2d(path2, w, [1, 1, 1, 1], "VALID", data_format=self.data_format) # Concat and apply BN final_path = tf.concat(values=[path1, path2], axis=concat_axis) final_path = batch_norm(final_path, is_training, data_format=self.data_format) return final_path
def _fixed_conv(self, x, f_size, out_filters, stride, is_training, stack_convs=2): """Apply fixed convolution. Args: stacked_convs: number of separable convs to apply. """ for conv_id in range(stack_convs): inp_c = self._get_C(x) if conv_id == 0: strides = self._get_strides(stride) else: strides = [1, 1, 1, 1] with tf.variable_scope("sep_conv_{}".format(conv_id)): w_depthwise = create_weight("w_depth", [f_size, f_size, inp_c, 1]) w_pointwise = create_weight("w_point", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.separable_conv2d(x, depthwise_filter=w_depthwise, pointwise_filter=w_pointwise, strides=strides, padding="SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) return x
def _enas_layer(self, layer_id, prev_layers, arc, out_filters): """ Args: layer_id: current layer prev_layers: cache of previous layers. for skip connections start_idx: where to start looking at. technically, we can infer this from layer_id, but why bother... """ assert len(prev_layers) == 2, "need exactly 2 inputs" layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=True) used = [] for cell_id in range(self.num_cells): prev_layers = tf.stack(layers, axis=0) with tf.variable_scope("cell_{0}".format(cell_id)): with tf.variable_scope("x"): x_id = arc[4 * cell_id] x_op = arc[4 * cell_id + 1] x = prev_layers[x_id, :, :, :, :] x = self._enas_cell(x, cell_id, x_id, x_op, out_filters) x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32) with tf.variable_scope("y"): y_id = arc[4 * cell_id + 2] y_op = arc[4 * cell_id + 3] y = prev_layers[y_id, :, :, :, :] y = self._enas_cell(y, cell_id, y_id, y_op, out_filters) y_used = tf.one_hot(y_id, depth=self.num_cells + 2, dtype=tf.int32) out = x + y used.extend([x_used, y_used]) layers.append(out) used = tf.add_n(used) indices = tf.where(tf.equal(used, 0)) indices = tf.to_int32(indices) indices = tf.reshape(indices, [-1]) num_outs = tf.size(indices) out = tf.stack(layers, axis=0) out = tf.gather(out, indices, axis=0) inp = prev_layers[0] if self.data_format == "NHWC": N = tf.shape(inp)[0] H = tf.shape(inp)[1] W = tf.shape(inp)[2] C = tf.shape(inp)[3] out = tf.transpose(out, [1, 2, 3, 0, 4]) out = tf.reshape(out, [N, H, W, num_outs * out_filters]) elif self.data_format == "NCHW": N = tf.shape(inp)[0] C = tf.shape(inp)[1] H = tf.shape(inp)[2] W = tf.shape(inp)[3] out = tf.transpose(out, [1, 0, 2, 3, 4]) out = tf.reshape(out, [N, num_outs * out_filters, H, W]) else: raise ValueError("Unknown data_format '{0}'".format( self.data_format)) with tf.variable_scope("final_conv"): w = create_weight( "w", [self.num_cells + 2, out_filters * out_filters]) # [7,48*48] w = tf.gather(w, indices, axis=0) w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) out = tf.nn.relu(out) out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) out = batch_norm(out, is_training=True, data_format=self.data_format) out = tf.reshape(out, tf.shape(prev_layers[0])) return out
def _fixed_layer(self, layer_id, prev_layers, arc, out_filters, stride, is_training, normal_or_reduction_cell="normal"): """ Args: prev_layers: cache of previous layers. for skip connections is_training: for batch_norm """ assert len(prev_layers) == 2 layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=is_training) with tf.variable_scope("layer_base"): x = layers[1] inp_c = self._get_C(x) w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) layers[1] = x used = np.zeros([self.num_cells + 2], dtype=np.int32) f_sizes = [3, 5] for cell_id in range(self.num_cells): with tf.variable_scope("cell_{}".format(cell_id)): x_id = arc[4 * cell_id] used[x_id] += 1 x_op = arc[4 * cell_id + 1] x = layers[x_id] x_stride = stride if x_id in [0, 1] else 1 with tf.variable_scope("x_conv"): if x_op in [0, 1]: f_size = f_sizes[x_op] x = self._fixed_conv(x, f_size, out_filters, x_stride, is_training) elif x_op in [2, 3]: inp_c = self._get_C(x) if x_op == 2: x = tf.layers.average_pooling2d( x, [3, 3], [x_stride, x_stride], "SAME", data_format=self.actual_data_format) else: x = tf.layers.max_pooling2d( x, [3, 3], [x_stride, x_stride], "SAME", data_format=self.actual_data_format) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) else: inp_c = self._get_C(x) if x_stride > 1: assert x_stride == 2 x = self._factorized_reduction( x, out_filters, 2, is_training) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) x = tf.nn.relu(x) x = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) if (x_op in [0, 1, 2, 3] and self.drop_path_keep_prob is not None and is_training): x = self._apply_drop_path(x, layer_id) y_id = arc[4 * cell_id + 2] used[y_id] += 1 y_op = arc[4 * cell_id + 3] y = layers[y_id] y_stride = stride if y_id in [0, 1] else 1 with tf.variable_scope("y_conv"): if y_op in [0, 1]: f_size = f_sizes[y_op] y = self._fixed_conv(y, f_size, out_filters, y_stride, is_training) elif y_op in [2, 3]: inp_c = self._get_C(y) if y_op == 2: y = tf.layers.average_pooling2d( y, [3, 3], [y_stride, y_stride], "SAME", data_format=self.actual_data_format) else: y = tf.layers.max_pooling2d( y, [3, 3], [y_stride, y_stride], "SAME", data_format=self.actual_data_format) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) y = tf.nn.relu(y) y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) y = batch_norm(y, is_training, data_format=self.data_format) else: inp_c = self._get_C(y) if y_stride > 1: assert y_stride == 2 y = self._factorized_reduction( y, out_filters, 2, is_training) if inp_c != out_filters: w = create_weight("w", [1, 1, inp_c, out_filters]) y = tf.nn.relu(y) y = tf.nn.conv2d(y, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) y = batch_norm(y, is_training, data_format=self.data_format) if (y_op in [0, 1, 2, 3] and self.drop_path_keep_prob is not None and is_training): y = self._apply_drop_path(y, layer_id) out = x + y layers.append(out) out = self._fixed_combine(layers, used, out_filters, is_training, normal_or_reduction_cell) return out
def _model(self, images, is_training, reuse=False): """Compute the logits given the images.""" if self.fixed_arc is None: is_training = True with tf.variable_scope(self.name, reuse=reuse): with tf.variable_scope("stem_conv"): w = create_weight("w", [3, 3, self.channel, self.out_filters * 3]) x = tf.nn.conv2d(images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) if self.data_format == "NHWC": split_axis = 3 elif self.data_format == "NCHW": split_axis = 1 else: raise ValueError("Unknown data_format '{0}'".format( self.data_format)) layers = [x, x] # building layers in the micro space out_filters = self.out_filters for layer_id in range(self.num_layers + 2): with tf.variable_scope("layer_{0}".format(layer_id)): if layer_id not in self.pool_layers: if self.fixed_arc is None: x = self._enas_layer(layer_id, layers, self.normal_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.normal_arc, out_filters, 1, is_training, normal_or_reduction_cell="normal") else: out_filters *= 2 if self.fixed_arc is None: x = self._factorized_reduction( x, out_filters, 2, is_training) layers = [layers[-1], x] x = self._enas_layer(layer_id, layers, self.reduce_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.reduce_arc, out_filters, 2, is_training, normal_or_reduction_cell="reduction") print("Layer {0:>2d}: {1}".format(layer_id, x)) layers = [layers[-1], x] # auxiliary heads self.num_aux_vars = 0 if (self.use_aux_heads and layer_id in self.aux_head_indices and is_training): print("Using aux_head at layer {0}".format(layer_id)) with tf.variable_scope("aux_head"): aux_logits = tf.nn.relu(x) if (aux_logits.get_shape()[2].value - 3) % 5 == 0: aux_logits = tf.layers.average_pooling2d( aux_logits, [5, 5], [3, 3], "VALID", data_format=self.actual_data_format) else: aux_logits = tf.layers.average_pooling2d( aux_logits, [5, 5], [3, 3], "SAME", data_format=self.actual_data_format) with tf.variable_scope("proj"): inp_c = self._get_C(aux_logits) w = create_weight("w", [1, 1, inp_c, 128]) aux_logits = tf.nn.conv2d( aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm( aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("avg_pool"): inp_c = self._get_C(aux_logits) hw = self._get_HW(aux_logits) w = create_weight("w", [hw, hw, inp_c, 768]) aux_logits = tf.nn.conv2d( aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm( aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("fc"): aux_logits = global_avg_pool( aux_logits, data_format=self.data_format) inp_c = aux_logits.get_shape()[1].value w = create_weight("w", [inp_c, 10]) aux_logits = tf.matmul(aux_logits, w) self.aux_logits = aux_logits aux_head_variables = [ var for var in tf.trainable_variables() if (var.name.startswith(self.name) and "aux_head" in var.name) ] self.num_aux_vars = count_model_params(aux_head_variables) print("Aux head uses {0} params".format(self.num_aux_vars)) x = tf.nn.relu(x) x = global_avg_pool(x, data_format=self.data_format) if is_training and self.keep_prob is not None and self.keep_prob < 1.0: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): inp_c = x.get_shape()[1].value w = create_weight("w", [inp_c, 10]) x = tf.matmul(x, w) return x
def _enas_layer(self, layer_id, prev_layers, arc, out_filters): """ Args: layer_id: current layer prev_layers: cache of previous layers. for skip connections start_idx: where to start looking at. technically, we can infer this from layer_id, but why bother... """ assert len(prev_layers) == 2, "need exactly 2 inputs" layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=True) # make number of channels out_filters, at first our_filters = 48 used = [] for cell_id in range(self.num_cells): # self.num_cells == 5 prev_layers = tf.stack(layers, axis=0) # [2,None,48,32,32] -> [3,None,48,32,32] with tf.variable_scope("cell_{0}".format(cell_id)): with tf.variable_scope("x"): x_id = arc[4 * cell_id] # arc is Tensorarray [24], x_id = arc[0], connection x_op = arc[4 * cell_id + 1] # arc[1], operation x = prev_layers[x_id, :, :, :, :] x = self._enas_cell(x, cell_id, x_id, x_op, out_filters) x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32) with tf.variable_scope("y"): y_id = arc[4 * cell_id + 2] y_op = arc[4 * cell_id + 3] y = prev_layers[y_id, :, :, :, :] y = self._enas_cell(y, cell_id, y_id, y_op, out_filters) y_used = tf.one_hot(y_id, depth=self.num_cells + 2, dtype=tf.int32) # because we already make 2 number of prev_layer, num_cells + 2 out = x + y # not concatenate just add the result used.extend([x_used, y_used]) layers.append(out) # finish making just one box module # this code is for final concatenate! used = tf.add_n(used) # add two list and the result will be [7], before it was [5,7] indices = tf.where(tf.equal(used, 0)) # it will give you not used layer indices = tf.to_int32(indices) # give you not used layer number indices = tf.reshape(indices, [-1]) num_outs = tf.size(indices) # very various according to luck out = tf.stack(layers, axis=0) # stack the total result! [7,?,48,32,32] out = tf.gather(out, indices, axis=0) # indices is not trained network! inp = prev_layers[0] # first layer! if self.data_format == "NHWC": N = tf.shape(inp)[0] H = tf.shape(inp)[1] W = tf.shape(inp)[2] C = tf.shape(inp)[3] out = tf.transpose(out, [1, 2, 3, 0, 4]) out = tf.reshape(out, [N, H, W, num_outs * out_filters]) elif self.data_format == "NCHW": N = tf.shape(inp)[0] C = tf.shape(inp)[1] H = tf.shape(inp)[2] W = tf.shape(inp)[3] out = tf.transpose(out, [1, 0, 2, 3, 4]) # n = ?, c = 48, h = 32, w = 32 out = tf.reshape(out, [N, num_outs * out_filters, H, W]) ## why?? num outs is not number of not connected layer else: raise ValueError("Unknown data_format '{0}'".format(self.data_format)) with tf.variable_scope("final_conv"): w = create_weight("w", [self.num_cells + 2, out_filters * out_filters]) # [7,48*48] w = tf.gather(w, indices, axis=0) w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) out = tf.nn.relu(out) out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) out = batch_norm(out, is_training=True, data_format=self.data_format) out = tf.reshape(out, tf.shape(prev_layers[0])) return out