def yolo_convolutional(inputs, filters, trainable, data_format, name): with tf.variable_scope(name): inputs = convolutional(inputs=inputs, filters=filters, kernel_size=1, trainable=trainable, name='conv0', data_format=data_format) inputs = convolutional(inputs=inputs, filters=2 * filters, kernel_size=3, trainable=trainable, name='conv1', data_format=data_format) inputs = convolutional(inputs=inputs, filters=filters, kernel_size=1, trainable=trainable, name='conv2', data_format=data_format) inputs = convolutional(inputs=inputs, filters=2 * filters, kernel_size=3, trainable=trainable, name='conv3', data_format=data_format) inputs = convolutional(inputs=inputs, filters=filters, kernel_size=1, trainable=trainable, name='conv4', data_format=data_format) route = inputs inputs = convolutional(inputs=inputs, filters=2 * filters, kernel_size=3, trainable=trainable, name='conv5', data_format=data_format) return route, inputs
def __build(self, inputs): with tf.variable_scope('yolo_v3_model'): if self.data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 3, 1, 2]) # mark this step #inputs = inputs / 255 route1, route2, inputs = darknet53(inputs=inputs, trainable=self.trainable, data_format=self.data_format) route, inputs = yolo_convolutional(inputs=inputs, filters=512, trainable=self.trainable, data_format=self.data_format, name='yolo_conv0') conv_lbbox, pred_lbbox, xy_offset_l = yolo_detection(inputs=inputs, n_classes=self.n_classes, anchors=_ANCHORS[6:9], img_size=self.model_size, trainable=self.trainable, data_format=self.data_format, name='conv_lbbox') inputs = convolutional(inputs=route, filters=256, kernel_size=1, trainable=self.trainable, name='conv57', data_format=self.data_format) upsample_size = route2.get_shape().as_list() inputs = upsample(inputs=inputs, out_shape=upsample_size, data_format=self.data_format, name='upsample0') if self.data_format=='channels_first': axis = 1 else: axis = 3 with tf.variable_scope('route_1'): inputs = tf.concat([inputs, route2], axis=axis) route, inputs = yolo_convolutional(inputs=inputs, filters=256, trainable=self.trainable, data_format=self.data_format, name='yolo_conv1') conv_mbbox, pred_mbbox, xy_offset_m = yolo_detection(inputs=inputs, n_classes=self.n_classes, anchors=_ANCHORS[3:6], img_size=self.model_size, trainable=self.trainable, data_format=self.data_format, name='conv_mbbox') inputs = convolutional(inputs=route, filters=128, kernel_size=1, trainable=self.trainable, name='conv63', data_format=self.data_format) upsample_size = route1.get_shape().as_list() inputs = upsample(inputs, out_shape=upsample_size, data_format=self.data_format, name='upsample1') with tf.variable_scope('route_2'): inputs = tf.concat([inputs, route1], axis=axis) route, inputs = yolo_convolutional(inputs=inputs, filters=128, trainable=self.trainable, data_format=self.data_format, name='yolo_conv2') conv_sbbox, pred_sbbox, xy_offset_s = yolo_detection(inputs=inputs, n_classes=self.n_classes, anchors=_ANCHORS[0:3], img_size=self.model_size, trainable=self.trainable, data_format=self.data_format, name='conv_sbbox') return [conv_lbbox, conv_mbbox, conv_sbbox],\ [pred_lbbox, pred_mbbox, pred_sbbox],\ [xy_offset_l, xy_offset_m, xy_offset_s]
def darknet53(inputs, trainable, data_format): with tf.variable_scope('darknet'): inputs = convolutional(inputs=inputs, filters=32, kernel_size=3, trainable=trainable, name='conv0', data_format=data_format) inputs = convolutional(inputs=inputs, filters=64, kernel_size=3, trainable=trainable, name='conv1', strides=2, data_format=data_format) for i in range(1): inputs = residual(inputs=inputs, filters=32, trainable=trainable, data_format=data_format, name='residual%d' % (i + 0)) inputs = convolutional(inputs=inputs, filters=128, kernel_size=3, trainable=trainable, name='conv4', strides=2, data_format=data_format) for i in range(2): inputs = residual(inputs=inputs, filters=64, trainable=trainable, data_format=data_format, name='residual%d' % (i + 1)) inputs = convolutional(inputs=inputs, filters=256, kernel_size=3, trainable=trainable, name='conv9', strides=2, data_format=data_format) for i in range(8): inputs = residual(inputs=inputs, filters=128, trainable=trainable, data_format=data_format, name='residual%d' % (i + 3)) route1 = inputs inputs = convolutional(inputs=inputs, filters=512, kernel_size=3, trainable=trainable, name='conv26', strides=2, data_format=data_format) for i in range(8): inputs = residual(inputs=inputs, filters=256, trainable=trainable, data_format=data_format, name='residual%d' % (i + 11)) route2 = inputs inputs = convolutional(inputs=inputs, filters=1024, kernel_size=3, trainable=trainable, name='conv43', strides=2, data_format=data_format) for i in range(4): inputs = residual(inputs=inputs, filters=512, trainable=trainable, data_format=data_format, name='residual%d' % (i + 19)) return route1, route2, inputs
n_channel = 1 n_filter = 32 HF = 5 HW = 5 n_input = 2 n_hidden = 100 n_output = 10 X = graph.Placeholder(name='inputs') #to feed with attributes Y = graph.Placeholder(name='labels') #to feed with labels #convolutional(X, channel_in, channel_out, filter_height = 3, filter_width = 3, stride = 1, pad = 1): conv_1 = layers.convolutional(X, 1, 16, filter_height=3, filter_width=3, stride=1, pad=1) conv_2 = layers.convolutional(conv_1, 16, 32, filter_height=3, filter_width=3, stride=3, pad=1) flat = op.Flatten(conv_2) fc_1 = layers.fully_connected(flat, 10 * 10 * 32, n_hidden, activation='sigmoid')
def yolo_detection(inputs, n_classes, anchors, img_size, trainable, data_format, name): ''' Args: inputs: tensor input n_classes: number of labels anchors: a list of anchor sizes img_size: the input size of the model data_format: input format ''' n_anchors = len(anchors) filters = n_anchors * (5 + n_classes) inputs = convolutional(inputs=inputs, filters=filters, kernel_size=1, trainable=trainable, name=name, data_format=data_format, act=False, bn=False) # raw output of detection conv layer raw_output = inputs shape = inputs.get_shape().as_list() # channels_first: NCHW # channels_last: NHWC if data_format == 'channels_first': grid_shape = shape[2:4] # reshape to NHWC inputs = tf.transpose(inputs, [0, 2, 3, 1]) raw_output = tf.transpose(raw_output, [0, 2, 3, 1]) else: grid_shape = shape[1:3] inputs = tf.reshape( inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1], 5 + n_classes]) strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1]) # split & get the 4 components of output box_centers, box_shapes, confidence, classes = \ tf.split(inputs, [2, 2, 1, n_classes], axis=-1) x = tf.range(grid_shape[0], dtype=tf.float32) y = tf.range(grid_shape[1], dtype=tf.float32) x_offset, y_offset = tf.meshgrid(x, y) x_offset = tf.reshape(x_offset, (-1, 1)) y_offset = tf.reshape(y_offset, (-1, 1)) x_y_offset = tf.concat([x_offset, y_offset], axis=-1) xy_offset_output = tf.identity(x_y_offset) xy_offset_output = tf.reshape(xy_offset_output, [grid_shape[0], grid_shape[1], 1, 2]) x_y_offset = tf.tile(x_y_offset, [1, n_anchors]) x_y_offset = tf.reshape(x_y_offset, [1, -1, 2]) box_centers = tf.nn.sigmoid(box_centers) box_centers = (box_centers + x_y_offset) * strides anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1]) box_shapes = tf.exp(box_shapes) * tf.cast(anchors, tf.float32) confidence = tf.nn.sigmoid(confidence) classes = tf.nn.sigmoid(classes) inputs = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1) inputs = tf.reshape( inputs, [-1, grid_shape[0], grid_shape[1], n_anchors, 5 + n_classes]) return raw_output, inputs, xy_offset_output