def main(_): with tf.Graph().as_default(): out_shape = [FLAGS.train_image_size] * 2 image_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) shape_input = tf.placeholder(tf.int32, shape=(2, )) features = ssd_preprocessing.preprocess_for_eval( image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False) features = tf.expand_dims(features, axis=0) anchor_creator = anchor_manipulator.AnchorCreator( out_shape, layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ), (0.9, )], extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ), (0.8078, ), (0.9836, )], anchor_ratios=[(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)], layer_steps=[8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2]) decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors( pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(FLAGS.data_format) feature_layers = backbone.forward(features, training=False) location_pred, cls_pred = ssd_net.multibox_head( feature_layers, FLAGS.num_classes, all_num_anchors_depth, data_format=FLAGS.data_format) if FLAGS.data_format == 'channels_first': cls_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred ] location_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred ] cls_pred = [ tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred ] location_pred = [ tf.reshape(pred, [-1, 4]) for pred in location_pred ] cls_pred = tf.concat(cls_pred, axis=0) location_pred = tf.concat(location_pred, axis=0) with tf.device('/cpu:0'): bboxes_pred = decode_fn(location_pred) bboxes_pred = tf.concat(bboxes_pred, axis=0) selected_bboxes, selected_scores = parse_by_class( cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list = [] scores_list = [] bboxes_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) np_image = imread('./demo/test.jpg') labels_, scores_, bboxes_ = sess.run( [all_labels, all_scores, all_bboxes], feed_dict={ image_input: np_image, shape_input: np_image.shape[:-1] }) img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2) imsave('./demo/test_out.jpg', img_to_draw)
def main(_): with tf.Graph().as_default(): out_shape = [FLAGS.train_image_size] * 2 with tf.name_scope('define_input'): image_input = tf.placeholder(tf.uint8, shape=(None, None, 3), name='image_input') features = ssd_preprocessing.preprocess_for_eval( image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False) features = tf.expand_dims(features, axis=0) anchor_creator = anchor_manipulator.AnchorCreator( out_shape, layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ), (0.9, )], extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ), (0.8078, ), (0.9836, )], anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), #(2., 3., .5, 0.3333), (2., .5), (2., .5)], layer_steps=[8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2]) def decode_fn(pred): return anchor_encoder_decoder.ext_decode_all_anchors( pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(FLAGS.data_format) feature_layers = backbone.forward(features, training=False) location_pred, cls_pred = ssd_net.multibox_head( feature_layers, FLAGS.num_classes, all_num_anchors_depth, data_format=FLAGS.data_format) if FLAGS.data_format == 'channels_first': cls_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred ] location_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred ] cls_pred = [ tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred ] location_pred = [ tf.reshape(pred, [-1, 4]) for pred in location_pred ] with tf.variable_scope('cls_pred'): cls_pred = tf.concat(cls_pred, axis=0) with tf.variable_scope('location_pred'): location_pred = tf.concat(location_pred, axis=0) with tf.device('/cpu:0'): bboxes_pred = decode_fn(location_pred) bboxes_pred = tf.concat(bboxes_pred, axis=0) selected_bboxes, selected_scores = parse_by_class( cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list = [] scores_list = [] bboxes_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) saver = tf.train.Saver() ''' config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) config.mlu_options.data_parallelism = 1 config.mlu_options.model_parallelism = 1 config.mlu_options.core_num = 1 config.mlu_options.core_version = 'MLU270' config.mlu_options.precision = 'float' with tf.Session(config = config) as sess: ''' with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) np_image = imread('demo/test.jpg') labels_, scores_, bboxes_ = sess.run( [all_labels, all_scores, all_bboxes], feed_dict={image_input: np_image}) #print('labels_', labels_, type(labels_), labels_.shape) #print('scores_', scores_, type(scores_), scores_.shape) #print('bboxes_', bboxes_, type(bboxes_), bboxes_.shape, bboxes_.shape[0]) img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2) imsave('demo/test_out.jpg', img_to_draw) saver.save(sess, 'model/ssd300_vgg16/ssd300_vgg16', global_step=0)
def main(_): with tf.Graph().as_default(): out_shape = [FLAGS.train_image_size] * 2 image_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) shape_input = tf.placeholder(tf.int32, shape=(2, )) features, output_shape = ssd_preprocessing.preprocess_for_eval( image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False) features = tf.expand_dims(features, axis=0) output_shape = tf.expand_dims(output_shape, axis=0) all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ), (270., )] all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ), (242.34, ), (295.08, )] all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)] # all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)] with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(FLAGS.data_format) feature_layers = backbone.forward(features, training=False) with tf.device('/cpu:0'): anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2]) if FLAGS.data_format == 'channels_first': all_layer_shapes = [ tf.shape(feat)[2:] for feat in feature_layers ] else: all_layer_shapes = [ tf.shape(feat)[1:3] for feat in feature_layers ] all_layer_strides = [8, 16, 32, 64, 100, 300] total_layers = len(all_layer_shapes) anchors_height = list() anchors_width = list() anchors_depth = list() for ind in range(total_layers): _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height( all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind)) anchors_height.append(_anchors_height) anchors_width.append(_anchors_width) anchors_depth.append(_anchor_depth) anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, _ = anchor_encoder_decoder.get_all_anchors( tf.squeeze(output_shape, axis=0), anchors_height, anchors_width, anchors_depth, [0.5] * total_layers, all_layer_shapes, all_layer_strides, [0.] * total_layers, [False] * total_layers) location_pred, cls_pred = ssd_net.multibox_head( feature_layers, FLAGS.num_classes, anchors_depth, data_format=FLAGS.data_format) if FLAGS.data_format == 'channels_first': cls_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred ] location_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred ] cls_pred = [ tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred ] location_pred = [ tf.reshape(pred, [-1, 4]) for pred in location_pred ] cls_pred = tf.concat(cls_pred, axis=0) location_pred = tf.concat(location_pred, axis=0) with tf.device('/cpu:0'): bboxes_pred = anchor_encoder_decoder.decode_anchors( location_pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) selected_bboxes, selected_scores = bbox_util.parse_by_class( tf.squeeze(output_shape, axis=0), cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list = [] scores_list = [] bboxes_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) np_image = imread('./demo/test.jpg') labels_, scores_, bboxes_, output_shape_ = sess.run( [all_labels, all_scores, all_bboxes, output_shape], feed_dict={ image_input: np_image, shape_input: np_image.shape[:-1] }) bboxes_[:, 0] = bboxes_[:, 0] * np_image.shape[0] / output_shape_[0, 0] bboxes_[:, 1] = bboxes_[:, 1] * np_image.shape[1] / output_shape_[0, 1] bboxes_[:, 2] = bboxes_[:, 2] * np_image.shape[0] / output_shape_[0, 0] bboxes_[:, 3] = bboxes_[:, 3] * np_image.shape[1] / output_shape_[0, 1] img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2) imsave('./demo/test_out.jpg', img_to_draw)
def ssd(path): # def ssd_res(img_path): with tf.Graph().as_default(): out_shape = [FLAGS.train_image_size] * 2 image_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) shape_input = tf.placeholder(tf.int32, shape=(2,)) features = ssd_preprocessing.preprocess_for_eval(image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False) features = tf.expand_dims(features, axis=0) anchor_creator = anchor_manipulator.AnchorCrealog_device_placementtor(out_shape, layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)], layer_steps = [8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6, positive_threshold = None, ignore_threshold = None, prior_scaling=[0.1, 0.1, 0.2, 0.2]) decode_fn = lambda pred : anchor_encoder_decoder.ext_decode_all_anchors(pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(FLAGS.data_format) feature_layers = backbone.forward(features, training=False) location_pred, cls_pred = ssd_net.multibox_head(feature_layers, FLAGS.num_classes, all_num_anchors_depth, data_format=FLAGS.data_format) if FLAGS.data_format == 'channels_first': cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred] location_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred] cls_pred = [tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred] location_pred = [tf.reshape(pred, [-1, 4]) for pred in location_pred] cls_pred = tf.concat(cls_pred, axis=0) location_pred = tf.concat(location_pred, axis=0) with tf.device('/cpu:0'): bboxes_pred = decode_fn(location_pred) bboxes_pred = tf.concat(bboxes_pred, axis=0) selected_bboxes, selected_scores = parse_by_class(cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list = [] scores_list = [] bboxes_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) np_image = imread(path) im = Image.open(path) print(np_image.shape) labels_, scores_, bboxes_ = sess.run([all_labels, all_scores, all_bboxes], feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]}) all_bboxes = sess.run([bboxes_pred], feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]}) shape = np_image.shape for j in range(len(all_bboxes[0])): all_box = all_bboxes[0][j] p1 = (int(all_box[0] * shape[0]), int(all_box[1] * shape[1])) p2 = (int(all_box[2] * shape[0]), int(all_box[3] * shape[1])) if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1): continue x1 = p1[1] y1 = p1[0] x2 = p2[1] y2 = p2[0] obj = im.crop((x1, y1, x2, y2)) num_str = str(j) num_str = num_str.zfill(5) obj.save('./res/img/{}.jpg'.format(num_str)) cor = str(x1) + ',' + str(y1) + ',' + str(x2) + ',' +str(y2) f2 = open('./res/cor.txt', 'a') f2.write(cor + '\n') zero_str = str(0) f = open('./res/label.txt', 'a') f.write(num_str + ',' + zero_str + '\n') f.close() num1 = 0 for i in range(bboxes_.shape[0]): bbox = bboxes_[i] p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) num1 = num1 + 1 if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1): continue x1 = p1[1] y1 = p1[0] x2 = p2[1] y2 = p2[0] cor1 = str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2) num = 0 with open('./res/cor.txt', 'r') as f11, open('./res/label.txt', '+r') as f22: for line in f11: num = num + 1 if cor1 in line: num11 = str(num) print(num11 + '\n') num11 = num11.zfill(5) ber = num11 + ',' + str(0) aft = num11 + ',' + str(labels_[i]) t = f22.read() t = t.replace(ber, aft) f22.seek(0, 0) f22.write(t) print(num1) img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2) imsave('./demo/out.jpg', img_to_draw)
def main(_): with tf.Graph().as_default(): out_shape = [FLAGS.train_image_size] * 2 image_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) shape_input = tf.placeholder(tf.int32, shape=(2, )) features = ssd_preprocessing.preprocess_for_eval( image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False) features = tf.expand_dims(features, axis=0) anchor_creator = anchor_manipulator.AnchorCreator( out_shape, layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ), (0.9, )], extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ), (0.8078, ), (0.9836, )], anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)], layer_steps=[8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2]) decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors( pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(FLAGS.data_format) feature_layers = backbone.forward(features, training=False) location_pred, cls_pred = ssd_net.multibox_head( feature_layers, FLAGS.num_classes, all_num_anchors_depth, data_format=FLAGS.data_format) if FLAGS.data_format == 'channels_first': cls_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred ] location_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred ] cls_pred = [ tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred ] location_pred = [ tf.reshape(pred, [-1, 4]) for pred in location_pred ] cls_pred = tf.concat(cls_pred, axis=0) location_pred = tf.concat(location_pred, axis=0) with tf.device('/cpu:0'): bboxes_pred = decode_fn(location_pred) bboxes_pred = tf.concat(bboxes_pred, axis=0) selected_bboxes, selected_scores = parse_by_class( cls_pred, bboxes_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list = [] scores_list = [] bboxes_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) for i in range(video_frame_cnt): ret, img_ori = vid.read() # height_ori, width_ori = img_ori.shape[:2] # img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB) np_image = np.asarray(img, np.float32) start_time = time.time() labels_, scores_, bboxes_ = sess.run( [all_labels, all_scores, all_bboxes], feed_dict={ image_input: np_image, shape_input: np_image.shape[:-1] }) end_time = time.time() img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2) cv2.putText(img_to_draw, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) imsave('./test_out.jpg', img_to_draw) new_img = cv2.imread('./test_out.jpg') cv2.imshow('image', new_img) videoWriter.write(new_img) if cv2.waitKey(1) & 0xFF == ord('q'): break vid.release() videoWriter.release()