def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode( filename_queue) img = tf.cast(img, tf.float32) if is_training: img, gtboxes_and_label = image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len, length_limitation=cfgs.IMG_MAX_LENGTH) img, gtboxes_and_label = image_preprocess.random_flip_left_right( img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label = image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len, length_limitation=cfgs.IMG_MAX_LENGTH) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img = img / 255 - tf.constant([[cfgs.PIXEL_MEAN_]]) else: img = img - tf.constant([[cfgs.PIXEL_MEAN]]) # sub pixel mean at last return img_name, img, gtboxes_and_label, num_objects
def preprocess_img(img_plac, gtbox_plac): ''' :param img_plac: [H, W, 3] uint 8 img. In RGB. :param gtbox_plac: shape of [-1, 5]. [xmin, ymin, xmax, ymax, label] :return: ''' img = tf.cast(img_plac, tf.float32) # gtboxes_and_label = tf.cast(gtbox_plac, tf.float32) img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtbox_plac, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, gtboxes_and_label=gtboxes_and_label) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img = img / 255 - tf.constant([[cfgs.PIXEL_MEAN_]]) else: img = img - tf.constant([[cfgs.PIXEL_MEAN]]) img_batch = tf.expand_dims(img, axis=0) # gtboxes_and_label = tf.Print(gtboxes_and_label, [tf.shape(gtboxes_and_label)], message='gtbox shape') return img_batch, gtboxes_and_label
def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode( filename_queue) img = tf.cast(img, tf.float32) img = img - tf.constant(cfgs.PIXEL_MEAN) if is_training: img, gtboxes_and_label = image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) img, gtboxes_and_label = image_preprocess.random_flip_left_right( img_tensor=img, gtboxes_and_label=gtboxes_and_label) #print(img.shape) img = img + tf.random_normal( shape=tf.shape(img), mean=0.0, stddev=255.0 * 0.2) else: img, gtboxes_and_label = image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) return img_name, img, gtboxes_and_label, num_objects
def train_parse_fn(example): """ :param example: 序列化的输入 :return: """ config = Config() features = tf.parse_single_example(serialized=example, features={ 'img_name': tf.FixedLenFeature([], tf.string), 'img_height': tf.FixedLenFeature([], tf.int64), 'img_width': tf.FixedLenFeature([], tf.int64), 'img': tf.FixedLenFeature([], tf.string), 'gtboxes_and_label': tf.FixedLenFeature([], tf.string) }) img_name = features['img_name'] img_height = tf.cast(features['img_height'], tf.int32) img_width = tf.cast(features['img_width'], tf.int32) img = tf.decode_raw(features['img'], tf.uint8) img = tf.reshape(img, shape=[img_height, img_width, 3]) img = tf.cast(img, tf.float32) gt_boxes_and_label = tf.decode_raw(features['gtboxes_and_label'], tf.int32) gt_boxes_and_label = tf.reshape(gt_boxes_and_label, [-1, 5]) # shape of img is (1024, 1024, 3), image_window(4,)[y1, x1, y2, x2] img, gt_boxes_and_label, image_window = image_preprocess.image_resize_pad( img_tensor=img, gtboxes_and_label=gt_boxes_and_label, target_side=config.TARGET_SIDE) img, gt_boxes_and_label = image_preprocess.random_flip_left_right( img_tensor=img, gtboxes_and_label=gt_boxes_and_label) # choose or padding make the gt_bbox_labels is FAST_RCNN_MAX_INSTANCES num_objects = tf.shape(gt_boxes_and_label)[0] object_index = tf.range(num_objects) object_index = tf.random_shuffle(object_index) object_index = object_index[:config.FAST_RCNN_MAX_INSTANCES] gt_boxes_and_label = tf.gather(gt_boxes_and_label, object_index) anchor = make_anchor.generate_pyramid_anchors(config) minibatch_indices, minibatch_encode_gtboxes, \ rpn_objects_one_hot = boxes_utils.build_rpn_target(gt_boxes_and_label[:, :4], anchor, config) num_padding = config.FAST_RCNN_MAX_INSTANCES - tf.shape( gt_boxes_and_label)[0] # (FAST_RCNN_MAX_INSTANCES, 5)[y1, x1, y2, x2, label] num_padding = tf.maximum(num_padding, 0) gt_box_label_padding = tf.zeros((num_padding, 5), dtype=tf.int32) gt_boxes_and_label = tf.concat([gt_boxes_and_label, gt_box_label_padding], axis=0) return {"image_name": img_name, "image": img, "image_window": image_window}, \ {"gt_box_labels": gt_boxes_and_label, "minibatch_indices": minibatch_indices, "minibatch_encode_gtboxes": minibatch_encode_gtboxes, "minibatch_objects_one_hot": rpn_objects_one_hot}
def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(filename_queue) # img = tf.image.per_image_standardization(img) img = tf.cast(img, tf.float32) img = img - tf.constant([103.939, 116.779, 123.68]) if is_training: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) return img_name, img, gtboxes_and_label, num_objects
def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(filename_queue) img = tf.cast(img, tf.float32) img = img - tf.constant([103.939, 116.779, 123.68]) if is_training: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) return img_name, img, gtboxes_and_label, num_objects
def read_and_prepocess_single_img(raw_dataset, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode(raw_dataset) img = tf.cast(img, tf.float32) img = img - tf.constant(cfgs.PIXEL_MEAN) if is_training: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) return img_name, img, gtboxes_and_label, num_objects
def read_and_preprocess_single_image(filename_queue, shortside_len, longside_len, is_training): image_name, image, gt_boxes_and_label = read_single_example_and_decode( filename_queue) image = tf.cast(image, tf.float32) # image = image - tf.constant([103.939, 116.779, 123.68]) if is_training: image, gt_boxes_and_label = image_preprocess.short_side_resize( image, gt_boxes_and_label, shortside_len, longside_len) image, gt_boxes_and_label = image_preprocess.random_flip_left_right( image, gt_boxes_and_label) # 随机水平翻转 else: image, gt_boxes_and_label = image_preprocess.short_side_resize( image, gt_boxes_and_label, shortside_len, longside_len) # image = tf.divide(image, 255.) return image_name, image, gt_boxes_and_label
def read_and_prepocess_single_img(filename_queue, shortside_len, is_training): img_name, img, gtboxes_and_label, num_objects = read_single_example_and_decode( filename_queue) img = tf.cast(img, tf.float32) if is_training: # prob is 0.3: convert to gray img = image_preprocess.random_rgb2gray( img_tensor=img, gtboxes_and_label=gtboxes_and_label) # rotate with 0.5 prob. and if rotate, if will random choose a theta from : tf.range(-90, 90+16, delta=15) img, gtboxes_and_label = image_preprocess.random_rotate_img( img_tensor=img, gtboxes_and_label=gtboxes_and_label) img, gtboxes_and_label = image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len, max_len=cfgs.IMG_MAX_LENGTH) img, gtboxes_and_label = image_preprocess.random_flip_left_right( img_tensor=img, gtboxes_and_label=gtboxes_and_label) img, gtboxes_and_label = image_preprocess.random_flip_up_dowm( img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label = image_preprocess.short_side_resize( img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) if cfgs.MXNET_NORM: print("Use Mxnet Norm") img /= 255.0 img = img - tf.constant([[cfgs.MXNET_MEAN]]) img = img / tf.constant(cfgs.MXNET_STD) else: img = img - tf.constant([[cfgs.PIXEL_MEAN]]) # sub pixel mean at last if cfgs.NET_NAME.endswith(('b', 'd')): print("Note: Use Mxnet ResNet, But Do Not Norm Img like MxNet....") print('\n') return img_name, img, gtboxes_and_label, num_objects
def preprocess_img(img_plac, gtbox_plac): ''' :param img_plac: [H, W, 3] uint 8 img. In RGB. :param gtbox_plac: shape of [-1, 5]. [xmin, ymin, xmax, ymax, label] :return: ''' img = tf.cast(img_plac, tf.float32) # gtboxes_and_label = tf.cast(gtbox_plac, tf.float32) img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtbox_plac, target_shortside_len=cfgs.IMG_SHORT_SIDE_LEN, length_limitation=cfgs.IMG_MAX_LENGTH) img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, gtboxes_and_label=gtboxes_and_label) img = img - tf.constant([[cfgs.PIXEL_MEAN]]) img_batch = tf.expand_dims(img, axis=0) return img_batch, gtboxes_and_label
def next_batch(dataset_name, batch_size, shortside_len, is_training): ''' :return: img_name_batch: shape(1, 1) img_batch: shape:(1, new_imgH, new_imgW, C) gtboxes_and_label_batch: shape(1, Num_Of_objects, 5] .each row is [x1, y1, x2, y2, label] ''' assert batch_size == 1, "we only support batch_size is 1.We may support large batch_size in the future" if dataset_name not in ['jyzdata', 'DOTA', 'ship', 'ICDAR2015', 'pascal', 'coco', 'DOTA_TOTAL', 'WIDER']: raise ValueError('dataSet name must be in pascal, coco spacenet and ship') if is_training: pattern = os.path.join('/home/work/tfrecord/', dataset_name + '_val.tfrecord') else: pattern = os.path.join('/content/drive/', dataset_name + '_test.tfrecord') print('tfrecord path is -->', os.path.abspath(pattern)) # filename_tensorlist = tf.train.match_filenames_once(pattern) # filename_queue = tf.train.string_input_producer([pattern]) raw_dataset = tf.data.TFRecordDataset(pattern) raw_dataset = raw_dataset.map(_parse_image_function) raw_dataset = raw_dataset.repeat() raw_dataset = raw_dataset.shuffle(1000) raw_dataset = tf.data.make_one_shot_iterator(raw_dataset) # parsed_image_dataset = raw_image_dataset.map(_parse_image_function) # raw_dataset = tf.python_io.tf_record_iterator(path=pattern) shortside_len = tf.constant(shortside_len) shortside_len = tf.random_shuffle(shortside_len)[0] single_ex = raw_dataset.get_next() img_name = single_ex['img_name'] img_height = tf.cast(single_ex['img_height'], tf.int32) img_width = tf.cast(single_ex['img_width'], tf.int32) img = tf.decode_raw(single_ex['img'], tf.uint8) img = tf.reshape(img, shape=[img_height, img_width, 3]) gtboxes_and_label = tf.decode_raw(single_ex['gtboxes_and_label'], tf.int32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 9]) num_obs = tf.cast(single_ex['num_objects'], tf.int32) img = tf.cast(img, tf.float32) img = img - tf.constant(cfgs.PIXEL_MEAN) if is_training: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) img, gtboxes_and_label = image_preprocess.random_flip_left_right(img_tensor=img, gtboxes_and_label=gtboxes_and_label) else: img, gtboxes_and_label = image_preprocess.short_side_resize(img_tensor=img, gtboxes_and_label=gtboxes_and_label, target_shortside_len=shortside_len) # img_name, img, gtboxes_and_label, num_obs = read_and_prepocess_single_img(raw_dataset, shortside_len, # is_training=is_training) img_name_batch, img_batch, gtboxes_and_label_batch , num_obs_batch = \ tf.train.batch( [img_name, tf.to_float(img), gtboxes_and_label, num_obs], batch_size=batch_size, capacity=1, num_threads=1, dynamic_pad=True) return img_name_batch, img_batch, gtboxes_and_label_batch, num_obs_batch