def prepare_dataloader(tfrecord_dir, batch_size, subset="train"): anchorobj = anchor.Anchor(img_size=550, feature_map_size=[69, 35, 18, 9, 5], aspect_ratio=[1, 0.5, 2], scale=[24, 48, 96, 192, 384]) parser = yolact_parser.Parser(output_size=550, anchor_instance=anchorobj, match_threshold=0.5, unmatched_threshold=0.5, mode=subset) files = tf.io.matching_files(os.path.join(tfrecord_dir, "coco_%s.*" % subset)) num_shards = tf.cast(tf.shape(files)[0], tf.int64) shards = tf.data.Dataset.from_tensor_slices(files) shards = shards.shuffle(num_shards) shards = shards.repeat() dataset = shards.interleave(tf.data.TFRecordDataset, cycle_length=num_shards, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.shuffle(buffer_size=2048) dataset = dataset.map(map_func=parser, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) return dataset
def prepare_dataloader(img_h, img_w, feature_map_size, protonet_out_size, aspect_ratio, scale, tfrecord_dir, batch_size, subset="train"): anchorobj = anchor.Anchor(img_size_h=img_h,img_size_w=img_w, feature_map_size=feature_map_size, aspect_ratio=aspect_ratio, scale=scale) parser = yolact_parser.Parser(output_size=[img_h, img_w], # (h,w) anchor_instance=anchorobj, match_threshold=0.5, unmatched_threshold=0.5, mode=subset, proto_output_size=[int(protonet_out_size[0]), int(protonet_out_size[1])]) files = tf.io.matching_files(os.path.join(tfrecord_dir, "%s.*" % subset)) num_shards = tf.cast(tf.shape(files)[0], tf.int64) shards = tf.data.Dataset.from_tensor_slices(files) shards = shards.shuffle(num_shards) shards = shards.repeat() dataset = shards.interleave(tf.data.TFRecordDataset, cycle_length=num_shards, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.shuffle(buffer_size=2048) dataset = dataset.map(map_func=parser, num_parallel_calls=tf.data.experimental.AUTOTUNE) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) return dataset
def __init__(self, img_h, img_w, fpn_channels, num_class, num_mask, aspect_ratio, scales): super(Yolact, self).__init__() out = ['conv3_block4_out', 'conv4_block6_out', 'conv5_block3_out'] # use pre-trained ResNet50 # Keras BatchNormalization problem # https://github.com/keras-team/keras/pull/9965#issuecomment-501933060 tf.keras.layers.BatchNormalization = FrozenBatchNormalization base_model = tf.keras.applications.ResNet50(input_shape=(img_h, img_w, 3), include_top=False, layers=tf.keras.layers, weights='imagenet') # extract certain feature maps for FPN self.backbone_resnet = tf.keras.Model(inputs=base_model.input, outputs=[base_model.get_layer(x).output for x in out]) # Calculating feature map size # https://stackoverflow.com/a/44242277/4582711 # https://github.com/tensorflow/tensorflow/issues/4297#issuecomment-246080982 self.feature_map_size = np.array([list(base_model.get_layer(x).output.shape[1:3]) for x in out]) out_height_p6 = np.ceil((self.feature_map_size[-1, 0]).astype(np.float32) / float(2)) out_width_p6 = np.ceil((self.feature_map_size[-1, 1]).astype(np.float32) / float(2)) out_height_p7 = np.ceil(out_height_p6 / float(2)) out_width_p7 = np.ceil(out_width_p6/ float(2)) self.feature_map_size = np.concatenate((self.feature_map_size, [[out_height_p6, out_width_p6], [out_height_p7, out_width_p7]]), axis=0) self.protonet_out_size = self.feature_map_size[0]*2 # Only one upsampling on p3 self.backbone_fpn = FeaturePyramidNeck(fpn_channels) self.protonet = ProtoNet(num_mask) # semantic segmentation branch to boost feature richness self.semantic_segmentation = tf.keras.layers.Conv2D(num_class-1, (1, 1), 1, padding="same", kernel_initializer=tf.keras.initializers.glorot_uniform()) anchorobj = anchor.Anchor(img_size_h=img_h,img_size_w=img_w, feature_map_size=self.feature_map_size, aspect_ratio=aspect_ratio, scale=scales) self.num_anchors = anchorobj.num_anchors self.priors = anchorobj.anchors # print("prior shape:", self.priors.shape) # print("num anchor per feature map: ", self.num_anchor) # shared prediction head # Here, len(aspect_ratio) is passed as during prior calculations, individula scale is selected for each layer. # So, when scale are [24, 48, 96, 130, 192] that means 24 is for p3; 48 is for p4 and so on. # So, number of priors for that layer will be HxWxlen(aspect_ratio) # Hence, passing len(aspect_ratio) # This implementation differs from the original used in yolact self.predictionHead = PredictionModule(256, len(aspect_ratio), num_class, num_mask) # post-processing for evaluation self.detect = Detect(num_class, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5) self.max_output_size = 300
import numpy as np import tensorflow as tf from data import anchor test_bbox = tf.convert_to_tensor( (np.array([[204.044, 253.8351, 487.8226, 427.06363], [0, 140.01741, 550, 290.21936], [40.005028, 117.37102, 255.7913, 205.13097], [263.31314, 67.0434, 514.04736, 124.48139], [0, 503.79834, 487.0279, 550]])), dtype=tf.float32) test_labels = tf.convert_to_tensor((np.array([[1], [2], [3], [4], [5]])), dtype=tf.float32) anchorobj = anchor.Anchor(img_size=550, feature_map_size=[69, 35, 18, 9, 5], aspect_ratio=[1, 0.5, 2], scale=[24, 48, 96, 192, 384]) print(anchorobj.get_anchors()) target_cls, target_loc, max_id_for_anchors, match_positiveness = anchorobj.matching( threshold_pos=0.5, threshold_neg=0.4, gt_bbox=test_bbox, gt_labels=test_labels) print(target_loc)
aspect_ratio=[1, 0.5, 2], scales=[24, 48, 96, 192, 384]) model = YOLACT.gen() ckpt_dir = "checkpoints-SGD" latest = tf.train.latest_checkpoint(ckpt_dir) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) status = checkpoint.restore(tf.train.latest_checkpoint(ckpt_dir)) print("Restore Ckpt Sucessfully!!") # Load Validation Images and do Detection # ----------------------------------------------------------------------------------------------- # Need default anchor anchorobj = anchor.Anchor(img_size=256, feature_map_size=[32, 16, 8, 4, 2], aspect_ratio=[1, 0.5, 2], scale=[24, 48, 96, 192, 384]) valid_dataset = dataset_coco.prepare_evalloader(img_size=256, tfrecord_dir='data/obj_tfrecord_256x256_20200921', subset='val') anchors = anchorobj.get_anchors() detect_layer = Detect(num_cls=13, label_background=0, top_k=200, conf_threshold=0.3, nms_threshold=0.5, anchors=anchors) remapping = [ 'Background', 'Face', 'Body', 'Bicycle', 'Car', 'Motorbike', 'Airplane', 'Ship',