def get_dataflow(annot_path, img_dir, strict, x_size = 224, y_size = 28):
    """
    This function initializes the tensorpack dataflow and serves generator
    for training operation.

    :param annot_path: path to the annotation file
    :param img_dir: path to the images
    :return: dataflow object
    """
    coco_crop_size = 368

    # configure augmentors

    augmentors = [
        ScaleAug(scale_min=0.5,
                 scale_max=1.1,
                 target_dist=0.6,
                 interp=cv2.INTER_CUBIC),

        RotateAug(rotate_max_deg=40,
                  interp=cv2.INTER_CUBIC,
                  border=cv2.BORDER_CONSTANT,
                  border_value=(128, 128, 128), mask_border_val=1),

        CropAug(coco_crop_size, coco_crop_size, center_perterb_max=40, border_value=128,
                mask_border_val=1),

        FlipAug(num_parts=18, prob=0.5),

        ResizeAug(x_size, x_size)

    ]

    # prepare augment function

    augment_func = functools.partial(augment,
                                     augmentors=augmentors)

    # prepare building sample function

    build_sample_func = functools.partial(build_sample,
                                          y_size=y_size)

    # build the dataflow

    df = CocoDataFlow((coco_crop_size, coco_crop_size), annot_path, img_dir)
    df.prepare()
    size = df.size()
    df = MapData(df, read_img)
    df = MapData(df, augment_func)
    df = MultiProcessMapDataZMQ(df, num_proc=4, map_func=build_sample_func, buffer_size=200, strict=strict)

    return df, size
Exemplo n.º 2
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = list(
        itertools.chain.from_iterable(
            DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    preprocess = TrainingDataPreprocessor(cfg)

    if cfg.DATA.NUM_WORKERS > 0:
        if cfg.TRAINER == 'horovod':
            buffer_size = cfg.DATA.NUM_WORKERS * 10  # one dataflow for each process, therefore don't need large buffer
            ds = MultiThreadMapData(ds,
                                    cfg.DATA.NUM_WORKERS,
                                    preprocess,
                                    buffer_size=buffer_size)
            # MPI does not like fork()
        else:
            buffer_size = cfg.DATA.NUM_WORKERS * 20
            ds = MultiProcessMapDataZMQ(ds,
                                        cfg.DATA.NUM_WORKERS,
                                        preprocess,
                                        buffer_size=buffer_size)
    else:
        ds = MapData(ds, preprocess)
    return ds
Exemplo n.º 3
0
def build_dataflow(files):
    train_ds = DataFromList(files)
    aug = imgaug.AugmentorList(get_basic_augmentor(isTrain=False))

    def mapper(dp):
        idx, fname, label = dp
        img = cv2.imread(fname)
        img = aug.augment(img)
        return img, idx

    train_ds = MultiProcessMapDataZMQ(train_ds,
                                      num_proc=8,
                                      map_func=mapper,
                                      strict=True)
    train_ds = BatchData(train_ds, local_batch_size)
    train_ds.reset_state()
    return train_ds
def get_sequential_loader(ds, isTrain, batch_size, augmentors, parallel=None):
    """ Load a Single-File LMDB (Sequential Read)
    Args:
        augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)`

    Returns: A LMDBData which produces BGR images and labels.

    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html
    """
    assert isinstance(augmentors, list)
    aug = imgaug.AugmentorList(augmentors)

    if parallel is None:
        parallel = min(40,
                       multiprocessing.cpu_count() //
                       2)  # assuming hyperthreading

    if isTrain:
        ds = LocallyShuffleData(ds, 50000)
        ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR),
                              0)
        ds = AugmentImageComponent(ds, aug, copy=False)
        if parallel < 16:
            logger.warn(
                "DataFlow may become the bottleneck when too few processes are used."
            )
        ds = BatchData(ds, batch_size, remainder=False, use_list=True)
        ds = MultiProcessRunnerZMQ(ds, parallel)
    else:

        def mapper(data):
            im, label = data
            im = cv2.imdecode(im, cv2.IMREAD_COLOR)
            im = aug.augment(im)
            return im, label

        ds = MultiProcessMapDataZMQ(ds,
                                    parallel,
                                    mapper,
                                    buffer_size=2000,
                                    strict=True)
        ds = BatchData(ds, batch_size, remainder=True, use_list=True)
    return ds
    def __init__(self,
                 mode,
                 batch_size=256,
                 shuffle=False,
                 num_workers=25,
                 cache=50000,
                 device='cuda'):
        # enumerate standard imagenet augmentors
        imagenet_augmentors = fbresnet_augmentor(mode == 'train')

        # load the lmdb if we can find it
        base_dir = '/userhome/cs/u3003679/'
        lmdb_loc = os.path.join(base_dir, 'ILSVRC-{}.lmdb'.format(mode))
        #lmdb_loc = os.path.join(os.environ['IMAGENET'], 'ILSVRC-%s.lmdb'%mode)
        ds = LMDBSerializer.load(lmdb_loc, shuffle=shuffle)
        ds = LocallyShuffleData(ds, cache)

        # ds = td.LMDBDataPoint(ds)

        def f(dp):
            x, label = dp
            x = cv2.imdecode(x, cv2.IMREAD_COLOR)
            for aug in imagenet_augmentors:
                x = aug.augment(x)
            return x, label

        ds = MultiProcessMapDataZMQ(ds, num_proc=8, map_func=f)
        # ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0)
        # ds = AugmentImageComponent(ds, imagenet_augmentors)

        # ds = td.PrefetchData(ds, 5000, 1)

        # ds = td.MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0)
        # ds = td.AugmentImageComponent(ds, imagenet_augmentors)
        # ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = BatchData(ds, batch_size)
        # self.ds = MultiProcessRunnerZMQ(self.ds, 4)
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.device = device
Exemplo n.º 6
0
def get_train_dataflow(src):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)

    classes = (
        'BG',  # always index 0
        'bathtub',
        'bed',
        'bookshelf',
        'box',
        'chair',
        'counter',
        'desk',
        'door',
        'dresser',
        'garbage_bin',
        'lamp',
        'monitor',
        'night_stand',
        'pillow',
        'sink',
        'sofa',
        'table',
        'toilet',
        'tv')

    class_to_ind = dict(list(zip(classes, list(range(len(classes))))))
    #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/'
    textfile_index = natsorted(
        [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')])
    imgs = []
    count = 0
    for fn in textfile_index:
        each_file = {}
        count = count + 1
        print(str(count) + ':::', fn)
        F = open(fn, 'r')
        file_F = F.read()
        file_F = file_F.split('\n')
        each_file['file_name'] = file_F[0]
        im = cv2.imread(each_file['file_name'])
        each_file['height'] = im.shape[0]
        each_file['width'] = im.shape[1]
        objects = file_F[2:len(file_F) - 1]
        boxes = []
        class_ = []
        for obj in objects:
            objs_line = obj.split(' ')
            x1 = float(objs_line[1]) - 1.0
            y1 = float(objs_line[2]) - 1.0
            x2 = float(objs_line[3]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            if x1 >= x2:
                x2 = x1 + 1
            boxes.append([x1, y1, x2, y2])
            cls = class_to_ind[objs_line[0]]
            class_.append(cls)
        each_file['boxes'] = np.array(boxes).astype(np.float32)
        each_file['class'] = np.array(class_).astype(np.int32)
        each_file['is_crowd'] = np.zeros_like(each_file['class']).astype(
            np.int8)
        imgs.append(each_file)
    """ 
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 7
0
def get_sniper_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    scale_index: i

    If MODE_MASK, gt_masks: (N, h, w)
    """

    OUTPUT_FILE = 'train_512_annotation.txt'
    OUTPUT_IMG_DIR = 'out'
    out_file = open(OUTPUT_FILE, 'w')

    class SniperDataFlow(ProxyDataFlow):
        def __init__(self, ds):
            super(SniperDataFlow, self).__init__(ds)
            # self.ds = ds
        def size(self):
            raise NotImplementedError()

        def get_data(self):
            for img in self.ds.get_data():
                for chip in img:
                    yield chip

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)
    # aug = imgaug.AugmentorList([
    #     CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
    #     imgaug.Flip(horiz=True)
    # ])

    assert os.path.isfile(cfg.SNIPER.PRN_PRE)
    proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE)

    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue
            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            new_name = '%s_%d' % (img_name, i)
            cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i])

            ret = [im[i]] + [boxes[i], klass[i]]
            for j in range(len(klass[i])):
                if j == 0:
                    out_file.write(new_name)
                out_file.write(' %d %f %f %f %f' %
                               (klass[i][j], boxes[i][j][0], boxes[i][j][1],
                                boxes[i][j][2], boxes[i][j][3]))
                if j == len(klass[i]) - 1:
                    out_file.write('\n')
            rets.append(ret)
        return rets

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # ds = PrefetchDataZM
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    # ds = SniperDataFlow(ds)
    return ds
Exemplo n.º 8
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    input image: (h, w, 3),
    semantic label image: (h, w, 1)
    """
    # imgs is a list, where each element is a dict containing 'fn_img', and 'fn_label'
    imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=True)
    # imgs = COCODetection.load_many(
    #     cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    difficult: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1])

    if cfg.DATA.NAME == 'cityscapes':
        aspect_exp = 1.1
    elif cfg.DATA.NAME == 'cocostuff':
        aspect_exp = 1.1 #2.0
    else:
        logger.warn('Dataset name not known.')
        assert False

    aug = imgaug.AugmentorList([ \
            SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=mean_bgr),
            SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN),
            imgaug.Flip(horiz=True),
            SSDColorJitter(mean_rgbgr=mean_bgr)
            ])
    aug_label = imgaug.AugmentorList([ \
            SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=[255,]),
            SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN, interp=cv2.INTER_NEAREST),
            imgaug.Flip(horiz=True)
            ])

    def preprocess(img):
        fn_img, fn_label = img['fn_img'], img['fn_label']
        # load head (and landmark) data as well
        im = cv2.imread(fn_img, cv2.IMREAD_COLOR)
        if fn_label.endswith('.mat'): # cocostuff
            label = loadmat(fn_label)['S'].astype(int)
            label = (label - 1).astype(np.uint8) # -1 becomes 255
        else:
            label = cv2.imread(fn_label, cv2.IMREAD_GRAYSCALE)
        label = np.expand_dims(label, 2)
        assert (im is not None) and (label is not None), fn_img
        im = im.astype('float32')
        # label = label.astype('int32')
        # augmentation
        im, params = aug.augment_return_params(im)
        # TODO: better way to adjust label?
        params_label = deepcopy(params[:-1])
        params_label[0].mean_rgbgr = [255,]
        params_label[1].interp = cv2.INTER_NEAREST
        label = aug_label.augment_with_params(label, params_label)
        label = label.astype('int32')

        ret = [im, label]
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        # ds = MapData(ds, preprocess) # for debugging
        ds = MultiProcessMapDataZMQ(ds, cfg.PREPROC.NUM_WORKERS, preprocess)
    ds = BatchData(ds, cfg.PREPROC.BATCH_SIZE)
    return ds
Exemplo n.º 9
0
def get_batch_train_dataflow(batch_size):
    """
    Return a training dataflow. Each datapoint consists of the following:

    A batch of images: (BS, h, w, 3),

    For each image

    1 or more pairs of (anchor_labels, anchor_boxes) :
    anchor_labels: (BS, h', w', maxNumAnchors)
    anchor_boxes: (BS, h', w', maxNumAnchors, 4)

    gt_boxes: (BS, maxNumAnchors, 4)
    gt_labels: (BS, maxNumAnchors)

    If MODE_MASK, gt_masks: (BS, maxNumAnchors, h, w)
    """
    print("In train dataflow")
    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print("Done loading roidbs")

    # print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs))
    logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
        num - len(roidbs), len(roidbs)))

    roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True)     # will shuffle it later at every rank

    print("Batching roidbs")
    batched_roidbs = []

    if cfg.PREPROC.PREDEFINED_PADDING:
        taken = [False for _ in roidbs]
        done = False

        for i, d in enumerate(roidbs):
            batch = []
            if not taken[i]:
                batch.append(d)
                padding_shape = get_padding_shape(d['height'], d['width'])
                while len(batch) < batch_size:
                    k = get_next_roidb(roidbs, i, padding_shape, taken)
                    if k == None:
                        done = True
                        break
                    batch.append(roidbs[k])
                    taken[i], taken[k] = True, True
                if not done:
                    batched_roidbs.append(batch)
    else:
        batch = []
        for i, d in enumerate(roidbs):
            if i % batch_size == 0:
                if len(batch) == batch_size:
                    batched_roidbs.append(batch)
                batch = []
            batch.append(d)

    #batched_roidbs = sort_by_aspect_ratio(roidbs, batch_size)
    #batched_roidbs = group_by_aspect_ratio(roidbs, batch_size)
    print("Done batching roidbs")


    # Notes:
    #   - discard any leftover images
    #   - The batches will be shuffled, but the contents of each batch will always be the same
    #   - TODO: Fix lack of batch contents shuffling


    aug = imgaug.AugmentorList(
         [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
          imgaug.Flip(horiz=True)])

    # aug = imgaug.AugmentorList([CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])


    def preprocess(roidb_batch):
        datapoint_list = []
        for roidb in roidb_batch:
            fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
            boxes = np.copy(boxes)
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            im = im.astype('float32')
            # assume floatbox as input
            assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

            # augmentation:
            im, params = aug.augment_return_params(im)
            points = box_to_point8(boxes)
            points = aug.augment_coords(points, params)
            boxes = point8_to_box(points)
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

            ret = {'images': im}
            # rpn anchor:
            try:
                if cfg.MODE_FPN:
                    multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                    for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                        ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                        ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
                else:
                    raise NotImplementedError("[armand] Batch mode only available for FPN")

                boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
                klass = klass[is_crowd == 0]
                ret['gt_boxes'] = boxes
                ret['gt_labels'] = klass
                ret['filename'] = fname
                if not len(boxes):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
                return None

            if cfg.MODE_MASK:
                # augmentation will modify the polys in-place
                segmentation = copy.deepcopy(roidb['segmentation'])
                segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
                assert len(segmentation) == len(boxes)

                # Apply augmentation on polygon coordinates.
                # And produce one image-sized binary mask per box.
                masks = []
                for polys in segmentation:
                    polys = [aug.augment_coords(p, params) for p in polys]
                    masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
                ret['gt_masks'] = masks

            datapoint_list.append(ret)

        #################################################################################################################
        # Batchify the output
        #################################################################################################################

        # Now we need to batch the various fields

        # Easily stackable:
        # - anchor_labels_lvl2
        # - anchor_boxes_lvl2
        # - anchor_labels_lvl3
        # - anchor_boxes_lvl3
        # - anchor_labels_lvl4
        # - anchor_boxes_lvl4
        # - anchor_labels_lvl5
        # - anchor_boxes_lvl5
        # - anchor_labels_lvl6
        # - anchor_boxes_lvl6

        batched_datapoint = {}
        for stackable_field in ["anchor_labels_lvl2",
                                "anchor_boxes_lvl2",
                                "anchor_labels_lvl3",
                                "anchor_boxes_lvl3",
                                "anchor_labels_lvl4",
                                "anchor_boxes_lvl4",
                                "anchor_labels_lvl5",
                                "anchor_boxes_lvl5",
                                "anchor_labels_lvl6",
                                "anchor_boxes_lvl6"]:
            batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list])



        # Require padding and original dimension storage
        # - image (HxWx3)
        # - gt_boxes (?x4)
        # - gt_labels (?)
        # - gt_masks (?xHxW)

        """
        Find the minimum container size for images (maxW x maxH)
        Find the maximum number of ground truth boxes
        For each image, save original dimension and pad
        """

        if cfg.PREPROC.PREDEFINED_PADDING:
            padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
            max_height = max([shp[0] for shp in padding_shapes])
            max_width = max([shp[1] for shp in padding_shapes])
        else:
            image_dims = [d["images"].shape for d in datapoint_list]
            heights = [dim[0] for dim in image_dims]
            widths = [dim[1] for dim in image_dims]

            max_height = max(heights)
            max_width = max(widths)


        # image
        padded_images = []
        original_image_dims = []
        for datapoint in datapoint_list:
            image = datapoint["images"]
            original_image_dims.append(image.shape)

            h_padding = max_height - image.shape[0]
            w_padding = max_width - image.shape[1]

            padded_image = np.pad(image,
                                  [[0, h_padding],
                                   [0, w_padding],
                                   [0, 0]],
                                  'constant')

            padded_images.append(padded_image)

        batched_datapoint["images"] = np.stack(padded_images)
        #print(batched_datapoint["images"].shape)
        batched_datapoint["orig_image_dims"] = np.stack(original_image_dims)


        # gt_boxes and gt_labels
        max_num_gts = max([d["gt_labels"].size for d in datapoint_list])

        gt_counts = []
        padded_gt_labels = []
        padded_gt_boxes = []
        padded_gt_masks = []
        for datapoint in datapoint_list:
            gt_count_for_image = datapoint["gt_labels"].size
            gt_counts.append(gt_count_for_image)

            gt_padding = max_num_gts - gt_count_for_image

            padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1)
            padded_gt_labels.append(padded_gt_labels_for_img)

            padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"],
                                             [[0, gt_padding],
                                              [0,0]],
                                             'constant')
            padded_gt_boxes.append(padded_gt_boxes_for_img)




            h_padding = max_height - datapoint["images"].shape[0]
            w_padding = max_width - datapoint["images"].shape[1]



            if cfg.MODE_MASK:
                padded_gt_masks_for_img = np.pad(datapoint["gt_masks"],
                                         [[0, gt_padding],
                                          [0, h_padding],
                                          [0, w_padding]],
                                         'constant')
                padded_gt_masks.append(padded_gt_masks_for_img)


        batched_datapoint["orig_gt_counts"] = np.stack(gt_counts)
        batched_datapoint["gt_labels"] = np.stack(padded_gt_labels)
        batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes)
        batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list]

        if cfg.MODE_MASK:
            batched_datapoint["gt_masks"] = np.stack(padded_gt_masks)



        return batched_datapoint

    ds = DataFromList(batched_roidbs, shuffle=True)



    if cfg.TRAINER == 'horovod':
        # ds = MapData(ds, preprocess)
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 10
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = list(
        itertools.chain.from_iterable(
            DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # Add rpn data to dataflow:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.DATA.NUM_WORKERS > 0:
        if cfg.TRAINER == 'horovod':
            buffer_size = cfg.DATA.NUM_WORKERS * 10  # one dataflow for each process, therefore don't need large buffer
            ds = MultiThreadMapData(ds,
                                    cfg.DATA.NUM_WORKERS,
                                    preprocess,
                                    buffer_size=buffer_size)
            # MPI does not like fork()
        else:
            buffer_size = cfg.DATA.NUM_WORKERS * 20
            ds = MultiProcessMapDataZMQ(ds,
                                        cfg.DATA.NUM_WORKERS,
                                        preprocess,
                                        buffer_size=buffer_size)
    else:
        ds = MapData(ds, preprocess)
    return ds
Exemplo n.º 11
0
def get_sniper_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    scale_index: i

    If MODE_MASK, gt_masks: (N, h, w)
    """
    class SniperDataFlow(ProxyDataFlow):
        def __init__(self, ds):
            super(SniperDataFlow, self).__init__(ds)
            # self.ds = ds
        def size(self):
            raise NotImplementedError()

        def get_data(self):
            for img in self.ds.get_data():
                for chip in img:
                    yield chip

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([imgaug.Flip(horiz=True)])

    assert os.path.isfile(cfg.SNIPER.PRN_PRE)
    proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE)

    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                # anchor_labels, anchor_boxes
                gt_invalid = []
                maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0]
                minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1]
                maxbox = sys.maxsize if maxbox == -1 else maxbox
                minbox = 0 if minbox == -1 else minbox
                for box in boxes[i]:
                    w = box[2] - box[0]
                    h = box[3] - box[1]
                    if w >= maxbox or h >= maxbox or (w < minbox
                                                      and h < minbox):
                        gt_invalid.append(box)
                anchor_inputs = get_sniper_rpn_anchor_input(
                    im[i], boxes[i], is_crowd[i], gt_invalid)
                assert len(anchor_inputs) == 2

                boxes[i] = boxes[i][is_crowd[i] ==
                                    0]  # skip crowd boxes in training target
                klass[i] = klass[i][is_crowd[i] == 0]

                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue

            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]]
            rets.append(ret)
        return rets

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # ds = PrefetchDataZM
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    ds = SniperDataFlow(ds)
    return ds
Exemplo n.º 12
0
def get_wider_dataflow(augment=False):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    logger.info("loading wider attributes dataset...")
    roidbs_train = load_many(cfg.WIDER.BASEDIR, 'train', augment)
    roidbs_test = load_many(cfg.WIDER.BASEDIR, 'test', augment)
    roidbs = roidbs_train + roidbs_test

    logger.info("load finished!")
    """
    To train on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.

    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img['bbox']) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname = roidb['img']
        x1, y1, w, h = np.split(roidb['bbox'], 4, axis=1)
        boxes = np.concatenate([x1, y1, x1 + w, y1 + h], axis=1)

        klass = np.ones(len(roidb['bbox']), dtype=np.int32)

        male = roidb['male']
        longhair = roidb['longhair']
        sunglass = roidb['sunglass']
        hat = roidb['hat']
        tshirt = roidb['tshirt']
        longsleeve = roidb['longsleeve']
        formal = roidb['formal']
        shorts = roidb['shorts']
        jeans = roidb['jeans']
        longpants = roidb['longpants']
        skirt = roidb['skirt']
        facemask = roidb['facemask']
        logo = roidb['logo']
        stripe = roidb['stripe']

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:

            # anchor_labels, anchor_boxes
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, np.zeros(len(boxes), dtype=int))
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass

            ret['male'] = male
            ret['longhair'] = longhair
            ret['sunglass'] = sunglass
            ret['hat'] = hat
            ret['tshirt'] = tshirt
            ret['longsleeve'] = longsleeve
            ret['formal'] = formal
            ret['shorts'] = shorts
            ret['jeans'] = jeans
            ret['longpants'] = longpants
            ret['skirt'] = skirt
            ret['facemask'] = facemask
            ret['logo'] = logo
            ret['stripe'] = stripe

            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 13
0
def get_attributes_dataflow(augment=False, two_cls=False):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """
    # roidbs = load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN)
    logger.info("loading wider attributes dataset...")
    roidbs_train = load_many('/root/datasets/WiderAttribute', 'train', augment,
                             two_cls)
    roidbs_val = load_many('/root/datasets/WiderAttribute', 'val', augment,
                           two_cls)
    roidbs_wider = roidbs_train + roidbs_val

    def attr_augment(names, multiple):
        datalist = []
        for name in names:
            datalist += [
                roidb for roidb in roidbs_wider if np.sum(roidb[name] == 1) > 0
            ]
        return datalist * multiple

    attr_names = ['sunglass', 'stripe', 'facemask', 'jeans']
    roidbs_wider += attr_augment(attr_names, 2)

    logger.info("load finished!")
    """
    To train on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    attrs: numpy array of k integers ,-1->negtive   0->ignorce   1->positive
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    # num = len(roidbs)
    # roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs))
    # logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
    #    num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs_wider, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname = roidb['img']
        boxes = roidb['bbox']
        male = roidb['male']
        longhair = roidb['longhair']
        sunglass = roidb['sunglass']
        hat = roidb['hat']
        tshirt = roidb['tshirt']
        longsleeve = roidb['longsleeve']
        formal = roidb['formal']
        shorts = roidb['shorts']
        jeans = roidb['jeans']
        longpants = roidb['longpants']
        skirt = roidb['skirt']
        facemask = roidb['facemask']
        logo = roidb['logo']
        stripe = roidb['stripe']

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        # assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {
            'image':
            im,
            'gt_boxes':
            boxes,
            'male':
            male,
            'longhair':
            longhair,
            'sunglass':
            sunglass,
            'hat':
            hat,
            'tshirt':
            tshirt,
            'longsleeve':
            longsleeve,
            'formal':
            formal,
            'shorts':
            shorts,
            'jeans':
            jeans,
            'longpants':
            longpants,
            'skirt':
            skirt,
            'facemask':
            facemask,
            'logo':
            logo,
            'stripe':
            stripe,
            'anchor_labels':
            get_rpn_anchor_input(im, boxes, np.zeros(len(boxes),
                                                     dtype=int))[0],
            'anchor_boxes':
            get_rpn_anchor_input(im, boxes, np.zeros(len(boxes), dtype=int))[1]
        }
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 14
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)
    """

    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load()
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    """

    ds = DataFromList(imgs, shuffle=cfg.DATA.TEST.SHUFFLE)

    # imgaug.Flip(horiz=True)
    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname, boxes, klass, is_crowd, re_id_class = img['file_name'], img['boxes'], \
                                                     img['class'], img['is_crowd'], img['re_id_class']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        orig_shape = im.shape[:2]
        orig_im = np.copy(im)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            # anchor_labels, anchor_boxes
            anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
            assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [
            boxes, klass, re_id_class, orig_shape, orig_im
        ]

        return ret

    ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 15
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    #roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    #print_class_histogram(roidbs)

    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                     cfg.DATA.TRAIN,
                                     add_gt=True,
                                     add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = imread(fname)
        assert im is not None, fname
        im = np.expand_dims(im, axis=2)
        im = np.repeat(im, 3, axis=2)
        im = im.astype('float32')
        #height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        #source_image = Image.fromarray(im.astype('uint8'))
        #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True)
        """
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i]))
        source_image.save('./input_image1', "JPEG")

        """
        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        klass_tmp = np.copy(klass)
        #print(klass)

        #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True)
        """
        source_image = Image.fromarray(im.astype('uint8'))
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i]))
        source_image.save('./input_image2', "JPEG")
        """

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        #ds = MultiProcessMapDataZMQ(ds, 10, preprocess, buffer_size=20)
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess, buffer_size=1)
    return ds
Exemplo n.º 16
0
def get_train_dataflow_YCBV():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    img_ids = YCBVDetectionDataset().load_training_image_ids(cfg.DATA.TRAIN)
    # print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    # num = len(img_ids)
    # roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs))
    # logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
    #     num - len(roidbs), len(roidbs)))

    ds = DataFromList(img_ids, shuffle=True)

    # aug = imgaug.AugmentorList(
    #     [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(image_id):
        roidb = YCBVDetectionDataset().load_single_roidb(image_id)
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        # im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        # points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            ret['gt_masks'] = segmentation

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 17
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Exemplo n.º 18
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                     cfg.DATA.TRAIN,
                                     add_gt=True,
                                     add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy arrays corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds