예제 #1
0
def main():
    global args
    args = parser.parse_args()
    args.batch_size = 10  # only segment one image for experiment

    model_dir = os.path.dirname(args.dir)
    core_config_path = os.path.join(model_dir, 'configs/core.config')

    core_config = CoreConfig()
    core_config.read(core_config_path)
    print('Using core configuration from {}'.format(core_config_path))

    offset_list = core_config.offsets
    print("offsets are: {}".format(offset_list))

    testset = WaldoDataset(args.test_data, core_config, args.train_image_size)
    print('Total samples in the test set: {0}'.format(len(testset)))

    dataloader = torch.utils.data.DataLoader(testset,
                                             num_workers=1,
                                             batch_size=args.batch_size)

    segment_dir = args.dir
    if not os.path.exists(segment_dir):
        os.makedirs(segment_dir)
    segment(dataloader, segment_dir, core_config)
예제 #2
0
파일: segment.py 프로젝트: rkshuai/waldo
def main():
    global args
    args = parser.parse_args()
    args.batch_size = 1  # only segment one image for experiment

    model_dir = os.path.dirname(args.dir)
    core_config_path = os.path.join(model_dir, 'configs/core.config')
    unet_config_path = os.path.join(model_dir, 'configs/unet.config')

    core_config = CoreConfig()
    core_config.read(core_config_path)
    print('Using core configuration from {}'.format(core_config_path))

    # loading Unet configuration
    unet_config = UnetConfig()
    unet_config.read(unet_config_path, args.train_image_size)
    print('Using unet configuration from {}'.format(unet_config_path))

    offset_list = core_config.offsets
    print("offsets are: {}".format(offset_list))

    # model configurations from core config
    num_classes = core_config.num_classes
    num_colors = core_config.num_colors
    num_offsets = len(core_config.offsets)
    # model configurations from unet config
    start_filters = unet_config.start_filters
    up_mode = unet_config.up_mode
    merge_mode = unet_config.merge_mode
    depth = unet_config.depth

    model = UNet(num_classes, num_offsets,
                 in_channels=num_colors, depth=depth,
                 start_filts=start_filters,
                 up_mode=up_mode,
                 merge_mode=merge_mode)

    model_path = os.path.join(model_dir, args.model)
    if os.path.isfile(model_path):
        print("=> loading checkpoint '{}'".format(model_path))
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])
        print("loaded.")
    else:
        print("=> no checkpoint found at '{}'".format(model_path))

    testset = WaldoTestset(args.test_data, args.train_image_size,
                           job=args.job, num_jobs=args.num_jobs)
    print('Total samples in the test set: {0}'.format(len(testset)))

    dataloader = torch.utils.data.DataLoader(
        testset, num_workers=1, batch_size=args.batch_size)

    segment_dir = args.dir
    if not os.path.exists(segment_dir):
        os.makedirs(segment_dir)
    segment(dataloader, segment_dir, model, core_config)
    make_submission(segment_dir, args.csv)
예제 #3
0
def main():
    file_list = get_file_list()
    config = CoreConfig()
    config.num_colors = 3
    data_saver = DataSaver(args.out_dir, config)
    for file_name in file_list:
        y = get_mask_from_page_image(file_name[0], file_name[1],
                                     args.max_image_size)
        data_saver.write_image(file_name[2], y)
    data_saver.write_index()
예제 #4
0
파일: visualize.py 프로젝트: danpovey/waldo
def visualize_object(x, transparency):
    """Given a dictionary object as follows
    x['img']: numpy array of shape (num_class,width,height)
    x['mask']: numpy array of same dimensions as image, but with every element categorizing it 
    into one of the object ids
    The method generates an image overlaying a translucent mask on the image and displays it.
    """
    c = CoreConfig()
    c.num_colors = x['img'].shape[0]
    visualize_mask(x,c,transparency)
    return
예제 #5
0
def get_mask_from_page_image(madcat_file_path, image_file_name, max_size):
    """ Given a page image, extracts the page image mask from it.
        Input
        -----
        image_file_name (string): complete path and name of the page image.
        madcat_file_path (string): complete path and name of the madcat xml file
                                      corresponding to the page image.
        """

    objects = _get_bounding_box(madcat_file_path)
    img = Image.open(image_file_name).convert("RGB")
    im_arr = np.array(img)

    config = CoreConfig()
    config.num_colors = 3
    image_with_objects = {
        'img': im_arr,
        'objects': objects
    }

    im_height = im_arr.shape[0]
    im_width = im_arr.shape[1]

    validated_objects = []
    for original_object in image_with_objects['objects']:
        ordered_polygon_points = original_object['polygon']
        object = {}
        resized_pp = []
        for point in ordered_polygon_points:
            new_point = _validate_and_update_point(point, im_width, im_height)
            resized_pp.append(new_point)
        object['polygon'] = resized_pp
        validated_objects.append(object)

    validated_image_with_objects = {
        'img': im_arr,
        'objects': validated_objects
    }

    scaled_image_with_objects = scale_down_image_with_objects(validated_image_with_objects, config,
                                                              max_size)

    img_padded = make_square_image_with_padding(scaled_image_with_objects['img'], config)

    padded_image_with_objects = {
        'img': img_padded,
        'objects': scaled_image_with_objects['objects']
    }

    y = convert_to_mask(padded_image_with_objects, config)

    return y
예제 #6
0
def test():
    # very non-thorough test.
    c = CoreConfig()
    n_c = UnetConfig()

    n_c.write('foo')
    n_c.read('foo', c)
    n_c.write('foo')
예제 #7
0
    def setUp(self):
        """This method sets up objects for all the test cases.
        """
        icdar = DatasetICDAR2015(DL_DIR)
        data = icdar.load_data()
        self.test_object = random.choice(data['test'])

        self.c = CoreConfig()
        self.c.num_colors = self.test_object['img'].shape[2]

        self.transparency = TRANSPARENCY
예제 #8
0
    def __init__(self, data_dir):
        """Constructor for the class.
        Validates the path to ICDAR 2015 data.
        """
        self.data_dir = ""

        self.core_config = CoreConfig()
        self.core_config.num_colors = 3

        if not self._validate_path(data_dir):
            raise ValueError(
                "The path is invalid. Either of the following"
                " could be wrong:\n"
                "- Path does not exist.\n"
                "- Path does not point to a directory.\n"
                "- Directory is empty.\n"
                "- The training or test directories have unequal number"
                "of images and labels leading to a mismatch.")
        else:
            self.data_dir = data_dir
예제 #9
0
파일: train.py 프로젝트: jtrmal/waldo
def main():
    global args, best_loss
    args = parser.parse_args()

    if args.tensorboard:
        from tensorboard_logger import configure
        print("Using tensorboard")
        configure("%s" % (args.dir))

    # loading core configuration
    c_config = CoreConfig()
    if args.core_config == '':
        print('No core config file given, using default core configuration')
    if not os.path.exists(args.core_config):
        sys.exit('Cannot find the config file: {}'.format(args.core_config))
    else:
        c_config.read(args.core_config)
        print('Using core configuration from {}'.format(args.core_config))

    # loading Unet configuration
    u_config = UnetConfig()
    if args.unet_config == '':
        print('No unet config file given, using default unet configuration')
    if not os.path.exists(args.unet_config):
        sys.exit('Cannot find the unet configuration file: {}'.format(
            args.unet_config))
    else:
        # need train_image_size for validation
        u_config.read(args.unet_config, args.train_image_size)
        print('Using unet configuration from {}'.format(args.unet_config))

    offset_list = c_config.offsets
    print("offsets are: {}".format(offset_list))

    # model configurations from core config
    num_classes = c_config.num_classes
    num_colors = c_config.num_colors
    num_offsets = len(c_config.offsets)
    # model configurations from unet config
    start_filters = u_config.start_filters
    up_mode = u_config.up_mode
    merge_mode = u_config.merge_mode
    depth = u_config.depth

    train_data = args.train_dir + '/train'
    val_data = args.train_dir + '/val'

    trainset = WaldoDataset(train_data, c_config, args.train_image_size)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              num_workers=4,
                                              batch_size=args.batch_size,
                                              shuffle=True)

    valset = WaldoDataset(val_data, c_config, args.train_image_size)
    valloader = torch.utils.data.DataLoader(valset,
                                            num_workers=4,
                                            batch_size=args.batch_size)

    NUM_TRAIN = len(trainset)
    NUM_VAL = len(valset)
    NUM_ALL = NUM_TRAIN + NUM_VAL
    print('Total samples: {0} \n'
          'Using {1} samples for training, '
          '{2} samples for validation'.format(NUM_ALL, NUM_TRAIN, NUM_VAL))

    # create model
    model = UNet(num_classes,
                 num_offsets,
                 in_channels=num_colors,
                 depth=depth,
                 start_filts=start_filters,
                 up_mode=up_mode,
                 merge_mode=merge_mode).cuda()

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # define optimizer
    # optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=args.nesterov,
                                weight_decay=args.weight_decay)

    # Train
    for epoch in range(args.start_epoch, args.epochs):
        Train(trainloader, model, optimizer, epoch)
        val_loss = Validate(valloader, model, epoch)
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_loss,
            }, is_best)
    print('Best validation loss: ', best_loss)

    # visualize some example outputs
    outdir = '{}/imgs'.format(args.dir)
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    sample(model, valloader, outdir, c_config)
예제 #10
0
            img = np.array(Image.open(path + '/images/' + id_ + '.png'))
            if len(img.shape) == 2 and channels == 3:
                # expand and reshape it to size:(height, width, channels)
                img = np.moveaxis(np.array([img, img, img]), 0, -1)
            else:
                img = img[:, :, :channels]
            test_item['img'] = img
            test_saver.write_image(id_, test_item)
        test_saver.write_index()
        print('Done with {} set!'.format(split_name))


if __name__ == '__main__':
    global args
    args = parser.parse_args()
    cfg = CoreConfig()
    cfg.read(args.cfg)

    split_names = ['train', 'stage1_test', 'stage2_test_final']
    for split in split_names:
        ids_file = "{0}/{1}/image_ids.txt".format(args.outdir, split)
        if not (os.path.exists(ids_file)):
            random.seed(args.seed)
            DataProcess(args.indir,
                        args.outdir,
                        split,
                        cfg,
                        train_prop=args.train_prop)
        else:
            print(
                'Not processing {} data as it is already there.'.format(split))
예제 #11
0
파일: segment.py 프로젝트: adelra/waldo
def main():
    global args
    args = parser.parse_args()
    args.batch_size = 1  # only segment one image for experiment

    core_config_path = os.path.join(args.dir, 'configs/core.config')
    unet_config_path = os.path.join(args.dir, 'configs/unet.config')

    core_config = CoreConfig()
    core_config.read(core_config_path)
    print('Using core configuration from {}'.format(core_config_path))

    # loading Unet configuration
    unet_config = UnetConfig()
    unet_config.read(unet_config_path, args.train_image_size)
    print('Using unet configuration from {}'.format(unet_config_path))

    offset_list = core_config.offsets
    print("offsets are: {}".format(offset_list))

    # model configurations from core config
    num_classes = core_config.num_classes
    num_colors = core_config.num_colors
    num_offsets = len(core_config.offsets)
    # model configurations from unet config
    start_filters = unet_config.start_filters
    up_mode = unet_config.up_mode
    merge_mode = unet_config.merge_mode
    depth = unet_config.depth

    model = UNet(num_classes,
                 num_offsets,
                 in_channels=num_colors,
                 depth=depth,
                 start_filts=start_filters,
                 up_mode=up_mode,
                 merge_mode=merge_mode)

    model_path = os.path.join(args.dir, args.model)
    if os.path.isfile(model_path):
        print("=> loading checkpoint '{}'".format(model_path))
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])
        print("loaded.")
    else:
        print("=> no checkpoint found at '{}'".format(model_path))

    model.eval()  # convert the model into evaluation mode

    testset = WaldoDataset(args.test_data, core_config, args.train_image_size)
    print('Total samples in the test set: {0}'.format(len(testset)))

    dataloader = torch.utils.data.DataLoader(testset,
                                             num_workers=1,
                                             batch_size=args.batch_size)

    segment_dir = '{}/segment'.format(args.dir)
    if not os.path.exists(segment_dir):
        os.makedirs(segment_dir)
    img, class_pred, adj_pred = sample(model, dataloader, segment_dir,
                                       core_config)

    seg = ObjectSegmenter(class_pred[0].detach().numpy(),
                          adj_pred[0].detach().numpy(), num_classes,
                          offset_list)
    mask_pred, object_class = seg.run_segmentation()
    x = {}
    # from (color, height, width) to (height, width, color)
    x['img'] = np.moveaxis(img[0].numpy(), 0, -1)
    x['mask'] = mask_pred.astype(int)
    x['object_class'] = object_class
    visualize_mask(x, core_config)