Exemplo n.º 1
0
def test(model, device, test_loader, criterion, padding_value=32):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for idx, (data, target, id) in enumerate(test_loader):
            data, target = data.to(device), target.to(device,
                                                      dtype=torch.int64)
            shape = list(data.shape)[2:]
            padded = pad(data, padding_value)

            input = padded.float()

            output = model(input)
            output = unpad(output, shape)
            test_loss += criterion(output, target)
            _, predicted = torch.max(output.data, 1)

            total += target.nelement()
            correct += predicted.eq(target.data).sum().item()
            logger.info('\r Image [{}/{}'.format(idx * len(data),
                                                 len(test_loader.dataset)))

    test_loss /= len(test_loader.dataset)

    logger.info(
        '\nTest set: Average loss: {:.4f}, Length of Test Set: {} (Accuracy{:.6f}%)\n'
        .format(test_loss, len(test_loader.dataset), 100. * correct / total))

    return 100. * correct / total, test_loss.data.cpu().numpy()
Exemplo n.º 2
0
def main():
    from segmentation.network import TrainSettings, dirs_to_pandaframe, load_image_map_from_file, MaskSetting, MaskType, \
        PCGTSVersion, XMLDataset, Network, compose, MaskGenerator, MaskDataset
    from segmentation.settings import Architecture
    from segmentation.modules import ENCODERS
    colors = [(255, 0, 0),
              (0, 255, 0),
              (0, 0, 255),
              (255, 255, 0),
              (0, 255, 255),
              (255, 0, 255)]

    parser = argparse.ArgumentParser()
    parser.add_argument("--load", type=str, nargs="*", default=[],
                        help="load models and use it for inference")
    parser.add_argument("--image_path", type=str, nargs="*", default=[],
                        help="load models and use it for inference")
    parser.add_argument("--scale_area", type=int, default=1000000,
                        help="max pixel amount of an image")
    parser.add_argument("--output_path_debug_images", type=str, default=None, help="Directory of the debug images")
    parser.add_argument("--layout_prediction", action="store_true", help="Generates Layout of the page "
                                                                         "based on the baselines")
    parser.add_argument("--show_baselines", action="store_true", help="Draws baseline to the debug image")
    parser.add_argument("--show_layout", action="store_true", help="Draws layout regions to the debug image")
    parser.add_argument("--output_xml", action="store_true", help="Outputs Xml Files")
    parser.add_argument("--output_xml_path", type=str, default=None, help="Directory of the XML output")
    parser.add_argument("--max_line_height", type=int, default=None,
                        help="If the average line_height of an document is bigger then the specified value, "
                             "the document is scaled down an processed again on the new resolution. "
                             "Proposed Value == 22")
    parser.add_argument("--min_line_height", type=int, default=None,
                        help="If the average line_height of an document is smaller then the specified value, "
                             "the document is scaled up an processed again on the new resolution")
    parser.add_argument("--marginalia_postprocessing", action="store_true", help="Enables marginalia postprocessing")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--processes", type=int, default=8)

    args = parser.parse_args()
    files = list(itertools.chain.from_iterable([glob.glob(x) for x in args.image_path]))
    networks = []
    bboxs = None
    for x in args.load:
        p_setting = PredictorSettings(MODEL_PATH=x)
        network = Network(p_setting)
        networks.append(network)
    ensemble = Ensemble(networks)
    for file in files:
        baselines = None
        logger.info("Processing: {} \n".format(file))
        img = Image.open(file)  # open image
        scale_factor_multiplier = 1
        while True:
            p_map, scale_factor = ensemble(file, scale_area=args.scale_area,
                                           additional_scale_factor=scale_factor_multiplier)
            baselines = extract_baselines_from_probability_map(p_map, processes=args.processes)
            image = img.resize((int(scale_factor * img.size[0]), int(scale_factor * img.size[1])))
            img = img.convert('RGB')
            draw = ImageDraw.Draw(img)
            #from matplotlib import pyplot as plt
            #f, ax = plt.subplots(1, 3, True, True)
            #ax[0].imshow(image)
            #map = scipy.special.softmax(p_map, axis=-1)
            #ax[1].imshow(map[:,:,1])
            #ax[2].imshow(map[:,:,2])

            #plt.show()
            if baselines is not None:
                from segmentation.preprocessing.ocrupus import binarize
                binary = (binarize(np.array(image).astype("float64"))).astype("uint8")
                with PerformanceCounter(function_name="Baseline Height Calculation mp"):
                    out = get_top_of_baselines(baselines, image=1 - binary, processes=1)
                heights = [x[2] for x in out]

                if (args.max_line_height is not None or args.min_line_height is not None) \
                        and scale_factor_multiplier == 1:

                    if (args.max_line_height is not None and np.median(heights) > args.max_line_height) or \
                            (args.min_line_height is not None and np.median(heights) < args.min_line_height):
                        scale_factor_multiplier = (args.max_line_height - 7) / np.median(heights)
                        logger.info("Resizing image Avg:{}, Med:{} \n".format(np.mean(heights), np.median(heights)))
                        continue
                if args.layout_prediction:
                    with PerformanceCounter(function_name="Baseline Height Calculation "):
                        bboxs = analyse(baselines=baselines, image=(1 - binary), image2=image)
                    from segmentation.postprocessing.marginialia_detection import marginalia_detection
                    if args.marginalia_postprocessing:
                        bboxs = marginalia_detection(bboxs, image)
                        baselines = [bl.baseline for cluster in bboxs for bl in cluster.baselines]
                        bboxs = analyse(baselines=baselines, image=(1 - binary), image2=image)
                    bboxs = connect_bounding_box(bboxs)
                    bboxs = [x.scale(1 / scale_factor) for x in bboxs]
                    if args.show_layout:
                        for ind, x in enumerate(bboxs):
                            if x.bbox:
                                draw.line(x.bbox + [x.bbox[0]], fill=colors[ind % len(colors)], width=3)
                                draw.text((x.bbox[0]), "type:{}".format(x.baselines[0].cluster_type))

            scale_baselines(baselines, 1 / scale_factor)
            if args.show_baselines:
                if baselines is not None and len(baselines) > 0:

                    for ind, x in enumerate(baselines):
                        t = list(itertools.chain.from_iterable(x))
                        a = t[::]
                        if args.show_baselines:
                            draw.line(a, fill=colors[ind % len(colors)], width=4)

            if args.output_path_debug_images:
                basename = "debug_" + os.path.basename(file)
                file_path = os.path.join(args.output_path_debug_images, basename)
                img.save(file_path)

            if args.output_xml and args.output_xml_path is not None:
                from segmentation.gui.xml_util import TextRegion, BaseLine, TextLine, XMLGenerator
                regions = []

                if bboxs is not None:
                    for box in bboxs:
                        text_lines = []
                        for b_line in box.baselines:
                            text_region_coord = b_line.baseline + list(reversed(
                                [(x, y - b_line.height) for x, y in b_line.baseline]))
                            text_lines.append(TextLine(coords=text_region_coord, baseline=BaseLine(b_line.baseline)))
                        regions.append(TextRegion(text_lines, coords=box.bbox))

                    xml_gen = XMLGenerator(img.size[0], img.size[1], os.path.basename(file), regions=regions)
                    xml_gen.save_textregions_as_xml(args.output_xml_path)
                elif baselines is not None:
                    text_lines = []
                    for b_line in baselines:
                        text_lines.append(TextLine(coords=None, baseline=BaseLine(b_line)))
                    regions.append(TextRegion(text_lines, coords=None))

                xml_gen = XMLGenerator(img.size[0], img.size[1], os.path.basename(file), regions=regions)
                xml_gen.save_textregions_as_xml(args.output_xml_path)

            if args.debug:
                from matplotlib import pyplot
                array = np.array(img)
                pyplot.imshow(array)
                pyplot.show()
            break
            pass
Exemplo n.º 3
0
def extract_baselines(image_map: np.array,
                      base_line_index=1,
                      base_line_border_index=2,
                      original=None,
                      processes=1):
    from scipy.ndimage.measurements import label

    base_ind = np.where(image_map == base_line_index)
    base_border_ind = np.where(image_map == base_line_border_index)

    baseline = np.zeros(image_map.shape)
    baseline_border = np.zeros(image_map.shape)
    baseline[base_ind] = 1
    baseline_border[base_border_ind] = 1
    baseline_ccs, n_baseline_ccs = label(baseline,
                                         structure=[[1, 1, 1], [1, 1, 1],
                                                    [1, 1, 1]])

    baseline_ccs = [
        np.where(baseline_ccs == x) for x in range(1, n_baseline_ccs + 1)
    ]
    baseline_ccs = [
        BaseLineCCs(x, 'baseline') for x in baseline_ccs if len(x[0]) > 10
    ]

    all_ccs = baseline_ccs  # + baseline_border_ccs
    logger.info("Extracted {} CCs from probability map \n".format(
        len(all_ccs)))

    def calculate_distance_matrix(ccs, maximum_angle=5, processes=8):
        distance_matrix = np.zeros((len(ccs), len(ccs)))

        from functools import partial
        distance_func = partial(calculate_distance,
                                ccs=ccs,
                                maximum_angle=maximum_angle,
                                baseline_border_image=baseline_border)
        indexes_ccs = list(range(len(ccs)))
        with multiprocessing.Pool(processes=processes,
                                  maxtasksperchild=100) as p:
            out = list(p.map(distance_func, indexes_ccs))
        for x in out:
            indexes, values = x
            distance_matrix[indexes] = values
        return distance_matrix

    with PerformanceCounter(function_name="calculate_distance_matrix"):
        matrix = calculate_distance_matrix(all_ccs, processes=processes)

    from sklearn.cluster import DBSCAN
    if np.sum(matrix) == 0:
        print("Empty Image")
        return
    t = DBSCAN(eps=100, min_samples=1, metric="precomputed").fit(matrix)

    ccs = []
    for x in np.unique(t.labels_):
        ind = np.where(t.labels_ == x)
        line = []
        for d in ind[0]:
            if all_ccs[d].type == 'baseline':
                line.append(all_ccs[d])
        if len(line) > 0:
            ccs.append((np.concatenate([x.cc[0] for x in line]),
                        np.concatenate([x.cc[1] for x in line])))

    ccs = [list(zip(x[0], x[1])) for x in ccs]

    from itertools import chain
    from typing import List, Tuple
    from collections import defaultdict

    def normalize_connected_components(cc_list: List[List[Tuple[int, int]]]):
        # Normalize the CCs (line segments), so that the height of each cc is normalized to one pixel
        def normalize(point_list):
            normalized_cc_list = []
            for cc in point_list:
                cc_dict = defaultdict(list)
                for y, x in cc:
                    cc_dict[x].append(y)
                normalized_cc = []
                for key in sorted(cc_dict.keys()):
                    value = cc_dict[key]
                    normalized_cc.append(
                        [int(np.floor(np.mean(value) + 0.5)), key])
                normalized_cc_list.append(normalized_cc)
            return normalized_cc_list

        return normalize(cc_list)

    ccs = normalize_connected_components(ccs)
    new_ccs = []
    for baseline in ccs:
        new_ccs.append([coord_tup[::-1] for coord_tup in baseline])

    return new_ccs
Exemplo n.º 4
0
def train(model,
          device,
          train_loader,
          optimizer,
          epoch,
          criterion,
          accumulation_steps=8,
          color_map=None,
          callback: TrainProgressCallbackWrapper = None,
          padding_value=32,
          debug=False):
    def debug_img(mask, target, original, color_map):
        if color_map is not None:
            from matplotlib import pyplot as plt
            mean = [0.485, 0.456, 0.406]
            stds = [0.229, 0.224, 0.225]
            mask = torch.argmax(mask, dim=1)
            mask = torch.squeeze(mask).cpu()
            original = original.permute(0, 2, 3, 1)
            original = torch.squeeze(original).cpu().numpy()
            original = original * stds
            original = original + mean
            original = original * 255
            original = original.astype(int)
            f, ax = plt.subplots(1, 3, True, True)
            target = torch.squeeze(target).cpu()
            ax[0].imshow(label_to_colors(mask=target, colormap=color_map))
            ax[1].imshow(label_to_colors(mask=mask, colormap=color_map))
            ax[2].imshow(original)

            plt.show()

    model.train()
    total_train = 0
    correct_train = 0

    for batch_idx, (data, target, id) in enumerate(train_loader):

        data, target = data.to(device), target.to(device, dtype=torch.int64)

        shape = list(data.shape)[2:]
        padded = pad(data, padding_value)

        input = padded.float()

        output = model(input)
        output = unpad(output, shape)
        loss = criterion(output, target)
        loss = loss / accumulation_steps
        loss.backward()
        _, predicted = torch.max(output.data, 1)
        total_train += target.nelement()
        correct_train += predicted.eq(target.data).sum().item()
        train_accuracy = 100 * correct_train / total_train
        logger.info(
            '\r Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'
            .format(epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item(),
                    train_accuracy)),
        if (batch_idx + 1
            ) % accumulation_steps == 0:  # Wait for several backward steps
            # debug_img(output, target, data, color_map)
            if isinstance(optimizer,
                          Iterable):  # Now we can do an optimizer step
                for opt in optimizer:
                    opt.step()
            else:
                optimizer.step()
            model.zero_grad()  # Reset gradients tensors
        if callback:
            callback.on_batch_end(batch_idx,
                                  loss=loss.item(),
                                  acc=train_accuracy)
        gc.collect()
Exemplo n.º 5
0
    def train(self, callback=None):

        if not isinstance(self.settings, TrainSettings):
            logger.warning(
                'Settings is of type: {}. Pass settings to network object of type Train to train'
                .format(str(type(self.settings))))
            return

        if callback:
            callback = TrainProgressCallbackWrapper(
                len(self.settings.TRAIN_DATASET), callback)

        criterion = nn.CrossEntropyLoss()
        self.model.float()
        opt = self.settings.OPTIMIZER.getOptimizer()
        try:
            optimizer1 = opt(self.model.encoder.parameters(),
                             lr=self.settings.LEARNINGRATE_ENCODER)
            optimizer2 = opt(self.model.decoder.parameters(),
                             lr=self.settings.LEARNINGRATE_DECODER)
            optimizer3 = opt(self.model.segmentation_head.parameters(),
                             lr=self.settings.LEARNINGRATE_SEGHEAD)
            optimizer = [optimizer1, optimizer2, optimizer3]
        except:
            optimizer = opt(self.model.parameters(),
                            lr=self.settings.LEARNINGRATE_SEGHEAD)

        train_loader = data.DataLoader(
            dataset=self.settings.TRAIN_DATASET,
            batch_size=self.settings.TRAIN_BATCH_SIZE,
            shuffle=True,
            num_workers=self.settings.PROCESSES)
        val_loader = data.DataLoader(dataset=self.settings.VAL_DATASET,
                                     batch_size=self.settings.VAL_BATCH_SIZE,
                                     shuffle=False)
        pseudo_loader = None
        if self.settings.PSEUDO_DATASET is not None:
            pseudo_loader = data.DataLoader(
                dataset=self.settings.PSEUDO_DATASET,
                batch_size=self.settings.TRAIN_BATCH_SIZE,
                shuffle=True)
        highest_accuracy = -1
        logger.info(str(self.model) + "\n")
        logger.info(str(self.model_params) + "\n")
        logger.info('Training started ...\n"')
        for epoch in range(0, self.settings.EPOCHS):
            if self.settings.PSEUDO_DATASET is not None:
                train_unlabeled(
                    self.model,
                    device=self.device,
                    train_loader=train_loader,
                    unlabeled_loader=pseudo_loader,
                    optimizer=optimizer,
                    epoch=epoch,
                    criterion=criterion,
                    accumulation_steps=self.settings.BATCH_ACCUMULATION,
                    color_map=self.color_map,
                    train_step=50,
                    alpha_factor=3,
                    epoch_conv=15,
                    padding_value=self.padding_value)
            else:
                train(self.model,
                      self.device,
                      train_loader,
                      optimizer,
                      epoch,
                      criterion,
                      accumulation_steps=self.settings.BATCH_ACCUMULATION,
                      color_map=self.color_map,
                      callback=callback,
                      padding_value=self.padding_value)
            accuracy, loss = test(self.model,
                                  self.device,
                                  val_loader,
                                  criterion=criterion,
                                  padding_value=self.padding_value)
            if self.settings.OUTPUT_PATH is not None:

                if accuracy > highest_accuracy:
                    logger.info('Saving model to {}\n'.format(
                        self.settings.OUTPUT_PATH + ".torch"))
                    torch.save(self.model.state_dict(),
                               self.settings.OUTPUT_PATH + ".torch")
                    file = self.settings.OUTPUT_PATH + '.json'
                    with open(file, 'w') as filetowrite:
                        filetowrite.write(self.settings.to_json())

                    highest_accuracy = accuracy
                if callback:
                    callback.on_epoch_end(epoch=epoch, acc=highest_accuracy)
Exemplo n.º 6
0
    def __init__(self,
                 settings: Union[TrainSettings, PredictorSettings],
                 color_map=None):
        from segmentation.modules import Architecture
        self.settings = settings
        json_file = None
        architecture: Architecture = None
        encoder: str = None
        classes: int = None
        encoder_depth: int = None
        decoder_channel: Tuple[int, ...] = None
        padding_value = None
        custom_model = None
        if isinstance(settings, PredictorSettings):

            import os
            if os.path.exists(
                    os.path.splitext(settings.MODEL_PATH)[0] + '.meta'):
                with open(
                        str(os.path.splitext(settings.MODEL_PATH)[0]) +
                        '.meta', 'r') as f:
                    for x in f.readlines():
                        x = x.strip('\n')
                        if x.startswith('Encoder'):
                            encoder = x.split(" ")[1]
                        if x.startswith('Architecture'):
                            architecture = Architecture(x.split(" ")[1])
                        if x.startswith('Classes'):
                            classes = int(x.split(" ")[1])
            elif os.path.exists(
                    os.path.splitext(settings.MODEL_PATH)[0] + '.json'):
                with open(
                        str(os.path.splitext(settings.MODEL_PATH)[0]) +
                        '.json', 'r') as f:
                    import json
                    json_file = json.load(f)
            if self.settings.PREDICT_DATASET is not None:
                self.settings.PREDICT_DATASET.preprocessing = sm.encoders.get_preprocessing_fn(
                    encoder if encoder else json_file["ENCODER"])
        elif isinstance(settings, TrainSettings):
            custom_model = self.settings.CUSTOM_MODEL
            encoder = self.settings.ENCODER
            architecture = self.settings.ARCHITECTURE
            classes = self.settings.CLASSES
            encoder_depth = self.settings.ENCODER_DEPTH
            decoder_channel = self.settings.DECODER_CHANNELS
            padding_value = self.settings.PADDING_VALUE
            self.settings.TRAIN_DATASET.preprocessing = sm.encoders.get_preprocessing_fn(
                self.settings.ENCODER)
            self.settings.VAL_DATASET.preprocessing = sm.encoders.get_preprocessing_fn(
                self.settings.ENCODER)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        logger.info(
            'Device: {} is used for training/prediction\n'.format(device))
        custom_model = custom_model if custom_model else json_file[
            "CUSTOM_MODEL"]
        self.device = torch.device(device)
        self.model_params = None
        if not custom_model:
            architecture = architecture if architecture else Architecture(
                json_file["ARCHITECTURE"])

            self.model_params = architecture.get_architecture_params()
            self.model_params[
                'classes'] = classes if classes else json_file["CLASSES"]
            self.model_params['decoder_use_batchnorm'] = False
            self.model_params[
                'encoder_name'] = encoder if encoder else json_file["ENCODER"]
            self.model_params['encoder_depth'] = json_file[
                "ENCODER_DEPTH"] if json_file else encoder_depth
            #   PÜelf.model_params['decoder_channels'] = json_file["DECODER_CHANNELS"] if json_file else decoder_channel
            self.model = get_model(architecture, self.model_params)
        else:
            from segmentation.model import CustomModel
            import json
            if isinstance(custom_model, dict):
                from segmentation.settings import CustomModelSettings
                custom_model = CustomModelSettings(**custom_model)
            kwargs = custom_model.get_kwargs()
            self.model = CustomModel(custom_model.TYPE)()(**kwargs)

        if self.settings.MODEL_PATH:
            try:
                self.model.load_state_dict(
                    torch.load(self.settings.MODEL_PATH,
                               map_location=torch.device(device)))
            except Exception:
                logger.warning('Could not load model weights, ... Skipping\n')

        self.color_map = color_map  # Optional for visualisation of mask data
        self.model.to(self.device)
        self.encoder = encoder if encoder else json_file["ENCODER"]
        self.padding_value = padding_value if padding_value else json_file[
            "PADDING_VALUE"]
Exemplo n.º 7
0
def train_unlabeled(model,
                    device,
                    train_loader,
                    unlabeled_loader,
                    optimizer,
                    epoch,
                    criterion,
                    accumulation_steps=8,
                    color_map=None,
                    train_step=50,
                    alpha_factor=3,
                    epoch_conv=15,
                    debug=False,
                    padding_value=32):
    def alpha_weight(epoch):
        return min((epoch / epoch_conv) * alpha_factor, alpha_factor)

    def debug(mask, target, original, color_map):
        if color_map is not None:
            from matplotlib import pyplot as plt
            mean = [0.485, 0.456, 0.406]
            stds = [0.229, 0.224, 0.225]
            mask = torch.argmax(mask, dim=1)
            mask = torch.squeeze(mask)
            original = original.permute(0, 2, 3, 1)
            original = torch.squeeze(original).cpu().numpy()
            original = original * stds
            original = original + mean
            original = original * 255
            original = original.astype(int)
            f, ax = plt.subplots(1, 3, True, True)
            target = torch.squeeze(target)
            ax[0].imshow(label_to_colors(mask=target, colormap=color_map))
            ax[1].imshow(label_to_colors(mask=mask, colormap=color_map))
            ax[2].imshow(original)

            plt.show()

    model.train()
    total_train = 0
    correct_train = 0
    for batch_idx, (data, target, id) in enumerate(unlabeled_loader):
        data = data.to(device)
        shape = list(data.shape)[2:]
        padded = pad(data, padding_value)

        input = padded.float()
        model.eval()
        with torch.no_grad():
            output_unlabeled = model(input)
            output_unlabeled = unpad(output_unlabeled, shape)
            pseudo_labeled = torch.argmax(output_unlabeled, dim=1)

        model.train()
        output = model(input)
        output = unpad(output, shape)
        if debug:
            debug(output, pseudo_labeled, data, color_map)
        loss = criterion(output, pseudo_labeled)
        loss = (loss * alpha_weight(epoch)) / accumulation_steps
        loss.backward()
        _, predicted = torch.max(output.data, 1)
        total_train += target.nelement()
        correct_train += predicted.eq(pseudo_labeled.data).sum().item()
        train_accuracy = 100 * correct_train / total_train
        logger.info(
            '\r Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'
            .format(epoch, batch_idx * len(data),
                    len(unlabeled_loader.dataset),
                    100. * batch_idx / len(unlabeled_loader), loss.item(),
                    train_accuracy)),
        if (batch_idx + 1
            ) % accumulation_steps == 0:  # Wait for several backward steps
            # debug(output, target, data, color_map)
            if isinstance(optimizer,
                          Iterable):  # Now we can do an optimizer stepd
                for opt in optimizer:
                    opt.step()
            else:
                optimizer.step()
            model.zero_grad()  # Reset gradients tensors
        gc.collect()

        if batch_idx + 1 % train_step == 0:  # used as correction with real data
            print('\n')
            train(model=model,
                  device=device,
                  optimizer=optimizer,
                  train_loader=train_loader,
                  epoch=epoch,
                  criterion=criterion,
                  accumulation_steps=accumulation_steps,
                  color_map=color_map)
    pass