示例#1
0
    def __init__(self, labels):
        super().__init__()

        self.labels = labels
        self.output_numbers =  max(labels.values()) + 1
        self.rnn_size = self.output_numbers

        print_normal("Creating resSru with " + str(self.output_numbers) + " labels")

        self.convolutions = torch.nn.Sequential(OrderedDict([
            ('conv1', torch.nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)),
            ('bn1', torch.nn.BatchNorm2d(64)),
            ('activation', torch.nn.ReLU(inplace=True)),
            ('maxpool', torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=(1, 1))),
            ('resnet', ResNet(BasicBlock, [2, 2, 2, 2], strides=[1, (2, 1), (2, 1), (2, 1)], bn=True)),
        ]))
        self.convolutions_output_size = self.get_cnn_output_size()

        self.rnn = sru.SRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.output_numbers, num_layers=4, bidirectional=False, rnn_dropout=0.3, use_tanh=1, use_relu=0, layer_norm=False, weight_norm=True)

        # self.rnn = torch.nn.GRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, num_layers=1, bidirectional=True)
        # self.rnn = IndRNN(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, n_layer=3, bidirectional=True, batch_norm=True, batch_first=True, dropout=0.1, nonlinearity='relu')
        # self.fc = torch.nn.Linear(2 * self.rnn_size, self.output_numbers)

        self.softmax = torch.nn.Softmax(dim=2)
示例#2
0
    def train(self, batch_size, overlr=None):
        """
        Train the network with the given batch size

        :param overlr: Override the current learning rate
        :param batch_size: The batch size
        """
        if overlr is not None:
            self.set_lr(overlr)

        train_database = parse_datasets_configuration_file(
            self.database_helper,
            with_document=True,
            training=True,
            testing=False,
            args={
                "loss": self.loss,
                "transform": True
            })
        print_normal("Train database length : " +
                     str(train_database.__len__()))
        self.trainer.train(train_database,
                           batch_size=batch_size,
                           callback=self.callback,
                           epoch_limit=int(
                               self.settings.get("line", "epoch_limit")))
示例#3
0
    def __init__(self,
                 model,
                 loss,
                 optimizer,
                 name=None,
                 clip_gradient=None,
                 checkpoint_userdata=None):
        """
        Create a trainer with the given models, loss and optimizer

        :param model: The models to use. It must inherit from models abstract class. :class:`socr.models.models.Model`
        :param loss: The loss to use. It must inherit from loss abstract class. :class:`socr.models.loss.loss.Loss`
        :param optimizer: The optimizer to use.
        :param name: The name of the checkpoint to load and to save. By default, it is the models name.
        :param clip_gradient: The value to clip is not none, during training.
        :param checkpoint_userdata: Complements data to save with the checkpoint.
        """
        os.makedirs('checkpoints', exist_ok=True)

        if checkpoint_userdata is None:
            checkpoint_userdata = {}
        self.original_model = model

        is_cuda = next(model.parameters()).is_cuda
        if is_cuda:
            print_normal("Using GPU Data Parallel")
            self.model = torch.nn.DataParallel(model)
        else:
            print_warning("Using CPU")
            self.model = CPUParallel(model)
        self.loss = loss
        self.optimizer = optimizer
        self.checkpoint_userdata = checkpoint_userdata

        if name is None:
            name = model.get_name()

        self.checkpoint_name = "checkpoints/" + name + ".pth.tar"
        self.csv_name_acc = "checkpoints/" + name + ".acc.txt"
        self.csv_name_lr = "checkpoints/" + name + ".lr.txt"
        self.csv_name_loss = "checkpoints/" + name + ".loss.txt"
        self.adaptative_optimizer = model.adaptative_learning_rate(
            self.optimizer)
        self.epoch = 0
        self.clip_gradient = clip_gradient
        if self.clip_gradient is not None:
            print_normal("Clipping the gradient to " + str(clip_gradient))

        self.start_time = None
        self.elapsed = 0.0
        self.error = None
        self.best_error = None

        if os.path.exists(self.checkpoint_name):
            self.restore()
        else:
            print_warning("Can't find '" + self.checkpoint_name + "'")
示例#4
0
    def set_lr(self, lr):
        """
        Ovverride the current learning rate

        :param lr: The new learning rate
        """
        print_normal("Overwriting the lr to " + str(lr))
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr
示例#5
0
    def __init__(self,
                 model_name="dhSegment",
                 lr=0.0001,
                 name=None,
                 is_cuda=True):
        """
        Creae a line localizator with the given models name

        :param model_name: The models name to use
        :param lr: The intial learning rate to use, if the training has not started yet
        """
        self.settings = ConfigParser()
        self.settings.read("settings.cfg")

        # Load the models, the loss, the optimizer and create a trainer from these three. The Trainer class will
        # automatically restore the weight if it exist.
        self.model = get_model_by_name(model_name)(
            self.settings.get("line", "loss_type"),
            float(self.settings.get("line", "hysteresis_minimum")),
            float(self.settings.get("line", "hysteresis_maximum")),
            int(self.settings.get("line", "thicknesses")),
            float(self.settings.get("line", "height_importance")),
            float(self.settings.get("line", "exponential_decay")),
            float(self.settings.get("line", "bn_momentum")))
        self.loss = self.model.create_loss()
        if is_cuda:
            self.model = self.model.cuda()
            self.loss = self.loss.cuda()
        else:
            print_warning("Using the CPU")
            self.model = self.model.cpu()
            self.loss = self.loss.cpu()

        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=lr,
            weight_decay=float(self.settings.get("line", "weight_decay")))
        self.trainer = Trainer(self.model, self.loss, self.optimizer, name)

        # Parse and load all the test datasets specified into datasets.cfg
        load_default_datasets_cfg_if_not_exist()
        self.database_helper = None
        self.test_database = parse_datasets_configuration_file(
            self.database_helper,
            with_document=True,
            training=False,
            testing=True,
            args={
                "loss": self.loss,
                "transform": False
            })
        print_normal("Test database length : " +
                     str(self.test_database.__len__()))
示例#6
0
    def __init__(self,
                 loss_type="mse",
                 hysteresis_minimum=0.5,
                 hysteresis_maximum=0.5,
                 thicknesses=2,
                 height_importance=1.0):
        """

        :param s: grid division, assuming we have only 1 bounding box per cell
        """
        super().__init__()

        self.add_activation = None
        self.loss_type = loss_type
        if loss_type == "mse":
            print_normal("Using MSE Loss with Hysteresis=(" +
                         str(hysteresis_minimum) + "," +
                         str(hysteresis_maximum) + "), thicknesses=" +
                         str(thicknesses) + ", height_importance=" +
                         str(height_importance))
            self.mse = torch.nn.MSELoss()
        elif loss_type == "bce":
            print_normal("Using Binary Cross Entropy Loss Hysteresis=(" +
                         str(hysteresis_minimum) + "," +
                         str(hysteresis_maximum) + "), thicknesses=" +
                         str(thicknesses) + ", height_importance=" +
                         str(height_importance))
            self.mse = torch.nn.BCELoss()
            # self.mse = torch.nn.BCEWithLogitsLoss()
        elif loss_type == "norm":
            self.mse = None
        else:
            raise AssertionError
        self.mseh = torch.nn.MSELoss()

        self.hysteresis_minimum = hysteresis_minimum
        self.hysteresis_maximum = hysteresis_maximum
        self.thicknesses = thicknesses

        self.height_factor = 1.0
        self.height_importance = height_importance
        self.decoder = BaselineDecoder(self.height_factor,
                                       self.hysteresis_minimum,
                                       self.hysteresis_maximum)
        self.encoder = BaselineEncoder(self.height_factor, self.thicknesses)
示例#7
0
    def restore(self):
        """
        Restore the checkpoint

        :return: The complement data saved with the checkpoint (given in the constructor parameters).
        """

        print_normal("Restoring the weights...")
        checkpoint = torch.load(self.checkpoint_name)
        self.epoch = checkpoint['epoch']
        self.checkpoint_userdata = checkpoint['userdata']
        self.model.load_state_dict(checkpoint['state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])
        self.adaptative_optimizer.load_state_dict(
            checkpoint['adaptative_optimizer'])
        self.elapsed = checkpoint['elapsed']
        self.best_error = checkpoint['best_error']
        return self.checkpoint_userdata
示例#8
0
def parse_datasets_configuration_file(helper,
                                      path="datasets.cfg",
                                      with_document=False,
                                      with_line=False,
                                      training=False,
                                      testing=False,
                                      args=None):

    config = ConfigParser()
    config.read(path)

    datasets = []

    for section in config.sections():
        dict = {}
        options = config.options(section)
        for option in options:
            dict[option] = config.get(section, option)

        if dict["for"] != "Document" and dict["for"] != "Line":
            print("Invalid for : '" + dict["for"] + "'")

        if dict["for"] == "Document" and with_document == False:
            continue

        if dict["for"] == "Line" and with_line == False:
            continue

        if training and "train" in dict:
            print_normal("Loading train database " + str(dict["type"]) + "...")
            dataset = parse_dataset(helper, dict["type"], dict["train"], args)
            if dataset is not None:
                datasets.append(dataset)

        if testing and "test" in dict:
            print_normal("Loading test database " + str(dict["type"]) + "...")
            dataset = parse_dataset(helper, dict["type"], dict["test"], args)
            if dataset is not None:
                datasets.append(dataset)

    if len(datasets) == 0:
        return None

    return torch.utils.data.ConcatDataset(datasets)
示例#9
0
    def __init__(self, loss_type="mse", hysteresis_minimum=0.5, hysteresis_maximum=0.5, thicknesses=2, height_importance=1.0, exponential_decay=1.0, bn_momentum=0.1):
        super(dhSegment, self).__init__()

        self.loss_type = loss_type
        self.hysteresis_minimum = hysteresis_minimum
        self.hysteresis_maximum = hysteresis_maximum
        self.thicknesses = thicknesses
        self.height_importance = height_importance
        self.exponential_decay = exponential_decay
        self.bn_momentum = bn_momentum

        self.inplanes = 64

        self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=7, padding=3, stride=2, bias=False)
        self.bn1 = torch.nn.BatchNorm2d(64)
        self.act1 = torch.nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1)
        self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
        self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2)
        self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)

        self.layer4_reduce = torch.nn.Conv2d(2048, 512, kernel_size=1, bias=False)
        self.layer4_reduce_bn = torch.nn.BatchNorm2d(512)
        self.layer4_reduce_act = torch.nn.ReLU(inplace=True)

        self.layer3_reduce = torch.nn.Conv2d(1024, 512, kernel_size=1, bias=False)
        self.layer3_reduce_bn = torch.nn.BatchNorm2d(512)
        self.layer3_reduce_act = torch.nn.ReLU(inplace=True)

        self.up1 = PSPUpsample(512 + 512, 512, bn=True)
        self.up2 = PSPUpsample(512 + 512, 256, bn=True)
        self.up3 = PSPUpsample(256 + 64, 128, bn=True)
        self.up4 = PSPUpsample(128 + 3, 64, bn=True)

        self.last_conv_prob =  torch.nn.Conv2d(64, 2, kernel_size=(1, 1), dilation=(1, 1), padding=0, bias=True)
        self.last_h_prob = torch.nn.ReLU(inplace=True)

        self.last_act_prob = torch.nn.Sigmoid()

        print_normal("Applying xavier initialization...")
        self.apply(self.weights_init)

        print_normal("Downloading pretrained model from pytorch model zoo...")
        pretrained_model = model_zoo.load_url("https://download.pytorch.org/models/resnet50-19c8e357.pth")

        print_normal("Loading pretrained resnet...")
        self.load_my_state_dict(pretrained_model)

        print_normal("Adjusting Batch Normalization momentum to " + str(self.bn_momentum))
        self.apply(self.adjust_bn_decay(self.bn_momentum))
示例#10
0
    def test(self):
        """
        Test the network and return the average loss and the test data-set
        :return: The average error and the test data-set
        """
        error = 0
        test_len = min(32, len(self.test_database) - 1)

        for i in range(0, test_len):
            image, label = self.test_database.__getitem__(i)

            result = self.model(
                torch.autograd.Variable(image.unsqueeze(0).float().cuda()))
            segmentation = self.loss.ytrue_to_segmentation(result)

            error = error + mIoU(result, segmentation)
            sys.stdout.write("Testing..." + str(i * 100 // test_len) + "%\r")

        error = error / test_len
        print_normal("Testing...100%. Error : " + str(error) + "\n")
        return error
示例#11
0
def download_resources():
    if not os.path.isdir("resources/fonts"):
        url = "https://www.dropbox.com/s/3wcp26el8x5na4j/resources.zip?dl=1"

        print_normal("Dowloading resources...")
        wget.download(url)

        print_normal("Extracting resources...")
        zip_ref = zipfile.ZipFile("resources.zip", 'r')
        zip_ref.extractall(".")
        zip_ref.close()

        print_normal("Cleaing up...")
        os.remove("resources.zip")

        print_normal("Resources downloaded successfully.")
示例#12
0
 def adaptative_learning_rate(self, optimizer):
     print_normal("Using a exponential decay of " + str(self.exponential_decay))
     return torch.optim.lr_scheduler.ExponentialLR(optimizer, self.exponential_decay)
示例#13
0
    def __init__(self, model_name="resRnn", lr=0.001, name=None, is_cuda=True):
        """
        Creae a text recognizer with the given models name

        :param model_name: The model name to use
        :param optimizer_name: The optimizer name to use
        :param lr: The learning rate
        :param name: The name where to save the model
        :param is_cuda: True to use cuda
        """
        self.settings = ConfigParser()
        self.settings.read("settings.cfg")

        self.ngram = int(self.settings.get("text", "ngram"))

        if self.ngram == 1:
            print_normal("Using 1-Gram")
            with open("resources/characters.txt", "r") as content_file:
                lst = content_file.read() + " "

            self.labels = {"": 0}
            for i in range(0, len(lst)):
                self.labels[lst[i]] = i + 1

        elif self.ngram == 2:
            print_normal("Using 2-Gram")
            analyser = N2GramAnalyzer()
            analyser.parse_xml_file("resources/texts/fr.xml.gz")
            analyser.parse_xml_file("resources/texts/en.xml.gz")

            self.labels = analyser.get_bests(
                num=int(self.settings.get("text", "max_gram")))

        else:
            print_error(str(self.ngram) + "-gram not implemented !")
            raise NotImplementedError()

        print(self.labels)

        # with open("resources/word_characters.txt", "r") as content_file:
        #    self.word_labels = content_file.read()

        self.document_helper = DocumentGeneratorHelper()

        self.model = get_model_by_name(model_name)(self.labels)
        self.loss = self.model.create_loss()
        if is_cuda:
            self.model = self.model.cuda()
            self.loss = self.loss.cuda()
        else:
            print_warning("Using the CPU")
            self.model = self.model.cpu()
            self.loss = self.loss.cpu()

        print_normal("Using Adam with a Learning Rate of " + str(lr))
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.trainer = Trainer(self.model, self.loss, self.optimizer, name)

        load_default_datasets_cfg_if_not_exist()
        self.database_helper = DocumentGeneratorHelper()
        self.test_database = parse_datasets_configuration_file(
            self.database_helper,
            with_line=True,
            training=False,
            testing=True,
            args={
                "height": self.model.get_input_image_height(),
                "labels": self.labels,
                "transform": False,
                "loss": self.loss
            })
        print_normal("Test database length : " +
                     str(self.test_database.__len__()))
示例#14
0
    def train(self,
              data_set,
              batch_size=1,
              callback=None,
              epoch_limit=None,
              alternative_loss=None):
        """
        Train the network until the loss won't decrease.

        :param data_set: The data-set which will be used to train the network
        :param batch_size: The batch size
        :param callback: A test function to call after every epochs. The returned value from this function will be writed into the CSV as test accuracy value.
        """
        self.moving_average = MovingAverage(
            max(data_set.__len__() // batch_size, 1024))
        self.alt_moving_average = MovingAverage(
            max(data_set.__len__() // (batch_size * 32), 1024))

        loader = torch.utils.data.DataLoader(data_set,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=4,
                                             collate_fn=self.loss.collate)

        try:
            if os.path.exists(self.csv_name_acc):
                append_write = 'a'
            else:
                append_write = 'w'

            with open(self.csv_name_acc, append_write) as csv_acc, open(
                    self.csv_name_loss,
                    append_write) as csv_loss, open(self.csv_name_lr,
                                                    append_write) as csv_lr:
                while self.optimizer.state_dict(
                )['param_groups'][0]['lr'] > 1e-7:
                    if epoch_limit is not None and self.epoch > epoch_limit:
                        print_normal("Epoch " + str(epoch_limit) + "reached !")
                        break

                    self.do_one_epoch(loader, batch_size, alternative_loss)
                    if callback is not None:
                        self.error = callback()

                        if self.error is not None:
                            if self.best_error is None or self.error < self.best_error:
                                print_normal("Best score ! Saving !")
                                self.best_error = self.error
                                self.save()
                    self.write_to_file(csv_loss, csv_acc, csv_lr)

            print_normal("Done training ! Saving...")
            self.save()

        except KeyboardInterrupt:
            while True:
                sys.stdout.write(
                    "\n\n\nDo you want to save the weight ? [yes/no]")
                i = input()
                if i == "yes":
                    sys.stdout.write("Saving... \n")
                    self.save()
                    sys.stdout.write("Done! \n")
                    break
                if i == "no":
                    break
示例#15
0
    def test(self, limit=None):
        """
        Test the network

        :param limit: Limit of images of the test
        :return: The average cer
        """
        is_cuda = next(self.model.parameters()).is_cuda

        loader = torch.utils.data.DataLoader(self.test_database,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1)

        test_len = len(self.test_database)
        if limit is not None:
            test_len = min(limit, test_len)

        wer_s, wer_i, wer_d, wer_n = 0, 0, 0, 0
        cer_s, cer_i, cer_d, cer_n = 0, 0, 0, 0

        sen_err = 0
        count = 0

        for i, data in enumerate(loader, 0):
            image, label = self.test_database.__getitem__(i)
            label = label[1]

            if image.shape[2] < 8:
                continue

            if is_cuda:
                result = self.model(
                    torch.autograd.Variable(image.unsqueeze(0).float().cuda()))
            else:
                result = self.model(
                    torch.autograd.Variable(image.unsqueeze(0).float().cpu()))

            text = self.loss.ytrue_to_lines(result.cpu().detach().numpy())

            # update CER statistics
            _, (s, i, d) = levenshtein(label, text)
            cer_s += s
            cer_i += i
            cer_d += d
            cer_n += len(label)
            # update WER statistics
            _, (s, i, d) = levenshtein(label.split(), text.split())
            wer_s += s
            wer_i += i
            wer_d += d
            wer_n += len(label.split())
            # update SER statistics
            if s + i + d > 0:
                sen_err += 1

            count = count + 1

            sys.stdout.write("Testing..." + str(count * 100 // test_len) +
                             "%\r")

            if count == test_len:
                break

        cer = (100.0 * (cer_s + cer_i + cer_d)) / cer_n
        wer = (100.0 * (wer_s + wer_i + wer_d)) / wer_n
        ser = (100.0 * sen_err) / count

        print_normal("CER : %.3f; WER : %.3f; SER : %.3f \n" % (cer, wer, ser))
        return wer
示例#16
0
    def callback(self):
        self.eval()
        subprocess.run(['rm', '-R', 'results'])

        config = ConfigParser()
        config.read("datasets.cfg")

        for section in config.sections():
            dict = {}
            options = config.options(section)
            for option in options:
                dict[option] = config.get(section, option)

            if dict["for"] == "Document" and "test" in dict:
                self.evaluate(dict["test"])

        result = self.run_transkribus()
        lines = result.split("\n")
        probs = [line.split(",") for line in lines]
        probs = [[prob.replace(" ", "") for prob in problist]
                 for problist in probs]

        new_probs = []
        total = None

        for i in range(0, len(probs)):
            try:
                id = probs[i][3].split(".")[0]
                if id == "TOTAL":
                    total = probs[i]
            except Exception as e:
                pass

        print_normal("P : " + str(total[0]) + "; F : " + str(total[1]) +
                     "; F1 : " + str(total[2]))

        for i in range(0, len(probs)):
            try:
                new_probs.append([
                    float(probs[i][0]),
                    float(probs[i][1]),
                    float(probs[i][2]), probs[i][3], probs[i][4]
                ])
            except Exception as e:
                pass

        new_probs.sort(key=lambda x: x[2])

        for i in range(0, len(new_probs)):
            id = new_probs[i][3].split(".")[0]
            if id != "TOTAL":
                for ext in [
                        ".jpg", ".probs.jpg", ".probs.gt.jpg",
                        ".components.jpg", ".txt", ".xml"
                ]:
                    os.rename("results/" + id + ext,
                              'results/%.4f%s' % (new_probs[i][2], ext))
            else:
                print(new_probs[i])

        return total[2]
示例#17
0
    def evaluate(self, path):
        """
        Evaluate the line localizator. Output all the results to the 'results' directory.

        :param path: The path of the images, with or without associated XMLs
        """
        print_normal("Evaluating " + path)

        if not os.path.exists("results"):
            os.makedirs("results")

        data_set = ICDARDocumentEvalSet(path, self.loss)

        loader = torch.utils.data.DataLoader(data_set,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=1)

        count = 0

        for i, data in enumerate(loader, 0):
            resized, image, path, label = data

            percent = i * 100 // data_set.__len__()
            sys.stdout.write(str(percent) + "%... Processing \r")

            lines, positions, probsmap, components = self.extract(
                image, resized, with_images=False)

            self.output_image_bloc(image, positions).save(
                "results/" + str(count) + ".jpg", "JPEG")

            save_connected_components(
                components, "results/" + str(count) + ".components.jpg")

            image_numpy_to_pillow_bw(
                probsmap[0].cpu().detach().numpy()).save("results/" +
                                                         str(count) +
                                                         ".probs.jpg")
            del probsmap

            image_numpy_to_pillow_bw(
                label[0][0].cpu().detach().numpy()).save("results/" +
                                                         str(count) +
                                                         ".probs.gt.jpg")

            xml_path = os.path.join(
                os.path.dirname(path[0]),
                os.path.splitext(os.path.basename(path[0]))[0] + ".xml")
            if not os.path.exists(xml_path):
                xml_path = os.path.join(
                    os.path.dirname(path[0]), "page/" +
                    os.path.splitext(os.path.basename(path[0]))[0] + ".xml")

            if os.path.exists(xml_path):
                shutil.copy2(xml_path, "results/" + str(count) + ".xml")
                with open("results/" + str(count) + ".txt", "w") as text_file:
                    text_file.write(self.output_baseline(positions))
            else:
                print_warning("Can't find : '" + xml_path + "'")

            count = count + 1