def __init__(self, labels): super().__init__() self.labels = labels self.output_numbers = max(labels.values()) + 1 self.rnn_size = self.output_numbers print_normal("Creating resSru with " + str(self.output_numbers) + " labels") self.convolutions = torch.nn.Sequential(OrderedDict([ ('conv1', torch.nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', torch.nn.BatchNorm2d(64)), ('activation', torch.nn.ReLU(inplace=True)), ('maxpool', torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=(1, 1))), ('resnet', ResNet(BasicBlock, [2, 2, 2, 2], strides=[1, (2, 1), (2, 1), (2, 1)], bn=True)), ])) self.convolutions_output_size = self.get_cnn_output_size() self.rnn = sru.SRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.output_numbers, num_layers=4, bidirectional=False, rnn_dropout=0.3, use_tanh=1, use_relu=0, layer_norm=False, weight_norm=True) # self.rnn = torch.nn.GRU(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, num_layers=1, bidirectional=True) # self.rnn = IndRNN(self.convolutions_output_size[1] * self.convolutions_output_size[2], self.rnn_size, n_layer=3, bidirectional=True, batch_norm=True, batch_first=True, dropout=0.1, nonlinearity='relu') # self.fc = torch.nn.Linear(2 * self.rnn_size, self.output_numbers) self.softmax = torch.nn.Softmax(dim=2)
def train(self, batch_size, overlr=None): """ Train the network with the given batch size :param overlr: Override the current learning rate :param batch_size: The batch size """ if overlr is not None: self.set_lr(overlr) train_database = parse_datasets_configuration_file( self.database_helper, with_document=True, training=True, testing=False, args={ "loss": self.loss, "transform": True }) print_normal("Train database length : " + str(train_database.__len__())) self.trainer.train(train_database, batch_size=batch_size, callback=self.callback, epoch_limit=int( self.settings.get("line", "epoch_limit")))
def __init__(self, model, loss, optimizer, name=None, clip_gradient=None, checkpoint_userdata=None): """ Create a trainer with the given models, loss and optimizer :param model: The models to use. It must inherit from models abstract class. :class:`socr.models.models.Model` :param loss: The loss to use. It must inherit from loss abstract class. :class:`socr.models.loss.loss.Loss` :param optimizer: The optimizer to use. :param name: The name of the checkpoint to load and to save. By default, it is the models name. :param clip_gradient: The value to clip is not none, during training. :param checkpoint_userdata: Complements data to save with the checkpoint. """ os.makedirs('checkpoints', exist_ok=True) if checkpoint_userdata is None: checkpoint_userdata = {} self.original_model = model is_cuda = next(model.parameters()).is_cuda if is_cuda: print_normal("Using GPU Data Parallel") self.model = torch.nn.DataParallel(model) else: print_warning("Using CPU") self.model = CPUParallel(model) self.loss = loss self.optimizer = optimizer self.checkpoint_userdata = checkpoint_userdata if name is None: name = model.get_name() self.checkpoint_name = "checkpoints/" + name + ".pth.tar" self.csv_name_acc = "checkpoints/" + name + ".acc.txt" self.csv_name_lr = "checkpoints/" + name + ".lr.txt" self.csv_name_loss = "checkpoints/" + name + ".loss.txt" self.adaptative_optimizer = model.adaptative_learning_rate( self.optimizer) self.epoch = 0 self.clip_gradient = clip_gradient if self.clip_gradient is not None: print_normal("Clipping the gradient to " + str(clip_gradient)) self.start_time = None self.elapsed = 0.0 self.error = None self.best_error = None if os.path.exists(self.checkpoint_name): self.restore() else: print_warning("Can't find '" + self.checkpoint_name + "'")
def set_lr(self, lr): """ Ovverride the current learning rate :param lr: The new learning rate """ print_normal("Overwriting the lr to " + str(lr)) for param_group in self.optimizer.param_groups: param_group['lr'] = lr
def __init__(self, model_name="dhSegment", lr=0.0001, name=None, is_cuda=True): """ Creae a line localizator with the given models name :param model_name: The models name to use :param lr: The intial learning rate to use, if the training has not started yet """ self.settings = ConfigParser() self.settings.read("settings.cfg") # Load the models, the loss, the optimizer and create a trainer from these three. The Trainer class will # automatically restore the weight if it exist. self.model = get_model_by_name(model_name)( self.settings.get("line", "loss_type"), float(self.settings.get("line", "hysteresis_minimum")), float(self.settings.get("line", "hysteresis_maximum")), int(self.settings.get("line", "thicknesses")), float(self.settings.get("line", "height_importance")), float(self.settings.get("line", "exponential_decay")), float(self.settings.get("line", "bn_momentum"))) self.loss = self.model.create_loss() if is_cuda: self.model = self.model.cuda() self.loss = self.loss.cuda() else: print_warning("Using the CPU") self.model = self.model.cpu() self.loss = self.loss.cpu() self.optimizer = torch.optim.Adam( self.model.parameters(), lr=lr, weight_decay=float(self.settings.get("line", "weight_decay"))) self.trainer = Trainer(self.model, self.loss, self.optimizer, name) # Parse and load all the test datasets specified into datasets.cfg load_default_datasets_cfg_if_not_exist() self.database_helper = None self.test_database = parse_datasets_configuration_file( self.database_helper, with_document=True, training=False, testing=True, args={ "loss": self.loss, "transform": False }) print_normal("Test database length : " + str(self.test_database.__len__()))
def __init__(self, loss_type="mse", hysteresis_minimum=0.5, hysteresis_maximum=0.5, thicknesses=2, height_importance=1.0): """ :param s: grid division, assuming we have only 1 bounding box per cell """ super().__init__() self.add_activation = None self.loss_type = loss_type if loss_type == "mse": print_normal("Using MSE Loss with Hysteresis=(" + str(hysteresis_minimum) + "," + str(hysteresis_maximum) + "), thicknesses=" + str(thicknesses) + ", height_importance=" + str(height_importance)) self.mse = torch.nn.MSELoss() elif loss_type == "bce": print_normal("Using Binary Cross Entropy Loss Hysteresis=(" + str(hysteresis_minimum) + "," + str(hysteresis_maximum) + "), thicknesses=" + str(thicknesses) + ", height_importance=" + str(height_importance)) self.mse = torch.nn.BCELoss() # self.mse = torch.nn.BCEWithLogitsLoss() elif loss_type == "norm": self.mse = None else: raise AssertionError self.mseh = torch.nn.MSELoss() self.hysteresis_minimum = hysteresis_minimum self.hysteresis_maximum = hysteresis_maximum self.thicknesses = thicknesses self.height_factor = 1.0 self.height_importance = height_importance self.decoder = BaselineDecoder(self.height_factor, self.hysteresis_minimum, self.hysteresis_maximum) self.encoder = BaselineEncoder(self.height_factor, self.thicknesses)
def restore(self): """ Restore the checkpoint :return: The complement data saved with the checkpoint (given in the constructor parameters). """ print_normal("Restoring the weights...") checkpoint = torch.load(self.checkpoint_name) self.epoch = checkpoint['epoch'] self.checkpoint_userdata = checkpoint['userdata'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.adaptative_optimizer.load_state_dict( checkpoint['adaptative_optimizer']) self.elapsed = checkpoint['elapsed'] self.best_error = checkpoint['best_error'] return self.checkpoint_userdata
def parse_datasets_configuration_file(helper, path="datasets.cfg", with_document=False, with_line=False, training=False, testing=False, args=None): config = ConfigParser() config.read(path) datasets = [] for section in config.sections(): dict = {} options = config.options(section) for option in options: dict[option] = config.get(section, option) if dict["for"] != "Document" and dict["for"] != "Line": print("Invalid for : '" + dict["for"] + "'") if dict["for"] == "Document" and with_document == False: continue if dict["for"] == "Line" and with_line == False: continue if training and "train" in dict: print_normal("Loading train database " + str(dict["type"]) + "...") dataset = parse_dataset(helper, dict["type"], dict["train"], args) if dataset is not None: datasets.append(dataset) if testing and "test" in dict: print_normal("Loading test database " + str(dict["type"]) + "...") dataset = parse_dataset(helper, dict["type"], dict["test"], args) if dataset is not None: datasets.append(dataset) if len(datasets) == 0: return None return torch.utils.data.ConcatDataset(datasets)
def __init__(self, loss_type="mse", hysteresis_minimum=0.5, hysteresis_maximum=0.5, thicknesses=2, height_importance=1.0, exponential_decay=1.0, bn_momentum=0.1): super(dhSegment, self).__init__() self.loss_type = loss_type self.hysteresis_minimum = hysteresis_minimum self.hysteresis_maximum = hysteresis_maximum self.thicknesses = thicknesses self.height_importance = height_importance self.exponential_decay = exponential_decay self.bn_momentum = bn_momentum self.inplanes = 64 self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=7, padding=3, stride=2, bias=False) self.bn1 = torch.nn.BatchNorm2d(64) self.act1 = torch.nn.ReLU(inplace=True) self.layer1 = self._make_layer(Bottleneck, 64, 3, stride=1) self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2) self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2) self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2) self.layer4_reduce = torch.nn.Conv2d(2048, 512, kernel_size=1, bias=False) self.layer4_reduce_bn = torch.nn.BatchNorm2d(512) self.layer4_reduce_act = torch.nn.ReLU(inplace=True) self.layer3_reduce = torch.nn.Conv2d(1024, 512, kernel_size=1, bias=False) self.layer3_reduce_bn = torch.nn.BatchNorm2d(512) self.layer3_reduce_act = torch.nn.ReLU(inplace=True) self.up1 = PSPUpsample(512 + 512, 512, bn=True) self.up2 = PSPUpsample(512 + 512, 256, bn=True) self.up3 = PSPUpsample(256 + 64, 128, bn=True) self.up4 = PSPUpsample(128 + 3, 64, bn=True) self.last_conv_prob = torch.nn.Conv2d(64, 2, kernel_size=(1, 1), dilation=(1, 1), padding=0, bias=True) self.last_h_prob = torch.nn.ReLU(inplace=True) self.last_act_prob = torch.nn.Sigmoid() print_normal("Applying xavier initialization...") self.apply(self.weights_init) print_normal("Downloading pretrained model from pytorch model zoo...") pretrained_model = model_zoo.load_url("https://download.pytorch.org/models/resnet50-19c8e357.pth") print_normal("Loading pretrained resnet...") self.load_my_state_dict(pretrained_model) print_normal("Adjusting Batch Normalization momentum to " + str(self.bn_momentum)) self.apply(self.adjust_bn_decay(self.bn_momentum))
def test(self): """ Test the network and return the average loss and the test data-set :return: The average error and the test data-set """ error = 0 test_len = min(32, len(self.test_database) - 1) for i in range(0, test_len): image, label = self.test_database.__getitem__(i) result = self.model( torch.autograd.Variable(image.unsqueeze(0).float().cuda())) segmentation = self.loss.ytrue_to_segmentation(result) error = error + mIoU(result, segmentation) sys.stdout.write("Testing..." + str(i * 100 // test_len) + "%\r") error = error / test_len print_normal("Testing...100%. Error : " + str(error) + "\n") return error
def download_resources(): if not os.path.isdir("resources/fonts"): url = "https://www.dropbox.com/s/3wcp26el8x5na4j/resources.zip?dl=1" print_normal("Dowloading resources...") wget.download(url) print_normal("Extracting resources...") zip_ref = zipfile.ZipFile("resources.zip", 'r') zip_ref.extractall(".") zip_ref.close() print_normal("Cleaing up...") os.remove("resources.zip") print_normal("Resources downloaded successfully.")
def adaptative_learning_rate(self, optimizer): print_normal("Using a exponential decay of " + str(self.exponential_decay)) return torch.optim.lr_scheduler.ExponentialLR(optimizer, self.exponential_decay)
def __init__(self, model_name="resRnn", lr=0.001, name=None, is_cuda=True): """ Creae a text recognizer with the given models name :param model_name: The model name to use :param optimizer_name: The optimizer name to use :param lr: The learning rate :param name: The name where to save the model :param is_cuda: True to use cuda """ self.settings = ConfigParser() self.settings.read("settings.cfg") self.ngram = int(self.settings.get("text", "ngram")) if self.ngram == 1: print_normal("Using 1-Gram") with open("resources/characters.txt", "r") as content_file: lst = content_file.read() + " " self.labels = {"": 0} for i in range(0, len(lst)): self.labels[lst[i]] = i + 1 elif self.ngram == 2: print_normal("Using 2-Gram") analyser = N2GramAnalyzer() analyser.parse_xml_file("resources/texts/fr.xml.gz") analyser.parse_xml_file("resources/texts/en.xml.gz") self.labels = analyser.get_bests( num=int(self.settings.get("text", "max_gram"))) else: print_error(str(self.ngram) + "-gram not implemented !") raise NotImplementedError() print(self.labels) # with open("resources/word_characters.txt", "r") as content_file: # self.word_labels = content_file.read() self.document_helper = DocumentGeneratorHelper() self.model = get_model_by_name(model_name)(self.labels) self.loss = self.model.create_loss() if is_cuda: self.model = self.model.cuda() self.loss = self.loss.cuda() else: print_warning("Using the CPU") self.model = self.model.cpu() self.loss = self.loss.cpu() print_normal("Using Adam with a Learning Rate of " + str(lr)) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.trainer = Trainer(self.model, self.loss, self.optimizer, name) load_default_datasets_cfg_if_not_exist() self.database_helper = DocumentGeneratorHelper() self.test_database = parse_datasets_configuration_file( self.database_helper, with_line=True, training=False, testing=True, args={ "height": self.model.get_input_image_height(), "labels": self.labels, "transform": False, "loss": self.loss }) print_normal("Test database length : " + str(self.test_database.__len__()))
def train(self, data_set, batch_size=1, callback=None, epoch_limit=None, alternative_loss=None): """ Train the network until the loss won't decrease. :param data_set: The data-set which will be used to train the network :param batch_size: The batch size :param callback: A test function to call after every epochs. The returned value from this function will be writed into the CSV as test accuracy value. """ self.moving_average = MovingAverage( max(data_set.__len__() // batch_size, 1024)) self.alt_moving_average = MovingAverage( max(data_set.__len__() // (batch_size * 32), 1024)) loader = torch.utils.data.DataLoader(data_set, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=self.loss.collate) try: if os.path.exists(self.csv_name_acc): append_write = 'a' else: append_write = 'w' with open(self.csv_name_acc, append_write) as csv_acc, open( self.csv_name_loss, append_write) as csv_loss, open(self.csv_name_lr, append_write) as csv_lr: while self.optimizer.state_dict( )['param_groups'][0]['lr'] > 1e-7: if epoch_limit is not None and self.epoch > epoch_limit: print_normal("Epoch " + str(epoch_limit) + "reached !") break self.do_one_epoch(loader, batch_size, alternative_loss) if callback is not None: self.error = callback() if self.error is not None: if self.best_error is None or self.error < self.best_error: print_normal("Best score ! Saving !") self.best_error = self.error self.save() self.write_to_file(csv_loss, csv_acc, csv_lr) print_normal("Done training ! Saving...") self.save() except KeyboardInterrupt: while True: sys.stdout.write( "\n\n\nDo you want to save the weight ? [yes/no]") i = input() if i == "yes": sys.stdout.write("Saving... \n") self.save() sys.stdout.write("Done! \n") break if i == "no": break
def test(self, limit=None): """ Test the network :param limit: Limit of images of the test :return: The average cer """ is_cuda = next(self.model.parameters()).is_cuda loader = torch.utils.data.DataLoader(self.test_database, batch_size=1, shuffle=False, num_workers=1) test_len = len(self.test_database) if limit is not None: test_len = min(limit, test_len) wer_s, wer_i, wer_d, wer_n = 0, 0, 0, 0 cer_s, cer_i, cer_d, cer_n = 0, 0, 0, 0 sen_err = 0 count = 0 for i, data in enumerate(loader, 0): image, label = self.test_database.__getitem__(i) label = label[1] if image.shape[2] < 8: continue if is_cuda: result = self.model( torch.autograd.Variable(image.unsqueeze(0).float().cuda())) else: result = self.model( torch.autograd.Variable(image.unsqueeze(0).float().cpu())) text = self.loss.ytrue_to_lines(result.cpu().detach().numpy()) # update CER statistics _, (s, i, d) = levenshtein(label, text) cer_s += s cer_i += i cer_d += d cer_n += len(label) # update WER statistics _, (s, i, d) = levenshtein(label.split(), text.split()) wer_s += s wer_i += i wer_d += d wer_n += len(label.split()) # update SER statistics if s + i + d > 0: sen_err += 1 count = count + 1 sys.stdout.write("Testing..." + str(count * 100 // test_len) + "%\r") if count == test_len: break cer = (100.0 * (cer_s + cer_i + cer_d)) / cer_n wer = (100.0 * (wer_s + wer_i + wer_d)) / wer_n ser = (100.0 * sen_err) / count print_normal("CER : %.3f; WER : %.3f; SER : %.3f \n" % (cer, wer, ser)) return wer
def callback(self): self.eval() subprocess.run(['rm', '-R', 'results']) config = ConfigParser() config.read("datasets.cfg") for section in config.sections(): dict = {} options = config.options(section) for option in options: dict[option] = config.get(section, option) if dict["for"] == "Document" and "test" in dict: self.evaluate(dict["test"]) result = self.run_transkribus() lines = result.split("\n") probs = [line.split(",") for line in lines] probs = [[prob.replace(" ", "") for prob in problist] for problist in probs] new_probs = [] total = None for i in range(0, len(probs)): try: id = probs[i][3].split(".")[0] if id == "TOTAL": total = probs[i] except Exception as e: pass print_normal("P : " + str(total[0]) + "; F : " + str(total[1]) + "; F1 : " + str(total[2])) for i in range(0, len(probs)): try: new_probs.append([ float(probs[i][0]), float(probs[i][1]), float(probs[i][2]), probs[i][3], probs[i][4] ]) except Exception as e: pass new_probs.sort(key=lambda x: x[2]) for i in range(0, len(new_probs)): id = new_probs[i][3].split(".")[0] if id != "TOTAL": for ext in [ ".jpg", ".probs.jpg", ".probs.gt.jpg", ".components.jpg", ".txt", ".xml" ]: os.rename("results/" + id + ext, 'results/%.4f%s' % (new_probs[i][2], ext)) else: print(new_probs[i]) return total[2]
def evaluate(self, path): """ Evaluate the line localizator. Output all the results to the 'results' directory. :param path: The path of the images, with or without associated XMLs """ print_normal("Evaluating " + path) if not os.path.exists("results"): os.makedirs("results") data_set = ICDARDocumentEvalSet(path, self.loss) loader = torch.utils.data.DataLoader(data_set, batch_size=1, shuffle=False, num_workers=1) count = 0 for i, data in enumerate(loader, 0): resized, image, path, label = data percent = i * 100 // data_set.__len__() sys.stdout.write(str(percent) + "%... Processing \r") lines, positions, probsmap, components = self.extract( image, resized, with_images=False) self.output_image_bloc(image, positions).save( "results/" + str(count) + ".jpg", "JPEG") save_connected_components( components, "results/" + str(count) + ".components.jpg") image_numpy_to_pillow_bw( probsmap[0].cpu().detach().numpy()).save("results/" + str(count) + ".probs.jpg") del probsmap image_numpy_to_pillow_bw( label[0][0].cpu().detach().numpy()).save("results/" + str(count) + ".probs.gt.jpg") xml_path = os.path.join( os.path.dirname(path[0]), os.path.splitext(os.path.basename(path[0]))[0] + ".xml") if not os.path.exists(xml_path): xml_path = os.path.join( os.path.dirname(path[0]), "page/" + os.path.splitext(os.path.basename(path[0]))[0] + ".xml") if os.path.exists(xml_path): shutil.copy2(xml_path, "results/" + str(count) + ".xml") with open("results/" + str(count) + ".txt", "w") as text_file: text_file.write(self.output_baseline(positions)) else: print_warning("Can't find : '" + xml_path + "'") count = count + 1