file=fileout, end=' ') print('', file=fileout) if add_boundary_blank: print('{:.10g}'.format(0.0), file=fileout, end=' ') for k in range(1, output.size(1)): print('{:.10g}'.format(-1e30), file=fileout, end=' ') print('', file=fileout) print(']', file=fileout) if __name__ == '__main__': add_defaults('gpu') add_argument('--adaptive_pool_height', type=int, default=16, help='Average adaptive pooling of the images before the ' 'LSTM layers') add_argument('--lstm_hidden_size', type=int, default=128) add_argument('--lstm_num_layers', type=int, default=1) add_argument('--add_softmax', action='store_true') add_argument('--add_boundary_blank', action='store_true') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('gt_file', help='') add_argument('checkpoint', help='') add_argument('output', type=argparse.FileType('w')) args = args() # Build neural network syms = SymbolsTable(args.syms)
from __future__ import absolute_import import argparse import torch.nn as nn from laia.models.htr.gated_crnn import GatedCRNN from laia.common import ModelSaver from laia.common.arguments import add_argument, args, add_defaults from laia.common.arguments_types import NumberInClosedRange, TupleList, \ str2bool from laia.utils import SymbolsTable if __name__ == '__main__': add_defaults('train_path') add_argument('num_input_channels', type=NumberInClosedRange(int, vmin=1), help='Number of channels of the input images') add_argument('syms', type=argparse.FileType('r'), help='Symbols table mapping from strings to integers') add_argument('--cnn_num_features', type=NumberInClosedRange(int, vmin=1), nargs='+', default=[8, 16, 32, 64, 128], help='Number of features in each convolutional layer') add_argument('--cnn_kernel_size', type=TupleList(int, dimensions=2), nargs='+', default=[3, (2, 4), 3, (2, 4), 3], help='Kernel size of each convolution. ' 'Use a list of integers, or a list of strings '
from torch.utils.data import DataLoader from tqdm import tqdm import laia import laia.common.logging as log from dortmund_utils import build_dortmund_model from laia.data import TextImageFromTextTableDataset from laia.common.arguments import add_argument, add_defaults, args from laia.utils import ImageToTensor from laia.utils.phoc import pphoc if __name__ == '__main__': add_defaults('gpu') add_argument('--phoc_levels', type=int, default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('queries', help='Transcription of each query image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters',
import torch from dortmund_utils import build_ctc_model from laia.data import ImageDataLoader from laia.data import TextImageFromTextTableDataset from laia.decoders import CTCGreedyDecoder from laia.common.arguments import add_argument, add_defaults, args from laia.common.arguments_types import str2bool from laia.utils import ImageToTensor, TextToTensor from laia.utils.symbols_table import SymbolsTable if __name__ == '__main__': add_defaults('gpu') add_argument('--adaptive_pool_height', type=int, default=16, help='Average adaptive pooling of the images before the ' 'LSTM layers') add_argument('--lstm_hidden_size', type=int, default=128) add_argument('--lstm_num_layers', type=int, default=1) add_argument( "--output_symbols", type=str2bool, nargs="?", const=True, default=False, help="Print the output with symbols instead of integers", ) add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('gt_file', help='') add_argument('checkpoint', help='')
'max_epochs', 'max_updates', 'train_samples_per_epoch', 'valid_samples_per_epoch', 'seed', 'train_path', # Override default values for these arguments, but use the # same help/checks: learning_rate=0.0001, momentum=0.9, iterations_per_update=10, show_progress_bar=True, use_distortions=True, weight_l2_penalty=0.00005) add_argument('--load_checkpoint', type=str, help='Path to the checkpoint to load.') add_argument('--continue_epoch', type=int) add_argument('--phoc_levels', type=int, default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('--exclude_words_ap', type=FileType('r'), help='List of words to exclude in the Average Precision ' 'computation') add_argument('--use_new_phoc', action='store_true') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('tr_img_dir', help='Directory containing word images') add_argument('tr_txt_table',
import editdistance def cer_score_dict(decoded, target, ids): cer_dict = {} for ref, hyp, x in zip(target, decoded, ids): cer_dict[x] = editdistance.eval(ref, hyp) / len(ref) return cer_dict if __name__ == "__main__": add_defaults("batch_size", "gpu", "train_path", logging_level="WARNING") add_argument( "--syms", type=argparse.FileType("r"), help="Symbols table mapping from strings to integers", ) add_argument("--img_dirs", type=str, nargs="+", help="Directory containing word images") add_argument( "--txt_table", type=argparse.FileType("r"), help="Character transcriptions of each image.", ) add_argument( "--delimiters", type=str, nargs="+",
'gpu', 'max_epochs', 'max_updates', 'train_samples_per_epoch', 'seed', 'train_path', # Override default values for these arguments, but use the # same help/checks: learning_rate=0.0001, momentum=0.9, iterations_per_update=10, show_progress_bar=True, use_distortions=True, weight_l2_penalty=0.00005) add_argument('--load_checkpoint', type=str, help='Path to the checkpoint to load.') add_argument('--continue_epoch', type=int) add_argument('--phoc_levels', type=int, default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('tr_txt_table', help='Character transcriptions of each training image') add_argument('qry_txt_table', help='Character transcriptions of each test query image') add_argument('doc_txt_table', help='Character transcriptions of each test document image')
file=fileout, end=" ") print("", file=fileout) if add_boundary_blank: print("{:.10g}".format(0.0), file=fileout, end=" ") for k in range(1, output.size(1)): print("{:.10g}".format(-1e30), file=fileout, end=" ") print("", file=fileout) print("]", file=fileout) if __name__ == "__main__": add_defaults("gpu") add_argument( "--image_sequencer", type=str, default="avgpool-16", help="Average adaptive pooling of the images before the LSTM layers", ) add_argument("--lstm_hidden_size", type=int, default=128) add_argument("--lstm_num_layers", type=int, default=1) add_argument("--add_softmax", action="store_true") add_argument("--add_boundary_blank", action="store_true") add_argument("syms", help="Symbols table mapping from strings to integers") add_argument("img_dir", help="Directory containing word images") add_argument("gt_file", help="") add_argument("checkpoint", help="") add_argument("output", type=argparse.FileType("w")) args = args() # Build neural network syms = SymbolsTable(args.syms)
p0 = -math.expm1(lp1) lp0 = math.log(p0) if p0 > 0 else float('-inf') for k, p in enumerate([lp0, lp1], 1): if not math.isinf(p): print('{:d}\t{:d}\t{:d}\t0,{:.10g},{:d}'.format( t, t + 1, k, -float(p), k), file=fileout) print(output.size(0), file=fileout) print('', file=fileout) if __name__ == '__main__': add_defaults('gpu') add_argument('--phoc_levels', type=int, default=[1, 2, 3, 4, 5], nargs='+', help='PHOC levels used to encode the transcript') add_argument('--add_sigmoid', action='store_true') add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('gt_file', help='') add_argument('checkpoint', help='') add_argument('output', type=argparse.FileType('w')) args = args() # Build neural network syms = SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size)
from laia.utils import SymbolsTable, ImageToTensor, TextToTensor from laia.utils.dortmund_image_to_tensor import DortmundImageToTensor from torch.optim import SGD if __name__ == '__main__': add_defaults('batch_size', 'learning_rate', 'gpu', 'max_epochs', 'train_path', 'train_samples_per_epoch', 'iterations_per_update', momentum=0.9, show_progress_bar=True) add_argument('fixed_height', type=int, help='Resize images to this fixed height size') add_argument('syms', type=argparse.FileType('r'), help='Symbols table mapping from strings to integers') add_argument('img_dir', help='Directory containing word images') add_argument('tr_txt_table', type=argparse.FileType('r'), help='Character transcriptions of each training image') add_argument('va_txt_table', type=argparse.FileType('r'), help='Character transcriptions of each validation image') args = args() syms = SymbolsTable(args.syms)
logger = laia.common.logging.get_logger('laia.egs.washington.train_ctc') if __name__ == '__main__': add_defaults('gpu', 'max_epochs', 'max_updates', 'train_samples_per_epoch', 'valid_samples_per_epoch', 'seed', 'train_path', # Override default values for these arguments, but use the # same help/checks: batch_size=1, learning_rate=0.0001, momentum=0.9, iterations_per_update=10, show_progress_bar=True, use_distortions=True, weight_l2_penalty=0.00005) add_argument('--load_checkpoint', type=str, help='Path to the checkpoint to load.') add_argument('--continue_epoch', type=int) add_argument('--train_laia', action='store_true', help='Train Laia-based model') add_argument('--adaptive_pool_height', type=int, default=16, help='Average adaptive pooling of the images before the ' 'LSTM layers') add_argument('--cnn_num_filters', type=int, nargs='+', default=[16, 32, 48, 64]) add_argument('--cnn_maxpool_size', type=int, nargs='*', default=[2, 2]) add_argument('--lstm_hidden_size', type=int, default=128) add_argument('--lstm_num_layers', type=int, default=1) add_argument('--min_size', type=int, default=None) add_argument('syms', help='Symbols table mapping from strings to integers') add_argument('tr_img_dir', help='Directory containing word images') add_argument('tr_txt_table',
from tqdm import tqdm import laia from laia.models.kws.dortmund_phocnet import DortmundPHOCNet import laia.common.logging as log from dortmund_utils import build_dortmund_model from laia.data import TextImageFromTextTableDataset from laia.common.arguments import add_argument, add_defaults, args from laia.utils import ImageToTensor if __name__ == "__main__": add_defaults("gpu") add_argument( "--phoc_levels", type=int, default=[1, 2, 3, 4, 5], nargs="+", help="PHOC levels used to encode the transcript", ) add_argument( "--tpp_levels", type=int, default=[1, 2, 3, 4, 5], nargs="*", help="Temporal Pyramid Pooling levels", ) add_argument( "--spp_levels", type=int, default=None, nargs="*",