Пример #1
0
                      file=fileout,
                      end=' ')
            print('', file=fileout)
        if add_boundary_blank:
            print('{:.10g}'.format(0.0), file=fileout, end=' ')
            for k in range(1, output.size(1)):
                print('{:.10g}'.format(-1e30), file=fileout, end=' ')
            print('', file=fileout)
        print(']', file=fileout)


if __name__ == '__main__':
    add_defaults('gpu')
    add_argument('--adaptive_pool_height',
                 type=int,
                 default=16,
                 help='Average adaptive pooling of the images before the '
                 'LSTM layers')
    add_argument('--lstm_hidden_size', type=int, default=128)
    add_argument('--lstm_num_layers', type=int, default=1)
    add_argument('--add_softmax', action='store_true')
    add_argument('--add_boundary_blank', action='store_true')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('gt_file', help='')
    add_argument('checkpoint', help='')
    add_argument('output', type=argparse.FileType('w'))
    args = args()

    # Build neural network
    syms = SymbolsTable(args.syms)
Пример #2
0
from __future__ import absolute_import

import argparse

import torch.nn as nn
from laia.models.htr.gated_crnn import GatedCRNN
from laia.common import ModelSaver
from laia.common.arguments import add_argument, args, add_defaults
from laia.common.arguments_types import NumberInClosedRange, TupleList, \
    str2bool
from laia.utils import SymbolsTable

if __name__ == '__main__':
    add_defaults('train_path')
    add_argument('num_input_channels',
                 type=NumberInClosedRange(int, vmin=1),
                 help='Number of channels of the input images')
    add_argument('syms',
                 type=argparse.FileType('r'),
                 help='Symbols table mapping from strings to integers')
    add_argument('--cnn_num_features',
                 type=NumberInClosedRange(int, vmin=1),
                 nargs='+',
                 default=[8, 16, 32, 64, 128],
                 help='Number of features in each convolutional layer')
    add_argument('--cnn_kernel_size',
                 type=TupleList(int, dimensions=2),
                 nargs='+',
                 default=[3, (2, 4), 3, (2, 4), 3],
                 help='Kernel size of each convolution. '
                 'Use a list of integers, or a list of strings '
Пример #3
0
from torch.utils.data import DataLoader
from tqdm import tqdm

import laia
import laia.common.logging as log
from dortmund_utils import build_dortmund_model
from laia.data import TextImageFromTextTableDataset
from laia.common.arguments import add_argument, add_defaults, args
from laia.utils import ImageToTensor
from laia.utils.phoc import pphoc

if __name__ == '__main__':
    add_defaults('gpu')
    add_argument('--phoc_levels',
                 type=int,
                 default=[1, 2, 3, 4, 5],
                 nargs='+',
                 help='PHOC levels used to encode the transcript')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('queries', help='Transcription of each query image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output',
                 type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
Пример #4
0
import torch
from dortmund_utils import build_ctc_model
from laia.data import ImageDataLoader
from laia.data import TextImageFromTextTableDataset
from laia.decoders import CTCGreedyDecoder
from laia.common.arguments import add_argument, add_defaults, args
from laia.common.arguments_types import str2bool
from laia.utils import ImageToTensor, TextToTensor
from laia.utils.symbols_table import SymbolsTable


if __name__ == '__main__':
    add_defaults('gpu')
    add_argument('--adaptive_pool_height', type=int, default=16,
                 help='Average adaptive pooling of the images before the '
                      'LSTM layers')
    add_argument('--lstm_hidden_size', type=int, default=128)
    add_argument('--lstm_num_layers', type=int, default=1)
    add_argument(
        "--output_symbols",
        type=str2bool,
        nargs="?",
        const=True,
        default=False,
        help="Print the output with symbols instead of integers",
    )
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('gt_file', help='')
    add_argument('checkpoint', help='')
Пример #5
0
     'max_epochs',
     'max_updates',
     'train_samples_per_epoch',
     'valid_samples_per_epoch',
     'seed',
     'train_path',
     # Override default values for these arguments, but use the
     # same help/checks:
     learning_rate=0.0001,
     momentum=0.9,
     iterations_per_update=10,
     show_progress_bar=True,
     use_distortions=True,
     weight_l2_penalty=0.00005)
 add_argument('--load_checkpoint',
              type=str,
              help='Path to the checkpoint to load.')
 add_argument('--continue_epoch', type=int)
 add_argument('--phoc_levels',
              type=int,
              default=[1, 2, 3, 4, 5],
              nargs='+',
              help='PHOC levels used to encode the transcript')
 add_argument('--exclude_words_ap',
              type=FileType('r'),
              help='List of words to exclude in the Average Precision '
              'computation')
 add_argument('--use_new_phoc', action='store_true')
 add_argument('syms', help='Symbols table mapping from strings to integers')
 add_argument('tr_img_dir', help='Directory containing word images')
 add_argument('tr_txt_table',
import editdistance


def cer_score_dict(decoded, target, ids):
    cer_dict = {}
    for ref, hyp, x in zip(target, decoded, ids):
        cer_dict[x] = editdistance.eval(ref, hyp) / len(ref)
    return cer_dict


if __name__ == "__main__":
    add_defaults("batch_size", "gpu", "train_path", logging_level="WARNING")
    add_argument(
        "--syms",
        type=argparse.FileType("r"),
        help="Symbols table mapping from strings to integers",
    )
    add_argument("--img_dirs",
                 type=str,
                 nargs="+",
                 help="Directory containing word images")
    add_argument(
        "--txt_table",
        type=argparse.FileType("r"),
        help="Character transcriptions of each image.",
    )
    add_argument(
        "--delimiters",
        type=str,
        nargs="+",
Пример #7
0
     'gpu',
     'max_epochs',
     'max_updates',
     'train_samples_per_epoch',
     'seed',
     'train_path',
     # Override default values for these arguments, but use the
     # same help/checks:
     learning_rate=0.0001,
     momentum=0.9,
     iterations_per_update=10,
     show_progress_bar=True,
     use_distortions=True,
     weight_l2_penalty=0.00005)
 add_argument('--load_checkpoint',
              type=str,
              help='Path to the checkpoint to load.')
 add_argument('--continue_epoch', type=int)
 add_argument('--phoc_levels',
              type=int,
              default=[1, 2, 3, 4, 5],
              nargs='+',
              help='PHOC levels used to encode the transcript')
 add_argument('syms', help='Symbols table mapping from strings to integers')
 add_argument('img_dir', help='Directory containing word images')
 add_argument('tr_txt_table',
              help='Character transcriptions of each training image')
 add_argument('qry_txt_table',
              help='Character transcriptions of each test query image')
 add_argument('doc_txt_table',
              help='Character transcriptions of each test document image')
                      file=fileout,
                      end=" ")
            print("", file=fileout)
        if add_boundary_blank:
            print("{:.10g}".format(0.0), file=fileout, end=" ")
            for k in range(1, output.size(1)):
                print("{:.10g}".format(-1e30), file=fileout, end=" ")
            print("", file=fileout)
        print("]", file=fileout)


if __name__ == "__main__":
    add_defaults("gpu")
    add_argument(
        "--image_sequencer",
        type=str,
        default="avgpool-16",
        help="Average adaptive pooling of the images before the LSTM layers",
    )
    add_argument("--lstm_hidden_size", type=int, default=128)
    add_argument("--lstm_num_layers", type=int, default=1)
    add_argument("--add_softmax", action="store_true")
    add_argument("--add_boundary_blank", action="store_true")
    add_argument("syms", help="Symbols table mapping from strings to integers")
    add_argument("img_dir", help="Directory containing word images")
    add_argument("gt_file", help="")
    add_argument("checkpoint", help="")
    add_argument("output", type=argparse.FileType("w"))
    args = args()

    # Build neural network
    syms = SymbolsTable(args.syms)
            p0 = -math.expm1(lp1)
            lp0 = math.log(p0) if p0 > 0 else float('-inf')
            for k, p in enumerate([lp0, lp1], 1):
                if not math.isinf(p):
                    print('{:d}\t{:d}\t{:d}\t0,{:.10g},{:d}'.format(
                        t, t + 1, k, -float(p), k),
                          file=fileout)
        print(output.size(0), file=fileout)
        print('', file=fileout)


if __name__ == '__main__':
    add_defaults('gpu')
    add_argument('--phoc_levels',
                 type=int,
                 default=[1, 2, 3, 4, 5],
                 nargs='+',
                 help='PHOC levels used to encode the transcript')
    add_argument('--add_sigmoid', action='store_true')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('gt_file', help='')
    add_argument('checkpoint', help='')
    add_argument('output', type=argparse.FileType('w'))
    args = args()

    # Build neural network
    syms = SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
Пример #10
0
from laia.utils import SymbolsTable, ImageToTensor, TextToTensor
from laia.utils.dortmund_image_to_tensor import DortmundImageToTensor
from torch.optim import SGD

if __name__ == '__main__':
    add_defaults('batch_size',
                 'learning_rate',
                 'gpu',
                 'max_epochs',
                 'train_path',
                 'train_samples_per_epoch',
                 'iterations_per_update',
                 momentum=0.9,
                 show_progress_bar=True)
    add_argument('fixed_height',
                 type=int,
                 help='Resize images to this fixed height size')
    add_argument('syms',
                 type=argparse.FileType('r'),
                 help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('tr_txt_table',
                 type=argparse.FileType('r'),
                 help='Character transcriptions of each training image')
    add_argument('va_txt_table',
                 type=argparse.FileType('r'),
                 help='Character transcriptions of each validation image')
    args = args()

    syms = SymbolsTable(args.syms)
Пример #11
0
logger = laia.common.logging.get_logger('laia.egs.washington.train_ctc')

if __name__ == '__main__':
    add_defaults('gpu', 'max_epochs', 'max_updates', 'train_samples_per_epoch',
                 'valid_samples_per_epoch', 'seed', 'train_path',
                 # Override default values for these arguments, but use the
                 # same help/checks:
                 batch_size=1,
                 learning_rate=0.0001,
                 momentum=0.9,
                 iterations_per_update=10,
                 show_progress_bar=True,
                 use_distortions=True,
                 weight_l2_penalty=0.00005)
    add_argument('--load_checkpoint', type=str,
                 help='Path to the checkpoint to load.')
    add_argument('--continue_epoch', type=int)
    add_argument('--train_laia', action='store_true',
                 help='Train Laia-based model')
    add_argument('--adaptive_pool_height', type=int, default=16,
                 help='Average adaptive pooling of the images before the '
                      'LSTM layers')
    add_argument('--cnn_num_filters', type=int, nargs='+',
                 default=[16, 32, 48, 64])
    add_argument('--cnn_maxpool_size', type=int, nargs='*', default=[2, 2])
    add_argument('--lstm_hidden_size', type=int, default=128)
    add_argument('--lstm_num_layers', type=int, default=1)
    add_argument('--min_size', type=int, default=None)
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('tr_img_dir', help='Directory containing word images')
    add_argument('tr_txt_table',
Пример #12
0
from tqdm import tqdm

import laia
from laia.models.kws.dortmund_phocnet import DortmundPHOCNet
import laia.common.logging as log
from dortmund_utils import build_dortmund_model
from laia.data import TextImageFromTextTableDataset
from laia.common.arguments import add_argument, add_defaults, args
from laia.utils import ImageToTensor

if __name__ == "__main__":
    add_defaults("gpu")
    add_argument(
        "--phoc_levels",
        type=int,
        default=[1, 2, 3, 4, 5],
        nargs="+",
        help="PHOC levels used to encode the transcript",
    )
    add_argument(
        "--tpp_levels",
        type=int,
        default=[1, 2, 3, 4, 5],
        nargs="*",
        help="Temporal Pyramid Pooling levels",
    )
    add_argument(
        "--spp_levels",
        type=int,
        default=None,
        nargs="*",