示例#1
0
     "valid_samples_per_epoch",
     "seed",
     "train_path",
     # Override default values for these arguments, but use the
     # same help/checks:
     learning_rate=0.0001,
     momentum=0.9,
     num_rolling_checkpoints=5,
     iterations_per_update=10,
     save_checkpoint_interval=5,
     show_progress_bar=True,
     use_distortions=True,
     weight_l2_penalty=0.00005,
 )
 add_argument("--load_checkpoint",
              type=str,
              help="Path to the checkpoint to load.")
 add_argument("--continue_epoch", type=int)
 add_argument(
     "--phoc_levels",
     type=int,
     default=[1, 2, 3, 4, 5],
     nargs="+",
     help="PHOC levels used to encode the transcript",
 )
 add_argument(
     "--tpp_levels",
     type=int,
     default=[1, 2, 3, 4, 5],
     nargs="*",
     help="Temporal Pyramid Pooling levels",
示例#2
0
        for t in range(output.size(0)):
            for k in range(output.size(1)):
                print(
                    "{:d}\t{:d}\t{:d}\t0,{:.10g},{:d}".format(
                        t, t + 1, k + 1, -float(output[t, k]), k + 1),
                    file=fileout,
                )
        print(output.size(0), file=fileout)
        print("", file=fileout)


if __name__ == "__main__":
    add_defaults("gpu")
    add_argument(
        "--image_sequencer",
        type=str,
        default="avgpool-16",
        help="Average adaptive pooling of the images before the LSTM layers",
    )
    add_argument("--lstm_hidden_size", type=int, default=128)
    add_argument("--lstm_num_layers", type=int, default=1)
    add_argument("--add_softmax", action="store_true")
    add_argument("syms", help="Symbols table mapping from strings to integers")
    add_argument("img_dir", help="Directory containing word images")
    add_argument("gt_file", help="")
    add_argument("checkpoint", help="")
    add_argument("output", type=argparse.FileType("w"))
    args = args()

    # Build neural network
    syms = SymbolsTable(args.syms)
    model = DortmundCRNN(
示例#3
0
     "seed",
     "train_path",
     # Override default values for these arguments, but use the
     # same help/checks:
     batch_size=1,
     learning_rate=0.0001,
     momentum=0.9,
     num_rolling_checkpoints=5,
     iterations_per_update=10,
     save_checkpoint_interval=5,
     show_progress_bar=True,
     use_distortions=True,
     weight_l2_penalty=0.00005,
 )
 add_argument("--load_checkpoint",
              type=str,
              help="Path to the checkpoint to load.")
 add_argument("--continue_epoch", type=int)
 add_argument(
     "--image_sequencer",
     type=str,
     default="avgpool-16",
     help="Average adaptive pooling of the images before the LSTM layers",
 )
 add_argument(
     "--use_adam_optim",
     type=str2bool,
     nargs="?",
     const=True,
     default=False,
     help="If true, use Adam optimizer instead of SGD",
示例#4
0
from torch.utils.data import DataLoader
from tqdm import tqdm

import laia
import laia.logging as log
from laia.data import TextImageFromTextTableDataset
from laia.models.kws.dortmund_phocnet import DortmundPHOCNet
from laia.plugins.arguments import add_argument, add_defaults, args
from laia.utils import ImageToTensor

if __name__ == "__main__":
    add_defaults("gpu")
    add_argument(
        "--phoc_levels",
        type=int,
        default=[1, 2, 3, 4, 5],
        nargs="+",
        help="PHOC levels used to encode the transcript",
    )
    add_argument("syms", help="Symbols table mapping from strings to integers")
    add_argument("img_dir", help="Directory containing word images")
    add_argument("candidates", help="Transcription of each candidate image")
    add_argument("queries", help="Transcription of each query image")
    add_argument("model_checkpoint", help="Filepath of the model checkpoint")
    add_argument("output",
                 type=argparse.FileType("w"),
                 help="Filepath of the output file")
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
示例#5
0
from laia.data import ImageDataLoader
from laia.data import TextImageFromTextTableDataset
from laia.decoders import CTCGreedyDecoder
from laia.models.htr.dortmund_crnn import DortmundCRNN
from laia.plugins.arguments import add_argument, add_defaults, args
from laia.plugins.arguments_types import str2bool
from laia.utils import ImageToTensor, TextToTensor
from laia.utils.symbols_table import SymbolsTable

if __name__ == "__main__":
    add_defaults("gpu")
    add_argument(
        "--output_symbols",
        type=str2bool,
        nargs="?",
        const=True,
        default=False,
        help="Print the output with symbols instead of integers",
    )
    add_argument(
        "--image_sequencer",
        type=str,
        default="avgpool-16",
        help="Average adaptive pooling of the images before the LSTM layers",
    )
    add_argument("--lstm_hidden_size", type=int, default=128)
    add_argument("--lstm_num_layers", type=int, default=1)
    add_argument("syms", help="Symbols table mapping from strings to integers")
    add_argument("img_dir", help="Directory containing word images")
    add_argument("gt_file", help="")
    add_argument("checkpoint", help="")
        if x.shape != 3:
            x = np.expand_dims(x, axis=-1)
        x = np.transpose(x, (2, 0, 1))
        return torch.from_numpy(x)


if __name__ == "__main__":
    import matplotlib.pyplot as plt

    import laia.random
    from laia.data import TextImageFromTextTableDataset, ImageDataLoader
    from laia.plugins.arguments import add_argument, add_defaults, args

    add_defaults("seed")
    add_argument("--num_images",
                 type=int,
                 help="Show only this number of images")
    add_argument("--shuffle",
                 action="store_true",
                 help="Shuffle the list of images")
    add_argument("img_dir", help="Directory containing images")
    add_argument("txt_table", help="Transcriptions of each image")
    args = args()
    laia.random.manual_seed(args.seed)

    dataset = TextImageFromTextTableDataset(
        args.txt_table, args.img_dir, img_transform=DortmundImageToTensor())
    dataset_loader = ImageDataLoader(dataset=dataset,
                                     image_channels=1,
                                     shuffle=args.shuffle)
from tqdm import tqdm

import laia
import laia.logging as log
from laia.data import TextImageFromTextTableDataset
from laia.models.kws.dortmund_phocnet import DortmundPHOCNet
from laia.plugins.arguments import add_argument, add_defaults, args
from laia.utils import ImageToTensor
from laia.utils.phoc import pphoc

if __name__ == "__main__":
    add_defaults("gpu")
    add_argument(
        "--phoc_levels",
        type=int,
        default=[1, 2, 3, 4, 5],
        nargs="+",
        help="PHOC levels used to encode the transcript",
    )
    add_argument(
        "--tpp_levels",
        type=int,
        default=[1, 2, 3, 4, 5],
        nargs="*",
        help="Temporal Pyramid Pooling levels",
    )
    add_argument(
        "--spp_levels",
        type=int,
        default=None,
        nargs="*",