Exemplo n.º 1
0
    def __init__(self, flags):
        super(SATRN, self).__init__()
        self.inplanes = 1 if flags.Global.image_shape[0] == 1 else 3
        self.converter = AttnLabelConverter(flags)
        self.num_classes = self.converter.char_num
        self.d_model = flags.Transformer.model_dims
        self.d_ff = flags.Transformer.feedforward_dims
        self.num_encoder = flags.Transformer.num_encoder
        self.num_decoder = flags.Transformer.num_decoder
        self.h = flags.Transformer.num_head
        self.dropout = flags.Transformer.dropout_rate

        c = copy.deepcopy
        self.attn = MultiHeadedAttention(self.h, self.d_model)
        self.laff = LocalityAwareFeedForward(self.d_model, self.d_ff,
                                             self.d_model)
        self.ff = PointwiseFeedForward(self.d_model, self.d_ff)
        self.position1d = PositionalEncoding(self.d_model, self.dropout)
        self.position2d = A2DPE(self.d_model, self.dropout)

        self.encoder = Encoder(
            EncoderLayer(self.d_model, c(self.attn), c(self.laff),
                         self.dropout), self.num_encoder)
        self.decoder = Decoder(
            DecoderLayer(self.d_model, c(self.attn), c(self.attn), c(self.ff),
                         self.dropout), self.num_decoder)
        self.src_embed = nn.Sequential(ShallowCNN(self.inplanes, self.d_model),
                                       self.position2d)
        self.tgt_embed = nn.Sequential(
            Embeddings(self.num_classes, self.d_model), self.position1d)
        self.generator = Generator(self.d_model, self.num_classes)
Exemplo n.º 2
0
    def __init__(self, flags):
        super(DAN, self).__init__()
        self.input_shape = flags.Global.image_shape
        self.inplanes = self.input_shape[0]
        self.strides = [(2, 2), (1, 1), (2, 2), (1, 1), (1, 1)]
        self.compress_layer = flags.Architecture.compress_layer
        self.block = BasicBlock
        self.layers = flags.Architecture.layers
        self.maxT = flags.Global.batch_max_length
        self.depth = flags.CAM.depth
        self.num_channel = flags.CAM.num_channel
        self.converter = AttnLabelConverter(flags)
        self.num_class = self.converter.char_num
        self.num_steps = flags.Global.batch_max_length
        self.is_train = flags.Global.is_train

        self.feature_extractor = ResNet(self.inplanes, self.block,
                                        self.strides, self.layers,
                                        self.compress_layer)
        self.scales = Feature_Extractor(self.input_shape, self.block,
                                        self.strides, self.layers,
                                        self.compress_layer).Iwantshapes()
        self.cam_module = CAM(self.scales, self.maxT, self.depth,
                              self.num_channel)
        self.decoder = DTD(self.num_class, self.num_channel)
 def __init__(self, device, model, optimizer, loss, val_loader, \
               train_loader, flags, global_state):
     self.model = model
     self.optimizer = optimizer
     self.loss_func = loss
     self.train_loader = train_loader
     self.eval_loader = val_loader
     self.to_use_device = device
     self.flags = flags.Global
     self.global_state = global_state
     if flags.Global.loss_type == 'ctc':
         self.converter = CTCLabelConverter(flags)
     else:
         self.converter = AttnLabelConverter(flags)
Exemplo n.º 4
0
    def __init__(self, flags):
        super(FAN, self).__init__()
        self.inplanes = 1 if flags.Global.image_shape[0] == 1 else 3
        self.num_inputs = flags.SeqRNN.input_size
        self.num_hiddens = flags.SeqRNN.hidden_size
        self.converter = AttnLabelConverter(flags)
        self.num_classes = self.converter.char_num

        self.block = BasicBlock
        self.layers = flags.Architecture.layers
        self.feature_extractor = ResNet(self.inplanes, self.num_inputs,
                                        self.block, self.layers)
        self.reshape_layer = ReshapeLayer()
        self.sequence_layer = Attention(self.num_inputs, self.num_hiddens,
                                        self.num_classes)
Exemplo n.º 5
0
    def __init__(self, flags):
        super(SAR, self).__init__()
        self.inplanes = 1 if flags.Global.image_shape[0] == 1 else 3
        self.input_size = flags.SeqRNN.input_size
        self.en_hidden_size = flags.SeqRNN.en_hidden_size
        self.de_hidden_size = flags.SeqRNN.de_hidden_size
        self.converter = AttnLabelConverter(flags)
        self.num_classes = self.converter.char_num

        self.block = BasicBlock
        self.layers = flags.Architecture.layers
        self.feature_extractor = ResNet(self.inplanes, self.input_size,
                                        self.block, self.layers)
        self.lstm_encoder = LSTMEncoder(self.input_size, self.en_hidden_size)
        self.lstm_decoder = LSTMDecoder(self.input_size, self.en_hidden_size,
                                        self.de_hidden_size, self.num_classes)
Exemplo n.º 6
0
 def __init__(self, device, model, optimizer, loss, val_loader, \
               train_loader, flags, global_state):
     self.model = model
     self.optimizer = optimizer
     self.loss_func = loss
     self.train_loader = train_loader
     self.eval_loader = val_loader
     self.to_use_device = device
     self.flags = flags.Global
     self.global_state = global_state
     if flags.Global.loss_type == 'ctc':
         self.converter = CTCLabelConverter(flags)
     elif flags.Global.loss_type == 'attn':
         self.converter = AttnLabelConverter(flags)
     else:
         raise Exception('Not implemented error!')
     logging.info(self.flags)
Exemplo n.º 7
0
    def __init__(self):
        self.config = build_config()
        model = build_model(self.config)
        device, gpu_count = build_device(self.config)
        optimizer = build_optimizer(self.config, model)
        model, optimizer, global_state = build_pretrained_weights(self.config, model, optimizer)
        self.device = device
        if self.config.Global.loss_type == 'ctc':
            self.converter = CTCLabelConverter(self.config)
        else:
            self.converter = AttnLabelConverter(self.config)
        self.model = model.to(self.device)

        self.keep_ratio_with_pad = self.config.TrainReader.padding
        self.channel = self.config.Global.image_shape[0]
        self.imgH = self.config.Global.image_shape[1]
        self.imgW = self.config.Global.image_shape[2]
        self.num_steps = self.config.Global.batch_max_length + 1
Exemplo n.º 8
0
    def __init__(self):
        config = build_config()
        model = build_model(config)
        device, gpu_count = build_device(config)
        optimizer = build_optimizer(config, model)
        if gpu_count > 1:
            model = nn.DataParallel(model)
        model, optimizer, global_state = build_pretrained_weights(
            config, model, optimizer)
        self.device = device
        if config.Global.loss_type == 'ctc':
            self.converter = CTCLabelConverter(config)
        else:
            self.converter = AttnLabelConverter(config)
        self.model = model.to(self.device)

        self.keep_ratio_with_pad = config.TrainReader.padding
        self.channel = config.Global.image_shape[0]
        self.imgH = config.Global.image_shape[1]
        self.imgW = config.Global.image_shape[2]