示例#1
0
    def predictString(self, line):
        """
        Predicts a string from an input image.

        Args:
            line (numpy.array): Input image

        Returns:
            A unicode string containing the recognition result.
        """
        line = line.reshape(-1, self.rnn.ninput(), 1)
        self.rnn.inputs.aset(line.astype('float32'))
        self.rnn.forward()
        self.outputs = self.rnn.outputs.array().reshape(
            line.shape[0], self.rnn.noutput())
        codes = [
            x[0]
            for x in kraken.lib.lstm.translate_back_locations(self.outputs)
        ]
        cls = clstm.Classes()
        cls.resize(len(codes))
        for i, v in enumerate(codes):
            cls[i] = int(v)
        res = self.rnn.decode(cls)
        return res
示例#2
0
    def trainString(self, line, s, update=1):
        """
        Trains the network using an input numpy array and a unicode string.

        Strings are assumed to be in ``display`` order as produced as the
        result of the BiDi algorithm.

        Args:
            line (numpy.array): Input image
            s (str): Expected output string
            update (bool): Switch to disable weight updates

        Returns:
            An unicode string containing the recognized sequence.
        """
        labels = clstm.Classes()
        self.rnn.encode(labels, s)

        cls = self.trainSequence(line, labels)
        return self.rnn.decode(cls)
示例#3
0
    def trainSequence(self, line, labels, update=1):
        """
        Trains the network using an input numpy array and a series of labels.

        Args:
            line (numpy.array): Input image
            labels (clstm.Classes): Label sequence
            update (bool): Switch to disable weight updates

        Returns:
            clstm.Classes containing the recognized label sequence.
        """
        line = line.reshape(-1, self.rnn.ninput(), 1)
        self.rnn.inputs.aset(line.astype('float32'))
        self.rnn.forward()
        self.outputs = self.rnn.outputs.array().reshape(
            line.shape[0], self.rnn.noutput())

        # build CTC alignment
        targets = clstm.Sequence()
        aligned = clstm.Sequence()
        clstm.mktargets(targets, labels, self.rnn.noutput())
        clstm.seq_ctc_align(aligned, self.rnn.outputs, targets)

        # calculate deltas, backpropagate and update weights
        deltas = aligned.array() - self.rnn.outputs.array()
        self.rnn.d_outputs.aset(deltas)
        self.rnn.backward()
        if update:
            self.rnn.update()

        codes = kraken.lib.lstm.translate_back(self.outputs)
        cls = clstm.Classes()
        cls.resize(len(codes))
        for i, v in enumerate(codes):
            cls[i] = v

        return cls