Пример #1
0
    def test_simple_decode(self):
        aa = torch.FloatTensor(np.array([[[1.0, 0.0]], [[1.0, 0.0]], [[0.0, 1.0]], [[1.0, 0.0]], [[1.0, 0.0]]], dtype=np.float32)).log()
        seq_len = torch.IntTensor(np.array([5], dtype=np.int32))

        labels="A_"
        scorer = pytorch_ctc.Scorer()
        decoder_merge = pytorch_ctc.CTCBeamDecoder(scorer, labels, blank_index=1, space_index=-1, top_paths=1, beam_width=1, merge_repeated=True)
        decoder_nomerge = pytorch_ctc.CTCBeamDecoder(scorer, labels, blank_index=1, space_index=-1, top_paths=1, beam_width=1, merge_repeated=False)

        result_merge, _, result_merge_len = decoder_merge.decode(aa, seq_len)
        result_nomerge, _, result_nomerge_len = decoder_nomerge.decode(aa, seq_len)
        self.assertEqual(result_merge_len[0][0], 1)
        self.assertEqual(result_nomerge_len[0][0], 2)
        self.assertEqual(result_merge.numpy()[0,0,:result_merge_len[0][0]].tolist(), [0])
        self.assertEqual(result_nomerge.numpy()[0,0,:result_nomerge_len[0][0]].tolist(), [0, 0])
Пример #2
0
    def test_ctc_decoder_beam_search(self):
        depth = 6
        seq_len_0 = 5
        input_prob_matrix_0 = np.asarray(
            [
                [0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908],
                [
                    0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581,
                    0.230517
                ],
                [
                    0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289,
                    0.238763
                ],
                [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655],
                [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878],
                # Random entry added in at time=5
                [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671]
            ],
            dtype=np.float32)
        # Add arbitrary offset - this is fine
        input_log_prob_matrix_0 = np.log(input_prob_matrix_0) + 2.0

        # len max_time_steps array of batch_size x depth matrices
        inputs = np.array(
            [
                input_log_prob_matrix_0[t, :][np.newaxis, :]
                for t in range(seq_len_0)
            ]  # Pad to max_time_steps = 8
            + 2 * [np.zeros((1, depth), dtype=np.float32)],
            dtype=np.float32)

        # batch_size length vector of sequence_lengths
        seq_lens = np.array([seq_len_0], dtype=np.int32)

        th_input = torch.from_numpy(inputs)
        th_seq_len = torch.IntTensor(seq_lens)

        labels = "ABCDE_"
        scorer = pytorch_ctc.Scorer()
        decoder = pytorch_ctc.CTCBeamDecoder(scorer,
                                             labels,
                                             blank_index=5,
                                             space_index=-1,
                                             top_paths=2,
                                             beam_width=2)

        decode_result, scores, decode_len, alignments = decoder.decode(
            th_input, th_seq_len)

        self.assertEqual(decode_len[0][0], 2)
        self.assertEqual(decode_len[1][0], 3)
        self.assertEqual(
            decode_result.numpy()[0, 0, :decode_len[0][0]].tolist(), [1, 0])
        self.assertEqual(
            decode_result.numpy()[1, 0, :decode_len[1][0]].tolist(), [0, 1, 0])
        np.testing.assert_almost_equal(scores.numpy(),
                                       np.array([[-0.584855], [-0.389139]]), 5)
Пример #3
0
    def test_simple_decode_different_blank_idx(self):
        aa = torch.FloatTensor(
            np.array([[[0.0, 1.0]], [[0.0, 1.0]], [[1.0, 0.0]], [[0.0, 1.0]],
                      [[0.0, 1.0]]],
                     dtype=np.float32)).log()
        seq_len = torch.IntTensor(np.array([5], dtype=np.int32))

        labels = "_A"
        scorer = pytorch_ctc.Scorer()
        decoder_nomerge = pytorch_ctc.CTCBeamDecoder(scorer,
                                                     labels,
                                                     blank_index=0,
                                                     space_index=-1,
                                                     top_paths=1,
                                                     beam_width=1)

        result_nomerge, _, result_nomerge_len, nomerge_alignments = decoder_nomerge.decode(
            aa, seq_len)
        self.assertEqual(result_nomerge_len[0][0], 2)
        self.assertEqual(
            result_nomerge.numpy()[0, 0, :result_nomerge_len[0][0]].tolist(),
            [1, 1])
    def __init__(self,
                 labels,
                 scorer,
                 top_paths=1,
                 beam_width=20,
                 output='char',
                 space_idx=-1):
        super(BeamDecoder_test, self).__init__(space_idx)
        self.labels = labels
        self.output = output
        assert top_paths == 1, "Only supports top 1 path in the current version"

        try:
            import pytorch_ctc
        except ImportError:
            raise ImportError("BeamCTCDecoder requires pytorch_ctc package.")

        self._decoder = pytorch_ctc.CTCBeamDecoder(scorer=scorer,
                                                   labels=self.labels,
                                                   top_paths=top_paths,
                                                   beam_width=beam_width,
                                                   blank_index=0,
                                                   space_index=self.space_idx,
                                                   merge_repeated=False)