Пример #1
0
 def test_wer_4(self):
     ref = 'the wood flamed up splendidly under the large brewing copper '\
             'and it sighed so deeply'
     hyp = 'the wood flame do splendidly under the large brewing copper '\
             'and its side so deeply'
     word_error_rate = error_rate.wer(ref, hyp)
     self.assertTrue(abs(word_error_rate - 0.2666666667) < 1e-6)
Пример #2
0
 def test_wer_2(self):
     ref = 'as any in england i would say said gamewell proudly that is '\
             'in his day'
     hyp = 'as any in england i would say said came well proudly that is '\
             'in his day'
     word_error_rate = error_rate.wer(ref, hyp)
     self.assertTrue(abs(word_error_rate - 0.1333333) < 1e-6)
Пример #3
0
def tune():
    """Tune parameters alpha and beta on one minibatch."""
    if not args.num_alphas >= 0:
        raise ValueError("num_alphas must be non-negative!")
    if not args.num_betas >= 0:
        raise ValueError("num_betas must be non-negative!")

    data_generator = DataGenerator(
        vocab_filepath=args.vocab_path,
        mean_std_filepath=args.mean_std_path,
        augmentation_config='{}',
        specgram_type=args.specgram_type,
        num_threads=1)
    batch_reader = data_generator.batch_reader_creator(
        manifest_path=args.tune_manifest,
        batch_size=args.num_samples,
        sortagrad=False,
        shuffle_method=None)
    tune_data = batch_reader().next()
    target_transcripts = [
        ''.join([data_generator.vocab_list[token] for token in transcript])
        for _, transcript in tune_data
    ]

    ds2_model = DeepSpeech2Model(
        vocab_size=data_generator.vocab_size,
        num_conv_layers=args.num_conv_layers,
        num_rnn_layers=args.num_rnn_layers,
        rnn_layer_size=args.rnn_layer_size,
        use_gru=args.use_gru,
        pretrained_model_path=args.model_path,
        share_rnn_weights=args.share_rnn_weights)

    # create grid for search
    cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas)
    cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas)
    params_grid = [(alpha, beta) for alpha in cand_alphas
                   for beta in cand_betas]

    ## tune parameters in loop
    for alpha, beta in params_grid:
        result_transcripts = ds2_model.infer_batch(
            infer_data=tune_data,
            decoding_method='ctc_beam_search',
            beam_alpha=alpha,
            beam_beta=beta,
            beam_size=args.beam_size,
            cutoff_prob=args.cutoff_prob,
            vocab_list=data_generator.vocab_list,
            language_model_path=args.lang_model_path,
            num_processes=args.num_proc_bsearch)
        wer_sum, num_ins = 0.0, 0
        for target, result in zip(target_transcripts, result_transcripts):
            wer_sum += wer(target, result)
            num_ins += 1
        print("alpha = %f\tbeta = %f\tWER = %f" %
              (alpha, beta, wer_sum / num_ins))
Пример #4
0
 def test_wer_5(self):
     ref = 'all the morning they trudged up the mountain path and at noon '\
             'unc and ojo sat on a fallen tree trunk and ate the last of '\
             'the bread which the old munchkin had placed in his pocket'
     hyp = 'all the morning they trudged up the mountain path and at noon '\
             'unc in ojo sat on a fallen tree trunk and ate the last of '\
             'the bread which the old munchkin had placed in his pocket'
     word_error_rate = error_rate.wer(ref, hyp)
     self.assertTrue(abs(word_error_rate - 0.027027027) < 1e-6)
Пример #5
0
 def test_wer_3(self):
     ref = 'the lieutenant governor lilburn w boggs afterward governor '\
             'was a pronounced mormon hater and throughout the period of '\
             'the troubles he manifested sympathy with the persecutors'
     hyp = 'the lieutenant governor little bit how bags afterward '\
             'governor was a pronounced warman hater and throughout the '\
             'period of th troubles he manifests sympathy with the '\
             'persecutors'
     word_error_rate = error_rate.wer(ref, hyp)
     self.assertTrue(abs(word_error_rate - 0.2692307692) < 1e-6)
Пример #6
0
 def test_wer_7(self):
     ref = ' '
     hyp = 'Hypothesis sentence'
     with self.assertRaises(ValueError):
         word_error_rate = error_rate.wer(ref, hyp)
Пример #7
0
 def test_wer_6(self):
     ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night'
     word_error_rate = error_rate.wer(ref, ref)
     self.assertEqual(word_error_rate, 0.0)
Пример #8
0
 def test_wer_1(self):
     ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night'
     hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last '\
             'night'
     word_error_rate = error_rate.wer(ref, hyp)
     self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6)