示例#1
0
 def batch_reader():
     # read manifest
     manifest = read_manifest(manifest_path=manifest_path,
                              max_duration=self._max_duration,
                              min_duration=self._min_duration)
     # sort (by duration) or batch-wise shuffle the manifest
     if self._epoch == 0 and sortagrad:
         manifest.sort(key=lambda x: x["duration"])
     else:
         if shuffle_method == "batch_shuffle":
             manifest = self._batch_shuffle(manifest,
                                            batch_size,
                                            clipped=False)
         elif shuffle_method == "batch_shuffle_clipped":
             manifest = self._batch_shuffle(manifest,
                                            batch_size,
                                            clipped=True)
         elif shuffle_method == "instance_shuffle":
             self._rng.shuffle(manifest)
         elif shuffle_method == None:
             pass
         else:
             raise ValueError("Unknown shuffle method %s." %
                              shuffle_method)
     # prepare batches
     instance_reader = self._instance_reader_creator(manifest)
     batch = []
     for instance in instance_reader():
         batch.append(instance)
         if len(batch) == batch_size:
             yield self._padding_batch(batch, padding_to, flatten)
             batch = []
     if len(batch) >= min_batch_size:
         yield self._padding_batch(batch, padding_to, flatten)
     self._epoch += 1
 def _compute_mean_std(self, manifest_path, num_samples, num_workers):
     """从随机抽样的实例中计算均值和标准值"""
     manifest = read_manifest(manifest_path)
     if num_samples < 0 or num_samples > len(manifest):
         sampled_manifest = manifest
     else:
         sampled_manifest = self._rng.sample(manifest, num_samples)
     dataset = NormalizerDataset(sampled_manifest)
     test_loader = DataLoader(dataset=dataset,
                              batch_size=64,
                              collate_fn=collate_fn,
                              num_workers=num_workers)
     # 求总和
     std, means = None, None
     number = 0
     for std1, means1, number1 in tqdm(test_loader()):
         number += number1
         if means is None:
             means = means1
         else:
             means += means1
         if std is None:
             std = std1
         else:
             std += std1
     # 求总和的均值和标准值
     for i in range(len(means)):
         means[i] /= number
         std[i] = std[i] / number - means[i] * means[i]
         if std[i] < 1.0e-20:
             std[i] = 1.0e-20
         std[i] = math.sqrt(std[i])
     self._mean = means.reshape([-1, 1])
     self._std = std.reshape([-1, 1])
示例#3
0
 def _compute_mean_std(self, manifest_path, featurize_func, num_samples):
     """从随机抽样的实例中计算均值和标准值"""
     manifest = read_manifest(manifest_path)
     sampled_manifest = self._rng.sample(manifest, num_samples)
     features = []
     for instance in tqdm(sampled_manifest):
         features.append(
             featurize_func(
                 AudioSegment.from_file(instance["audio_filepath"])))
     features = np.hstack(features)
     self._mean = np.mean(features, axis=1).reshape([-1, 1])
     self._std = np.std(features, axis=1).reshape([-1, 1])
示例#4
0
 def _compute_mean_std(self, manifest_path, featurize_func, num_samples):
     """Compute mean and std from randomly sampled instances."""
     manifest = read_manifest(manifest_path)
     sampled_manifest = self._rng.sample(manifest, num_samples)
     features = []
     for instance in sampled_manifest:
         features.append(
             featurize_func(
                 AudioSegment.from_file(instance["audio_filepath"])))
     features = np.hstack(features)
     self._mean = np.mean(features, axis=1).reshape([-1, 1])
     self._std = np.std(features, axis=1).reshape([-1, 1])
示例#5
0
def warm_up_test(audio_process_handler,
                 manifest_path,
                 num_test_cases,
                 random_seed=0):
    """Warming-up test."""
    manifest = read_manifest(manifest_path)
    rng = random.Random(random_seed)
    samples = rng.sample(manifest, num_test_cases)
    for idx, sample in enumerate(samples):
        print("Warm-up Test Case %d: %s" % (idx, sample['audio_filepath']))
        start_time = time.time()
        transcript = audio_process_handler(sample['audio_filepath'])
        finish_time = time.time()
        print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript))
示例#6
0
    def _warm_up_test(self, num_test_cases, random_seed=0):
        manifest = read_manifest(args.warmup_manifest)
        rng = random.Random(random_seed)
        samples = rng.sample(manifest, num_test_cases)

        for idx, sample in enumerate(samples):
            print("Warm-up Test Case %d: %s", idx, sample['audio_filepath'])

            start_time = time.time()
            transcript = self.file_speech_to_text(sample['audio_filepath'])
            finish_time = time.time()

            print("Response Time: %f, Transcript: %s" %
                  (finish_time - start_time, transcript))
示例#7
0
        def batch_reader():
            # 读取数据列表
            manifest = read_manifest(manifest_path=manifest_path,
                                     max_duration=self._max_duration,
                                     min_duration=self._min_duration)
            # 将数据列表长到短排序
            if self._epoch == 0:
                manifest.sort(key=lambda x: x["duration"])
                manifest.reverse()
            else:
                if shuffle_method == "batch_shuffle":
                    manifest = self._batch_shuffle(manifest,
                                                   batch_size,
                                                   clipped=False)
                elif shuffle_method == "batch_shuffle_clipped":
                    manifest = self._batch_shuffle(manifest,
                                                   batch_size,
                                                   clipped=True)
                elif shuffle_method == "instance_shuffle":
                    self._rng.shuffle(manifest)
                elif shuffle_method is None:
                    pass
                else:
                    raise ValueError("Unknown shuffle method %s." %
                                     shuffle_method)
            # 准备批量数据
            batch = []
            instance_reader = self._instance_reader_creator(manifest)

            for instance in instance_reader():
                batch.append(instance)
                if len(batch) == batch_size:
                    yield self._padding_batch(batch, padding_to, flatten)
                    batch = []
            if len(batch) >= 1:
                yield self._padding_batch(batch, padding_to, flatten)
            self._epoch += 1
def count_manifest(counter, manifest_path):
    manifest_jsons = read_manifest(manifest_path)
    for line_json in manifest_jsons:
        for char in line_json['text']:
            counter.update(char)
示例#9
0
 def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path):
     self._min_snr_dB = min_snr_dB
     self._max_snr_dB = max_snr_dB
     self._rng = rng
     self._noise_manifest = read_manifest(manifest_path=noise_manifest_path)
 def __init__(self, rng, impulse_manifest_path):
     self._rng = rng
     self._impulse_manifest = read_manifest(impulse_manifest_path)
示例#11
0
def evaluate():
    """Evaluate on whole test data for DeepSpeech2."""
    data_generator = DataGenerator(
        vocab_filepath=args.vocab_path,
        mean_std_filepath=args.mean_std_path,
        augmentation_config='{}',
        specgram_type=args.specgram_type,
        num_threads=args.num_proc_data,
        keep_transcription_text=True)

    ds2_model = DeepSpeech2Model(
        vocab_size=data_generator.vocab_size,
        num_conv_layers=args.num_conv_layers,
        num_rnn_layers=args.num_rnn_layers,
        rnn_layer_size=args.rnn_layer_size,
        use_gru=args.use_gru,
        pretrained_model_path=args.model_path,
        share_rnn_weights=args.share_rnn_weights)

    # decoders only accept string encoded in utf-8
    vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]

    if args.decoding_method == "ctc_beam_search":
        ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path,
                                  vocab_list)
    errors_func = char_errors if args.error_rate_type == 'cer' else word_errors

    # prepare ASR inference handler
    def file_to_transcript(filename):
        feature = data_generator.process_utterance(filename, "")
        probs_split = ds2_model.infer_batch_probs(
            infer_data=[feature],
            feeding_dict=data_generator.feeding)

        if args.decoding_method == "ctc_greedy":
            result_transcript = ds2_model.decode_batch_greedy(
                probs_split=probs_split,
                vocab_list=vocab_list)
        else:
            result_transcript = ds2_model.decode_batch_beam_search(
                probs_split=probs_split,
                beam_alpha=args.alpha,
                beam_beta=args.beta,
                beam_size=args.beam_size,
                cutoff_prob=args.cutoff_prob,
                cutoff_top_n=args.cutoff_top_n,
                vocab_list=vocab_list,
                num_processes=1)
        return result_transcript[0]

    parentdir = os.path.join(args.src_path)
    manifest_path = args.manifest_path
    manifest = read_manifest(
        manifest_path=manifest_path)
    transcripts = []
    for entry in manifest:
        fname = entry["audio_filepath"]
        transcript = file_to_transcript(fname)
        transcripts.append((fname, fname.split("/")[-1], transcript))

    df = pd.DataFrame(data=transcripts, columns=["wav_path", "wav_name", "transcripts"])
    df.sort_values("wav_name", inplace=True)
    try:
        with open(os.path.join(parentdir, 'transcripts_list_'+\
                               datetime.datetime.now().strftime("%H:%M:%S")+".b"), 'wb') as f:
            pickle.dump(transcripts, f)
    except:
        pass
    try:
        with open(os.path.join(parentdir, 'ds2_stt_complete.csv'), 'w') as f:
            df.to_csv(f, index=False)
    except:
        pass
    try:
        with open(os.path.join(parentdir, 'ds2_stt.txt'), 'w') as f:
            for trans in df["transcripts"]:
                f.write(pre_process_srt(trans) + " ")
    except:
        pass
    ds2_model.logger.info("finish evaluation")
def get_data_len(manifest_path, max_duration, min_duration):
    manifest = read_manifest(manifest_path=manifest_path,
                             max_duration=max_duration,
                             min_duration=min_duration)
    return len(manifest)