def batch_reader(): # read manifest manifest = read_manifest(manifest_path=manifest_path, max_duration=self._max_duration, min_duration=self._min_duration) # sort (by duration) or batch-wise shuffle the manifest if self._epoch == 0 and sortagrad: manifest.sort(key=lambda x: x["duration"]) else: if shuffle_method == "batch_shuffle": manifest = self._batch_shuffle(manifest, batch_size, clipped=False) elif shuffle_method == "batch_shuffle_clipped": manifest = self._batch_shuffle(manifest, batch_size, clipped=True) elif shuffle_method == "instance_shuffle": self._rng.shuffle(manifest) elif shuffle_method == None: pass else: raise ValueError("Unknown shuffle method %s." % shuffle_method) # prepare batches instance_reader = self._instance_reader_creator(manifest) batch = [] for instance in instance_reader(): batch.append(instance) if len(batch) == batch_size: yield self._padding_batch(batch, padding_to, flatten) batch = [] if len(batch) >= min_batch_size: yield self._padding_batch(batch, padding_to, flatten) self._epoch += 1
def _compute_mean_std(self, manifest_path, num_samples, num_workers): """从随机抽样的实例中计算均值和标准值""" manifest = read_manifest(manifest_path) if num_samples < 0 or num_samples > len(manifest): sampled_manifest = manifest else: sampled_manifest = self._rng.sample(manifest, num_samples) dataset = NormalizerDataset(sampled_manifest) test_loader = DataLoader(dataset=dataset, batch_size=64, collate_fn=collate_fn, num_workers=num_workers) # 求总和 std, means = None, None number = 0 for std1, means1, number1 in tqdm(test_loader()): number += number1 if means is None: means = means1 else: means += means1 if std is None: std = std1 else: std += std1 # 求总和的均值和标准值 for i in range(len(means)): means[i] /= number std[i] = std[i] / number - means[i] * means[i] if std[i] < 1.0e-20: std[i] = 1.0e-20 std[i] = math.sqrt(std[i]) self._mean = means.reshape([-1, 1]) self._std = std.reshape([-1, 1])
def _compute_mean_std(self, manifest_path, featurize_func, num_samples): """从随机抽样的实例中计算均值和标准值""" manifest = read_manifest(manifest_path) sampled_manifest = self._rng.sample(manifest, num_samples) features = [] for instance in tqdm(sampled_manifest): features.append( featurize_func( AudioSegment.from_file(instance["audio_filepath"]))) features = np.hstack(features) self._mean = np.mean(features, axis=1).reshape([-1, 1]) self._std = np.std(features, axis=1).reshape([-1, 1])
def _compute_mean_std(self, manifest_path, featurize_func, num_samples): """Compute mean and std from randomly sampled instances.""" manifest = read_manifest(manifest_path) sampled_manifest = self._rng.sample(manifest, num_samples) features = [] for instance in sampled_manifest: features.append( featurize_func( AudioSegment.from_file(instance["audio_filepath"]))) features = np.hstack(features) self._mean = np.mean(features, axis=1).reshape([-1, 1]) self._std = np.std(features, axis=1).reshape([-1, 1])
def warm_up_test(audio_process_handler, manifest_path, num_test_cases, random_seed=0): """Warming-up test.""" manifest = read_manifest(manifest_path) rng = random.Random(random_seed) samples = rng.sample(manifest, num_test_cases) for idx, sample in enumerate(samples): print("Warm-up Test Case %d: %s" % (idx, sample['audio_filepath'])) start_time = time.time() transcript = audio_process_handler(sample['audio_filepath']) finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript))
def _warm_up_test(self, num_test_cases, random_seed=0): manifest = read_manifest(args.warmup_manifest) rng = random.Random(random_seed) samples = rng.sample(manifest, num_test_cases) for idx, sample in enumerate(samples): print("Warm-up Test Case %d: %s", idx, sample['audio_filepath']) start_time = time.time() transcript = self.file_speech_to_text(sample['audio_filepath']) finish_time = time.time() print("Response Time: %f, Transcript: %s" % (finish_time - start_time, transcript))
def batch_reader(): # 读取数据列表 manifest = read_manifest(manifest_path=manifest_path, max_duration=self._max_duration, min_duration=self._min_duration) # 将数据列表长到短排序 if self._epoch == 0: manifest.sort(key=lambda x: x["duration"]) manifest.reverse() else: if shuffle_method == "batch_shuffle": manifest = self._batch_shuffle(manifest, batch_size, clipped=False) elif shuffle_method == "batch_shuffle_clipped": manifest = self._batch_shuffle(manifest, batch_size, clipped=True) elif shuffle_method == "instance_shuffle": self._rng.shuffle(manifest) elif shuffle_method is None: pass else: raise ValueError("Unknown shuffle method %s." % shuffle_method) # 准备批量数据 batch = [] instance_reader = self._instance_reader_creator(manifest) for instance in instance_reader(): batch.append(instance) if len(batch) == batch_size: yield self._padding_batch(batch, padding_to, flatten) batch = [] if len(batch) >= 1: yield self._padding_batch(batch, padding_to, flatten) self._epoch += 1
def count_manifest(counter, manifest_path): manifest_jsons = read_manifest(manifest_path) for line_json in manifest_jsons: for char in line_json['text']: counter.update(char)
def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path): self._min_snr_dB = min_snr_dB self._max_snr_dB = max_snr_dB self._rng = rng self._noise_manifest = read_manifest(manifest_path=noise_manifest_path)
def __init__(self, rng, impulse_manifest_path): self._rng = rng self._impulse_manifest = read_manifest(impulse_manifest_path)
def evaluate(): """Evaluate on whole test data for DeepSpeech2.""" data_generator = DataGenerator( vocab_filepath=args.vocab_path, mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=args.num_proc_data, keep_transcription_text=True) ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) # decoders only accept string encoded in utf-8 vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] if args.decoding_method == "ctc_beam_search": ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, vocab_list) errors_func = char_errors if args.error_rate_type == 'cer' else word_errors # prepare ASR inference handler def file_to_transcript(filename): feature = data_generator.process_utterance(filename, "") probs_split = ds2_model.infer_batch_probs( infer_data=[feature], feeding_dict=data_generator.feeding) if args.decoding_method == "ctc_greedy": result_transcript = ds2_model.decode_batch_greedy( probs_split=probs_split, vocab_list=vocab_list) else: result_transcript = ds2_model.decode_batch_beam_search( probs_split=probs_split, beam_alpha=args.alpha, beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, cutoff_top_n=args.cutoff_top_n, vocab_list=vocab_list, num_processes=1) return result_transcript[0] parentdir = os.path.join(args.src_path) manifest_path = args.manifest_path manifest = read_manifest( manifest_path=manifest_path) transcripts = [] for entry in manifest: fname = entry["audio_filepath"] transcript = file_to_transcript(fname) transcripts.append((fname, fname.split("/")[-1], transcript)) df = pd.DataFrame(data=transcripts, columns=["wav_path", "wav_name", "transcripts"]) df.sort_values("wav_name", inplace=True) try: with open(os.path.join(parentdir, 'transcripts_list_'+\ datetime.datetime.now().strftime("%H:%M:%S")+".b"), 'wb') as f: pickle.dump(transcripts, f) except: pass try: with open(os.path.join(parentdir, 'ds2_stt_complete.csv'), 'w') as f: df.to_csv(f, index=False) except: pass try: with open(os.path.join(parentdir, 'ds2_stt.txt'), 'w') as f: for trans in df["transcripts"]: f.write(pre_process_srt(trans) + " ") except: pass ds2_model.logger.info("finish evaluation")
def get_data_len(manifest_path, max_duration, min_duration): manifest = read_manifest(manifest_path=manifest_path, max_duration=max_duration, min_duration=min_duration) return len(manifest)