def main(models, experiments, dataset_dir, phi, low, high): # create kaldi instance model_dir = select_model(models, phi, low, high) kaldi = Kaldi.from_trained_model( model_dir=model_dir, base_dir=experiments.joinpath( f'{time.strftime("%Y-%m-%d")}_{pydng.generate_name()}')) # prepare dataset dataset = Dataset(dataset_dir) kaldi_dataset_dir = kaldi.base_dir.joinpath("data", dataset.name) dataset.dump_as_kaldi_dataset(kaldi_dataset_dir, wavs_prefix=f'data/{dataset.name}') # decode wer, meta = kaldi.decode_wavs(data_dir=dataset.data_dir, text=dataset.text) print(f'\n[+] WER {dataset}: {wer:03.2f}%') for utt in meta: print(f"\n[+] {utt['wav_name']}") print(f" REF: {utt['ref']}") print(f" HYP: {utt['hyp']}") print(f" WER: {utt['wer']*100:5.2f}%")
def decode_wavs(self, data_dir, text, phi=None): data_dir = Path(data_dir) # create decode dir decode_name = f'decode_job_{data_dir.name}_{int(time.time())}' decode_dir = self.base_dir.joinpath( f"exp/nnet5d_gpu_time/{decode_name}") # create kaldi dataset dataset = Dataset(data_dir, name=decode_name) dataset.text = text dataset.dump_as_kaldi_dataset( decode_dir, wavs_prefix=f"exp/nnet5d_gpu_time/{decode_name}") # invoke decode script in container self.run_in_container( f'./decode_wavs.sh exp/nnet5d_gpu_time/{decode_name}', additional_cmds=[ f'-e NUMJOBS={min(len(dataset), cpu_count())}', f'-e PHI={phi} -e LOG_CLAMP=0' ]) # get best_wer best_wer = open(f'{decode_dir}/scoring_kaldi/best_wer').read().strip() best_wer = float(re.findall(r'%WER (.*) \[', best_wer)[0]) # get decoding meta decoding_meta = parse_per_utt_file(decode_dir) return best_wer, decoding_meta
def main(models, experiments, dataset_dir, inner_itr, max_itr, learning_rate, psycho_hiding_thresh, phi, low, high, attacker): # create kaldi instance model_dir = select_model(models, phi, low, high) kaldi = Kaldi.from_trained_model(model_dir=model_dir, base_dir=experiments.joinpath(f'{time.strftime("%Y-%m-%d")}_{pydng.generate_name()}')) # prepare dataset dataset = Dataset(dataset_dir) kaldi_dataset_dir = kaldi.base_dir.joinpath("data", dataset.name) dataset.dump_as_kaldi_dataset(kaldi_dataset_dir, wavs_prefix=f'data/{dataset.name}') # optimization steps max_outer_itr = max_itr // inner_itr # dump config print(f'\n[+] Compute adversarial examples') print(f' -> attacker "{attacker}"') print(f' -> dataset "{dataset}"') print(f' -> psycho_hiding_thresh "{psycho_hiding_thresh}dB"') print(f' -> {max_outer_itr} * {inner_itr} = {inner_itr*max_outer_itr} itr') print(f' -> phi={phi} bandpass={low}-{high}') print(f' -> learning_rate={learning_rate}') kaldi.results['ae_config'] = { 'phi' : phi, 'low' : low, 'high': high, 'learning_rate' : learning_rate, 'attacker' : attacker, 'dataset' : dataset.name, 'psycho_hiding_thresh' : psycho_hiding_thresh, 'inner_itr' : inner_itr, 'max_outer_itr' : max_outer_itr } # decode wer, _ = kaldi.decode_wavs(data_dir=dataset.data_dir, text=dataset.target) print(f' -> inital WER: {wer:03.2f}%') kaldi.results['inital_wer'] = f'{wer:03.2f}%' # psychoacoustic filter causes an unstable gradient for backpropping to raw audio # => caused by small input values to the log component # => thus, for backprop we clamp input before the log component (@ nnet-component.cc) log_clamp = 1 if phi != "None" else 0 # invoke adversarial examples script logger = KaldiLogger(kaldi.base_dir) logger.log_ae(inner_itr, max_outer_itr) try: running_time = kaldi.run_in_container( f'./compute_adversarial_examples.sh {dataset} {psycho_hiding_thresh} {inner_itr} ' f'{max_outer_itr} {len(dataset)} {len(dataset)} {attacker}', additional_cmds=[f'-v {kaldi_dataset_dir.joinpath("target_utterances")}:/root/kaldi/wsj_recipe/targets', f'-e NUMJOBS={len(dataset)}', f'-e PHI={phi} -e LOG_CLAMP={log_clamp} -e LEARNING_RATE={learning_rate}'] ) logger.stop() kaldi.results['running_time'] = f'{running_time // 3600}h {(running_time % 3600) // 60}m {(running_time % 60)}s' print(f" completed in {kaldi.results['running_time']}") except KeyboardInterrupt: logger.stop() print(" terminated prematurely") # score AEs kaldi.results['history'] = parse_results_file(kaldi.base_dir) score_AEs(kaldi=kaldi, ae_dir=kaldi.base_dir.joinpath(f"adversarial_examples/wavs"), ref_dir=dataset.data_dir, stats_dir=kaldi.base_dir.joinpath(f"adversarial_examples/stats"), original_text=dataset.text, target_text=dataset.target, phi=phi)