示例#1
0
def main(models, experiments, dataset_dir, phi, low, high):
    # create kaldi instance
    model_dir = select_model(models, phi, low, high)
    kaldi = Kaldi.from_trained_model(
        model_dir=model_dir,
        base_dir=experiments.joinpath(
            f'{time.strftime("%Y-%m-%d")}_{pydng.generate_name()}'))

    # prepare dataset
    dataset = Dataset(dataset_dir)
    kaldi_dataset_dir = kaldi.base_dir.joinpath("data", dataset.name)
    dataset.dump_as_kaldi_dataset(kaldi_dataset_dir,
                                  wavs_prefix=f'data/{dataset.name}')

    # decode
    wer, meta = kaldi.decode_wavs(data_dir=dataset.data_dir, text=dataset.text)
    print(f'\n[+] WER {dataset}: {wer:03.2f}%')
    for utt in meta:
        print(f"\n[+] {utt['wav_name']}")
        print(f"    REF: {utt['ref']}")
        print(f"    HYP: {utt['hyp']}")
        print(f"    WER: {utt['wer']*100:5.2f}%")
示例#2
0
 def decode_wavs(self, data_dir, text, phi=None):
     data_dir = Path(data_dir)
     # create decode dir
     decode_name = f'decode_job_{data_dir.name}_{int(time.time())}'
     decode_dir = self.base_dir.joinpath(
         f"exp/nnet5d_gpu_time/{decode_name}")
     # create kaldi dataset
     dataset = Dataset(data_dir, name=decode_name)
     dataset.text = text
     dataset.dump_as_kaldi_dataset(
         decode_dir, wavs_prefix=f"exp/nnet5d_gpu_time/{decode_name}")
     # invoke decode script in container
     self.run_in_container(
         f'./decode_wavs.sh exp/nnet5d_gpu_time/{decode_name}',
         additional_cmds=[
             f'-e NUMJOBS={min(len(dataset), cpu_count())}',
             f'-e PHI={phi} -e LOG_CLAMP=0'
         ])
     # get best_wer
     best_wer = open(f'{decode_dir}/scoring_kaldi/best_wer').read().strip()
     best_wer = float(re.findall(r'%WER (.*) \[', best_wer)[0])
     # get decoding meta
     decoding_meta = parse_per_utt_file(decode_dir)
     return best_wer, decoding_meta
示例#3
0
def main(models, experiments, dataset_dir, inner_itr, max_itr, learning_rate, psycho_hiding_thresh, phi, low, high, attacker):
    # create kaldi instance
    model_dir = select_model(models, phi, low, high)
    kaldi = Kaldi.from_trained_model(model_dir=model_dir,
                                     base_dir=experiments.joinpath(f'{time.strftime("%Y-%m-%d")}_{pydng.generate_name()}'))

    # prepare dataset
    dataset = Dataset(dataset_dir)
    kaldi_dataset_dir = kaldi.base_dir.joinpath("data", dataset.name)
    dataset.dump_as_kaldi_dataset(kaldi_dataset_dir, wavs_prefix=f'data/{dataset.name}')

    # optimization steps
    max_outer_itr = max_itr // inner_itr

    # dump config
    print(f'\n[+] Compute adversarial examples')
    print(f'    -> attacker "{attacker}"')
    print(f'    -> dataset "{dataset}"')
    print(f'    -> psycho_hiding_thresh "{psycho_hiding_thresh}dB"')
    print(f'    -> {max_outer_itr} * {inner_itr} = {inner_itr*max_outer_itr} itr')
    print(f'    -> phi={phi} bandpass={low}-{high}')
    print(f'    -> learning_rate={learning_rate}')
    kaldi.results['ae_config'] = {
        'phi' : phi,
        'low' : low,
        'high': high,
        'learning_rate' : learning_rate,
        'attacker' : attacker,
        'dataset' : dataset.name,
        'psycho_hiding_thresh' : psycho_hiding_thresh,
        'inner_itr' : inner_itr,
        'max_outer_itr' : max_outer_itr
    }

    # decode
    wer, _ = kaldi.decode_wavs(data_dir=dataset.data_dir, text=dataset.target)
    print(f'    -> inital WER: {wer:03.2f}%')
    kaldi.results['inital_wer'] = f'{wer:03.2f}%'

    # psychoacoustic filter causes an unstable gradient for backpropping to raw audio 
    # => caused by small input values to the log component
    # => thus, for backprop we clamp input before the log component (@ nnet-component.cc)
    log_clamp = 1 if phi != "None" else 0

    # invoke adversarial examples script
    logger = KaldiLogger(kaldi.base_dir)
    logger.log_ae(inner_itr, max_outer_itr)
    try:
        running_time = kaldi.run_in_container(
            f'./compute_adversarial_examples.sh {dataset} {psycho_hiding_thresh} {inner_itr} '
            f'{max_outer_itr} {len(dataset)} {len(dataset)} {attacker}',
            additional_cmds=[f'-v {kaldi_dataset_dir.joinpath("target_utterances")}:/root/kaldi/wsj_recipe/targets',
                             f'-e NUMJOBS={len(dataset)}',  
                             f'-e PHI={phi} -e LOG_CLAMP={log_clamp} -e LEARNING_RATE={learning_rate}']
        )
        logger.stop()
        kaldi.results['running_time'] = f'{running_time // 3600}h {(running_time % 3600) // 60}m {(running_time % 60)}s'
        print(f"    completed in {kaldi.results['running_time']}")
    except KeyboardInterrupt:
        logger.stop()
        print("    terminated prematurely")

    # score AEs
    kaldi.results['history'] = parse_results_file(kaldi.base_dir)
    score_AEs(kaldi=kaldi, 
              ae_dir=kaldi.base_dir.joinpath(f"adversarial_examples/wavs"), 
              ref_dir=dataset.data_dir,
              stats_dir=kaldi.base_dir.joinpath(f"adversarial_examples/stats"),
              original_text=dataset.text,
              target_text=dataset.target,
              phi=phi)