def __init__(
        self,
        o_audio_file,
        t_audio_file,
        save_p_fname,
        save_info_fname,
        MODE="TIMIE",
        dct_field=0.65,
    ):
        self.o_audio_file = o_audio_file
        self.t_audio_file = t_audio_file
        self.model = Sincnet.get_speaker_model(MODE)
        self.model = self.model.eval()
        self.speaker_label, self.label_speaker = Sincnet.get_speaker_label(
            MODE)
        self.save_p_fname = save_p_fname
        self.save_info_fname = save_info_fname

        self.dct = lambda x: ffp.dct(x, norm='ortho')
        self.idct = lambda ix: ffp.idct(ix, norm='ortho')
        self.o_audio, self.sr = sf.read(o_audio_file)
        self.t_audio, self.sr = sf.read(t_audio_file)
        # 目标label以及原始label以及name
        self.o_label = self.predict_one_label(self.o_audio)
        self.t_label = self.predict_one_label(self.t_audio)
        self.o_name = self.get_name_by_label(self.o_label)
        self.t_name = self.get_name_by_label(self.t_label)
        # 初始化音频
        self.audio_len = min(len(self.o_audio), len(self.t_audio))
        self.o_audio = self.o_audio[:self.audio_len]
        self.t_audio = self.t_audio[:self.audio_len]
        self.o_audio /= np.linalg.norm(self.o_audio, np.inf)
        self.t_audio /= np.linalg.norm(self.t_audio, np.inf)
        # 初始化音频微缩
        self.o_audio *= 0.95
        self.t_audio *= 0.95
        self.o2_audio = None
        # 定义扰动部分变量
        self.best_pretub_scale = float('inf')
        self.best_pretub = None
        self.best_clip_scale = float('inf')
        self.best_clip_perturb = None
        self.interval = [None, None]
        self.clip_perturb_len = None
        # 超参
        self.query_num = 0
        self.theta = 1e-4  # 二分结束条件
        self.dct_field = dct_field
from utils import Sincnet
import torch
import numpy as np
import soundfile as sf
import librosa.display
import matplotlib.pyplot as plt
import pickle
import utils
import os
import LOCAL_ATT_HSJA_ATTACK

MODE = "TIMIT"
abs_path=os.getcwd()

model = Sincnet.get_speaker_model(MODE)
speaker_label, label_speaker = Sincnet.get_speaker_label(MODE)
def mkd(name):
    if os.path.exist(name)==False:
        os.mkdir(os.path.join(abs_path,name))
if MODE=="Librispeech":
    save_dir = "lahresult\lib"
    save_adv_dir="lahresult\libaudio"
    mkd(save_dir)
    mkd(save_adv_dir)
    attackdir = r"AttackDataset\lib-attack-audio"
    targetdir = r"AttackDataset\lib-target-audio"
else:
    save_dir = r"lahresult\timit"
    save_adv_dir = r"lahresult\timitaudio"
    mkd(save_dir)
    mkd(save_adv_dir)