示例#1
0
def compute_vgg13_features(waveform, hparams):
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T

    sample_rate = hparams.sample_rate
    mel_filt = tf.convert_to_tensor(mel_filt)
    stfts = tf.contrib.signal.stft(waveform,
                                   frame_length=1024,
                                   frame_step=512,
                                   fft_length=1024,
                                   pad_end=True)
    spectrograms = tf.abs(stfts)

    # Warp the linear scale spectrograms into the mel-scale.
    num_spectrogram_bins = stfts.shape[-1].value
    mel_spectrograms = tf.tensordot(tf.pow(spectrograms, 2), mel_filt, 1)
    mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
        mel_filt.shape[-1:]))

    max_val = tf.reduce_max(mel_spectrograms, axis=None)

    # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
    log_mel_spectrograms = 10 * (
        (tf.log(mel_spectrograms + 1e-6) - tf.log(max_val + 1e-6)) /
        tf.log(tf.constant(10, dtype=tf.float32)))
    log_mel_spectrograms = tf.contrib.signal.frame(log_mel_spectrograms,
                                                   128,
                                                   128,
                                                   axis=0,
                                                   pad_end=True)
    features = generator.standardize(log_mel_spectrograms)
    features.set_shape(shape=[None, 128, 64])
    return features
示例#2
0
def dataset_iterator(train_csv_file, train_audio_dir, label_data_file,
                     hparams):
    """
    Create an iterator for the training process
    """
    label_index_table = load_data(train_csv_file)
    label_data = np.load(label_data_file)
    print(label_data.shape)
    num_classes = 41
    dataset = tf.data.TextLineDataset(train_csv_file).skip(1)

    dataset = dataset.shuffle(buffer_size=10000)

    if (hparams.vgg13_features):
        x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
        generator = utils.fit_scaler(x)
        mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
        dataset = dataset.map(map_func=functools.partial(
            get_vgg13_data,
            train_audio_dir=train_audio_dir,
            hparams=hparams,
            label_index_table=label_index_table,
            label_data=label_data,
            generator=generator,
            mel_filt=mel_filt),
                              num_parallel_calls=6)
    else:
        dataset = dataset.map(
            map_func=functools.partial(get_data,
                                       train_audio_dir=train_audio_dir,
                                       hparams=hparams,
                                       label_index_table=label_index_table,
                                       label_data=label_data))

    dataset = dataset.apply(tf.contrib.data.unbatch())
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.repeat(6)
    dataset = dataset.batch(hparams.batch_size)

    dataset = dataset.prefetch(10)
    iterator = dataset.make_initializable_iterator()
    features, label = iterator.get_next()

    return features, label, num_classes, iterator.initializer

    return
示例#3
0
def label_data(model_path, train_csv_file, train_audio_dir):
    """
    Label the data using a particular model and save the softmax values.
    Generates one softmax values per file
    """

    sr = 32000
    df = pd.read_csv(train_csv_file)
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    file_names = df.iloc[:, 0].values
    print(file_names)
    with tf.Graph().as_default() as graph:
        mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
        mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        model = CleverHansModel(model_path + '.meta', sr, generator, mel_filt)
        saver = model.build_graph(pcm)

    probs = []
    temp = np.zeros((len(file_names), 41))
    print(temp.shape)
    #temp = {}
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        print(len(file_names))
        for i in range(len(file_names)):
            data, _ = utils_tf._preprocess_data(train_audio_dir, file_names[i])
            l = sess.run([model.get_probs()], feed_dict={pcm: data})
            l = np.squeeze(l)
            if (l.ndim != 1):
                l = np.mean(l, axis=0)

            temp[i, :] = l
            #       temp[file_names[i]] = l
            print(i)
    #   print(temp)

    #file = open('label_data','wb')
    #np.save('labels.npy',temp)
    #pickle.dump(temp,file)
    #file.close()

    return
示例#4
0
def target():
    """
    Label the data using a particular model and save the softmax values.
    Generates one softmax values per file
    """
    flags = parse_flags()
    hparams = parse_hparams(flags.hparams)
    num_classes = 41
    df = pd.read_csv(flags.infer_csv_file)
    file_names = df.iloc[:, 0].values

    count = 0

    sr = 32000
    df = pd.read_csv(flags.infer_csv_file)
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    file_names = df.iloc[:, 0].values
    with tf.Graph().as_default() as graph:
        mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
        mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        model = CleverHansModel(flags.save_model_dir + '.meta', sr, generator,
                                mel_filt)
        saver = model.build_graph(pcm)

    with tf.Session(graph=graph) as sess:
        saver.restore(sess, flags.save_model_dir)
        print(len(file_names))
        for i in range(100):
            data, _ = utils_tf._preprocess_data(flags.infer_audio_dir,
                                                file_names[i])
            l = sess.run([model.get_probs()], feed_dict={pcm: data})
            l = np.squeeze(l)
            if (l.ndim != 1):
                l = np.mean(l, axis=0)

            lab = utils_tf._convert_label_name_to_label(df.iloc[i, 1])
            if (lab == np.argmax(l)):
                count += 1
                print(lab, np.argmax(l))

            print(count / 100)
示例#5
0
def deepfoolattack(audio_path,metadata_path,model_path,exp_data_path,adv_audio_path,save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x,_ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    label_names= df.iloc[:,2].values
    file_names = df.iloc[:,1].values
    mel_fb = librosa.filters.mel(sr=32000,n_fft=1024,n_mels=64).T
    sample_rate = 32000
    
    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb,dtype=tf.float32)
        model = CleverHansModel(model_path +'.meta',sample_rate,generator,mel_filt)
        pcm = tf.placeholder(tf.float32,shape=[None],name='input_audio')
        saver= model.build_graph(pcm)
        deepfool = DFM.DeepFool(model)
        deepfool.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess,model_path)
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data,q = utils_tf._preprocess_data(audio_path,audio_file_name)
            except EOFError:
                print("EOF Error")
 
            labels= utils_tf._convert_label_name_to_label(label_names[i])
            s = sess.run([model.get_probs()],feed_dict={'input_audio:0':data})
            
            s = np.squeeze(s)
            if (s.ndim != 1):
                s = np.mean(s,axis=0)
                      
                
            print('Original label number:',np.argmax(s))
            print('Original label confidence:',np.max(s))
                
            tic = time.process_time()
            adv = deepfool.attack(sess,data,int(q))
            toc = time.process_time()

            print('Time for processing sample:',toc-tic,'for iteration:',i)
            preds = sess.run([model.get_probs()],feed_dict={pcm:adv})
            preds = np.squeeze(preds)

            if(preds.ndim !=1):
                preds = np.mean(preds,axis=0)
            print('New label number:',np.argmax(preds))
            print('New label confidence:',np.max(preds))
                
            if(save_data):
                librosa.output.write_wav(adv_audio_path + 'adv-' + audio_file_name,adv,sample_rate)
                
            audio_name.append(audio_file_name)
            audio_length.append(int(q))
            original_label.append(np.argmax(s))
            original_confidence.append(np.max(s))
            new_label.append(np.argmax(preds))
            new_confidence.append(np.max(preds))
            new_o_label_conf.append(preds[np.argmax(s)])
            snr.append(10*np.log10(np.mean(data**2)/(np.mean((adv-data)**2))))
        if(save_data):
            df_deepfool = pd.DataFrame({'audio_name':audio_name,'audio_length':audio_length,'original_label':original_label,'original_confidence':original_confidence,'new_label':new_label,'new_confidence':new_confidence,'new_orig_conf':new_o_label_conf,'SNR':snr})
        
            with open(exp_data_path,'a') as f:
                df_deepfool.to_csv(f,header=False)
示例#6
0
def inferenceiqbal(audio_path,
                   metadata_path,
                   model_path,
                   exp_data_path,
                   adv_audio_path,
                   save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    label_names = df.iloc[:, 1].values
    file_names = df.iloc[:, 0].values

    mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
    sample_rate = 32000

    audio_name = []
    ground_truth = []
    inferred_label = []
    inferred_confidence = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32)
        model = CleverHansModel(model_path + '.meta', sample_rate, generator,
                                mel_filt)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        saver = model.build_graph(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        count = 0
        count_tot = 0
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
            except EOFError:
                print("EOF Error")

            gt_label = utils_tf._convert_label_name_to_label(label_names[i])
            s = sess.run([model.get_probs()],
                         feed_dict={'input_audio:0': data})

            s = np.squeeze(s)
            if (s.ndim != 1):
                s = np.mean(s, axis=0)
            label = np.argmax(s)
            count_tot += 1
            if (label == gt_label):
                count += 1

            if (i % 1000 == 0):
                print('Iteration number:', i)
                print('Current accuracy:', float(count / count_tot))
            audio_name.append(audio_file_name)
            ground_truth.append(gt_label)
            inferred_label.append(label)
            inferred_confidence.append(np.max(s))
        if (save_data):
            df_deepfool = pd.DataFrame({
                'audio_name':
                audio_name,
                'ground_truth':
                ground_truth,
                'inferred_label':
                inferred_label,
                'inferred_confidence':
                inferred_confidence
            })

            with open(exp_data_path, 'w') as f:
                df_deepfool.to_csv(f, header=False)
def carliniwagneruntargeted(audio_path,
                            metadata_path,
                            model_path,
                            exp_data_path,
                            adv_audio_path,
                            save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    label_names = df.iloc[:, 2].values
    file_names = df.iloc[:, 1].values
    mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
    sample_rate = 32000

    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32)
        model = CleverHansModel(model_path + '.meta', sample_rate, generator,
                                mel_filt)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        carliniwagner = CW.CarliniWagnerAttack(model,
                                               learning_rate=1e-5,
                                               initial_const=1e-2,
                                               max_iterations=1000,
                                               confidence=500,
                                               binary_search_steps=2)
        saver = carliniwagner.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
            except EOFError:
                print("EOF Error")

            label = utils_tf._convert_label_name_to_label(label_names[i])

            print('Ground truth label:', label_names[i])

            labels_batchwise = np.repeat(label, int(q))

            tic = time.process_time()
            adv, o_label, o_conf, n_label, n_conf, n_conf_gt = carliniwagner.attack(
                sess,
                data,
                label,
                labels_batchwise,
                int(q),
                prob_thresh=0.0244)
            toc = time.process_time()

            print('Time for iteration:', i, 'is', toc - tic)
            if (save_data):
                librosa.output.write_wav(
                    adv_audio_path + 'adv-' + audio_file_name, adv,
                    sample_rate)

            audio_name.append(audio_file_name)
            audio_length.append(int(q))
            original_label.append(o_label)
            original_confidence.append(o_conf)
            new_label.append(n_label)
            new_confidence.append(n_conf)
            new_o_label_conf.append(n_conf_gt)
            snr.append(10 *
                       np.log10(np.mean(data**2) / (np.mean((adv - data)**2))))
        if (save_data):
            df_cw = pd.DataFrame({
                'audio_name': audio_name,
                'audio_length': audio_length,
                'original_label': original_label,
                'original_confidence': original_confidence,
                'new_label': new_label,
                'new_confidence': new_confidence,
                'new_orig_conf': new_o_label_conf,
                'SNR': snr
            })

            with open(exp_data_path, 'w') as f:
                df_cw.to_csv(f, header=False)
def carliniwagnertargeted(audio_path,
                          metadata_path,
                          model_path,
                          exp_data_path,
                          adv_audio_path,
                          save_data=False):
    #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data
    #Load dataset to normalize new data
    x, _ = utils_tf._load_dataset(cfg.to_dataset('training'))
    generator = utils.fit_scaler(x)
    df = pd.read_csv(metadata_path)
    gt_labels = df.iloc[:, 2].values
    file_names = df.iloc[:, 1].values
    mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T
    sample_rate = 32000
    label_list = [
        "Bass_drum", "Cello", "Clarinet", "Oboe", "Snare_drum",
        "Violin_or_fiddle"
    ]

    audio_name = []
    audio_length = []
    original_label = []
    original_confidence = []
    new_label = []
    new_confidence = []
    new_o_label_conf = []
    snr = []
    with tf.Graph().as_default() as graph:
        mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32)
        model = CleverHansModel(model_path + '.meta', sample_rate, generator,
                                mel_filt)
        pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio')
        carliniwagner = CW.CarliniWagnerAttack(model,
                                               learning_rate=1e-5,
                                               confidence=500,
                                               targeted=True,
                                               max_iterations=1000,
                                               binary_search_steps=2)
        saver = carliniwagner.build_attack(pcm)
    with tf.Session(graph=graph) as sess:
        saver.restore(sess, model_path)
        for i in range(df.shape[0]):
            audio_file_name = file_names[i]
            try:
                data, q = utils_tf._preprocess_data(audio_path,
                                                    audio_file_name)
            except EOFError:
                print("EOF Error")

            gt_label = gt_labels[i]

            print('Ground truth label:', gt_label, 'Audio_file:',
                  file_names[i])
            for l in range(len(label_list)):
                label = utils_tf._convert_label_name_to_label(label_list[l])
                if (label == gt_label):
                    continue

                adv, o_label, o_conf, n_label, n_conf, n_gt_conf = carliniwagner.attack(
                    sess,
                    data,
                    label,
                    np.repeat(label, int(q)),
                    int(q),
                    prob_thresh=0.975)

                if (save_data):
                    librosa.output.write_wav(
                        adv_audio_path + 'adv-' + label_list[l] + '-' +
                        audio_file_name, adv, sample_rate)

                audio_name.append(audio_file_name)
                audio_length.append(int(q))
                original_label.append(o_label)
                original_confidence.append(o_conf)
                new_label.append(n_label)
                new_confidence.append(n_conf)
                new_o_label_conf.append(n_gt_conf)

                snr.append(
                    10 * np.log10(np.mean(data**2) / (np.mean(adv - data)**2)))
        if (save_data):
            df_cw = pd.DataFrame(
                {
                    'audio_name': audio_name,
                    'audio_length': audio_length,
                    'original_label': original_label,
                    'original_confidence': original_confidence,
                    'new_label': new_label,
                    'new_confidence': new_confidence,
                    'SNR': snr
                },
                columns=[
                    'audio_name', 'audio_length', 'original_label',
                    'original_confidence', 'new_label', 'new_confidence', 'SNR'
                ])

            with open(exp_data_path, 'a') as f:
                df_cw.to_csv(f)