def compute_vgg13_features(waveform, hparams): x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = hparams.sample_rate mel_filt = tf.convert_to_tensor(mel_filt) stfts = tf.contrib.signal.stft(waveform, frame_length=1024, frame_step=512, fft_length=1024, pad_end=True) spectrograms = tf.abs(stfts) # Warp the linear scale spectrograms into the mel-scale. num_spectrogram_bins = stfts.shape[-1].value mel_spectrograms = tf.tensordot(tf.pow(spectrograms, 2), mel_filt, 1) mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate( mel_filt.shape[-1:])) max_val = tf.reduce_max(mel_spectrograms, axis=None) # Compute a stabilized log to get log-magnitude mel-scale spectrograms. log_mel_spectrograms = 10 * ( (tf.log(mel_spectrograms + 1e-6) - tf.log(max_val + 1e-6)) / tf.log(tf.constant(10, dtype=tf.float32))) log_mel_spectrograms = tf.contrib.signal.frame(log_mel_spectrograms, 128, 128, axis=0, pad_end=True) features = generator.standardize(log_mel_spectrograms) features.set_shape(shape=[None, 128, 64]) return features
def dataset_iterator(train_csv_file, train_audio_dir, label_data_file, hparams): """ Create an iterator for the training process """ label_index_table = load_data(train_csv_file) label_data = np.load(label_data_file) print(label_data.shape) num_classes = 41 dataset = tf.data.TextLineDataset(train_csv_file).skip(1) dataset = dataset.shuffle(buffer_size=10000) if (hparams.vgg13_features): x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T dataset = dataset.map(map_func=functools.partial( get_vgg13_data, train_audio_dir=train_audio_dir, hparams=hparams, label_index_table=label_index_table, label_data=label_data, generator=generator, mel_filt=mel_filt), num_parallel_calls=6) else: dataset = dataset.map( map_func=functools.partial(get_data, train_audio_dir=train_audio_dir, hparams=hparams, label_index_table=label_index_table, label_data=label_data)) dataset = dataset.apply(tf.contrib.data.unbatch()) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.repeat(6) dataset = dataset.batch(hparams.batch_size) dataset = dataset.prefetch(10) iterator = dataset.make_initializable_iterator() features, label = iterator.get_next() return features, label, num_classes, iterator.initializer return
def label_data(model_path, train_csv_file, train_audio_dir): """ Label the data using a particular model and save the softmax values. Generates one softmax values per file """ sr = 32000 df = pd.read_csv(train_csv_file) x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) file_names = df.iloc[:, 0].values print(file_names) with tf.Graph().as_default() as graph: mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') model = CleverHansModel(model_path + '.meta', sr, generator, mel_filt) saver = model.build_graph(pcm) probs = [] temp = np.zeros((len(file_names), 41)) print(temp.shape) #temp = {} with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) print(len(file_names)) for i in range(len(file_names)): data, _ = utils_tf._preprocess_data(train_audio_dir, file_names[i]) l = sess.run([model.get_probs()], feed_dict={pcm: data}) l = np.squeeze(l) if (l.ndim != 1): l = np.mean(l, axis=0) temp[i, :] = l # temp[file_names[i]] = l print(i) # print(temp) #file = open('label_data','wb') #np.save('labels.npy',temp) #pickle.dump(temp,file) #file.close() return
def target(): """ Label the data using a particular model and save the softmax values. Generates one softmax values per file """ flags = parse_flags() hparams = parse_hparams(flags.hparams) num_classes = 41 df = pd.read_csv(flags.infer_csv_file) file_names = df.iloc[:, 0].values count = 0 sr = 32000 df = pd.read_csv(flags.infer_csv_file) x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) file_names = df.iloc[:, 0].values with tf.Graph().as_default() as graph: mel_filt = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T mel_filt = tf.convert_to_tensor(mel_filt, dtype=tf.float32) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') model = CleverHansModel(flags.save_model_dir + '.meta', sr, generator, mel_filt) saver = model.build_graph(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, flags.save_model_dir) print(len(file_names)) for i in range(100): data, _ = utils_tf._preprocess_data(flags.infer_audio_dir, file_names[i]) l = sess.run([model.get_probs()], feed_dict={pcm: data}) l = np.squeeze(l) if (l.ndim != 1): l = np.mean(l, axis=0) lab = utils_tf._convert_label_name_to_label(df.iloc[i, 1]) if (lab == np.argmax(l)): count += 1 print(lab, np.argmax(l)) print(count / 100)
def deepfoolattack(audio_path,metadata_path,model_path,exp_data_path,adv_audio_path,save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x,_ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) label_names= df.iloc[:,2].values file_names = df.iloc[:,1].values mel_fb = librosa.filters.mel(sr=32000,n_fft=1024,n_mels=64).T sample_rate = 32000 audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb,dtype=tf.float32) model = CleverHansModel(model_path +'.meta',sample_rate,generator,mel_filt) pcm = tf.placeholder(tf.float32,shape=[None],name='input_audio') saver= model.build_graph(pcm) deepfool = DFM.DeepFool(model) deepfool.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess,model_path) for i in range(df.shape[0]): audio_file_name = file_names[i] try: data,q = utils_tf._preprocess_data(audio_path,audio_file_name) except EOFError: print("EOF Error") labels= utils_tf._convert_label_name_to_label(label_names[i]) s = sess.run([model.get_probs()],feed_dict={'input_audio:0':data}) s = np.squeeze(s) if (s.ndim != 1): s = np.mean(s,axis=0) print('Original label number:',np.argmax(s)) print('Original label confidence:',np.max(s)) tic = time.process_time() adv = deepfool.attack(sess,data,int(q)) toc = time.process_time() print('Time for processing sample:',toc-tic,'for iteration:',i) preds = sess.run([model.get_probs()],feed_dict={pcm:adv}) preds = np.squeeze(preds) if(preds.ndim !=1): preds = np.mean(preds,axis=0) print('New label number:',np.argmax(preds)) print('New label confidence:',np.max(preds)) if(save_data): librosa.output.write_wav(adv_audio_path + 'adv-' + audio_file_name,adv,sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(np.argmax(s)) original_confidence.append(np.max(s)) new_label.append(np.argmax(preds)) new_confidence.append(np.max(preds)) new_o_label_conf.append(preds[np.argmax(s)]) snr.append(10*np.log10(np.mean(data**2)/(np.mean((adv-data)**2)))) if(save_data): df_deepfool = pd.DataFrame({'audio_name':audio_name,'audio_length':audio_length,'original_label':original_label,'original_confidence':original_confidence,'new_label':new_label,'new_confidence':new_confidence,'new_orig_conf':new_o_label_conf,'SNR':snr}) with open(exp_data_path,'a') as f: df_deepfool.to_csv(f,header=False)
def inferenceiqbal(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) label_names = df.iloc[:, 1].values file_names = df.iloc[:, 0].values mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = 32000 audio_name = [] ground_truth = [] inferred_label = [] inferred_confidence = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32) model = CleverHansModel(model_path + '.meta', sample_rate, generator, mel_filt) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') saver = model.build_graph(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) count = 0 count_tot = 0 for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) except EOFError: print("EOF Error") gt_label = utils_tf._convert_label_name_to_label(label_names[i]) s = sess.run([model.get_probs()], feed_dict={'input_audio:0': data}) s = np.squeeze(s) if (s.ndim != 1): s = np.mean(s, axis=0) label = np.argmax(s) count_tot += 1 if (label == gt_label): count += 1 if (i % 1000 == 0): print('Iteration number:', i) print('Current accuracy:', float(count / count_tot)) audio_name.append(audio_file_name) ground_truth.append(gt_label) inferred_label.append(label) inferred_confidence.append(np.max(s)) if (save_data): df_deepfool = pd.DataFrame({ 'audio_name': audio_name, 'ground_truth': ground_truth, 'inferred_label': inferred_label, 'inferred_confidence': inferred_confidence }) with open(exp_data_path, 'w') as f: df_deepfool.to_csv(f, header=False)
def carliniwagneruntargeted(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) label_names = df.iloc[:, 2].values file_names = df.iloc[:, 1].values mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = 32000 audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32) model = CleverHansModel(model_path + '.meta', sample_rate, generator, mel_filt) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') carliniwagner = CW.CarliniWagnerAttack(model, learning_rate=1e-5, initial_const=1e-2, max_iterations=1000, confidence=500, binary_search_steps=2) saver = carliniwagner.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) except EOFError: print("EOF Error") label = utils_tf._convert_label_name_to_label(label_names[i]) print('Ground truth label:', label_names[i]) labels_batchwise = np.repeat(label, int(q)) tic = time.process_time() adv, o_label, o_conf, n_label, n_conf, n_conf_gt = carliniwagner.attack( sess, data, label, labels_batchwise, int(q), prob_thresh=0.0244) toc = time.process_time() print('Time for iteration:', i, 'is', toc - tic) if (save_data): librosa.output.write_wav( adv_audio_path + 'adv-' + audio_file_name, adv, sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(o_label) original_confidence.append(o_conf) new_label.append(n_label) new_confidence.append(n_conf) new_o_label_conf.append(n_conf_gt) snr.append(10 * np.log10(np.mean(data**2) / (np.mean((adv - data)**2)))) if (save_data): df_cw = pd.DataFrame({ 'audio_name': audio_name, 'audio_length': audio_length, 'original_label': original_label, 'original_confidence': original_confidence, 'new_label': new_label, 'new_confidence': new_confidence, 'new_orig_conf': new_o_label_conf, 'SNR': snr }) with open(exp_data_path, 'w') as f: df_cw.to_csv(f, header=False)
def carliniwagnertargeted(audio_path, metadata_path, model_path, exp_data_path, adv_audio_path, save_data=False): #Run the attacks to generate adversarial attacks on manually verified examples on the training and test data #Load dataset to normalize new data x, _ = utils_tf._load_dataset(cfg.to_dataset('training')) generator = utils.fit_scaler(x) df = pd.read_csv(metadata_path) gt_labels = df.iloc[:, 2].values file_names = df.iloc[:, 1].values mel_fb = librosa.filters.mel(sr=32000, n_fft=1024, n_mels=64).T sample_rate = 32000 label_list = [ "Bass_drum", "Cello", "Clarinet", "Oboe", "Snare_drum", "Violin_or_fiddle" ] audio_name = [] audio_length = [] original_label = [] original_confidence = [] new_label = [] new_confidence = [] new_o_label_conf = [] snr = [] with tf.Graph().as_default() as graph: mel_filt = tf.convert_to_tensor(mel_fb, dtype=tf.float32) model = CleverHansModel(model_path + '.meta', sample_rate, generator, mel_filt) pcm = tf.placeholder(tf.float32, shape=[None], name='input_audio') carliniwagner = CW.CarliniWagnerAttack(model, learning_rate=1e-5, confidence=500, targeted=True, max_iterations=1000, binary_search_steps=2) saver = carliniwagner.build_attack(pcm) with tf.Session(graph=graph) as sess: saver.restore(sess, model_path) for i in range(df.shape[0]): audio_file_name = file_names[i] try: data, q = utils_tf._preprocess_data(audio_path, audio_file_name) except EOFError: print("EOF Error") gt_label = gt_labels[i] print('Ground truth label:', gt_label, 'Audio_file:', file_names[i]) for l in range(len(label_list)): label = utils_tf._convert_label_name_to_label(label_list[l]) if (label == gt_label): continue adv, o_label, o_conf, n_label, n_conf, n_gt_conf = carliniwagner.attack( sess, data, label, np.repeat(label, int(q)), int(q), prob_thresh=0.975) if (save_data): librosa.output.write_wav( adv_audio_path + 'adv-' + label_list[l] + '-' + audio_file_name, adv, sample_rate) audio_name.append(audio_file_name) audio_length.append(int(q)) original_label.append(o_label) original_confidence.append(o_conf) new_label.append(n_label) new_confidence.append(n_conf) new_o_label_conf.append(n_gt_conf) snr.append( 10 * np.log10(np.mean(data**2) / (np.mean(adv - data)**2))) if (save_data): df_cw = pd.DataFrame( { 'audio_name': audio_name, 'audio_length': audio_length, 'original_label': original_label, 'original_confidence': original_confidence, 'new_label': new_label, 'new_confidence': new_confidence, 'SNR': snr }, columns=[ 'audio_name', 'audio_length', 'original_label', 'original_confidence', 'new_label', 'new_confidence', 'SNR' ]) with open(exp_data_path, 'a') as f: df_cw.to_csv(f)