def createMelSpectrogram(input_path, fileName, output_path, saveOrShow=0): # load sound signal signal, sr = librosa.load(os.path.join(input_path, fileName), duration=10, sr=16000) #signal = filter_signal(signal, sr, target_audio_length) # create Mel Spectrogram S = Melspectrogram(n_dft=1024, n_hop=320, #n_hop=256, input_shape=(1, signal.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy() S = S.reshape(S.shape[1], S.shape[2]) print(S.shape) if saveOrShow == 0: matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + ".png"), S, cmap='inferno') else: #plt.imshow(S) #plt.show() display.specshow(S, sr=sr) plt.show()
def createMelSpectrogramNew(input_path, fileName, output_path_train, output_path_val): # load sound signal signal, sr = librosa.load(os.path.join(input_path, fileName), sr=16000, mono=True) abs_signal = [np.abs(s) for s in signal] rolling_5s_abs_signal = [sum(abs_signal[i*5*sr:(i*5+5)*sr]) for i in range(int(len(abs_signal) // sr // 5))] if len(signal) <= sr * 5: # add 0 padding signal = list(signal) + [0 for i in range(sr*5 - len(signal))] signal = np.array(signal, dtype=np.float32) # draw random number rand = np.random.randint(0, 10) if rand <= 7: output_path = output_path_train toAug = 1 else: output_path = output_path_val toAug = 0 if toAug == 0: # normal mel spectrogram S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy() S = S.reshape(S.shape[1], S.shape[2]) matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + ".png"), S, cmap='inferno') else: # augmentation mySignal = augmenter(signal, sr) S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)(mySignal.reshape(1, 1, -1)).numpy() S = S.reshape(S.shape[1], S.shape[2]) matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + "_noise.png"), S, cmap='inferno') else: q_signal = np.quantile(rolling_5s_abs_signal, 0.75) count = 0 numSamples = int((len(signal) // sr) // 5) for i in range(numSamples): tmpSignal = signal[int(i*5)*sr:int((i*5+5))*sr] # cut out region is highest intensity #window = 224 * 256 #interval = int((sr * 5 - window) // 10) #intensity = [sum(tmpSignal[i*interval:i*interval+window]) for i in range(10)] #idx = intensity.index(max(intensity)) #tmpSignal = tmpSignal[idx*interval:idx*interval+window] #mask = [1 if np.abs(s) > median_signal else 0 for s in tmpSignal] if sum([abs(j) for j in tmpSignal]) >= q_signal: # draw random number rand = np.random.randint(0, 10) if rand <= 7: output_path = output_path_train toAug = 1 else: output_path = output_path_val toAug = 0 if toAug == 0: S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)(tmpSignal.reshape(1, 1, -1)).numpy() S = S.reshape(S.shape[1], S.shape[2]) matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + "_" + str(count) + ".png"), S, cmap='inferno') else: # augmentation mySignal = augmenter(tmpSignal, sr) S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)(mySignal.reshape(1, 1, -1)).numpy() S = S.reshape(S.shape[1], S.shape[2]) matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + "_" + str(count) + "_noise.png"), S, cmap='inferno') count += 1
d.extend(data_raw) frames = (np.hstack(d)).flatten() melspec = Melspectrogram(n_dft=1024, n_hop=256, input_shape=(1, frames.shape[0]), padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr / 2, power_melgram=2.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False)( frames.reshape(1, 1, -1)).numpy() melspec = melspec.reshape(melspec.shape[1], melspec.shape[2]) print( f"Frames array: {frames.shape}, Melspec array: {melspec.shape}" ) melplot = display.specshow(melspec, sr=sr) melplot.set_frame_on(False) plt.tight_layout(pad=0) plt.draw() plt.pause(0.0001) plt.clf() if (melspec.shape[1] >= IM_SIZE[0]): img = Image.frombuffer("RGBA", fig.canvas.get_width_height(), fig.canvas.buffer_rgba(), "raw", "RGBA", 0, 1) img = img.convert('RGB').resize(IM_SIZE[0:2])