def get_wav_and_labels(filename, data_dir): # folder = filename[:-9] wav_path = data_dir + "/audio/" + filename label_path = data_dir + "/Annotations/" + filename[:-9] + ".txt" with open(label_path, 'r') as label_file: category = "" dimensions = "" speaker = "" for row in label_file: if row[0] == '[': split = row.split("\t") if split[1] == filename[:-4]: category = get_emotion_from_label(split[2]) dimensions = cont2list(split[3]) dimensions_dis = cont2list(split[3], binned=True) speaker = get_speaker_from_filename(filename) audio = audio_utils.load_wav(wav_path) audio = np.array(audio, dtype=np.float32) labels = concatenate_labels(category, speaker, dimensions, dimensions_dis) return audio, labels
def get_samples_and_labels(filename, config): # config = yaml.load(open('./config.yaml', 'r')) wav_path = config['data']['sample_set_dir'] + "/" + filename folder = filename[:-9] label_path = config['data'][ 'dataset_dir'] + "/Annotations/" + folder + ".txt" with open(label_path, 'r') as label_file: category = "" dimensions = "" speaker = "" for row in label_file: if row[0] == '[': split = row.split("\t") if split[1] == filename[:-4]: category = get_emotion_from_label(split[2]) dimensions = cont2list(split[3]) dimensions_dis = cont2list(split[3], binned=True) speaker = get_speaker_from_filename(filename) audio = audio_utils.load_wav(wav_path) audio = np.array(audio, dtype=np.float32) labels = concatenate_labels(category, speaker, dimensions, dimensions_dis) return audio, labels
# for f in filenames: # print(f[:-4]) # in_dir = '../data/labels' # files = find_files(in_dir, ext = 'npy') # filenames = [os.path.basename(f)[:-4] + ".wav" for f in files] # print("Found", len(filenames), " files.") # # filenames = [f for f in filenames if pp.get_wav_and_labels(f, config['data']['dataset_dir'])[1][1] in range(0,6)] # random.shuffle(filenames) # filenames = filenames[:10] # print(filenames) # print("Number of files to be converted = ", len(filenames)) filenames = ["../data/stairway.wav"] wav = audio_utils.load_wav(filenames[0]) labels = [3, 1, 0,0,0,0,0,0] ######################################## # BASELINE SYNTHESIS LOOP # ######################################## # for f in filenames: # # wav, labels = pp.get_wav_and_labels(f, config['data']['dataset_dir']) # wav = np.array(wav, dtype = np.float64) # labels = np.array(labels) # f0, ap, sp, coded_sp = preprocess_world.cal_mcep(wav) # coded_sp = coded_sp.T # # coded_sp_temp = np.copy(coded_sp).T # # print(coded_sp_temp.shape) # filename_wav = f[0:-4] + "_" + str(int(labels[0].item())) + ".wav" # print(coded_sp.shape)
import librosa import audio_utils import numpy as np dir = "../datasets/IEMOCAP/All" filenames = librosa.util.find_files(dir) filename = filenames[0] audio = audio_utils.load_wav(filename) print(audio.shape) audio = np.array(audio, dtype=np.float32) mel = audio_utils.wav2melspectrogram(audio) np.save('./samples/Test/mel.npy', mel)