def get_wav_and_labels(filename, data_dir):

    # folder = filename[:-9]
    wav_path = data_dir + "/audio/" + filename
    label_path = data_dir + "/Annotations/" + filename[:-9] + ".txt"

    with open(label_path, 'r') as label_file:

        category = ""
        dimensions = ""
        speaker = ""

        for row in label_file:
            if row[0] == '[':
                split = row.split("\t")
                if split[1] == filename[:-4]:
                    category = get_emotion_from_label(split[2])
                    dimensions = cont2list(split[3])
                    dimensions_dis = cont2list(split[3], binned=True)
                    speaker = get_speaker_from_filename(filename)

    audio = audio_utils.load_wav(wav_path)
    audio = np.array(audio, dtype=np.float32)
    labels = concatenate_labels(category, speaker, dimensions, dimensions_dis)

    return audio, labels
def get_samples_and_labels(filename, config):

    # config = yaml.load(open('./config.yaml', 'r'))

    wav_path = config['data']['sample_set_dir'] + "/" + filename
    folder = filename[:-9]
    label_path = config['data'][
        'dataset_dir'] + "/Annotations/" + folder + ".txt"

    with open(label_path, 'r') as label_file:

        category = ""
        dimensions = ""
        speaker = ""

        for row in label_file:
            if row[0] == '[':
                split = row.split("\t")
                if split[1] == filename[:-4]:
                    category = get_emotion_from_label(split[2])
                    dimensions = cont2list(split[3])
                    dimensions_dis = cont2list(split[3], binned=True)
                    speaker = get_speaker_from_filename(filename)

    audio = audio_utils.load_wav(wav_path)
    audio = np.array(audio, dtype=np.float32)
    labels = concatenate_labels(category, speaker, dimensions, dimensions_dis)

    return audio, labels
    # for f in filenames:
    #     print(f[:-4])

    # in_dir = '../data/labels'
    # files = find_files(in_dir, ext = 'npy')
    # filenames = [os.path.basename(f)[:-4] + ".wav" for f in files]
    # print("Found", len(filenames), " files.")
    #
    # filenames = [f for f in filenames if pp.get_wav_and_labels(f, config['data']['dataset_dir'])[1][1] in range(0,6)]
    # random.shuffle(filenames)
    # filenames = filenames[:10]
    # print(filenames)
    # print("Number of files to be converted = ", len(filenames))

    filenames = ["../data/stairway.wav"]
    wav = audio_utils.load_wav(filenames[0])
    labels = [3, 1, 0,0,0,0,0,0]
    ########################################
    #       BASELINE SYNTHESIS LOOP        #
    ########################################
    # for f in filenames:
    #
    #     wav, labels = pp.get_wav_and_labels(f, config['data']['dataset_dir'])
    #     wav = np.array(wav, dtype = np.float64)
    #     labels = np.array(labels)
    #     f0, ap, sp, coded_sp = preprocess_world.cal_mcep(wav)
    #     coded_sp = coded_sp.T
    #     # coded_sp_temp = np.copy(coded_sp).T
    #     # print(coded_sp_temp.shape)
    #     filename_wav =  f[0:-4] + "_" + str(int(labels[0].item())) + ".wav"
    #     print(coded_sp.shape)
示例#4
0
import librosa
import audio_utils
import numpy as np

dir = "../datasets/IEMOCAP/All"

filenames = librosa.util.find_files(dir)

filename = filenames[0]

audio = audio_utils.load_wav(filename)
print(audio.shape)
audio = np.array(audio, dtype=np.float32)
mel = audio_utils.wav2melspectrogram(audio)

np.save('./samples/Test/mel.npy', mel)