示例#1
0
        def f(n):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, clip_samples)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))
def pack_waveforms_to_hdf5(args):
    """Pack waveforms to a single hdf5 file.
    """

    # Arguments & parameters
    audios_dir = args.audios_dir
    csv_path = args.csv_path
    waveform_hdf5_path = args.waveform_hdf5_path
    target_hdf5_path = args.target_hdf5_path
    mini_data = args.mini_data

    audio_length = config.audio_length
    classes_num = config.classes_num
    sample_rate = config.sample_rate

    # Paths
    if mini_data:
        prefix = 'mini_'
        waveform_hdf5_path += '.mini'
        target_hdf5_path += '.mini'
    else:
        prefix = ''

    create_folder(os.path.dirname(waveform_hdf5_path))
    create_folder(os.path.dirname(target_hdf5_path))

    logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(
        prefix, get_filename(csv_path))
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Write logs to {}'.format(logs_dir))

    # Read csv file
    meta_dict = read_metadata(csv_path)

    if mini_data:
        mini_num = 10
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][0:mini_num]

    audios_num = len(meta_dict['audio_name'])

    # Pack waveform to hdf5
    total_time = time.time()

    with h5py.File(waveform_hdf5_path, 'w') as hf:
        hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20')
        hf.create_dataset('waveform',
                          shape=((audios_num, audio_length)),
                          dtype=np.int16)
        hf.create_dataset('target',
                          shape=((audios_num, classes_num)),
                          dtype=np.bool)
        hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)

        # Read audio
        for n in range(audios_num):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, audio_length)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))

        # Pack target to hdf5
        hdf5_name = target_hdf5_path.split('/')[-1]

        with h5py.File(target_hdf5_path, 'w') as target_hf:
            target_hf.create_dataset('audio_name',
                                     data=hf['audio_name'][:],
                                     dtype='S20')
            target_hf.create_dataset('hdf5_name',
                                     data=[hdf5_name.encode()] * audios_num,
                                     dtype='S40')
            target_hf.create_dataset('index_in_hdf5',
                                     data=np.arange(audios_num),
                                     dtype=np.int32)
            target_hf.create_dataset('target',
                                     data=hf['target'][:],
                                     dtype=np.bool)

    logging.info('Write to {}'.format(waveform_hdf5_path))
    logging.info('Write to {}'.format(target_hdf5_path))
    logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
示例#3
0
def pack_audio_files_to_hdf5_ramas(args):
    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    mini_data = args.mini_data

    sample_rate = config.sample_rate
    clip_samples = config.clip_samples
    classes_num = config.classes_num
    lb_to_idx = config.lb_to_idx

    # Paths
    audios_dir = os.path.join(dataset_dir)

    if mini_data:
        packed_hdf5_path = os.path.join(workspace, 'features_ramas',
                                        'minidata_waveform.h5')
    else:
        packed_hdf5_path = os.path.join(workspace, 'features_ramas',
                                        'waveform_meta_test.h5')
    create_folder(os.path.dirname(packed_hdf5_path))

    # (audio_names, audio_paths) = traverse_folder(audios_dir)

    # audio_names = sorted(audio_names)
    # audio_paths = sorted(audio_paths)

    meta_df = pd.read_csv(
        '/home/den/DATASETS/AUDIO/preprocessed/ramas/meta_test.csv', sep=',')
    meta_df = meta_df[meta_df.cur_label.isin(['hap', 'ang', 'neu', 'sad'])]

    audio_names = list(meta_df.cur_name)
    audio_paths = [
        os.path.join('/home/den/DATASETS/AUDIO/preprocessed/ramas/data',
                     audio_name) for audio_name in audio_names
    ]

    meta_dict = {
        'audio_name':
        np.array(audio_names),
        'audio_path':
        np.array(audio_paths),
        'target':
        np.array([
            lb_to_idx[list(
                meta_df[meta_df.cur_name == audio_name].cur_label)[0]]
            for audio_name in audio_names
        ]),
        'fold':
        np.array([1 for audio_name in audio_names])
    }

    if mini_data:
        mini_num = 10
        total_num = len(meta_dict['audio_name'])
        random_state = np.random.RandomState(1234)
        indexes = random_state.choice(total_num, size=mini_num, replace=False)
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][indexes]

    audios_num = len(meta_dict['audio_name'])

    feature_time = time.time()
    with h5py.File(packed_hdf5_path, 'w') as hf:
        hf.create_dataset(name='audio_name', shape=(audios_num, ), dtype='S80')

        hf.create_dataset(name='waveform',
                          shape=(audios_num, clip_samples),
                          dtype=np.int16)

        hf.create_dataset(name='target',
                          shape=(audios_num, classes_num),
                          dtype=np.float32)

        hf.create_dataset(name='fold', shape=(audios_num, ), dtype=np.int32)

        for n in range(audios_num):
            print(n)
            audio_name = meta_dict['audio_name'][n]
            fold = meta_dict['fold'][n]
            audio_path = meta_dict['audio_path'][n]
            (audio, fs) = librosa.core.load(audio_path,
                                            sr=sample_rate,
                                            mono=True)

            audio = pad_truncate_sequence(audio, clip_samples)

            hf['audio_name'][n] = audio_name.encode()
            hf['waveform'][n] = float32_to_int16(audio)
            hf['target'][n] = to_one_hot(meta_dict['target'][n], classes_num)
            hf['fold'][n] = meta_dict['fold'][n]

    print('Write hdf5 to {}'.format(packed_hdf5_path))
    print('Time: {:.3f} s'.format(time.time() - feature_time))
示例#4
0
def pack_audio_files_to_hdf5(args):

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    mini_data = args.mini_data

    sample_rate = config.sample_rate
    clip_samples = config.clip_samples
    classes_num = config.classes_num
    lb_to_idx = config.lb_to_idx

    # Paths
    audios_dir = os.path.join(dataset_dir)

    if mini_data:
        packed_hdf5_path = os.path.join(workspace, 'features',
                                        'minidata_waveform.h5')
    else:
        packed_hdf5_path = os.path.join(workspace, 'features', 'waveform.h5')
    create_folder(os.path.dirname(packed_hdf5_path))

    (audio_names, audio_paths) = traverse_folder(audios_dir)

    audio_names = sorted(audio_names)
    audio_paths = sorted(audio_paths)

    meta_dict = {
        'audio_name':
        np.array(audio_names),
        'audio_path':
        np.array(audio_paths),
        'target':
        np.array([
            lb_to_idx[audio_name.split('.')[0]] for audio_name in audio_names
        ]),
        'fold':
        np.arange(len(audio_names)) % 10 + 1
    }

    if mini_data:
        mini_num = 10
        total_num = len(meta_dict['audio_name'])
        random_state = np.random.RandomState(1234)
        indexes = random_state.choice(total_num, size=mini_num, replace=False)
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][indexes]

    audios_num = len(meta_dict['audio_name'])

    feature_time = time.time()
    with h5py.File(packed_hdf5_path, 'w') as hf:
        hf.create_dataset(name='audio_name', shape=(audios_num, ), dtype='S80')

        hf.create_dataset(name='waveform',
                          shape=(audios_num, clip_samples),
                          dtype=np.int16)

        hf.create_dataset(name='target',
                          shape=(audios_num, classes_num),
                          dtype=np.float32)

        hf.create_dataset(name='fold', shape=(audios_num, ), dtype=np.int32)

        for n in range(audios_num):
            print(n)
            audio_name = meta_dict['audio_name'][n]
            fold = meta_dict['fold'][n]
            audio_path = meta_dict['audio_path'][n]
            (audio, fs) = librosa.core.load(audio_path,
                                            sr=sample_rate,
                                            mono=True)

            audio = pad_truncate_sequence(audio, clip_samples)

            hf['audio_name'][n] = audio_name.encode()
            hf['waveform'][n] = float32_to_int16(audio)
            hf['target'][n] = to_one_hot(meta_dict['target'][n], classes_num)
            hf['fold'][n] = meta_dict['fold'][n]

    print('Write hdf5 to {}'.format(packed_hdf5_path))
    print('Time: {:.3f} s'.format(time.time() - feature_time))
示例#5
0
def pack_waveforms_to_hdf5(args):
    """Pack waveform and target of several audio clips to a single hdf5 file. 
    This can speed up loading and training.
    """

    # Arguments & parameters
    audios_dir = args.audios_dir
    csv_path = args.csv_path
    waveforms_hdf5_path = args.waveforms_hdf5_path
    mini_data = args.mini_data

    clip_samples = config.clip_samples
    classes_num = config.classes_num
    sample_rate = config.sample_rate
    id_to_ix = config.id_to_ix

    # Paths
    if mini_data:
        prefix = 'mini_'
        waveforms_hdf5_path += '.mini'
    else:
        prefix = ''

    create_folder(os.path.dirname(waveforms_hdf5_path))

    logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(
        prefix, get_filename(csv_path))
    create_folder(logs_dir)
    create_logging(logs_dir, filemode='w')
    logging.info('Write logs to {}'.format(logs_dir))

    # Read csv file
    meta_dict = read_metadata(csv_path, classes_num, id_to_ix)

    if mini_data:
        mini_num = 10
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][0:mini_num]

    audios_num = len(meta_dict['audio_name'])

    # Pack waveform to hdf5
    total_time = time.time()

    with h5py.File(waveforms_hdf5_path, 'w') as hf:
        hf.create_dataset('audio_name', shape=((audios_num, )), dtype='S20')
        hf.create_dataset('waveform',
                          shape=((audios_num, clip_samples)),
                          dtype=np.int16)
        hf.create_dataset('target',
                          shape=((audios_num, classes_num)),
                          dtype=np.bool)
        hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)

        # Pack waveform & target of several audio clips to a single hdf5 file
        for n in range(audios_num):
            audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])

            if os.path.isfile(audio_path):
                logging.info('{} {}'.format(n, audio_path))
                (audio, _) = librosa.core.load(audio_path,
                                               sr=sample_rate,
                                               mono=True)
                audio = pad_or_truncate(audio, clip_samples)

                hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
                hf['waveform'][n] = float32_to_int16(audio)
                hf['target'][n] = meta_dict['target'][n]
            else:
                logging.info('{} File does not exist! {}'.format(
                    n, audio_path))

    logging.info('Write to {}'.format(waveforms_hdf5_path))
    logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
def pack_audio_files_to_hdf5(args):
    """Pack waveform to hdf5 file. 

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, Directory of your workspace
      data_type: 'training' | 'testing' | 'evaluation'
      mini_data: bool, set True for debugging on a small part of data
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    data_type = args.data_type
    mini_data = args.mini_data

    sample_rate = config.sample_rate
    audio_length = config.audio_length
    classes_num = config.classes_num
    lb_to_idx = config.lb_to_idx
    frames_per_second = config.frames_per_second
    frames_num = frames_per_second * config.audio_duration

    has_strong_target = data_type in ['testing', 'evaluation']

    # Paths
    audios_dir = os.path.join(dataset_dir, data_type)
    weak_label_csv_path = os.path.join(dataset_dir, 'metadata', 
        get_weak_csv_filename(data_type))

    if data_type == 'testing':
        strong_label_csv_path = os.path.join(dataset_dir, 'metadata', 
            'groundtruth_strong_label_testing_set.csv')
    elif data_type == 'evaluation':
        strong_label_csv_path = os.path.join(dataset_dir, 'metadata', 
            'groundtruth_strong_label_evaluation_set.csv')

    if mini_data:
        packed_hdf5_path = os.path.join(workspace, 'features', 
            'minidata_{}.waveform.h5'.format(data_type))
    else:
        packed_hdf5_path = os.path.join(workspace, 'features', 
            '{}.waveform.h5'.format(data_type))
    create_folder(os.path.dirname(packed_hdf5_path))

    # Read metadata
    weak_meta_list = read_weak_csv(weak_label_csv_path, data_type)

    # Use a small amount of data for debugging
    if mini_data:
        random.seed(1234)
        random.shuffle(weak_meta_list)
        weak_meta_list = weak_meta_list[0 : 100]

    audios_num = len(weak_meta_list)

    feature_time = time.time()
    with h5py.File(packed_hdf5_path, 'w') as hf:
        hf.create_dataset(
            name='audio_name', 
            shape=(audios_num,), 
            dtype='S80')

        hf.create_dataset(
            name='waveform', 
            shape=(audios_num, audio_length), 
            dtype=np.int32)

        hf.create_dataset(
            name='weak_target', 
            shape=(audios_num, classes_num), 
            dtype=np.float32)

        if has_strong_target:
            strong_meta_dict = read_strong_csv(strong_label_csv_path)        
            
            hf.create_dataset(
                name='strong_target', 
                shape=(0, frames_num, classes_num), 
                maxshape=(None, frames_num, classes_num), 
                dtype=np.bool)

        for n in range(audios_num):
            print(n)
            weak_meta_dict = weak_meta_list[n]
            audio_name = weak_meta_dict['audio_name']
            audio_path = os.path.join(audios_dir, audio_name)
            (audio, fs) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
            audio = pad_truncate_sequence(audio, audio_length)

            hf['audio_name'][n] = audio_name.encode()
            hf['waveform'][n] = float32_to_int16(audio)
            hf['weak_target'][n] = weak_target = get_weak_target(
                weak_meta_dict['labels'], lb_to_idx)

            if has_strong_target:
                strong_target = get_strong_target(
                    weak_meta_dict['audio_name'][1:], strong_meta_dict, 
                    frames_num, frames_per_second, lb_to_idx)
                
                hf['strong_target'].resize((n + 1, frames_num, classes_num))
                hf['strong_target'][n] = strong_target

    print('Write hdf5 to {}'.format(packed_hdf5_path))
    print('Time: {:.3f} s'.format(time.time() - feature_time))
def pack_maps_dataset_to_hdf5(args):
    """MAPS is a piano dataset only used for evaluating our piano transcription
    system (optional). Ref:

    [1] Emiya, Valentin. "MAPS Database A piano database for multipitch 
    estimation and automatic transcription of music. 2016

    Load & resample MAPS audio files, then write to hdf5 files.

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, directory of your workspace
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace

    sample_rate = config.sample_rate
    pianos = ['ENSTDkCl', 'ENSTDkAm']

    # Paths
    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maps')

    logs_dir = os.path.join(workspace, 'logs', get_filename(__file__))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    feature_time = time.time()
    count = 0

    # Load & resample each audio file to a hdf5 file
    for piano in pianos:
        sub_dir = os.path.join(dataset_dir, piano, 'MUS')

        audio_names = [os.path.splitext(name)[0] for name in os.listdir(sub_dir) 
            if os.path.splitext(name)[-1] == '.mid']
        
        for audio_name in audio_names:
            print('{} {}'.format(count, audio_name))
            audio_path = '{}.wav'.format(os.path.join(sub_dir, audio_name))
            midi_path = '{}.mid'.format(os.path.join(sub_dir, audio_name))

            (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
            midi_dict = read_maps_midi(midi_path)
            
            packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(audio_name))
            create_folder(os.path.dirname(packed_hdf5_path))

            with h5py.File(packed_hdf5_path, 'w') as hf:
                hf.attrs.create('split', data='test'.encode(), dtype='S20')
                hf.attrs.create('midi_filename', data='{}.mid'.format(audio_name).encode(), dtype='S100')
                hf.attrs.create('audio_filename', data='{}.wav'.format(audio_name).encode(), dtype='S100')
                hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100')
                hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32)
                hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
            
            count += 1

    logging.info('Write hdf5 to {}'.format(packed_hdf5_path))
    logging.info('Time: {:.3f} s'.format(time.time() - feature_time))
def pack_maestro_dataset_to_hdf5(args):
    """Load & resample MAESTRO audio files, then write to hdf5 files.

    Args:
      dataset_dir: str, directory of dataset
      workspace: str, directory of your workspace
    """

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace

    sample_rate = config.sample_rate

    # Paths
    csv_path = os.path.join(dataset_dir, 'maestro-v2.0.0.csv')
    waveform_hdf5s_dir = os.path.join(workspace, 'hdf5s', 'maestro')

    logs_dir = os.path.join(workspace, 'logs', get_filename(__file__))
    create_logging(logs_dir, filemode='w')
    logging.info(args)

    # Read meta dict
    meta_dict = read_metadata(csv_path)

    audios_num = len(meta_dict['canonical_composer'])
    logging.info('Total audios number: {}'.format(audios_num))

    feature_time = time.time()

    # Load & resample each audio file to a hdf5 file
    for n in range(audios_num):
        logging.info('{} {}'.format(n, meta_dict['midi_filename'][n]))

        # Read midi
        midi_path = os.path.join(dataset_dir, meta_dict['midi_filename'][n])
        midi_dict = read_midi(midi_path)

        # Load audio
        audio_path = os.path.join(dataset_dir, meta_dict['audio_filename'][n])
        (audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)

        packed_hdf5_path = os.path.join(waveform_hdf5s_dir, '{}.h5'.format(
            os.path.splitext(meta_dict['audio_filename'][n])[0]))

        create_folder(os.path.dirname(packed_hdf5_path))

        with h5py.File(packed_hdf5_path, 'w') as hf:
            hf.attrs.create('canonical_composer', data=meta_dict['canonical_composer'][n].encode(), dtype='S100')
            hf.attrs.create('canonical_title', data=meta_dict['canonical_title'][n].encode(), dtype='S100')
            hf.attrs.create('split', data=meta_dict['split'][n].encode(), dtype='S20')
            hf.attrs.create('year', data=meta_dict['year'][n].encode(), dtype='S10')
            hf.attrs.create('midi_filename', data=meta_dict['midi_filename'][n].encode(), dtype='S100')
            hf.attrs.create('audio_filename', data=meta_dict['audio_filename'][n].encode(), dtype='S100')
            hf.attrs.create('duration', data=meta_dict['duration'][n], dtype=np.float32)

            hf.create_dataset(name='midi_event', data=[e.encode() for e in midi_dict['midi_event']], dtype='S100')
            hf.create_dataset(name='midi_event_time', data=midi_dict['midi_event_time'], dtype=np.float32)
            hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
        
    logging.info('Write hdf5 to {}'.format(packed_hdf5_path))
    logging.info('Time: {:.3f} s'.format(time.time() - feature_time))