forked from craffel/midi-dataset
/
create_hashing_dataset.py
117 lines (101 loc) · 4.46 KB
/
create_hashing_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
'''
Functions for creating the dataset to be used for cross-modality hashing experiments.
'''
# <codecell>
import numpy as np
import csv
import os
import midi
import pretty_midi
import hdf5_getters
import beat_aligned_feats
import glob
# <codecell>
def hms_to_s(hms):
''' Given an hour:minute:second string, resturn seconds '''
h, m, s = hms.split(':')
return 60*60*int(h) + 60*int(m) + int(s)
# <codecell>
def load_results(path_to_tsv):
'''
Given a tab-separated value file with entries in the format
Bryan Adams - Summer Of '69.mp3 70.3806 0:00:03 0:03:12 Some egregious pitch bends
return a list of the files which have start/end times listed and the start/end times.
'''
files = []
start_times = []
end_times = []
with open(path_to_tsv) as f:
csv_reader = csv.reader(f, delimiter='\t')
for line in csv_reader:
if len(line) == 5 and len(line[2].split(':')) == 3 and len(line[3].split(':')) == 3:
files.append(line[0])
start_times.append(hms_to_s(line[2]))
end_times.append(hms_to_s(line[3]))
return files, np.array(start_times), np.array(end_times)
# <codecell>
def get_data_folder(filename):
''' Given an mp3 filename, returns cal500 or cal10k depending on where the file is (a hack) '''
if ' ' in filename:
return 'cal10k'
else:
return 'cal500'
# <codecell>
# Create the hashing dataset based on which files are aligned, and when they are
if __name__=='__main__':
# Set up paths
base_data_path = 'data'
aligned_path = os.path.join(base_data_path, 'aligned')
tsv_path = os.path.join(aligned_path, 'results.tsv')
output_path = os.path.join(base_data_path, 'hash_dataset')
midi_directory = 'midi-aligned-new-new-dpmod-multiple-files'
def to_numbered_mid(filename):
''' Given an mp3 filename, return the corresponding best alignment .mid name according to the .pdf present '''
base, _ = os.path.splitext(filename)
if os.path.exists(os.path.join(aligned_path, base + '.pdf')):
return filename.replace('mp3', 'mid')
n = 1
while not os.path.exists(os.path.join(aligned_path, '{}.{}.pdf'.format(base, n))):
n += 1
return '{}.{}.mid'.format(base, n)
def to_h5_path(filename):
''' Given an mp3 filename, returns the path to the corresponding -beats.npy file '''
return os.path.join(base_data_path, get_data_folder(filename), 'msd', filename.replace('.mp3', '.h5'))
def to_midi_path(filename):
''' Given an mp3 filename, returns the path to the corresponding midi file '''
return os.path.join(base_data_path, get_data_folder(filename), midi_directory, to_numbered_mid(filename))
# Load in list of files which were aligned correctly, and the start/end times of the good alignment
files, start_times, end_times = load_results(tsv_path)
for filename, start_time, end_time in zip(files, start_times, end_times):
# Load in MSD hdf5 file
h5 = hdf5_getters.open_h5_file_read(to_h5_path(filename))
# Load in beat times from MSD
beats = hdf5_getters.get_beats_start(h5)
# Some files have no EN analysis
if beats.size == 0:
continue
# Get indices which fall within the range of correct alignment
time_mask = np.logical_and(beats > start_time, beats < end_time)
beats = beats[time_mask]
# and beat-synchronous feature matrices, within the time range of correct alignment
chroma = beat_aligned_feats.get_btchromas(h5)[:, time_mask]
timbre = beat_aligned_feats.get_bttimbre(h5)[:, time_mask]
loudness = beat_aligned_feats.get_btloudnessmax(h5)[:, time_mask]
h5.close()
# Stack it
msd_features = np.vstack([chroma, timbre, loudness])
if np.isnan(msd_features).any():
print filename
continue
# Load in pretty midi object
pm = pretty_midi.PrettyMIDI(midi.read_midifile(to_midi_path(filename)))
# Construct piano roll, aligned to the msd beat times
piano_roll = pm.get_piano_roll(times=beats)
# Ignore notes below 36 and above 84
piano_roll = piano_roll[36:84, :]
# Write out
np.save(os.path.join(output_path, filename.replace('.mp3', '-msd.npy')), msd_features)
np.save(os.path.join(output_path, filename.replace('.mp3', '-midi.npy')), piano_roll)