def match_one_midi(midi_file): ''' Hash and match a single MIDI file against the MSD :parameters: - midi_file : str Path to a MIDI file to match ''' # Get a beat-synchronous piano roll of the MIDI pm = pretty_midi.PrettyMIDI(midi_file) piano_roll = pm.get_piano_roll(times=pm.get_beats()).T piano_roll = piano_roll[np.newaxis, :, 36:84] # Make the piano roll look like it does when we trained the hasher piano_roll = (piano_roll - train_stats['mean'])/train_stats['std'] hashed_piano_roll = hash( piano_roll[np.newaxis].astype(theano.config.floatX)) # Compute hash sequence query = hash_match.vectors_to_ints(hashed_piano_roll > 0) query = query.astype('uint16') # Get indices of sequences which are within 40% -> 1/40% of this seq length valid_length_indices = hash_match.filter_by_length(query, sequences, .4) # Compute MIDI mean chroma vector query_chroma = pm.get_chroma().mean(axis=1) # Get sequences less than the mean chroma distance valid_chroma_indices = hash_match.filter_by_mean_chroma( query_chroma, mean_chromas, 20) # Intersect to get valid index set valid_indices = np.intersect1d(valid_length_indices, valid_chroma_indices) # Match the MIDI file query hash list against all sequences matches, scores = hash_match.match_one_sequence( query, sequences, .9, 4, valid_indices) return matches, scores
def process_one_file(index_entry, base_path='msd', train_stats=train_stats, hash=hash): ''' Hash the features in a single npz file. :parameters: - index_entry : dict Entry in an index with keys 'path', 'artist', and 'title' - base_path : str Which dataset are we processing? - train_stats : dict Dict where train_stats['mean'] is the training set mean feature and train_stats['std'] is the per-feature std dev - hash : theano.function Theano function which takes in feature matrix and outputs hashes ''' try: npz_file = os.path.join(BASE_DATA_PATH, base_path, 'npz', index_entry['path'] + '.npz') output_filename = npz_file.replace('npz', 'pkl') if os.path.exists(output_filename): return features = np.load(npz_file) sync_gram = features['sync_gram'] if sync_gram.shape[0] < 6: return mean_cqt = sync_gram.mean(axis=0) sync_gram = sync_gram[np.newaxis] sync_gram = (sync_gram - train_stats['mean'])/train_stats['std'] if np.isnan(sync_gram).any(): return hashed_features = hash( sync_gram[np.newaxis].astype(theano.config.floatX)) hashes = hash_match.vectors_to_ints(hashed_features > 0) hashes = hashes.astype('uint16') output_dict = dict([('hash_list', hashes), ('mean_cqt', mean_cqt)], **index_entry) if not os.path.exists(os.path.split(output_filename)[0]): os.makedirs(os.path.split(output_filename)[0]) with open(output_filename, 'wb') as f: pickle.dump(output_dict, f) except Exception as e: print "Error creating {}: {}".format(index_entry['path'], e) return