def reconstruct(sampler, model, evaluation_params): # Load data data_path = evaluation_params['test_data'] song_names = evaluation_params['test_songs'] tempos = evaluation_params['test_tempos'] instruments = evaluation_params['test_instruments'] batch_size = evaluation_params['batch_size'] data = load_data(data_path, batch_size, song_names, instruments, tempos) # Reconstruct specified song reconstruction_params = evaluation_params['reconstruction'] song_id = reconstruction_params['song_name'] temperature = evaluation_params['temperature'] attach_method = reconstruction_params['attach_method'] reconstruction_path = reconstruction_params['reconstruction_path'] song = data.dataset.get_tensor_by_name(song_id) # Generate reconstruction from the samples reconstructed = sampler.reconstruct(model, song, temperature) # Reconstruct into midi form I, tempo = data.dataset.get_aux_by_names(song_id) programs = instrument_representation_to_programs(I, attach_method) rolls_to_midi(reconstructed, programs, reconstruction_path, song_id, tempo, 24, 84, 128, 0.5) print('Saved reconstruction for %s' % song_id)
def generate_midi(sampler, model, evaluation_params): generate_path = 'midi_generate' song_id = 'g' temperature = evaluation_params['temperature'] glow_z = np.random.randn(1, 128, 1, 1) generated = sampler.generate(model, glow_z, temperature) generated = generated.cpu() tempo = 120 attach_method = '1hot-category' I = np.zeros((4, 16)) I[0, 0] = 1 I[1, 3] = 1 I[2, 4] = 1 I[3, 2] = 1 programs = instrument_representation_to_programs(I, attach_method) rolls_to_midi(generated, programs, generate_path, song_id, tempo, 24, 84, 128, 0.5)
def load_rolls(self, path, name, save_preprocessed_midi): #try loading the midi file #if it fails, return all None objects try: mid = pretty_midi.PrettyMIDI(path + name) except (ValueError, EOFError, IndexError, OSError, KeyError, ZeroDivisionError, AttributeError) as e: exception_str = 'Unexpected error in ' + name + ':\n', e, sys.exc_info( )[0] print(exception_str) return None, None, None, None, None, None #determine start and end of the song #if there are tempo changes in the song, only take the longest part where the tempo is steady #this cuts of silent starts and extended ends #this also makes sure that the start of the bars are aligned through the song tempo_change_times, tempo_change_bpm = mid.get_tempo_changes() song_start = 0 song_end = mid.get_end_time() #there will always be at least one tempo change to set the first tempo #but if there are more than one tempo changes, that means that the tempos are changed if len(tempo_change_times) > 1: longest_part = 0 longest_part_start_time = 0 longest_part_end_time = song_end longest_part_tempo = 0 for i, tempo_change_time in enumerate(tempo_change_times): if i == len(tempo_change_times) - 1: end_time = song_end else: end_time = tempo_change_times[i + 1] current_part_length = end_time - tempo_change_time if current_part_length > longest_part: longest_part = current_part_length longest_part_start_time = tempo_change_time longest_part_end_time = end_time longest_part_tempo = tempo_change_bpm[i] song_start = longest_part_start_time song_end = longest_part_end_time tempo = longest_part_tempo else: tempo = tempo_change_bpm[0] #cut off the notes that are not in the longest part where the tempo is steady for instrument in mid.instruments: new_notes = [] #list for the notes that survive the cutting for note in instrument.notes: #check if it is in the given range of the longest part where the tempo is steady if note.start >= song_start and note.end <= song_end: #adjust to new times note.start -= song_start note.end -= song_start new_notes.append(note) instrument.notes = new_notes #(descending) order the piano_rolls according to the number of notes per track number_of_notes = [] piano_rolls = [i.get_piano_roll(fs=100) for i in mid.instruments] for piano_roll in piano_rolls: number_of_notes.append(np.count_nonzero(piano_roll)) permutation = np.argsort(number_of_notes)[::-1] mid.instruments = [mid.instruments[i] for i in permutation] quarter_note_length = 1. / (tempo / 60.) #fs is is the frequency for the song at what rate notes are picked #the song will by sampled by (0, song_length_in_seconds, 1./fs) #fs should be the inverse of the length of the note, that is to be sampled #the value should be in beats per seconds, where beats can be quarter notes or whatever... fs = 1. / (quarter_note_length * 4. / self.smallest_note) total_ticks = math.ceil(song_end * fs) #assemble piano_rolls, velocity_rolls and held_note_rolls piano_rolls = [] velocity_rolls = [] held_note_rolls = [] max_concurrent_notes_per_track_list = [] for instrument in mid.instruments: piano_roll = np.zeros((total_ticks, 128)) #counts how many notes are played at maximum for this instrument at any given tick #this is used to determine the depth of the velocity_roll and held_note_roll concurrent_notes_count = np.zeros((total_ticks, )) #keys is a tuple of the form (tick_start_of_the_note, pitch) #this uniquely identifies a note since there can be no two notes # playing on the same pitch for the same instrument note_to_velocity_dict = dict() #keys is a tuple of the form (tick_start_of_the_note, pitch) #this uniquely identifies a note since there can be no two notes playing # on the same pitch for the same instrument note_to_duration_dict = dict() for note in instrument.notes: note_tick_start = note.start * fs note_tick_end = note.end * fs absolute_start = int(round(note_tick_start)) absolute_end = int(round(note_tick_end)) decimal = note_tick_start - absolute_start #see if it starts at a tick or not #if it doesn't start at a tick (decimal > 10e-3) but is longer than one tick, include it anyways if decimal < 10e-3 or absolute_end - absolute_start >= 1: piano_roll[absolute_start:absolute_end, note.pitch] = 1 concurrent_notes_count[absolute_start:absolute_end] += 1 #save information of velocity and duration for later use #this can not be done right now because there might be no ordering in the notes note_to_velocity_dict[(absolute_start, note.pitch)] = note.velocity note_to_duration_dict[( absolute_start, note.pitch)] = absolute_end - absolute_start max_concurrent_notes = int(np.max(concurrent_notes_count)) max_concurrent_notes_per_track_list.append(max_concurrent_notes) velocity_roll = np.zeros((total_ticks, max_concurrent_notes)) held_note_roll = np.zeros((total_ticks, max_concurrent_notes)) for step, note_vector in enumerate(piano_roll): pitches = list(note_vector.nonzero()[0]) sorted_pitches_from_highest_to_lowest = sorted(pitches)[::-1] for voice_number, pitch in enumerate( sorted_pitches_from_highest_to_lowest): if (step, pitch) in note_to_velocity_dict.keys(): velocity_roll[step, voice_number] = note_to_velocity_dict[( step, pitch)] if (step, pitch) not in note_to_duration_dict.keys(): #if the note is in the dictionary, it means that it is the start of the note #if its not the start of a note, it means it is held held_note_roll[step, voice_number] = 1 piano_rolls.append(piano_roll) velocity_rolls.append(velocity_roll) held_note_rolls.append(held_note_roll) #get the program numbers for each instrument #program numbers are between 0 and 127 and have a 1:1 mapping to the instruments described in settings file programs = [i.program for i in mid.instruments] #we may want to override the maximal_number_of_voices_per_track # if the following tracks are all silent it makes no sense to exclude # voices from the first instrument and then just have a song with 1 voice override_max_notes_per_track_list = [ self.max_voices_per_track for _ in max_concurrent_notes_per_track_list ] silent_tracks_if_we_dont_override = self.max_voices - \ sum([min(self.max_voices_per_track, x) if x > 0 else 0 for x in max_concurrent_notes_per_track_list[:self.max_voices]]) for voice in range( min(self.max_voices, len(max_concurrent_notes_per_track_list))): if silent_tracks_if_we_dont_override > 0 and \ max_concurrent_notes_per_track_list[voice] > self.max_voices: additional_voices = min(silent_tracks_if_we_dont_override, max_concurrent_notes_per_track_list[voice] - \ self.max_voices) override_max_notes_per_track_list[voice] += additional_voices silent_tracks_if_we_dont_override -= additional_voices #chose the most important piano_rolls #each of them will be monophonic chosen_piano_rolls = [] chosen_velocity_rolls = [] chosen_held_note_rolls = [] chosen_programs = [] max_song_length = 0 #go through all pianorolls in the descending order of the total notes they have for batch in zip(piano_rolls, velocity_rolls, held_note_rolls, programs, max_concurrent_notes_per_track_list, override_max_notes_per_track_list): piano_roll = batch[0] velocity_roll = batch[1] held_note_roll = batch[2] program = batch[3] max_concurrent_notes = batch[4] override_max_notes_per_track = batch[5] #see if there is actually a note played in that pianoroll if max_concurrent_notes > 0: #skip if you only want monophonic instruments and there are more than 1 notes played at the same time if self.include_only_monophonic_instruments: if max_concurrent_notes > 1: continue monophonic_piano_roll = piano_roll #append them to the chosen ones if len(chosen_piano_rolls) < self.max_voices: chosen_piano_rolls.append(monophonic_piano_roll) chosen_velocity_rolls.append(velocity_roll) chosen_held_note_rolls.append() chosen_programs.append(program) if monophonic_piano_roll.shape[0] > max_song_length: max_song_length = monophonic_piano_roll.shape[0] else: break else: #limit the number of voices per track by the minimum of the actual # concurrent voices per track or the maximal allowed in the settings file for voice in range( min( max_concurrent_notes, max(self.max_voices_per_track, override_max_notes_per_track))): #Take the highest note for voice 0, second highest for voice 1 and so on... monophonic_piano_roll = np.zeros(piano_roll.shape) for step in range(piano_roll.shape[0]): #sort all the notes from highest to lowest notes = np.nonzero(piano_roll[step, :])[0][::-1] if len(notes) > voice: monophonic_piano_roll[step, notes[voice]] = 1 #append them to the chosen ones if len(chosen_piano_rolls) < self.max_voices: chosen_piano_rolls.append(monophonic_piano_roll) chosen_velocity_rolls.append(velocity_roll[:, voice]) chosen_held_note_rolls.append( held_note_roll[:, voice]) chosen_programs.append(program) if monophonic_piano_roll.shape[0] > max_song_length: max_song_length = monophonic_piano_roll.shape[ 0] else: break if len(chosen_piano_rolls) == self.max_voices: break assert (len(chosen_piano_rolls) == len(chosen_velocity_rolls)) assert (len(chosen_piano_rolls) == len(chosen_held_note_rolls)) assert (len(chosen_piano_rolls) == len(chosen_programs)) #do the unrolling and prepare for model input if len(chosen_piano_rolls) > 0: song_length = max_song_length * self.max_voices #prepare Y #Y will be the target notes Y = np.zeros((song_length, chosen_piano_rolls[0].shape[1])) #unroll the pianoroll into one matrix for i, piano_roll in enumerate(chosen_piano_rolls): for step in range(piano_roll.shape[0]): Y[i + step * self.max_voices, :] += piano_roll[step, :] #assert that there is always at most one note played for step in range(Y.shape[0]): assert (np.sum(Y[step, :]) <= 1) #cut off pitch values which are very uncommon #this reduces the feature space significantly Y = Y[:, self.low_crop:self.high_crop] #append silent note if desired #the silent note will always be at the last note if self.include_silent_note: Y = np.append(Y, np.zeros((Y.shape[0], 1)), axis=1) for step in range(Y.shape[0]): if np.sum(Y[step]) == 0: Y[step, -1] = 1 #assert that there is now a 1 at every step for step in range(Y.shape[0]): assert (np.sum(Y[step, :]) == 1) #unroll the velocity roll #V will only have shape (song_length,) and it's values will be between 0 and 1 (divide by MAX_VELOCITY) V = np.zeros((song_length, )) for i, velocity_roll in enumerate(chosen_velocity_rolls): for step in range(velocity_roll.shape[0]): if velocity_roll[step] > 0: velocity = self.velocity_threshold + \ (velocity_roll[step] / self.max_velocity) * (1.0 - self.velocity_threshold) # a note is therefore at least 0.1*max_velocity loud # but this is good, since we can now more clearly distinguish between silent or played notes assert (velocity <= 1.0) V[i + step * self.max_voices] = velocity #unroll the held_note_rolls #D will only have shape (song_length,) and it's values will be 0 or 1 (1 if held) #it's name is D for Duration to not have a name clash with the history (H) D = np.zeros((song_length, )) for i, held_note_roll in enumerate(chosen_held_note_rolls): for step in range(held_note_roll.shape[0]): D[i + step * self.max_voices] = held_note_roll[step] instrument_feature_matrix = mf.programs_to_instrument_matrix( chosen_programs, self.instrument_attach_method, self.max_voices) if self.attach_instruments: instrument_feature_matrix = np.transpose( np.tile(np.transpose(instrument_feature_matrix), song_length // self.max_voices)) Y = np.append(Y, instrument_feature_matrix, axis=1) X = Y if save_preprocessed_midi: mf.rolls_to_midi(Y, chosen_programs, 'preprocess_midi_data/' + t + '/', name, tempo, self.low_crop, self.high_crop, self.num_notes, self.velocity_threshold, V, D) #split the song into chunks of size output_length or input_length #pad them with silent notes if necessary if self.input_length > 0: #split X padding_length = self.input_length - (X.shape[0] % self.input_length) if self.input_length == padding_length: padding_length = 0 #pad to the right.. X = np.pad(X, ((0, padding_length), (0, 0)), 'constant', constant_values=(0, 0)) if self.include_silent_note: X[-padding_length:, -1] = 1 number_of_splits = X.shape[0] // self.input_length X = np.split(X, number_of_splits) X = np.asarray(X) if self.output_length > 0: #split Y padding_length = self.output_length - (Y.shape[0] % self.output_length) if self.output_length == padding_length: padding_length = 0 #pad to the right.. Y = np.pad(Y, ((0, padding_length), (0, 0)), 'constant', constant_values=(0, 0)) if self.include_silent_note: Y[-padding_length:, -1] = 1 number_of_splits = Y.shape[0] // self.output_length Y = np.split(Y, number_of_splits) Y = np.asarray(Y) #split V #pad to the right with zeros.. V = np.pad(V, (0, padding_length), 'constant', constant_values=0) number_of_splits = V.shape[0] // self.output_length V = np.split(V, number_of_splits) V = np.asarray(V) #split D #pad to the right with zeros.. D = np.pad(D, (0, padding_length), 'constant', constant_values=0) number_of_splits = D.shape[0] // self.output_length D = np.split(D, number_of_splits) D = np.asarray(D) return X, Y, instrument_feature_matrix, tempo, V, D else: return None, None, None, None, None, None