def testDecode(self): encoder = music_encoders.MidiPerformanceEncoder(steps_per_second=100, num_velocity_bins=32, min_pitch=21, max_pitch=108, ngrams=[(277, 129)]) ids = [ 302, # VELOCITY(25) 41, # NOTE-ON(60) 310 # TIME-SHIFT(100), NOTE-OFF(60) ] # Decode method returns MIDI filename, read and convert to NoteSequence. filename = encoder.decode(ids) ns = note_seq.midi_file_to_sequence_proto(filename) # Remove default tempo & time signature. del ns.tempos[:] del ns.time_signatures[:] expected_ns = note_seq.NoteSequence(ticks_per_quarter=220) testing_lib.add_track_to_sequence(expected_ns, 0, [(60, 97, 0.0, 1.0)]) # Add source info fields. expected_ns.source_info.encoding_type = ( note_seq.NoteSequence.SourceInfo.MIDI) expected_ns.source_info.parser = ( note_seq.NoteSequence.SourceInfo.PRETTY_MIDI) self.assertEqual(expected_ns, ns)
def testEncodeEmptyNoteSequence(self): encoder = music_encoders.MidiPerformanceEncoder(steps_per_second=100, num_velocity_bins=32, min_pitch=21, max_pitch=108) ids = encoder.encode_note_sequence(note_seq.NoteSequence()) self.assertEqual([], ids)
def testEncodeNoteSequenceAddEos(self): encoder = music_encoders.MidiPerformanceEncoder(steps_per_second=100, num_velocity_bins=32, min_pitch=21, max_pitch=108, add_eos=True) ns = note_seq.NoteSequence() testing_lib.add_track_to_sequence(ns, 0, [(60, 100, 0.0, 4.0), (64, 100, 0.0, 3.0), (67, 127, 1.0, 2.0)]) ids = encoder.encode_note_sequence(ns) expected_ids = [ 302, # VELOCITY(25) 41, # NOTE-ON(60) 45, # NOTE-ON(64) 277, # TIME-SHIFT(100) 309, # VELOCITY(32) 48, # NOTE-ON(67) 277, # TIME-SHIFT(100) 136, # NOTE-OFF(67) 277, # TIME-SHIFT(100) 133, # NOTE-OFF(64 277, # TIME-SHIFT(100) 129, # NOTE-OFF(60) 1 # EOS ] self.assertEqual(expected_ids, ids)
def testEncodeNoteSequenceNGrams(self): encoder = music_encoders.MidiPerformanceEncoder(steps_per_second=100, num_velocity_bins=32, min_pitch=21, max_pitch=108, ngrams=[ (41, 45), (277, 309, 300), (309, 48), (277, 129, 130) ]) ns = note_seq.NoteSequence() testing_lib.add_track_to_sequence(ns, 0, [(60, 100, 0.0, 4.0), (64, 100, 0.0, 3.0), (67, 127, 1.0, 2.0)]) ids = encoder.encode_note_sequence(ns) expected_ids = [ 302, # VELOCITY(25) 310, # NOTE-ON(60), NOTE-ON(64) 277, # TIME-SHIFT(100) 312, # VELOCITY(32), NOTE-ON(67) 277, # TIME-SHIFT(100) 136, # NOTE-OFF(67) 277, # TIME-SHIFT(100) 133, # NOTE-OFF(64 277, # TIME-SHIFT(100) 129 # NOTE-OFF(60) ] self.assertEqual(expected_ids, ids)
def testEncodeNoteSequence(self): encoder = music_encoders.TextMelodyEncoder(steps_per_quarter=4, min_pitch=21, max_pitch=108) encoder_absolute = music_encoders.TextMelodyEncoderAbsolute( steps_per_second=4, min_pitch=21, max_pitch=108) ns = note_seq.NoteSequence() ns.tempos.add(qpm=60) testing_lib.add_track_to_sequence(ns, 0, [(60, 127, 0.0, 0.25), (62, 127, 0.25, 0.75), (64, 127, 1.25, 2.0)]) ids = encoder.encode_note_sequence(ns) ids_absolute = encoder_absolute.encode_note_sequence(ns) expected_ids = [ 43, # ON(60) 45, # ON(62) 2, # HOLD(62) 3, # OFF(62) 2, # REST 47, # ON(64) 2, # HOLD(64) 2 # HOLD(64) ] self.assertEqual(expected_ids, ids) self.assertEqual(expected_ids, ids_absolute)
def load_seq_files(files): res = [] for fname in files: with open(fname, 'rb') as f: ns = note_seq.NoteSequence() ns.ParseFromString(f.read()) res.append(ns) return res
def predict(self, path: str, wav_data=None): """Using the model, return the predicted note sequence of a .wav file at the given path. Args: path (str): The path to a .wav audio file. If path is "binary", then a binary must be specified. wav_data (bytes): The binary for the .wav file if that is easier to extract. Defaults to None whqen path is provided. Returns: NoteSequence object containing the prediction. Convertable to MIDI. """ if path == "binary": if wav_data is None: raise ValueError( "The binary option is chosen but a binary is not provided." ) else: f = open(path, "rb") wav_data = f.read() f.close() ns = note_seq.NoteSequence() example_list = [ audio_label_data_utils.create_example( path, ns, wav_data, velocity_range=audio_label_data_utils. velocity_range_from_sequence(ns)) ] to_process = [example_list[0].SerializeToString()] print('Processing complete for', path) sess = tf.Session() sess.run([ tf.initializers.global_variables(), tf.initializers.local_variables() ]) sess.run(self.iterator.initializer, {self.examples: to_process}) def transcription_data(params): del params return tf.data.Dataset.from_tensors(sess.run(self.next_record)) input_fn = infer_util.labels_to_features_wrapper(transcription_data) prediction_list = list( self.estimator.predict(input_fn, yield_single_examples=False)) assert len(prediction_list) == 1 sequence_prediction = note_seq.NoteSequence.FromString( prediction_list[0]['sequence_predictions'][0]) return sequence_prediction
def encode(self, s): """Transform a MIDI filename into a list of performance event indices. Args: s: Path to the MIDI file. Returns: ids: List of performance event indices. """ if s: ns = note_seq.midi_file_to_sequence_proto(s) else: ns = note_seq.NoteSequence() return self.encode_note_sequence(ns)
def encode(self, s): """Transform a MusicXML filename into a list of score event index tuples. Args: s: Path to the MusicXML file. Returns: ids: List of score event index tuples. """ if s: ns = note_seq.musicxml_file_to_sequence_proto(s) else: ns = note_seq.NoteSequence() return self.encode_note_sequence(ns)
def segment_notes(binarize_f, pick_f0_f, pick_amps_f, controls, frame_rate=DDSP_DEFAULT_FRAME_RATE): """A function to split a controls dict into discrete notes. Args: binarize_f: Returns a binary vector that is True when a note is on. pick_f0_f: Returns a single f0 for a vector of f0s of a single note. pick_amps_f: Returns a single amplitude for a vector of amplidutes of a single note. controls: The controls as returned from model inference. frame_rate: Frame rate for the notes found. Returns: NoteSequence object with discretized note information. """ sequence = note_seq.NoteSequence() def construct_note(curr_ind, duration): note_start = curr_ind - duration f0 = pick_f0_f(controls, start=note_start, stop=curr_ind) amplitude = pick_amps_f(controls, start=note_start, stop=curr_ind) # pylint:disable=unused-variable note = sequence.notes.add() note.pitch = np.round(ddsp.core.hz_to_midi(f0)).astype(np.int32) note.start_time = note_start / frame_rate note.end_time = (note_start + duration) / frame_rate # TODO(rigeljs): convert amplitude to velocity and add to note. note.velocity = 127 binary_sample = binarize_f(controls) has_been_on = 0 for i, sample_i in enumerate(np.nditer(binary_sample)): if sample_i: has_been_on += 1 elif has_been_on > 0: construct_note(i, has_been_on) has_been_on = 0 if has_been_on > 0: construct_note(len(binary_sample), has_been_on) sequence.total_time = len(binary_sample) / frame_rate return sequence
def testEncodeNoteSequence(self): encoder = music_encoders.TextChordsEncoder(steps_per_quarter=1) ns = note_seq.NoteSequence() ns.tempos.add(qpm=60) testing_lib.add_chords_to_sequence(ns, [('C', 1), ('Dm', 3), ('Bdim', 4)]) ns.total_time = 5.0 ids = encoder.encode_note_sequence(ns) expected_ids = [ 2, # no-chord 3, # C major 3, # C major 17, # D minor 50 # B diminished ] self.assertEqual(expected_ids, ids)
def testEncode(self): encoder = music_encoders.MidiPerformanceEncoder( steps_per_second=100, num_velocity_bins=32, min_pitch=21, max_pitch=108, ngrams=[(277, 129)]) ns = note_seq.NoteSequence() testing_lib.add_track_to_sequence(ns, 0, [(60, 97, 0.0, 1.0)]) # Write NoteSequence to MIDI file as encoder takes in filename. with tempfile.NamedTemporaryFile(suffix='.mid') as f: note_seq.sequence_proto_to_midi_file(ns, f.name) ids = encoder.encode(f.name) expected_ids = [ 302, # VELOCITY(25) 41, # NOTE-ON(60) 310 # TIME-SHIFT(100), NOTE-OFF(60) ] self.assertEqual(expected_ids, ids)
def testEncodeNoteSequence(self): encoder = music_encoders.CompositeScoreEncoder([ music_encoders.TextChordsEncoder(steps_per_quarter=4), music_encoders.TextMelodyEncoder(steps_per_quarter=4, min_pitch=21, max_pitch=108) ]) ns = note_seq.NoteSequence() ns.tempos.add(qpm=60) testing_lib.add_chords_to_sequence(ns, [('C', 0.5), ('Dm', 1.0)]) testing_lib.add_track_to_sequence(ns, 0, [(60, 127, 0.0, 0.25), (62, 127, 0.25, 0.75), (64, 127, 1.25, 2.0)]) chord_ids, melody_ids = zip(*encoder.encode_note_sequence(ns)) expected_chord_ids = [ 2, # no-chord 2, # no-chord 3, # C major 3, # C major 17, # D minor 17, # D minor 17, # D minor 17 # D minor ] expected_melody_ids = [ 43, # ON(60) 45, # ON(62) 2, # HOLD(62) 3, # OFF(62) 2, # REST 47, # ON(64) 2, # HOLD(64) 2 # HOLD(64) ] self.assertEqual(expected_chord_ids, list(chord_ids)) self.assertEqual(expected_melody_ids, list(melody_ids))
def testPerformanceRnnPipeline(self): note_sequence = note_seq.NoteSequence() testing_lib.add_track_to_sequence(note_sequence, 0, [(36, 100, 0.00, 2.0), (40, 55, 2.1, 5.0), (44, 80, 3.6, 5.0), (41, 45, 5.1, 8.0), (64, 100, 6.6, 10.0), (55, 120, 8.1, 11.0), (39, 110, 9.6, 9.7), (53, 99, 11.1, 14.1), (51, 40, 12.6, 13.0), (55, 100, 14.1, 15.0), (54, 90, 15.6, 17.0), (60, 100, 17.1, 18.0)]) pipeline_inst = performance_pipeline.get_pipeline(min_events=32, max_events=512, eval_ratio=0, config=self.config) result = pipeline_inst.transform(note_sequence) self.assertTrue(len(result['training_performances']))
def testEncodeNoteSequence(self): encoder = music_encoders.FlattenedTextMelodyEncoderAbsolute( steps_per_second=4, num_velocity_bins=127) ns = note_seq.NoteSequence() ns.tempos.add(qpm=60) testing_lib.add_track_to_sequence(ns, 0, [(60, 127, 0.0, 0.25), (62, 15, 0.25, 0.75), (64, 32, 1.25, 2.0)]) ids = encoder.encode_note_sequence(ns) expected_ids = [ 130, # ON(vel=127) 18, # ON(vel=15) 2, # HOLD(62) 2, # REST 2, # REST 35, # ON(vel=32) 2, # HOLD(64) 2 # HOLD(64) ] self.assertEqual(expected_ids, ids)
def get_new_ns(max_instrument, ns): if max_instrument is None: return None seq = note_seq.NoteSequence() seq.source_info.parser = ns.source_info.parser seq.source_info.encoding_type = ns.source_info.encoding_type for tempo in ns.tempos: seq.tempos.add().qpm = tempo.qpm seq.ticks_per_quarter = ns.ticks_per_quarter for ns_time_signature in ns.time_signatures: time_signature = seq.time_signatures.add() try: time_signature.numerator = ns.time_signatures.numerator time_signature.denominator = ns.time_signatures.denominator except: time_signature.numerator = 4 time_signature.denominator = 4 for ns_key_signature in ns.key_signatures: key_signature = seq.key_signatures.add() key_signature = ns_key_signature new_notes = [] for note in ns.notes: if not note.is_drum: if note.instrument == max_instrument: new_notes.append(note) timing = 1000000.0 for note in new_notes: if note.start_time < timing: timing = note.start_time for note in new_notes: note.start_time -= timing note.end_time -= timing seq.notes.extend(new_notes) seq.total_time = ns.total_time - timing return seq
def _to_single_notesequence(self, samples, controls): qpm = note_seq.DEFAULT_QUARTERS_PER_MINUTE seconds_per_step = 60.0 / (self._steps_per_quarter * qpm) chunk_size_steps = self._steps_per_bar * self._chunk_size_bars seq = note_seq.NoteSequence() seq.tempos.add().qpm = qpm seq.ticks_per_quarter = note_seq.STANDARD_PPQ tracks = [[] for _ in range(self._max_num_instruments)] all_timed_chords = [] for chunk_index, encoded_chunk in enumerate(samples): chunk_step_offset = chunk_index * chunk_size_steps # Decode all tracks in this chunk into performance representation. # We don't immediately convert to NoteSequence as we first want to group # by track and concatenate. for instrument, encoded_track in enumerate(encoded_chunk): track_tokens = np.argmax(encoded_track, axis=-1) # Trim to end token. if self.end_token in track_tokens: idx = track_tokens.tolist().index(self.end_token) track_tokens = track_tokens[:idx] # Handle program token. If there are extra program tokens, just use the # first one. program_tokens = [token for token in track_tokens if token >= self._performance_encoding.num_classes] track_token_indices = [idx for idx, t in enumerate(track_tokens) if t < self._performance_encoding.num_classes] track_tokens = [track_tokens[idx] for idx in track_token_indices] if not program_tokens: program = 0 is_drum = False else: program = program_tokens[0] - self._performance_encoding.num_classes if program == note_seq.MAX_MIDI_PROGRAM + 1: # This is the drum program. program = 0 is_drum = True else: is_drum = False # Decode the tokens into a performance track. track = performance_lib.MetricPerformance( quantized_sequence=None, steps_per_quarter=self._steps_per_quarter, start_step=0, num_velocity_bins=self._num_velocity_bins, program=program, is_drum=is_drum) for token in track_tokens: track.append(self._performance_encoding.decode_event(token)) if controls is not None: # Get the corresponding chord and time for each event in the track. # This is a little tricky since we removed extraneous program tokens # when constructing the track. track_chord_tokens = np.argmax(controls[chunk_index][instrument], axis=-1) track_chord_tokens = [track_chord_tokens[idx] for idx in track_token_indices] chords = [self._chord_encoding.decode_event(token) for token in track_chord_tokens] chord_times = [(chunk_step_offset + step) * seconds_per_step for step in track.steps if step < chunk_size_steps] all_timed_chords += zip(chord_times, chords) # Make sure the track has the proper length in time steps. track.set_length(chunk_size_steps) # Aggregate by instrument. tracks[instrument].append(track) # Concatenate all of the track chunks for each instrument. for instrument, track_chunks in enumerate(tracks): if track_chunks: track = track_chunks[0] for t in track_chunks[1:]: for e in t: track.append(e) track_seq = track.to_sequence(instrument=instrument, qpm=qpm) seq.notes.extend(track_seq.notes) # Set total time. if seq.notes: seq.total_time = max(note.end_time for note in seq.notes) if self._chord_encoding: # Sort chord times from all tracks and add to the sequence. all_chord_times, all_chords = zip(*sorted(all_timed_chords)) chords_lib.add_chords_to_sequence(seq, all_chords, all_chord_times) return seq
#The purpose of this file is to test whether the parsed data from magentaparser.py can be #converted back into a midi sequence and exported as a midi file. This is purely to make sure #that notes predicted by the model can be properly formatted such that it can also be #converted back into a midi file. import pandas as pd import magenta import note_seq import os directory = 'parsed_songs' df = pd.read_csv(directory + '/Time.csv') seq = note_seq.NoteSequence() for i in range(len(list(df['velocity']))): seq.notes.add(pitch=df.at[i, 'pitch'], velocity=df.at[i, 'velocity'], start_time=df.at[i, 'start_time'], end_time=df.at[i, 'end_time']) tempo = df['tempo'].iloc[0] seq.tempos.add(qpm=tempo) note_seq.sequence_proto_to_midi_file(seq, 'songs_test/Tim_reconstruct.mid')
def setUp(self): super().setUp() self.sequence = note_seq.NoteSequence() self.sequence.ticks_per_quarter = 220 self.sequence.tempos.add().qpm = 120.0