def testMixSequences(self): sample_rate = 10 sequence1 = music_pb2.NoteSequence() sequence1.notes.add(pitch=60, start_time=0.5, end_time=1.0, velocity=90) sequence1.notes.add(pitch=62, start_time=1.0, end_time=2.0, velocity=90) sequence1.total_time = 2.0 samples1 = np.linspace(0, 1, int(sample_rate * sequence1.total_time)) sequence2 = music_pb2.NoteSequence() sequence2.notes.add(pitch=64, start_time=0.5, end_time=1.0, velocity=90) sequence2.total_time = 1.0 samples2 = np.linspace(0, 1, int(sample_rate * sequence2.total_time)) mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences( [samples1, samples2], sample_rate, [sequence1, sequence2]) expected_sequence = music_pb2.NoteSequence() expected_sequence.ticks_per_quarter = constants.STANDARD_PPQ expected_sequence.notes.add( pitch=60, start_time=0.5, end_time=1.0, velocity=127) expected_sequence.notes.add( pitch=62, start_time=1.0, end_time=2.0, velocity=127) expected_sequence.notes.add( pitch=64, start_time=0.5, end_time=1.0, velocity=127) expected_sequence.notes.add( pitch=64, start_time=1.5, end_time=2.0, velocity=127) expected_sequence.total_time = 2.0 self.assertProtoEquals(expected_sequence, mixed_sequence) expected_samples = np.concatenate([samples2, samples2]) * .5 + samples1 * .5 np.testing.assert_array_equal(expected_samples, mixed_samples)
def testMixSequencesTotalTime(self): sample_rate = 10 sequence1 = music_pb2.NoteSequence() sequence1.notes.add(pitch=60, start_time=0.5, end_time=1.0, velocity=90) sequence1.notes.add(pitch=62, start_time=1.0, end_time=1.5, velocity=90) sequence1.total_time = 1.5 samples1 = np.linspace(0, 1, int(sample_rate * 2)) sequence2 = music_pb2.NoteSequence() sequence2.notes.add(pitch=64, start_time=0.5, end_time=0.9, velocity=90) sequence2.total_time = 0.9 samples2 = np.linspace(0, 1, int(sample_rate * 1)) mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences( [samples1, samples2], sample_rate, [sequence1, sequence2]) expected_sequence = music_pb2.NoteSequence() expected_sequence.ticks_per_quarter = constants.STANDARD_PPQ expected_sequence.notes.add(pitch=60, start_time=0.5, end_time=1.0, velocity=127) expected_sequence.notes.add(pitch=62, start_time=1.0, end_time=1.5, velocity=127) expected_sequence.notes.add(pitch=64, start_time=0.5, end_time=0.9, velocity=127) expected_sequence.notes.add(pitch=64, start_time=1.5, end_time=1.9, velocity=127) # Expected time is 1.9 because the sequences are repeated according to the # length of their associated audio. So sequence1 is not repeated at all # (audio is 2 seconds) and sequence2 is repeated once after shifting all the # notes by the audio length of 1 second. The final total_time is left as is # after the last repeat, so it ends up being 1 + .9 seconds. expected_sequence.total_time = 1.9 self.assertProtoEquals(expected_sequence, mixed_sequence) expected_samples = np.concatenate([samples2, samples2 ]) * .5 + samples1 * .5 np.testing.assert_array_equal(expected_samples, mixed_samples)
def testMixSequencesWithSustain(self): sample_rate = 10 sequence1 = music_pb2.NoteSequence() sequence1.notes.add(pitch=60, start_time=0.5, end_time=0.6, velocity=90) sequence1.notes.add(pitch=62, start_time=1.0, end_time=2.0, velocity=90) sequence1.total_time = 2.0 testing_lib.add_control_changes_to_sequence(sequence1, 0, [(0.0, 64, 127), (1.0, 64, 0)]) samples1 = np.linspace(0, 1, sample_rate * sequence1.total_time) sequence2 = music_pb2.NoteSequence() sequence2.notes.add(pitch=64, start_time=0.5, end_time=0.6, velocity=90) sequence2.total_time = 1.0 testing_lib.add_control_changes_to_sequence(sequence2, 0, [(0.0, 64, 127), (0.9, 64, 0)]) samples2 = np.linspace(0, 1, sample_rate * sequence2.total_time) mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences( [samples1, samples2], sample_rate, [sequence1, sequence2]) expected_sequence = music_pb2.NoteSequence() expected_sequence.ticks_per_quarter = constants.STANDARD_PPQ expected_sequence.notes.add(pitch=60, start_time=0.5, end_time=1.0, velocity=90) expected_sequence.notes.add(pitch=62, start_time=1.0, end_time=2.0, velocity=90) expected_sequence.notes.add(pitch=64, start_time=0.5, end_time=0.9, velocity=90) expected_sequence.notes.add(pitch=64, start_time=1.5, end_time=1.9, velocity=90) expected_sequence.total_time = 2.0 self.assertProtoEquals(expected_sequence, mixed_sequence) expected_samples = np.concatenate([samples2, samples2 ]) * .5 + samples1 * .5 np.testing.assert_array_equal(expected_samples, mixed_samples)
def testMixSequencesLongerNoteSequence(self): sample_rate = 10 sequence1 = music_pb2.NoteSequence() sequence1.notes.add(pitch=60, start_time=0.5, end_time=1.0, velocity=90) sequence1.notes.add(pitch=62, start_time=1.0, end_time=2.0, velocity=90) sequence1.total_time = 2.0 # samples1 will be .1 seconds shorter than sequence1 samples1 = np.linspace(0, 1, int(sample_rate * (sequence1.total_time - .1))) sequence2 = music_pb2.NoteSequence() sequence2.notes.add(pitch=64, start_time=0.5, end_time=1.0, velocity=90) sequence2.total_time = 1.0 samples2 = np.linspace(0, 1, int(sample_rate * sequence2.total_time)) mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences( [samples1, samples2], sample_rate, [sequence1, sequence2]) expected_sequence = music_pb2.NoteSequence() expected_sequence.ticks_per_quarter = constants.STANDARD_PPQ expected_sequence.notes.add(pitch=60, start_time=0.5, end_time=1.0, velocity=127) expected_sequence.notes.add(pitch=62, start_time=1.0, end_time=2.0, velocity=127) expected_sequence.notes.add(pitch=64, start_time=0.5, end_time=1.0, velocity=127) expected_sequence.notes.add(pitch=64, start_time=1.5, end_time=2.0, velocity=127) expected_sequence.total_time = 2.0 self.assertProtoEquals(expected_sequence, mixed_sequence) # We expect samples1 to have 2 samples of padding and samples2 to be # repeated 1 time fully and once with a single sample. expected_samples = ( np.concatenate([samples2, samples2, [samples2[0]]]) * .5 + np.concatenate([samples1, [0, 0]]) * .5) np.testing.assert_array_equal(expected_samples, mixed_samples)
def mix_examples(mixid_exs, sample_rate, load_audio_with_librosa): """Mix several Examples together to create a new example.""" mixid, exs = mixid_exs del mixid example_samples = [] example_sequences = [] for ex_str in exs: ex = tf.train.Example.FromString(ex_str) wav_data = ex.features.feature['audio'].bytes_list.value[0] if load_audio_with_librosa: samples = audio_io.wav_data_to_samples_librosa( wav_data, sample_rate) else: samples = audio_io.wav_data_to_samples(wav_data, sample_rate) example_samples.append(samples) ns = music_pb2.NoteSequence.FromString( ex.features.feature['sequence'].bytes_list.value[0]) example_sequences.append(ns) mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences( individual_samples=example_samples, sample_rate=sample_rate, individual_sequences=example_sequences) mixed_wav_data = audio_io.samples_to_wav_data(mixed_samples, sample_rate) mixed_id = '::'.join(['mixed'] + [ns.id for ns in example_sequences]) mixed_sequence.id = mixed_id mixed_filename = '::'.join(['mixed'] + [ns.filename for ns in example_sequences]) mixed_sequence.filename = mixed_filename examples = list( audio_label_data_utils.process_record(mixed_wav_data, mixed_sequence, mixed_id, min_length=0, max_length=-1, sample_rate=sample_rate)) assert len(examples) == 1 return examples[0]