Пример #1
0
def split_wav(input_example, min_length, max_length, sample_rate,
              debug_output_directory, split_example, load_audio_with_librosa):
    """Splits wav and midi files for the dataset."""
    tf.logging.info('Splitting %s',
                    input_example.features.feature['id'].bytes_list.value[0])

    wav_data = input_example.features.feature['audio'].bytes_list.value[0]

    ns = music_pb2.NoteSequence.FromString(
        input_example.features.feature['sequence'].bytes_list.value[0])

    Metrics.counter('split_wav', 'read_midi_wav_to_split').inc()

    if not split_example:
        split_examples = audio_label_data_utils.process_record(
            wav_data,
            ns,
            ns.id,
            min_length=0,
            max_length=-1,
            sample_rate=sample_rate,
            allow_empty_notesequence=True,
            load_audio_with_librosa=load_audio_with_librosa)

        for example in split_examples:
            Metrics.counter('split_wav', 'full_example').inc()
            yield example
    else:
        try:
            split_examples = audio_label_data_utils.process_record(
                wav_data,
                ns,
                ns.id,
                min_length=min_length,
                max_length=max_length,
                sample_rate=sample_rate,
                load_audio_with_librosa=load_audio_with_librosa)

            for example in split_examples:
                Metrics.counter('split_wav', 'split_example').inc()
                yield example
        except AssertionError:
            output_file = 'badexample-' + hashlib.md5(
                ns.id).hexdigest() + '.proto'
            output_path = os.path.join(debug_output_directory, output_file)
            tf.logging.error('Exception processing %s. Writing file to %s',
                             ns.id, output_path)
            with tf.gfile.Open(output_path, 'w') as f:
                f.write(input_example.SerializeToString())
            raise
def generate_train_set(exclude_ids):
    """Generate the train TFRecord."""
    train_file_pairs = []
    for directory in train_dirs:
        path = os.path.join(FLAGS.input_dir, directory)
        path = os.path.join(path, '*.wav')
        wav_files = glob.glob(path)
        # find matching mid files
        for wav_file in wav_files:
            base_name_root, _ = os.path.splitext(wav_file)
            mid_file = base_name_root + '.mid'
            if filename_to_id(wav_file) not in exclude_ids:
                train_file_pairs.append((wav_file, mid_file))

    train_output_name = os.path.join(FLAGS.output_dir,
                                     'maps_config2_train.tfrecord')

    with tf.python_io.TFRecordWriter(train_output_name) as writer:
        for idx, pair in enumerate(train_file_pairs):
            print('{} of {}: {}'.format(idx, len(train_file_pairs), pair[0]))
            # load the wav data
            wav_data = tf.gfile.Open(pair[0], 'rb').read()
            # load the midi data and convert to a notesequence
            ns = midi_io.midi_file_to_note_sequence(pair[1])
            for example in audio_label_data_utils.process_record(
                    wav_data, ns, pair[0], FLAGS.min_length, FLAGS.max_length,
                    FLAGS.sample_rate):
                writer.write(example.SerializeToString())
Пример #3
0
def infer(filename):
    # WAV 파일 Binary로 읽기
    wav = open(filename, 'rb')
    wav_data = wav.read()
    wav.close()

    tf.logging.info('User .WAV FIle %s length %s bytes', filename,
                    len(wav_data))

    ## 전처리
    # 청크로 분할 후, Protocol Buffers 로 변환
    to_process = []
    examples = list(
        audio_label_data_utils.process_record(wav_data=wav_data,
                                              ns=music_pb2.NoteSequence(),
                                              example_id=filename,
                                              min_length=0,
                                              max_length=-1,
                                              allow_empty_notesequence=True))

    # 분할된 버퍼를 시리얼라이즈
    to_process.append(examples[0].SerializeToString())

    #############################################################

    #시리얼라이즈한 버퍼를 iterator에 주입
    sess.run(iterator.initializer, {example: to_process})

    # Inference
    predictions = list(estimator.predict(input_fn,
                                         yield_single_examples=False))
    #가정 설정문으로 prediction size를 1로 보장
    assert len(predictions) == 1

    #예측 결과 불러오기
    frame_predictions = predictions[0]['frame_predictions'][0]
    onset_predictions = predictions[0]['onset_predictions'][0]  # 치는 순간
    velocity_values = predictions[0]['velocity_values'][0]  #강약

    #MIDI로 인코딩
    sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
        frame_predictions,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_duration_ms=0,
        min_midi_pitch=constants.MIN_MIDI_PITCH,
        onset_predictions=onset_predictions,
        velocity_values=velocity_values)

    basename = os.path.split(os.path.splitext(filename)[0])[1] + '.mid'
    output_filename = os.path.join('', basename)

    midi_filename = (output_filename)
    midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)

    print('Program Ended, Your MIDI File is in', output_filename)

    sess.close()
Пример #4
0
    def process(files):
        for fn in files:
            print('**\n\n', fn, '\n\n**')
            with open(fn, 'rb', buffering=0) as f:
                wav_data = f.read()
            example_list = list(
                audio_label_data_utils.process_record(
                wav_data=wav_data,
                ns=music_pb2.NoteSequence(),
                example_id=fn,
                min_length=0,
                max_length=-1,
                allow_empty_notesequence=True))
            assert len(example_list) == 1
            to_process.append(example_list[0].SerializeToString())
            print('Processing complete for', fn)

            sess = tf.Session()

            sess.run([
                tf.initializers.global_variables(),
                tf.initializers.local_variables()
            ])

            sess.run(iterator.initializer, {examples: to_process})

            def transcription_data(params):
                del params
                return tf.data.Dataset.from_tensors(sess.run(next_record))


            input_fn = infer_util.labels_to_features_wrapper(transcription_data)

            #@title Run inference
            prediction_list = list(
                estimator.predict(
                    input_fn,
                    yield_single_examples=False))
            assert len(prediction_list) == 1

            # Ignore warnings caused by pyfluidsynth
            import warnings
            warnings.filterwarnings("ignore", category=DeprecationWarning) 

            sequence_prediction = music_pb2.NoteSequence.FromString(
                prediction_list[0]['sequence_predictions'][0])

            pathname = fn.split('/').pop()
            print('**\n\n', pathname, '\n\n**')
            midi_filename = '{outputs}/{file}.mid'.format(outputs=output,file=pathname)
            midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)
def create_example(filename):
    """Processes an audio file into an Example proto."""
    wav_data = tf.gfile.Open(filename, 'rb').read()
    example_list = list(
        audio_label_data_utils.process_record(
            wav_data=wav_data,
            ns=music_pb2.NoteSequence(),
            # decode to handle filenames with extended characters.
            example_id=six.ensure_text(filename, 'utf-8'),
            min_length=0,
            max_length=-1,
            allow_empty_notesequence=True))
    assert len(example_list) == 1
    return example_list[0].SerializeToString()
Пример #6
0
    def process(self, paths):
        wav_path, midi_path = paths

        if midi_path:
            if FLAGS.use_midi_stems:
                base_ns = note_sequence_from_directory(
                    os.path.dirname(midi_path))
            else:
                base_ns = midi_io.midi_file_to_note_sequence(midi_path)
            base_ns.filename = midi_path
        else:
            base_ns = music_pb2.NoteSequence()

        logging.info('Creating Example %s:%s', midi_path, wav_path)
        if FLAGS.convert_flac:
            samples, sr = librosa.load(wav_path, FLAGS.sample_rate)
            wav_data = audio_io.samples_to_wav_data(samples, sr)
        else:
            wav_data = tf.io.gfile.GFile(wav_path, 'rb').read()

        ns = copy.deepcopy(base_ns)

        # Use base names.
        ns.id = '%s:%s' % (wav_path, midi_path)

        Metrics.counter('create_example', 'read_midi_wav').inc()

        if FLAGS.max_length > 0:
            split_examples = audio_label_data_utils.process_record(
                wav_data,
                ns,
                ns.id,
                min_length=FLAGS.min_length,
                max_length=FLAGS.max_length,
                sample_rate=FLAGS.sample_rate,
                load_audio_with_librosa=False)

            for example in split_examples:
                Metrics.counter('split_wav', 'split_example').inc()
                yield example
        else:

            example = audio_label_data_utils.create_example(
                ns.id, ns, wav_data)

            Metrics.counter('create_example', 'created_example').inc()
            yield example
Пример #7
0
def inference(filename):
    # 오디오 파일(.wav) 읽기
    wav_file = open(filename, mode='rb')
    wav_data = wav_file.read()
    wav_file.close()
    
    print('User uploaded file "{name}" with length {length} bytes'.format(name=filename, length=len(wav_data)))

    # 청크로 분할 후 protobufs 포맷으로 데이터 생성
    to_process = []
    example_list = list(
    audio_label_data_utils.process_record(wav_data=wav_data, ns=music_pb2.NoteSequence(),
        example_id=filename, min_length=0, max_length=-1, allow_empty_notesequence=True))
    
    # Serialize
    to_process.append(example_list[0].SerializeToString())

    # 세션 실행
    sess.run(iterator.initializer, {examples: to_process})

    # 예측
    prediction_list = list(estimator.predict(input_fn, yield_single_examples=False))
    assert len(prediction_list) == 1

    # 예측 결과 데이터 가져오기
    frame_predictions = prediction_list[0]['frame_predictions'][0]
    onset_predictions = prediction_list[0]['onset_predictions'][0]
    velocity_values = prediction_list[0]['velocity_values'][0]

    # 예측 결과 데이터를 이용해서 미디 시퀀스 생성
    sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
        frame_predictions,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_duration_ms=0,
        min_midi_pitch=constants.MIN_MIDI_PITCH,
        onset_predictions=onset_predictions,
        velocity_values=velocity_values)

    basename = os.path.split(os.path.splitext(filename)[0])[1] + '.mid'
    output_filename = os.path.join(env.MIDI_DIRECTORY, basename)

    # 미디 시퀀스를 파일로 내보내기
    midi_filename = (output_filename)
    midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)

    return basename
Пример #8
0
def mix_examples(mixid_exs, sample_rate, load_audio_with_librosa):
    """Mix several Examples together to create a new example."""
    mixid, exs = mixid_exs
    del mixid

    example_samples = []
    example_sequences = []

    for ex_str in exs:
        ex = tf.train.Example.FromString(ex_str)
        wav_data = ex.features.feature['audio'].bytes_list.value[0]
        if load_audio_with_librosa:
            samples = audio_io.wav_data_to_samples_librosa(
                wav_data, sample_rate)
        else:
            samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
        example_samples.append(samples)
        ns = music_pb2.NoteSequence.FromString(
            ex.features.feature['sequence'].bytes_list.value[0])
        example_sequences.append(ns)

    mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences(
        individual_samples=example_samples,
        sample_rate=sample_rate,
        individual_sequences=example_sequences)

    mixed_wav_data = audio_io.samples_to_wav_data(mixed_samples, sample_rate)

    mixed_id = '::'.join(['mixed'] + [ns.id for ns in example_sequences])
    mixed_sequence.id = mixed_id
    mixed_filename = '::'.join(['mixed'] +
                               [ns.filename for ns in example_sequences])
    mixed_sequence.filename = mixed_filename

    examples = list(
        audio_label_data_utils.process_record(mixed_wav_data,
                                              mixed_sequence,
                                              mixed_id,
                                              min_length=0,
                                              max_length=-1,
                                              sample_rate=sample_rate))
    assert len(examples) == 1
    return examples[0]
    def testSplitAudioLabelData(self):
        wav_data, sequence = self._CreateSyntheticExample()
        records = audio_label_data_utils.process_record(
            wav_data, sequence, 'test', sample_rate=SAMPLE_RATE)

        for record in records:
            audio = record.features.feature['audio'].bytes_list.value[0]
            velocity_range = music_pb2.VelocityRange.FromString(
                record.features.feature['velocity_range'].bytes_list.value[0])
            note_sequence = music_pb2.NoteSequence.FromString(
                record.features.feature['sequence'].bytes_list.value[0])

            expected_samples = np.zeros(10 * SAMPLE_RATE)
            np.testing.assert_array_equal(
                expected_samples,
                audio_io.wav_data_to_samples(audio, sample_rate=SAMPLE_RATE))
            self.assertEqual(velocity_range.min, 20)
            self.assertEqual(velocity_range.max, 80)
            self.assertEqual(note_sequence.notes[0].velocity, 20)
            self.assertEqual(note_sequence.notes[0].end_time, 5.)
            self.assertEqual(note_sequence.notes[1].velocity, 80)
            self.assertEqual(note_sequence.notes[1].end_time, 10.)
def transcribe(audio, sr, cuda=False):
    """
    Google sucks and want to use audio path (raw wav) instead of decoded
    samples loosing in decoupling between file format and DSP

    input audio and sample rate, output mat like asmd with (pitch, ons, offs, velocity)
    """

    # simple hack because google sucks... in this way we can accept audio data
    # already loaded and keep our reasonable interface (and decouple i/o
    # from processing)
    original_google_sucks = audio_io.wav_data_to_samples
    audio_io.wav_data_to_samples = google_sucks
    audio = np.array(audio)
    config = configs.CONFIG_MAP['onsets_frames']
    hparams = config.hparams
    hparams.use_cudnn = cuda
    hparams.batch_size = 1
    examples = tf.placeholder(tf.string, [None])

    dataset = data.provide_batch(examples=examples,
                                 preprocess_examples=True,
                                 params=hparams,
                                 is_training=False,
                                 shuffle_examples=False,
                                 skip_n_initial_records=0)

    estimator = train_util.create_estimator(config.model_fn, CHECKPOINT_DIR,
                                            hparams)

    iterator = dataset.make_initializable_iterator()
    next_record = iterator.get_next()

    example_list = list(
        audio_label_data_utils.process_record(wav_data=audio,
                                              sample_rate=sr,
                                              ns=music_pb2.NoteSequence(),
                                              example_id="fakeid",
                                              min_length=0,
                                              max_length=-1,
                                              allow_empty_notesequence=True,
                                              load_audio_with_librosa=False))
    assert len(example_list) == 1
    to_process = [example_list[0].SerializeToString()]

    sess = tf.Session()

    sess.run([
        tf.initializers.global_variables(),
        tf.initializers.local_variables()
    ])

    sess.run(iterator.initializer, {examples: to_process})

    def transcription_data(params):
        del params
        return tf.data.Dataset.from_tensors(sess.run(next_record))

    # put back the original function (it still writes and reload... stupid
    # though
    audio_io.wav_data_to_samples = original_google_sucks
    input_fn = infer_util.labels_to_features_wrapper(transcription_data)

    prediction_list = list(
        estimator.predict(input_fn, yield_single_examples=False))

    assert len(prediction_list) == 1

    notes = music_pb2.NoteSequence.FromString(
        prediction_list[0]['sequence_predictions'][0]).notes

    out = np.empty((len(notes), 4))
    for i, note in enumerate(notes):
        out[i] = [note.pitch, note.start_time, note.end_time, note.velocity]
    return out
Пример #11
0
"""# Upload Audio

Run the following cell to upload audio files.
"""
# pianoAudio = 'misty.wav'
pianoAudio = 'lalala.wav'
to_process = []

with open(pianoAudio, mode='rb') as file:
    wav_data = file.read()

example_list = list(
    audio_label_data_utils.process_record(wav_data=wav_data,
                                          ns=music_pb2.NoteSequence(),
                                          example_id='accompaniment.wav',
                                          min_length=0,
                                          max_length=-1,
                                          allow_empty_notesequence=True))

to_process.append(example_list[0].SerializeToString())
sess = tf.Session()

sess.run(
    [tf.initializers.global_variables(),
     tf.initializers.local_variables()])

sess.run(iterator.initializer, {examples: to_process})

input_fn = infer_util.labels_to_features_wrapper(transcription_data)
"""# Inference