Пример #1
0
  def _ExampleToInputs(self,
                       ex,
                       truncated_length=0,
                       crop_training_sequence_to_notes=False):
    hparams = copy.deepcopy(constants.DEFAULT_HPARAMS)
    hparams.crop_training_sequence_to_notes = crop_training_sequence_to_notes

    filename = ex.features.feature['id'].bytes_list.value[0]
    sequence, crop_beginning_seconds = data.preprocess_sequence(
        ex.features.feature['sequence'].bytes_list.value[0], hparams)
    wav_data = ex.features.feature['audio'].bytes_list.value[0]

    if crop_training_sequence_to_notes:
      wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate,
                                        crop_beginning_seconds,
                                        sequence.total_time)
    spec = data.wav_to_spec(wav_data, hparams=hparams)
    roll = sequences_lib.sequence_to_pianoroll(
        sequence,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH,
        min_frame_occupancy_for_label=0.0,
        onset_mode='length_ms',
        onset_length_ms=32.,
        onset_delay_ms=0.)
    length = data.wav_to_num_frames(
        wav_data, frames_per_second=data.hparams_frames_per_second(hparams))

    return self._DataToInputs(spec, roll.active, roll.weights, length, filename,
                              truncated_length)
Пример #2
0
    def _ExampleToInputs(self,
                         ex,
                         truncated_length=0,
                         crop_training_sequence_to_notes=False):
        hparams = copy.deepcopy(constants.DEFAULT_HPARAMS)
        hparams.crop_training_sequence_to_notes = crop_training_sequence_to_notes

        filename = ex.features.feature['id'].bytes_list.value[0]
        sequence, crop_beginning_seconds = data.preprocess_sequence(
            ex.features.feature['sequence'].bytes_list.value[0], hparams)
        wav_data = ex.features.feature['audio'].bytes_list.value[0]

        if crop_training_sequence_to_notes:
            wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate,
                                              crop_beginning_seconds,
                                              sequence.total_time)
        spec = data.wav_to_spec(wav_data, hparams=hparams)
        roll = sequences_lib.sequence_to_pianoroll(
            sequence,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_pitch=constants.MIN_MIDI_PITCH,
            max_pitch=constants.MAX_MIDI_PITCH,
            min_frame_occupancy_for_label=0.0,
            onset_mode='length_ms',
            onset_length_ms=32.,
            onset_delay_ms=0.)
        length = data.wav_to_num_frames(
            wav_data,
            frames_per_second=data.hparams_frames_per_second(hparams))

        return self._DataToInputs(spec, roll.active, roll.weights, length,
                                  filename, truncated_length)
def generate_train_set():
  """Generate the train TFRecord."""
  train_file_pairs = []
  for directory in train_dirs:
    path = os.path.join(FLAGS.input_dir, directory)
    path = os.path.join(path, '*.wav')
    wav_files = glob.glob(path)
    # find matching mid files
    for wav_file in wav_files:
      base_name_root, _ = os.path.splitext(wav_file)
      mid_file = base_name_root + '.mid'
      train_file_pairs.append((wav_file, mid_file))

  train_output_name = os.path.join(FLAGS.output_dir,
                                   'maps_config2_train.tfrecord')

  with tf.python_io.TFRecordWriter(train_output_name) as writer:
    for pair in train_file_pairs:
      print(pair)
      # load the wav data
      wav_data = tf.gfile.Open(pair[0]).read()
      samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)

      # load the midi data and convert to a notesequence
      midi_data = tf.gfile.Open(pair[1]).read()
      ns = midi_io.midi_to_sequence_proto(midi_data)

      splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                 FLAGS.min_length, FLAGS.max_length)

      for start, end in zip(splits[:-1], splits[1:]):
        if end - start < FLAGS.min_length:
          continue

        new_ns = sequences_lib.extract_subsequence(ns, start, end)
        new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate,
                                              start, end - start)
        example = tf.train.Example(features=tf.train.Features(feature={
            'id':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[pair[0]]
                )),
            'sequence':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_ns.SerializeToString()]
                )),
            'audio':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_wav_data]
                ))

            }))
        writer.write(example.SerializeToString())
def generate_train_set():
    """Generate the train TFRecord."""
    train_file_pairs = []
    for directory in train_dirs:
        path = os.path.join(FLAGS.input_dir, directory)
        path = os.path.join(path, '*.wav')
        wav_files = glob.glob(path)
        # find matching mid files
        for wav_file in wav_files:
            base_name_root, _ = os.path.splitext(wav_file)
            mid_file = base_name_root + '.mid'
            train_file_pairs.append((wav_file, mid_file))

    train_output_name = os.path.join(FLAGS.output_dir,
                                     'maps_config2_train.tfrecord')

    with tf.python_io.TFRecordWriter(train_output_name) as writer:
        for pair in train_file_pairs:
            print(pair)
            # load the wav data
            wav_data = tf.gfile.Open(pair[0]).read()
            samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)

            # load the midi data and convert to a notesequence
            midi_data = tf.gfile.Open(pair[1]).read()
            ns = midi_io.midi_to_sequence_proto(midi_data)

            splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                       FLAGS.min_length, FLAGS.max_length)

            for start, end in zip(splits[:-1], splits[1:]):
                if end - start < FLAGS.min_length:
                    continue

                new_ns = sequences_lib.extract_subsequence(ns, start, end)
                new_wav_data = audio_io.crop_wav_data(wav_data,
                                                      FLAGS.sample_rate, start,
                                                      end - start)
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'id':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[pair[0]])),
                        'sequence':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_ns.SerializeToString()])),
                        'audio':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_wav_data]))
                    }))
                writer.write(example.SerializeToString())
Пример #5
0
  def transform_wav_data(wav_data, sequence_tensor):
    """Transforms with sox."""
    sequence, cropped_beginning_seconds = preprocess_sequence(
        sequence_tensor, hparams)

    # Only do audio transformations during training.
    if is_training:
      wav_data = audio_io.jitter_wav_data(wav_data, hparams.sample_rate,
                                          jitter_amount_sec)
      wav_data = audio_transform.transform_wav_audio(wav_data, hparams)

    # If requested, crop wav.
    if hparams.crop_training_sequence_to_notes:
      wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate,
                                        cropped_beginning_seconds,
                                        sequence.total_time)

    # Normalize.
    if hparams.normalize_audio:
      wav_data = audio_io.normalize_wav_data(wav_data, hparams.sample_rate)

    return [wav_data]
Пример #6
0
  def transform_wav_data(wav_data, sequence_tensor):
    """Transforms with sox."""
    sequence, cropped_beginning_seconds = preprocess_sequence(
        sequence_tensor, hparams)

    # Only do audio transformations during training.
    if is_training:
      wav_data = audio_io.jitter_wav_data(wav_data, hparams.sample_rate,
                                          jitter_amount_sec)
      wav_data = audio_transform.transform_wav_audio(wav_data, hparams)

    # If requested, crop wav.
    if hparams.crop_training_sequence_to_notes:
      wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate,
                                        cropped_beginning_seconds,
                                        sequence.total_time)

    # Normalize.
    if hparams.normalize_audio:
      wav_data = audio_io.normalize_wav_data(wav_data, hparams.sample_rate)

    return [wav_data]
def generate_train_set(exclude_ids):
  """Generate the train TFRecord."""
  train_file_pairs = []
  for directory in train_dirs:
    path = os.path.join(FLAGS.input_dir, directory)
    path = os.path.join(path, '*.wav')
    wav_files = glob.glob(path)
    # find matching mid files
    for wav_file in wav_files:
      base_name_root, _ = os.path.splitext(wav_file)
      mid_file = base_name_root + '.mid'
      if filename_to_id(wav_file) not in exclude_ids:
        train_file_pairs.append((wav_file, mid_file))

  train_output_name = os.path.join(FLAGS.output_dir,
                                   'maps_config2_train.tfrecord')

  with tf.python_io.TFRecordWriter(train_output_name) as writer:
    for pair in train_file_pairs:
      print(pair)
      # load the wav data
      wav_data = tf.gfile.Open(pair[0], 'rb').read()
      samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)
      samples = librosa.util.normalize(samples, norm=np.inf)

      # load the midi data and convert to a notesequence
      ns = midi_io.midi_file_to_note_sequence(pair[1])

      splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                 FLAGS.min_length, FLAGS.max_length)

      velocities = [note.velocity for note in ns.notes]
      velocity_max = np.max(velocities)
      velocity_min = np.min(velocities)
      new_velocity_tuple = music_pb2.VelocityRange(
          min=velocity_min, max=velocity_max)

      for start, end in zip(splits[:-1], splits[1:]):
        if end - start < FLAGS.min_length:
          continue

        new_ns = sequences_lib.extract_subsequence(ns, start, end)
        new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate,
                                              start, end - start)
        example = tf.train.Example(features=tf.train.Features(feature={
            'id':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[pair[0]]
                )),
            'sequence':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_ns.SerializeToString()]
                )),
            'audio':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_wav_data]
                )),
            'velocity_range':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_velocity_tuple.SerializeToString()]
                )),
            }))
        writer.write(example.SerializeToString())
def generate_train_set(exclude_ids):
    """Generate the train TFRecord."""
    train_file_pairs = []
    for directory in train_dirs:
        path = os.path.join(FLAGS.input_dir, directory)
        path = os.path.join(path, '*.wav')
        wav_files = glob.glob(path)
        # find matching mid files
        for wav_file in wav_files:
            base_name_root, _ = os.path.splitext(wav_file)
            mid_file = base_name_root + '.mid'
            if filename_to_id(wav_file) not in exclude_ids:
                train_file_pairs.append((wav_file, mid_file))

    train_output_name = os.path.join(FLAGS.output_dir,
                                     'maps_config2_train.tfrecord')

    with tf.python_io.TFRecordWriter(train_output_name) as writer:
        for pair in train_file_pairs:
            print(pair)
            # load the wav data
            wav_data = tf.gfile.Open(pair[0], 'rb').read()
            samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)
            samples = librosa.util.normalize(samples, norm=np.inf)

            # load the midi data and convert to a notesequence
            ns = midi_io.midi_file_to_note_sequence(pair[1])

            splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                       FLAGS.min_length, FLAGS.max_length)

            velocities = [note.velocity for note in ns.notes]
            velocity_max = np.max(velocities)
            velocity_min = np.min(velocities)
            new_velocity_tuple = music_pb2.VelocityRange(min=velocity_min,
                                                         max=velocity_max)

            for start, end in zip(splits[:-1], splits[1:]):
                if end - start < FLAGS.min_length:
                    continue

                new_ns = sequences_lib.extract_subsequence(ns, start, end)
                new_wav_data = audio_io.crop_wav_data(wav_data,
                                                      FLAGS.sample_rate, start,
                                                      end - start)
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'id':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[pair[0]])),
                        'sequence':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_ns.SerializeToString()])),
                        'audio':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_wav_data])),
                        'velocity_range':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_velocity_tuple.SerializeToString()])),
                    }))
                writer.write(example.SerializeToString())