Exemplo n.º 1
0
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False,
                   load_audio_with_librosa=False):
  """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.
    load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs.

  Yields:
    Example protos.
  """
  try:
    if load_audio_with_librosa:
      samples = audio_io.wav_data_to_samples_librosa(wav_data, sample_rate)
    else:
      samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
  except audio_io.AudioIOReadError as e:
    print('Exception %s', e)
    return
  samples = librosa.util.normalize(samples, norm=np.inf)

  # Add padding to samples if notesequence is longer.
  pad_to_samples = int(math.ceil(ns.total_time * sample_rate))
  padding_needed = pad_to_samples - samples.shape[0]
  if padding_needed > 5 * sample_rate:
    raise ValueError(
        'Would have padded {} more than 5 seconds to match note sequence total '
        'time. ({} original samples, {} sample rate, {} sample seconds, '
        '{} sequence seconds) This likely indicates a problem with the source '
        'data.'.format(
            example_id, samples.shape[0], sample_rate,
            samples.shape[0] / sample_rate, ns.total_time))
  samples = np.pad(samples, (0, max(0, padding_needed)), 'constant')

  if max_length == min_length:
    splits = np.arange(0, ns.total_time, max_length)
  elif max_length > 0:
    splits = find_split_points(ns, samples, sample_rate, min_length, max_length)
  else:
    splits = [0, ns.total_time]
  velocity_range = velocity_range_from_sequence(ns)

  for start, end in zip(splits[:-1], splits[1:]):
    if end - start < min_length:
      continue

    if start == 0 and end == ns.total_time:
      new_ns = ns
    else:
      new_ns = sequences_lib.extract_subsequence(ns, start, end)

    if not new_ns.notes and not allow_empty_notesequence:
      tf.logging.warning('skipping empty sequence')
      continue

    if start == 0 and end == ns.total_time:
      new_samples = samples
    else:
      # the resampling that happen in crop_wav_data is really slow
      # and we've already done it once, avoid doing it twice
      new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                          end - start)
    new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
    yield create_example(
        example_id, new_ns, new_wav_data, velocity_range=velocity_range)
Exemplo n.º 2
0
    def process(self, kv):
        # Seed random number generator based on key so that hop times are
        # deterministic.
        key, ns_str = kv
        m = hashlib.md5(key.encode('utf-8'))
        random.seed(int(m.hexdigest(), 16))

        # Deserialize NoteSequence proto.
        ns = note_seq.NoteSequence.FromString(ns_str)

        # Apply sustain pedal.
        ns = sequences_lib.apply_sustain_control_changes(ns)

        # Remove control changes as there are potentially a lot of them and they are
        # no longer needed.
        del ns.control_changes[:]

        if (self._min_hop_size_seconds
                and ns.total_time < self._min_hop_size_seconds):
            Metrics.counter('extract_examples', 'sequence_too_short').inc()
            return

        sequences = []
        for _ in range(self._num_replications):
            if self._max_hop_size_seconds:
                if self._max_hop_size_seconds == self._min_hop_size_seconds:
                    # Split using fixed hop size.
                    sequences += sequences_lib.split_note_sequence(
                        ns, self._max_hop_size_seconds)
                else:
                    # Sample random hop positions such that each segment size is within
                    # the specified range.
                    hop_times = [0.0]
                    while hop_times[
                            -1] <= ns.total_time - self._min_hop_size_seconds:
                        if hop_times[
                                -1] + self._max_hop_size_seconds < ns.total_time:
                            # It's important that we get a valid hop size here, since the
                            # remainder of the sequence is too long.
                            max_offset = min(
                                self._max_hop_size_seconds, ns.total_time -
                                self._min_hop_size_seconds - hop_times[-1])
                        else:
                            # It's okay if the next hop time is invalid (in which case we'll
                            # just stop).
                            max_offset = self._max_hop_size_seconds
                        offset = random.uniform(self._min_hop_size_seconds,
                                                max_offset)
                        hop_times.append(hop_times[-1] + offset)
                    # Split at the chosen hop times (ignoring zero and the final invalid
                    # time).
                    sequences += sequences_lib.split_note_sequence(
                        ns, hop_times[1:-1])
            else:
                sequences += [ns]

        for performance_sequence in sequences:
            if self._encode_score_fns:
                # We need to extract a score.
                if not self._absolute_timing:
                    # Beats are required to extract a score with metric timing.
                    beats = [
                        ta for ta in performance_sequence.text_annotations
                        if ta.annotation_type == BEAT
                        and ta.time <= performance_sequence.total_time
                    ]
                    if len(beats) < 2:
                        Metrics.counter('extract_examples',
                                        'not_enough_beats').inc()
                        continue

                    # Ensure the sequence starts and ends on a beat.
                    performance_sequence = sequences_lib.extract_subsequence(
                        performance_sequence,
                        start_time=min(beat.time for beat in beats),
                        end_time=max(beat.time for beat in beats))

                    # Infer beat-aligned chords (only for relative timing).
                    try:
                        chord_inference.infer_chords_for_sequence(
                            performance_sequence,
                            chord_change_prob=0.25,
                            chord_note_concentration=50.0,
                            add_key_signatures=True)
                    except chord_inference.ChordInferenceError:
                        Metrics.counter('extract_examples',
                                        'chord_inference_failed').inc()
                        continue

                # Infer melody regardless of relative/absolute timing.
                try:
                    melody_instrument = melody_inference.infer_melody_for_sequence(
                        performance_sequence,
                        melody_interval_scale=2.0,
                        rest_prob=0.1,
                        instantaneous_non_max_pitch_prob=1e-15,
                        instantaneous_non_empty_rest_prob=0.0,
                        instantaneous_missing_pitch_prob=1e-15)
                except melody_inference.MelodyInferenceError:
                    Metrics.counter('extract_examples',
                                    'melody_inference_failed').inc()
                    continue

                if not self._absolute_timing:
                    # Now rectify detected beats to occur at fixed tempo.
                    # TODO(iansimon): also include the alignment
                    score_sequence, unused_alignment = sequences_lib.rectify_beats(
                        performance_sequence, beats_per_minute=SCORE_BPM)
                else:
                    # Score uses same timing as performance.
                    score_sequence = copy.deepcopy(performance_sequence)

                # Remove melody notes from performance.
                performance_notes = []
                for note in performance_sequence.notes:
                    if note.instrument != melody_instrument:
                        performance_notes.append(note)
                del performance_sequence.notes[:]
                performance_sequence.notes.extend(performance_notes)

                # Remove non-melody notes from score.
                score_notes = []
                for note in score_sequence.notes:
                    if note.instrument == melody_instrument:
                        score_notes.append(note)
                del score_sequence.notes[:]
                score_sequence.notes.extend(score_notes)

                # Remove key signatures and beat/chord annotations from performance.
                del performance_sequence.key_signatures[:]
                del performance_sequence.text_annotations[:]

                Metrics.counter('extract_examples', 'extracted_score').inc()

            for augment_fn in self._augment_fns:
                # Augment and encode the performance.
                try:
                    augmented_performance_sequence = augment_fn(
                        performance_sequence)
                except DataAugmentationError:
                    Metrics.counter('extract_examples',
                                    'augment_performance_failed').inc()
                    continue
                example_dict = {
                    'targets':
                    self._encode_performance_fn(augmented_performance_sequence)
                }
                if not example_dict['targets']:
                    Metrics.counter('extract_examples',
                                    'skipped_empty_targets').inc()
                    continue

                if (self._random_crop_length and len(example_dict['targets']) >
                        self._random_crop_length):
                    # Take a random crop of the encoded performance.
                    max_offset = len(
                        example_dict['targets']) - self._random_crop_length
                    offset = random.randrange(max_offset + 1)
                    example_dict['targets'] = example_dict['targets'][
                        offset:offset + self._random_crop_length]

                if self._encode_score_fns:
                    # Augment the extracted score.
                    try:
                        augmented_score_sequence = augment_fn(score_sequence)
                    except DataAugmentationError:
                        Metrics.counter('extract_examples',
                                        'augment_score_failed').inc()
                        continue

                    # Apply all score encoding functions.
                    skip = False
                    for name, encode_score_fn in self._encode_score_fns.items(
                    ):
                        example_dict[name] = encode_score_fn(
                            augmented_score_sequence)
                        if not example_dict[name]:
                            Metrics.counter('extract_examples',
                                            'skipped_empty_%s' % name).inc()
                            skip = True
                            break
                    if skip:
                        continue

                Metrics.counter('extract_examples', 'encoded_example').inc()
                Metrics.distribution(
                    'extract_examples',
                    'performance_length_in_seconds').update(
                        int(augmented_performance_sequence.total_time))

                yield generator_utils.to_example(example_dict)