Пример #1
0
  def __init__(self,
               min_batch_size=1,
               max_batch_size=1000,
               target_batch_overhead=.1,
               target_batch_duration_secs=1,
               clock=time.time):
    if min_batch_size > max_batch_size:
      raise ValueError("Minimum (%s) must not be greater than maximum (%s)" % (
          min_batch_size, max_batch_size))
    if target_batch_overhead and not 0 < target_batch_overhead <= 1:
      raise ValueError("target_batch_overhead (%s) must be between 0 and 1" % (
          target_batch_overhead))
    if target_batch_duration_secs and target_batch_duration_secs <= 0:
      raise ValueError("target_batch_duration_secs (%s) must be positive" % (
          target_batch_duration_secs))
    if max(0, target_batch_overhead, target_batch_duration_secs) == 0:
      raise ValueError("At least one of target_batch_overhead or "
                       "target_batch_duration_secs must be positive.")
    self._min_batch_size = min_batch_size
    self._max_batch_size = max_batch_size
    self._target_batch_overhead = target_batch_overhead
    self._target_batch_duration_secs = target_batch_duration_secs
    self._clock = clock
    self._data = []
    self._ignore_next_timing = False

    self._size_distribution = Metrics.distribution(
        'BatchElements', 'batch_size')
    self._time_distribution = Metrics.distribution(
        'BatchElements', 'msec_per_batch')
    # Beam distributions only accept integer values, so we use this to
    # accumulate under-reported values until they add up to whole milliseconds.
    # (Milliseconds are chosen because that's conventionally used elsewhere in
    # profiling-style counters.)
    self._remainder_msecs = 0
Пример #2
0
 def __init__(self):
   super(BitcoinTxnCountDoFn, self).__init__()
   self.txn_counter = Metrics.counter(self.__class__, 'txns')
   self.inputs_dist = Metrics.distribution(self.__class__, 'inputs_per_txn')
   self.outputs_dist = Metrics.distribution(self.__class__, 'outputs_per_txn')
   self.output_amts_dist = Metrics.distribution(self.__class__, 'output_amts')
   self.txn_amts_dist = Metrics.distribution(self.__class__, 'txn_amts')
Пример #3
0
 def __init__(self):
   super(WordExtractingDoFn, self).__init__()
   self.words_counter = Metrics.counter(self.__class__, 'words')
   self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
   self.word_lengths_dist = Metrics.distribution(
       self.__class__, 'word_len_dist')
   self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
 def __init__(self):
   self.total_metric = Metrics.counter(self.__class__, 'total_values')
   self.dist_metric = Metrics.distribution(
       self.__class__, 'distribution_values')
   # TODO(ajamato): Add a verifier for gauge once it is supported by the SDKs
   # and runners.
   self.latest_metric = Metrics.gauge(self.__class__, 'latest_value')
Пример #5
0
 def __init__(self, pattern):
   self.pattern = pattern
   # A custom metric can track values in your pipeline as it runs. Create
   # custom metrics to count unmatched words, and know the distribution of
   # word lengths in the input PCollection.
   self.word_len_dist = Metrics.distribution(self.__class__,
                                             'word_len_dist')
   self.unmatched_words = Metrics.counter(self.__class__,
                                          'unmatched_words')
 def __init__(self):
   self.empty_line_counter = Metrics.counter('main', 'empty_lines')
   self.word_length_counter = Metrics.counter('main', 'word_lengths')
   self.word_counter = Metrics.counter('main', 'total_words')
   self.word_lengths_dist = Metrics.distribution('main', 'word_len_dist')
 def __init__(self):
   self.runtime_start = Metrics.distribution('pardo', 'runtime.start')
   self.runtime_end = Metrics.distribution('pardo', 'runtime.end')
Пример #8
0
  def process(self, kv):
    # Seed random number generator based on key so that hop times are
    # deterministic.
    key, ns_str = kv
    m = hashlib.md5(key)
    random.seed(int(m.hexdigest(), 16))

    # Deserialize NoteSequence proto.
    ns = music_pb2.NoteSequence.FromString(ns_str)

    # Apply sustain pedal.
    ns = sequences_lib.apply_sustain_control_changes(ns)

    # Remove control changes as there are potentially a lot of them and they are
    # no longer needed.
    del ns.control_changes[:]

    if (self._min_hop_size_seconds and
        ns.total_time < self._min_hop_size_seconds):
      Metrics.counter('extract_examples', 'sequence_too_short').inc()
      return

    sequences = []
    for _ in range(self._num_replications):
      if self._max_hop_size_seconds:
        if self._max_hop_size_seconds == self._min_hop_size_seconds:
          # Split using fixed hop size.
          sequences += sequences_lib.split_note_sequence(
              ns, self._max_hop_size_seconds)
        else:
          # Sample random hop positions such that each segment size is within
          # the specified range.
          hop_times = [0.0]
          while hop_times[-1] <= ns.total_time - self._min_hop_size_seconds:
            if hop_times[-1] + self._max_hop_size_seconds < ns.total_time:
              # It's important that we get a valid hop size here, since the
              # remainder of the sequence is too long.
              max_offset = min(
                  self._max_hop_size_seconds,
                  ns.total_time - self._min_hop_size_seconds - hop_times[-1])
            else:
              # It's okay if the next hop time is invalid (in which case we'll
              # just stop).
              max_offset = self._max_hop_size_seconds
            offset = random.uniform(self._min_hop_size_seconds, max_offset)
            hop_times.append(hop_times[-1] + offset)
          # Split at the chosen hop times (ignoring zero and the final invalid
          # time).
          sequences += sequences_lib.split_note_sequence(ns, hop_times[1:-1])
      else:
        sequences += [ns]

    for performance_sequence in sequences:
      if self._encode_score_fns:
        # We need to extract a score.
        if not self._absolute_timing:
          # Beats are required to extract a score with metric timing.
          beats = [
              ta for ta in performance_sequence.text_annotations
              if (ta.annotation_type ==
                  music_pb2.NoteSequence.TextAnnotation.BEAT)
              and ta.time <= performance_sequence.total_time
          ]
          if len(beats) < 2:
            Metrics.counter('extract_examples', 'not_enough_beats').inc()
            continue

          # Ensure the sequence starts and ends on a beat.
          performance_sequence = sequences_lib.extract_subsequence(
              performance_sequence,
              start_time=min(beat.time for beat in beats),
              end_time=max(beat.time for beat in beats)
          )

          # Infer beat-aligned chords (only for relative timing).
          try:
            chord_inference.infer_chords_for_sequence(
                performance_sequence,
                chord_change_prob=0.25,
                chord_note_concentration=50.0,
                add_key_signatures=True)
          except chord_inference.ChordInferenceError:
            Metrics.counter('extract_examples', 'chord_inference_failed').inc()
            continue

        # Infer melody regardless of relative/absolute timing.
        try:
          melody_instrument = melody_inference.infer_melody_for_sequence(
              performance_sequence,
              melody_interval_scale=2.0,
              rest_prob=0.1,
              instantaneous_non_max_pitch_prob=1e-15,
              instantaneous_non_empty_rest_prob=0.0,
              instantaneous_missing_pitch_prob=1e-15)
        except melody_inference.MelodyInferenceError:
          Metrics.counter('extract_examples', 'melody_inference_failed').inc()
          continue

        if not self._absolute_timing:
          # Now rectify detected beats to occur at fixed tempo.
          # TODO(iansimon): also include the alignment
          score_sequence, unused_alignment = sequences_lib.rectify_beats(
              performance_sequence, beats_per_minute=SCORE_BPM)
        else:
          # Score uses same timing as performance.
          score_sequence = copy.deepcopy(performance_sequence)

        # Remove melody notes from performance.
        performance_notes = []
        for note in performance_sequence.notes:
          if note.instrument != melody_instrument:
            performance_notes.append(note)
        del performance_sequence.notes[:]
        performance_sequence.notes.extend(performance_notes)

        # Remove non-melody notes from score.
        score_notes = []
        for note in score_sequence.notes:
          if note.instrument == melody_instrument:
            score_notes.append(note)
        del score_sequence.notes[:]
        score_sequence.notes.extend(score_notes)

        # Remove key signatures and beat/chord annotations from performance.
        del performance_sequence.key_signatures[:]
        del performance_sequence.text_annotations[:]

        Metrics.counter('extract_examples', 'extracted_score').inc()

      for augment_fn in self._augment_fns:
        # Augment and encode the performance.
        try:
          augmented_performance_sequence = augment_fn(performance_sequence)
        except DataAugmentationError:
          Metrics.counter(
              'extract_examples', 'augment_performance_failed').inc()
          continue
        example_dict = {
            'targets': self._encode_performance_fn(
                augmented_performance_sequence)
        }
        if not example_dict['targets']:
          Metrics.counter('extract_examples', 'skipped_empty_targets').inc()
          continue

        if self._encode_score_fns:
          # Augment the extracted score.
          try:
            augmented_score_sequence = augment_fn(score_sequence)
          except DataAugmentationError:
            Metrics.counter('extract_examples', 'augment_score_failed').inc()
            continue

          # Apply all score encoding functions.
          skip = False
          for name, encode_score_fn in self._encode_score_fns.items():
            example_dict[name] = encode_score_fn(augmented_score_sequence)
            if not example_dict[name]:
              Metrics.counter('extract_examples',
                              'skipped_empty_%s' % name).inc()
              skip = True
              break
          if skip:
            continue

        Metrics.counter('extract_examples', 'encoded_example').inc()
        Metrics.distribution(
            'extract_examples', 'performance_length_in_seconds').update(
                int(augmented_performance_sequence.total_time))

        yield generator_utils.to_example(example_dict)
Пример #9
0
 def __init__(self):
   self.words_counter = Metrics.counter(self.__class__, 'words')
   self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
   self.word_lengths_dist = Metrics.distribution(
       self.__class__, 'word_len_dist')
   self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
 def __init__(self):
     self.word_length_counter = Metrics.distribution('main', 'word_lengths')
Пример #11
0
    def process(self, kv):
        # Seed random number generator based on key so that hop times are
        # deterministic.
        key, ns_str = kv
        m = hashlib.md5(key)
        random.seed(int(m.hexdigest(), 16))

        # Deserialize NoteSequence proto.
        ns = music_pb2.NoteSequence.FromString(ns_str)

        # Apply sustain pedal.
        ns = sequences_lib.apply_sustain_control_changes(ns)

        # Remove control changes as there are potentially a lot of them and they are
        # no longer needed.
        del ns.control_changes[:]

        for _ in range(self._num_replications):
            for augment_fn in self._augment_fns:
                # Augment and encode the performance.
                try:
                    augmented_performance_sequence = augment_fn(ns)
                except DataAugmentationError:
                    Metrics.counter('extract_examples',
                                    'augment_performance_failed').inc()
                    continue
                seq = self._encode_performance_fn(
                    augmented_performance_sequence)
                # feed in performance as both input/output to music transformer
                # chopping sequence into length 2048 (throw out shorter sequences)
                if len(seq) >= 2048:
                    max_offset = len(seq) - 2048
                    offset = random.randrange(max_offset + 1)
                    cropped_seq = seq[offset:offset + 2048]

                    example_dict = {
                        'inputs': cropped_seq,
                        'targets': cropped_seq
                    }

                    if self._melody:
                        # decode truncated performance sequence for melody inference
                        decoded_midi = self._decode_performance_fn(cropped_seq)
                        decoded_ns = mm.midi_io.midi_file_to_note_sequence(
                            decoded_midi)

                        # extract melody from cropped performance sequence
                        melody_instrument = melody_inference.infer_melody_for_sequence(
                            decoded_ns,
                            melody_interval_scale=2.0,
                            rest_prob=0.1,
                            instantaneous_non_max_pitch_prob=1e-15,
                            instantaneous_non_empty_rest_prob=0.0,
                            instantaneous_missing_pitch_prob=1e-15)

                        # remove non-melody notes from score
                        score_sequence = copy.deepcopy(decoded_ns)
                        score_notes = []
                        for note in score_sequence.notes:
                            if note.instrument == melody_instrument:
                                score_notes.append(note)
                        del score_sequence.notes[:]
                        score_sequence.notes.extend(score_notes)

                        # encode melody
                        encode_score_fn = self._encode_score_fns['melody']
                        example_dict['melody'] = encode_score_fn(
                            score_sequence)
                        # make sure performance input also matches targets; needed for
                        # compatibility of both perf and (mel & perf) autoencoders

                        if self._noisy:
                            # randomly sample a pitch shift to construct noisy performance
                            all_pitches = [x.pitch for x in decoded_ns.notes]
                            min_val = min(all_pitches)
                            max_val = max(all_pitches)
                            transpose_range = range(-(min_val - 21),
                                                    108 - max_val + 1)
                            try:
                                transpose_range.remove(
                                    0)  # make sure you transpose
                            except ValueError:
                                pass
                            transpose_amount = random.choice(transpose_range)
                            augmented_ns, _ = sequences_lib.transpose_note_sequence(
                                decoded_ns,
                                transpose_amount,
                                min_allowed_pitch=21,
                                max_allowed_pitch=108,
                                in_place=False)
                            aug_seq = self._encode_performance_fn(augmented_ns)
                            example_dict['performance'] = aug_seq
                        else:
                            example_dict['performance'] = example_dict[
                                'targets']
                        del example_dict['inputs']

                    Metrics.counter('extract_examples',
                                    'encoded_example').inc()
                    Metrics.distribution(
                        'extract_examples',
                        'performance_length_in_seconds').update(
                            int(augmented_performance_sequence.total_time))

                    yield generator_utils.to_example(example_dict)
Пример #12
0
    def process(self, kv):
        # Seed random number generator based on key so that hop times are
        # deterministic.
        key, ns_str = kv
        m = hashlib.md5(key.encode('utf-8'))
        random.seed(int(m.hexdigest(), 16))

        # Deserialize NoteSequence proto.
        ns = music_pb2.NoteSequence.FromString(ns_str)

        # Apply sustain pedal.
        ns = sequences_lib.apply_sustain_control_changes(ns)

        # Remove control changes as there are potentially a lot of them and they are
        # no longer needed.
        del ns.control_changes[:]

        if (self._min_hop_size_seconds
                and ns.total_time < self._min_hop_size_seconds):
            Metrics.counter('extract_examples', 'sequence_too_short').inc()
            return

        sequences = []
        for _ in range(self._num_replications):
            if self._max_hop_size_seconds:
                if self._max_hop_size_seconds == self._min_hop_size_seconds:
                    # Split using fixed hop size.
                    sequences += sequences_lib.split_note_sequence(
                        ns, self._max_hop_size_seconds)
                else:
                    # Sample random hop positions such that each segment size is within
                    # the specified range.
                    hop_times = [0.0]
                    while hop_times[
                            -1] <= ns.total_time - self._min_hop_size_seconds:
                        if hop_times[
                                -1] + self._max_hop_size_seconds < ns.total_time:
                            # It's important that we get a valid hop size here, since the
                            # remainder of the sequence is too long.
                            max_offset = min(
                                self._max_hop_size_seconds, ns.total_time -
                                self._min_hop_size_seconds - hop_times[-1])
                        else:
                            # It's okay if the next hop time is invalid (in which case we'll
                            # just stop).
                            max_offset = self._max_hop_size_seconds
                        offset = random.uniform(self._min_hop_size_seconds,
                                                max_offset)
                        hop_times.append(hop_times[-1] + offset)
                    # Split at the chosen hop times (ignoring zero and the final invalid
                    # time).
                    sequences += sequences_lib.split_note_sequence(
                        ns, hop_times[1:-1])
            else:
                sequences += [ns]

        for performance_sequence in sequences:
            if self._encode_score_fns:
                # We need to extract a score.
                if not self._absolute_timing:
                    # Beats are required to extract a score with metric timing.
                    beats = [
                        ta for ta in performance_sequence.text_annotations
                        if ta.annotation_type == BEAT
                        and ta.time <= performance_sequence.total_time
                    ]
                    if len(beats) < 2:
                        Metrics.counter('extract_examples',
                                        'not_enough_beats').inc()
                        continue

                    # Ensure the sequence starts and ends on a beat.
                    performance_sequence = sequences_lib.extract_subsequence(
                        performance_sequence,
                        start_time=min(beat.time for beat in beats),
                        end_time=max(beat.time for beat in beats))

                    # Infer beat-aligned chords (only for relative timing).
                    try:
                        chord_inference.infer_chords_for_sequence(
                            performance_sequence,
                            chord_change_prob=0.25,
                            chord_note_concentration=50.0,
                            add_key_signatures=True)
                    except chord_inference.ChordInferenceError:
                        Metrics.counter('extract_examples',
                                        'chord_inference_failed').inc()
                        continue

                # Infer melody regardless of relative/absolute timing.
                try:
                    melody_instrument = melody_inference.infer_melody_for_sequence(
                        performance_sequence,
                        melody_interval_scale=2.0,
                        rest_prob=0.1,
                        instantaneous_non_max_pitch_prob=1e-15,
                        instantaneous_non_empty_rest_prob=0.0,
                        instantaneous_missing_pitch_prob=1e-15)
                except melody_inference.MelodyInferenceError:
                    Metrics.counter('extract_examples',
                                    'melody_inference_failed').inc()
                    continue

                if not self._absolute_timing:
                    # Now rectify detected beats to occur at fixed tempo.
                    # TODO(iansimon): also include the alignment
                    score_sequence, unused_alignment = sequences_lib.rectify_beats(
                        performance_sequence, beats_per_minute=SCORE_BPM)
                else:
                    # Score uses same timing as performance.
                    score_sequence = copy.deepcopy(performance_sequence)

                # Remove melody notes from performance.
                performance_notes = []
                for note in performance_sequence.notes:
                    if note.instrument != melody_instrument:
                        performance_notes.append(note)
                del performance_sequence.notes[:]
                performance_sequence.notes.extend(performance_notes)

                # Remove non-melody notes from score.
                score_notes = []
                for note in score_sequence.notes:
                    if note.instrument == melody_instrument:
                        score_notes.append(note)
                del score_sequence.notes[:]
                score_sequence.notes.extend(score_notes)

                # Remove key signatures and beat/chord annotations from performance.
                del performance_sequence.key_signatures[:]
                del performance_sequence.text_annotations[:]

                Metrics.counter('extract_examples', 'extracted_score').inc()

            for augment_fn in self._augment_fns:
                # Augment and encode the performance.
                try:
                    augmented_performance_sequence = augment_fn(
                        performance_sequence)
                except DataAugmentationError:
                    Metrics.counter('extract_examples',
                                    'augment_performance_failed').inc()
                    continue
                example_dict = {
                    'targets':
                    self._encode_performance_fn(augmented_performance_sequence)
                }
                if not example_dict['targets']:
                    Metrics.counter('extract_examples',
                                    'skipped_empty_targets').inc()
                    continue

                if (self._random_crop_length and len(example_dict['targets']) >
                        self._random_crop_length):
                    # Take a random crop of the encoded performance.
                    max_offset = len(
                        example_dict['targets']) - self._random_crop_length
                    offset = random.randrange(max_offset + 1)
                    example_dict['targets'] = example_dict['targets'][
                        offset:offset + self._random_crop_length]

                if self._encode_score_fns:
                    # Augment the extracted score.
                    try:
                        augmented_score_sequence = augment_fn(score_sequence)
                    except DataAugmentationError:
                        Metrics.counter('extract_examples',
                                        'augment_score_failed').inc()
                        continue

                    # Apply all score encoding functions.
                    skip = False
                    for name, encode_score_fn in self._encode_score_fns.items(
                    ):
                        example_dict[name] = encode_score_fn(
                            augmented_score_sequence)
                        if not example_dict[name]:
                            Metrics.counter('extract_examples',
                                            'skipped_empty_%s' % name).inc()
                            skip = True
                            break
                    if skip:
                        continue

                Metrics.counter('extract_examples', 'encoded_example').inc()
                Metrics.distribution(
                    'extract_examples',
                    'performance_length_in_seconds').update(
                        int(augmented_performance_sequence.total_time))

                yield generator_utils.to_example(example_dict)
Пример #13
0
 def __init__(self, namespace):
   self.namespace = namespace
   self.runtime = Metrics.distribution(self.namespace, RUNTIME_LABEL)
Пример #14
0
 def __init__(self):
     self.double_message_counter = Metrics.counter(
         self.__class__, 'double_msg_counter_name')
     self.msg_len_dist_metric = Metrics.distribution(
         self.__class__, 'msg_len_dist_metric_name')