def test_compute_embedding_dofn(self, average_over_time, sample_rate_key,
                                  sample_rate):
    # Establish required key names.
    audio_key = 'audio_key'

    # Construct the tf.train.Example test data.
    ex = tf.train.Example()
    ex.features.feature[audio_key].float_list.value.extend(
        np.zeros(2000, np.float32))
    if sample_rate_key:
      ex.features.feature[sample_rate_key].int64_list.value.append(8000)

    old_k = 'oldkey'

    do_fn = beam_dofns.ComputeEmbeddingMapFn(
        name='module_name',
        module='@loc',
        output_key='output_key',
        audio_key=audio_key,
        sample_rate_key=sample_rate_key,
        sample_rate=sample_rate,
        average_over_time=average_over_time,
        setup_fn=lambda _: FakeMod())
    do_fn.setup()
    new_k, new_v = next(do_fn.process((old_k, ex)))

    self.assertEqual(new_k, old_k)
    expected_shape = (1, BASE_SHAPE_[1]) if average_over_time else BASE_SHAPE_
    self.assertEqual(new_v.shape, expected_shape)
  def test_compute_embedding_map_fn_tflite(
      self, average_over_time, sample_rate_key, sample_rate):
    # Establish required key names.
    audio_key = 'audio_key'

    # Construct the tf.train.Example test data.
    ex = tf.train.Example()
    ex.features.feature[audio_key].float_list.value.extend(
        np.zeros(2000, np.float32))
    if sample_rate_key:
      ex.features.feature[sample_rate_key].int64_list.value.append(8000)

    old_k = 'oldkey'

    def _feature_fn(x, s):
      return tf.expand_dims(
          tf_frontend.compute_frontend_features(x, s, frame_hop=17),
          axis=-1).numpy().astype(np.float32)
    do_fn = beam_dofns.ComputeEmbeddingMapFn(
        name='module_name',
        module='file.tflite',
        output_key=0,
        audio_key=audio_key,
        sample_rate_key=sample_rate_key,
        sample_rate=sample_rate,
        average_over_time=average_over_time,
        feature_fn=_feature_fn,
        module_call_fn=_s2e,
        setup_fn=build_tflite_interpreter_dummy)
    do_fn.setup()
    new_k, new_v = next(do_fn.process((old_k, ex)))

    self.assertEqual(new_k, old_k)
    expected_shape = (1, BASE_SHAPE_[1]) if average_over_time else BASE_SHAPE_
    self.assertEqual(new_v.shape, expected_shape)
示例#3
0
    def test_pipeline_padding(self, process_fn, chunk_len):
        """Check that the model input is of sufficient length."""
        k, ex = 'key', make_tfexample(100)
        common_args = dict(name='name',
                           module=None,
                           output_key=['output_key'],
                           audio_key='audio',
                           sample_rate_key='sample_rate',
                           sample_rate=None,
                           average_over_time=True,
                           model_input_min_length=400,
                           setup_fn=lambda _: FakeMod())
        if process_fn == 'ComputeEmbeddingMapFn':
            beam_dofn = beam_dofns.ComputeEmbeddingMapFn(**common_args)
        elif process_fn == 'ComputeMultipleEmbeddings':
            beam_dofn = beam_dofns.ComputeMultipleEmbeddingsFromSingleModel(
                embedding_names=['em1'], chunk_len=chunk_len, **common_args)
        elif process_fn == 'ChunkAudioAndComputeEmbeddings':
            beam_dofn = beam_dofns.ChunkAudioAndComputeEmbeddings(
                embedding_names=['em1'], chunk_len=chunk_len, **common_args)
        else:
            assert process_fn == 'ComputeBatchedChunkedSingleEmbeddings'
            beam_dofn = beam_dofns.ComputeBatchedChunkedSingleEmbeddings(
                **common_args)

        # Run preprocessing step.
        beam_dofn.setup()
        if process_fn == 'ComputeEmbeddingMapFn':
            model_input, sample_rate = beam_dofn.read_and_preprocess_audio(
                k, ex)
            expected_output_shape = (400, )
        elif process_fn == 'ComputeBatchedChunkedSingleEmbeddings':
            model_input, _, sample_rate = beam_dofn.read_and_preprocess_batched_audio(
                [k, k], [ex, ex])
            expected_output_shape = (2, 400)
        else:
            model_input, sample_rate = beam_dofn.tfex_to_chunked_audio(k, ex)
            expected_output_shape = (2, chunk_len) if chunk_len else (1, 400)

        # Original audio is too short, so it should be padded to
        # `model_input_min_length`.

        self.assertEqual(model_input.shape, expected_output_shape)

        # Having a non-standard sample rate should trigger resampling and cause the
        # output to be 16kHz.
        self.assertEqual(sample_rate, 16000)
示例#4
0
    def test_compute_embedding_dofn_custom_call(self, average_over_time,
                                                sample_rate_key, sample_rate):
        # Establish required key names.
        audio_key = 'audio_key'
        custom_call_shape = (5, 25)

        # Custom call function for embedding generation.
        def test_call_fn(audio_samples, sample_rate, module_location,
                         output_key, name):
            """Mock waveform-to-embedding computation."""
            del audio_samples, sample_rate, module_location, output_key, name
            return np.zeros(custom_call_shape, dtype=np.float32)

        # Construct the tf.train.Example test data.
        ex = tf.train.Example()
        ex.features.feature[audio_key].float_list.value.extend(
            np.zeros(2000, np.float32))
        if sample_rate_key:
            ex.features.feature[sample_rate_key].int64_list.value.append(8000)

        old_k = 'oldkey'

        do_fn = beam_dofns.ComputeEmbeddingMapFn(
            name='module_name',
            module='@loc',
            output_key='unnecessary',
            audio_key=audio_key,
            sample_rate_key=sample_rate_key,
            sample_rate=sample_rate,
            average_over_time=average_over_time,
            module_call_fn=test_call_fn,
            setup_fn=lambda _: None)
        do_fn.setup()
        new_k, new_v = next(do_fn.process((old_k, ex)))

        self.assertEqual(new_k, old_k)
        expected_shape = (
            1,
            custom_call_shape[1]) if average_over_time else custom_call_shape
        self.assertEqual(new_v.shape, expected_shape)
示例#5
0
def make_many_models_beam_pipeline(
        root,
        input_filenames,
        output_filename,
        sample_rate,
        debug,
        embedding_names,
        embedding_modules,
        module_output_keys,
        audio_key,
        sample_rate_key,
        label_key,
        speaker_id_key,
        average_over_time,
        delete_audio_from_output,
        split_embeddings_into_separate_tables=False,
        use_frontend_fn=False,
        normalize_to_pm_one=True,
        model_input_min_length=None,
        input_format='tfrecord',
        output_format='tfrecord',
        suffix='Main',
        module_call_fn=utils.samples_to_embedding_tfhub,
        setup_fn=hub.load):
    """Construct beam pipeline for mapping from audio to embeddings.

  Args:
    root: The beam root node.
    input_filenames: Python list. List of input files.
    output_filename: Python string. Output filename.
    sample_rate: Python int, or `None`. The sample rate for all embeddings, or
      `None` if this is a TFDS dataset, or if each example has its own sample
      rate.
    debug: Python bool. Whether to operate in debug mode.
    embedding_names: Python list of embeddings.
    embedding_modules: Python list of TF-Hub modules.
    module_output_keys: Python list of strings, names of output modules.
    audio_key: Python string, the key of the audio.
    sample_rate_key: Python string or `None`, the key for.
    label_key: Python string. Field for label.
    speaker_id_key: Python string or `None`. Key for speaker ID, or `None`.
    average_over_time: Python bool. If `True`, average over the time axis.
    delete_audio_from_output: Python bool. Whether to remove audio fromm
      outputs.
    split_embeddings_into_separate_tables: Python bool. If true, write each
      embedding to a separate table.
    use_frontend_fn: If `true`, call frontend fn on audio before passing to the
      model.
    normalize_to_pm_one: Whether to normalize input to +- 1 before passing to
      model.
    model_input_min_length: Min length to the model, or `None`. 0-pad inputs to
      this length, if necessary. Note that frontends usually contain their own
      length logic, unless the model is in TFLite format.
    input_format: Python string. Must correspond to a function in
      `reader_functions`.
    output_format: Python string. Must correspond to a function
      `writer_functions`.
    suffix: Python string. Suffix to stage names to make them unique.
    module_call_fn: Function for inference on audio.
    setup_fn: Function for creating audio inference model.
  """
    tf_examples_key_ = 'tf_examples'
    if tf_examples_key_ in embedding_names:
        raise ValueError(
            f'"{tf_examples_key_}" is reserved, cannot be embedding name.')
    s = suffix  # for code brevity.

    # Read from input.
    input_examples = _common_pipeline_beginning(root, input_format,
                                                input_filenames, s, debug)

    # Compute all the embeddings simultaneously.
    embedding_tables = {}
    for name, mod, out_key in zip(embedding_names, embedding_modules,
                                  module_output_keys):
        logging.info('Adding signal: %s %s, %s', name, mod, out_key)
        tbl = input_examples | f'ComputeEmbedding-{name}-{s}' >> beam.ParDo(
            beam_dofns.ComputeEmbeddingMapFn(
                name=name,
                module=mod,
                output_key=out_key,
                audio_key=audio_key,
                sample_rate_key=sample_rate_key,
                sample_rate=sample_rate,
                average_over_time=average_over_time,
                feature_fn=(utils.default_feature_fn
                            if use_frontend_fn else None),
                normalize_to_pm_one=normalize_to_pm_one,
                model_input_min_length=model_input_min_length,
                module_call_fn=module_call_fn,
                setup_fn=setup_fn))
        embedding_tables[name] = tbl
    assert tf_examples_key_ not in embedding_tables
    embedding_tables[tf_examples_key_] = input_examples
    logging.info('embedding_tables: %s', embedding_tables)

    # Either write to one table with all embeddings, or one table per embedding.
    if split_embeddings_into_separate_tables:
        output_table_dicts = [(k, {
            k: v,
            tf_examples_key_: input_examples
        }) for k, v in embedding_tables.items() if k != tf_examples_key_]
    else:
        output_table_dicts = [('all', embedding_tables)]

    # Combine embeddings and tf.train.Example, using the common key.
    writer_function = utils.writer_functions[output_format]
    for name, embedding_tables in output_table_dicts:
        if split_embeddings_into_separate_tables:
            cur_s = f'{name}-{s}'
            # Add `name` as a subdir.
            dirname, basename = os.path.split(output_filename)
            cur_output_filename = os.path.join(dirname, name, f'{basename}@*')
        else:
            cur_s = s
            cur_output_filename = f'{output_filename}@*'
        combined_tbl = (
            embedding_tables
            | f'CombineEmbeddingTables-{cur_s}' >> beam.CoGroupByKey()
            | f'AddEmbeddings-{cur_s}' >> beam.Map(
                utils.add_embeddings_to_tfex,
                original_example_key=tf_examples_key_,
                delete_audio_from_output=delete_audio_from_output,
                audio_key=audio_key,
                label_key=label_key,
                speaker_id_key=speaker_id_key))
        logging.info('Writing to %s', cur_output_filename)
        writer_function(combined_tbl, cur_output_filename, cur_s)