def test_make_tfexample_and_write(self):
        expected = [[1., 2., 3.], [4., 5., 6.], [-1., -2., -3]]
        embs = tf.constant(expected, tf.float32)
        embeddings_output_dir = os.path.join(
            absltest.get_default_test_tmpdir(), 'embeddings2')
        tfrecord_writer = tf.io.TFRecordWriter(embeddings_output_dir)

        speaker_id = tf.constant('speakr')
        expected_lbl = ['1', '1', '2']
        lbls = tf.constant(expected_lbl, tf.string)
        for emb, lbl in zip(embs, lbls):
            eval_downstream_embedding_fidelity.make_tfexample_and_write(
                emb, lbl, speaker_id, 'speaker_id_name', tfrecord_writer)
        tfrecord_writer.close()
        self.assertTrue(tf.io.gfile.exists(embeddings_output_dir))

        # Check that they can be read correctly.
        embs_np, lbls_np, _ = sklearn_utils.tfexamples_to_nps(
            path=f'{embeddings_output_dir}*',
            embedding_name=eval_downstream_embedding_fidelity.EMBEDDING_KEY_,
            label_name=eval_downstream_embedding_fidelity.LABEL_KEY_,
            label_list=['1', '2'],
            l2_normalization=False)

        # Check correctness.
        np.testing.assert_array_equal(expected, embs_np)
        np.testing.assert_array_equal([0, 0, 1], lbls_np)
示例#2
0
 def _read_glob(glob, name):
     s = time.time()
     npx, npy = sklearn_utils.tfexamples_to_nps(glob, embedding_name,
                                                label_name, label_list,
                                                l2_normalization,
                                                speaker_id_name)
     logging.info('Finished reading %s data: %.2f sec.', name, _cur_s(s))
     return npx, npy
示例#3
0
    def test_tfexample_to_nps(self, l2_normalization,
                              int64_label_instead_of_bytes):
        path = os.path.join(absltest.get_default_test_tmpdir(),
                            'dummy_tfrecords')
        embedding_name = 'fake_emb'
        label_name = 'label/fake_lbl'
        if int64_label_instead_of_bytes:
            label_list = ['0', '1']
        else:
            label_list = ['yes', 'no']

        np.random.seed(10)
        # Generate fake embeddings and labels.

        fake_data = [
            (np.random.rand(100), 1),
            (np.random.rand(100), 0),
            (np.random.rand(100), 1),
        ]

        def _emb_lbl_i_to_tfexample(emb, label_index):
            """Package fake data as a tf.Example."""
            ex = tf.train.Example()
            ex.features.feature[
                f'embedding/{embedding_name}'].float_list.value.extend(emb)
            if int64_label_instead_of_bytes:
                ex.features.feature[label_name].int64_list.value.append(
                    int(label_list[label_index]))
            else:
                ex.features.feature[label_name].bytes_list.value.append(
                    label_list[label_index].encode('utf-8'))
            return ex

        # Write TFRecord of tf.Examples to disk.
        with tf.python_io.TFRecordWriter(path) as writer:
            for emb, label_index in fake_data:
                ex = _emb_lbl_i_to_tfexample(emb, label_index)
                writer.write(ex.SerializeToString())

        # Convert them back.
        npx, npy, _ = sklearn_utils.tfexamples_to_nps(path, embedding_name,
                                                      label_name, label_list,
                                                      l2_normalization)

        # Check that output is correct.
        expected_embs = np.array([d[0] for d in fake_data], np.float32)
        if l2_normalization:
            expected_embs /= np.linalg.norm(expected_embs,
                                            axis=1,
                                            ord=2,
                                            keepdims=True)
        self.assertAllEqual(npx, expected_embs)
        self.assertAllEqual(npy, (1, 0, 1))