Exemplo n.º 1
0
    def testConfusionMatrix(self):
        spec = audio_spec.BrowserFFTSpec()

        temp_folder = self.get_temp_dir()
        cat1 = write_sample(temp_folder, 'cat', '1.wav', 44100, duration_sec=1)
        cat2 = write_sample(temp_folder, 'cat', '2.wav', 44100, duration_sec=2)
        dog1 = write_sample(temp_folder, 'dog', '1.wav', 44100, duration_sec=3)
        dog2 = write_sample(temp_folder, 'dog', '2.wav', 44100, duration_sec=4)
        index_to_labels = ['cat', 'dog']

        # Prepare data.
        ds = tf.data.Dataset.from_tensor_slices(([cat1, cat2, dog1,
                                                  dog2], [0, 0, 1, 1]))
        data_loader = audio_dataloader.DataLoader(ds, len(ds), index_to_labels,
                                                  spec)

        # Train a floating point model.
        task = audio_classifier.create(data_loader,
                                       spec,
                                       batch_size=1,
                                       epochs=15)

        confusion_matrx = task.confusion_matrix(data_loader)

        # BrowserFFTSpec generates 1 sample for 1 second audio so there are
        # 10 samples in total.
        self.assertEqual(tf.math.reduce_sum(confusion_matrx), 10)
        # confusion_matrix is of shape (truth, predication)
        # We have 2 classes, 3 cat samples and 7 dog samples.
        self.assertEqual(confusion_matrx.shape, (2, 2))
        self.assertAllEqual(
            tf.math.reduce_sum(confusion_matrx, axis=-1).numpy(),
            np.array([3, 7]))
Exemplo n.º 2
0
 def test_dynamic_range_quantization(self):
     self._train_and_export(
         audio_spec.BrowserFFTSpec(),
         num_classes=2,
         filename='binary_classification.tflite',
         expected_model_size=1 * 1000 * 1000,
         quantization_config=configs.QuantizationConfig.for_dynamic())
Exemplo n.º 3
0
    def testBrowserFFT(self):
        def pcm(shape):
            # Convert random number between (0, 1] to int16
            return np.random.rand(*shape) * (1 << 15)

        np.random.seed(123)

        spec = audio_spec.BrowserFFTSpec()
        dataset_shape = (1, spec.expected_waveform_len)
        sounds = [pcm(dataset_shape) for category in range(2)]
        labels = list(range(2))
        index_to_labels = ['sound1', 'sound2']
        ds = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(sounds),
                                  tf.data.Dataset.from_tensor_slices(labels)))
        ds = ds.map(spec.preprocess)
        data_loader = audio_dataloader.DataLoader(ds, len(ds), index_to_labels)

        task = audio_classifier.create(data_loader,
                                       spec,
                                       batch_size=1,
                                       epochs=100)

        _, acc = task.evaluate(data_loader)
        # Better than random guessing.
        self.assertGreater(acc, .5)

        # Export the model to saved model.
        saved_model_output_path = os.path.join(spec.model_dir, 'saved_model')
        task.export(spec.model_dir, export_format=ExportFormat.SAVED_MODEL)
        self.assertTrue(os.path.isdir(saved_model_output_path))
        self.assertNotEqual(len(os.listdir(saved_model_output_path)), 0)
Exemplo n.º 4
0
  def test_spec(self):
    folder_path = self._get_folder_path('test_examples_helper')
    write_sample(folder_path, 'unknown', '2s.wav', 44100, 2, value=1)

    spec = audio_spec.YAMNetSpec()
    audio_dataloader.DataLoader.from_folder(spec, folder_path)

    spec = audio_spec.BrowserFFTSpec()
    audio_dataloader.DataLoader.from_folder(spec, folder_path)
Exemplo n.º 5
0
    def test_spec(self):
        folder_path = self._get_folder_path('test_examples_helper')

        spec = audio_spec.YAMNetSpec()
        audio_dataloader.DataLoader.from_esc50(spec, folder_path)

        spec = audio_spec.BrowserFFTSpec()
        with self.assertRaises(AssertionError):
            audio_dataloader.DataLoader.from_esc50(spec, folder_path)
Exemplo n.º 6
0
  def testBrowserFFT(self):

    def pcm(shape):
      # Convert random number between (0, 1] to int16
      return np.random.rand(*shape) * (1 << 15)

    np.random.seed(123)
    tf.random.set_seed(123)

    # Prepare data.
    spec = audio_spec.BrowserFFTSpec()
    dataset_shape = (1, spec.expected_waveform_len)
    sounds = [pcm(dataset_shape) for category in range(2)]
    labels = list(range(2))
    index_to_labels = ['sound1', 'sound2']
    ds = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(sounds),
                              tf.data.Dataset.from_tensor_slices(labels)))
    preprocessed_ds = ds.map(spec.preprocess)
    data_loader = audio_dataloader.DataLoader(preprocessed_ds,
                                              len(preprocessed_ds),
                                              index_to_labels)

    # Train a floating point model.
    task = audio_classifier.create(data_loader, spec, batch_size=1, epochs=50)

    # Evaluate trained model
    _, acc = task.evaluate(data_loader)
    # Better than random guessing.
    self.assertGreater(acc, .5)

    # Export the model to saved model.
    output_path = os.path.join(spec.model_dir, 'saved_model')
    task.export(spec.model_dir, export_format=ExportFormat.SAVED_MODEL)
    self.assertTrue(os.path.isdir(output_path))
    self.assertNotEqual(len(os.listdir(output_path)), 0)

    # Export the model to TFLite.
    output_path = os.path.join(spec.model_dir, 'float.tflite')
    task.export(
        spec.model_dir,
        tflite_filename='float.tflite',
        export_format=ExportFormat.TFLITE)
    self.assertTrue(tf.io.gfile.exists(output_path))
    self.assertGreater(os.path.getsize(output_path), 0)

    # Evaluate accurarcy on TFLite model.

    # Create a new dataset without preprocessing since preprocessing has been
    # packaged inside TFLite model.
    squeezed_ds = ds.map(lambda x, y: (tf.squeeze(tf.cast(x, tf.float32)), y))
    tflite_dataloader = audio_dataloader.DataLoader(squeezed_ds,
                                                    len(squeezed_ds),
                                                    index_to_labels)

    # Evaluate accurarcy on float model.
    result = task.evaluate_tflite(output_path, tflite_dataloader)
    self.assertGreater(result['accuracy'], .5)
    def testBrowserFFT(self):

        temp_folder = self.get_temp_dir()
        cat1 = write_sample(temp_folder, 'cat', '1.wav', 44100, duration_sec=1)
        cat2 = write_sample(temp_folder, 'cat', '2.wav', 44100, duration_sec=2)
        dog1 = write_sample(temp_folder, 'dog', '1.wav', 44100, duration_sec=3)
        dog2 = write_sample(temp_folder, 'dog', '2.wav', 44100, duration_sec=4)
        index_to_labels = ['cat', 'dog']

        np.random.seed(123)
        tf.random.set_seed(123)

        # Prepare data.
        spec = audio_spec.BrowserFFTSpec()
        ds = tf.data.Dataset.from_tensor_slices(([cat1, cat2, dog1,
                                                  dog2], [0, 0, 1, 1]))
        data_loader = audio_dataloader.DataLoader(ds, len(ds), index_to_labels,
                                                  spec)

        # Train a floating point model.
        task = audio_classifier.create(data_loader,
                                       spec,
                                       batch_size=1,
                                       epochs=15)

        # Evaluate trained model
        _, acc = task.evaluate(data_loader)
        # Better than random guessing.
        self.assertGreater(acc, .5)

        # Export the model to saved model.
        output_path = os.path.join(spec.model_dir, 'saved_model')
        task.export(spec.model_dir, export_format=ExportFormat.SAVED_MODEL)
        self.assertTrue(os.path.isdir(output_path))
        self.assertNotEqual(len(os.listdir(output_path)), 0)

        # Export the model to TFLite.
        output_path = os.path.join(spec.model_dir, 'float.tflite')
        task.export(spec.model_dir,
                    tflite_filename='float.tflite',
                    export_format=ExportFormat.TFLITE)
        self.assertTrue(tf.io.gfile.exists(output_path))
        self.assertGreater(os.path.getsize(output_path), 0)

        # Evaluate accurarcy on TFLite model.

        # Create a new dataset without preprocessing since preprocessing has been
        # packaged inside TFLite model.
        spec = BrowserFFTWithoutPreprocessing()
        tflite_dataloader = audio_dataloader.DataLoader(
            ds, len(ds), index_to_labels, spec)

        # Evaluate accurarcy on float model.
        result = task.evaluate_tflite(output_path, tflite_dataloader)
        self.assertGreaterEqual(result['accuracy'], .5)
Exemplo n.º 8
0
    def test_basic_training(self):
        tflite_path = self._train_and_export(
            audio_spec.BrowserFFTSpec(),
            num_classes=5,
            filename='basic_5_classes_training.tflite',
            expected_model_size=6 * 1000 * 1000)
        self.assertEqual(
            model_util.extract_tflite_metadata_json(tflite_path), """{
  "name": "AudioClassifier",
  "description": "Identify the most prominent type in the audio clip from a known set of categories.",
  "version": "v1",
  "subgraph_metadata": [
    {
      "input_tensor_metadata": [
        {
          "name": "audio_clip",
          "description": "Input audio clip to be classified.",
          "content": {
            "content_properties_type": "AudioProperties",
            "content_properties": {
              "sample_rate": 44100,
              "channels": 1
            }
          },
          "stats": {
          }
        }
      ],
      "output_tensor_metadata": [
        {
          "name": "probability",
          "description": "Scores of the labels respectively.",
          "content": {
            "content_properties_type": "FeatureProperties",
            "content_properties": {
            }
          },
          "stats": {
            "max": [
              1.0
            ],
            "min": [
              0.0
            ]
          },
          "associated_files": [
            {
              "name": "probability_labels.txt",
              "description": "Labels for categories that the model can recognize.",
              "type": "TENSOR_AXIS_LABELS"
            }
          ]
        }
      ]
    }
  ],
  "author": "TensorFlow Lite Model Maker",
  "license": "Apache License. Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.",
  "min_parser_version": "1.3.0"
}
""")
Exemplo n.º 9
0
 def test_binary_classification(self):
     self._train_and_export(audio_spec.BrowserFFTSpec(),
                            num_classes=2,
                            filename='binary_classification.tflite',
                            expected_model_size=6 * 1000 * 1000)
Exemplo n.º 10
0
 def setUpClass(cls):
     super(BrowserFFTSpecTest, cls).setUpClass()
     cls._spec = audio_spec.BrowserFFTSpec()
Exemplo n.º 11
0
 def testBrowserFFT(self):
     self._test_spec(audio_spec.BrowserFFTSpec(),
                     BrowserFFTWithoutPreprocessing())