def testPrepareDataIndexEmpty(self):
     tmp_dir = self.get_temp_dir()
     self._saveWavFolders(tmp_dir, ["a", "b", "c"], 0)
     with self.assertRaises(Exception) as e:
         _ = input_data.AudioProcessor("", tmp_dir, 10, 10, ["a", "b"], 10,
                                       10, self._model_settings(), tmp_dir)
     self.assertTrue("No .wavs found" in str(e.exception))
 def testPrepareProcessingGraph(self):
     tmp_dir = self.get_temp_dir()
     wav_dir = os.path.join(tmp_dir, "wavs")
     os.mkdir(wav_dir)
     self._saveWavFolders(wav_dir, ["a", "b", "c"], 100)
     background_dir = os.path.join(wav_dir, "_background_noise_")
     os.mkdir(background_dir)
     wav_data = self._getWavData()
     for i in range(10):
         file_path = os.path.join(background_dir,
                                  "background_audio_%d.wav" % i)
         self._saveTestWavFile(file_path, wav_data)
     model_settings = {
         "desired_samples": 160,
         "fingerprint_size": 40,
         "label_count": 4,
         "window_size_samples": 100,
         "window_stride_samples": 100,
         "fingerprint_width": 40,
         "preprocess": "mfcc",
     }
     audio_processor = input_data.AudioProcessor("", wav_dir, 10, 10,
                                                 ["a", "b"], 10, 10,
                                                 model_settings, tmp_dir)
     self.assertIsNotNone(audio_processor.wav_filename_placeholder_)
     self.assertIsNotNone(audio_processor.foreground_volume_placeholder_)
     self.assertIsNotNone(audio_processor.time_shift_padding_placeholder_)
     self.assertIsNotNone(audio_processor.time_shift_offset_placeholder_)
     self.assertIsNotNone(audio_processor.background_data_placeholder_)
     self.assertIsNotNone(audio_processor.background_volume_placeholder_)
     self.assertIsNotNone(audio_processor.output_)
 def testPrepareDataIndexMissing(self):
     tmp_dir = self.get_temp_dir()
     self._saveWavFolders(tmp_dir, ["a", "b", "c"], 100)
     with self.assertRaises(Exception) as e:
         _ = input_data.AudioProcessor("", tmp_dir, 10, 10, ["a", "b", "d"], 10,
                                       10, self._model_settings(), tmp_dir)
     self.assertIn("Expected to find", str(e.exception))
示例#4
0
 def testGetData(self):
     tmp_dir = self.get_temp_dir()
     wav_dir = os.path.join(tmp_dir, "wavs")
     os.mkdir(wav_dir)
     self._saveWavFolders(wav_dir, ["a", "b", "c"], 100)
     background_dir = os.path.join(wav_dir, "_background_noise_")
     os.mkdir(background_dir)
     wav_data = self._getWavData()
     for i in range(10):
         file_path = os.path.join(background_dir,
                                  "background_audio_%d.wav" % i)
         self._saveTestWavFile(file_path, wav_data)
     model_settings = {
         "desired_samples": 160,
         "fingerprint_size": 40,
         "label_count": 4,
         "window_size_samples": 100,
         "window_stride_samples": 100,
         "dct_coefficient_count": 40,
     }
     audio_processor = input_data.AudioProcessor("", wav_dir, 10, 10,
                                                 ["a", "b"], 10, 10,
                                                 model_settings)
     with self.test_session() as sess:
         result_data, result_labels = audio_processor.get_data(
             10, 0, model_settings, 0.3, 0.1, 100, "training", sess)
     self.assertEqual(10, len(result_data))
     self.assertEqual(10, len(result_labels))
 def testPrepareDataIndex(self):
     tmp_dir = self.get_temp_dir()
     self._saveWavFolders(tmp_dir, ["a", "b", "c"], 100)
     audio_processor = input_data.AudioProcessor("", tmp_dir, 10, 10,
                                                 ["a", "b"], 10, 10,
                                                 self._model_settings(), tmp_dir)
     self.assertLess(0, audio_processor.set_size("training"))
     self.assertIn("training", audio_processor.data_index)
     self.assertIn("validation", audio_processor.data_index)
     self.assertIn("testing", audio_processor.data_index)
     self.assertEqual(input_data.UNKNOWN_WORD_INDEX,
                      audio_processor.word_to_index["c"])
 def testPrepareBackgroundData(self):
     tmp_dir = self.get_temp_dir()
     background_dir = os.path.join(tmp_dir, "_background_noise_")
     os.mkdir(background_dir)
     wav_data = self._getWavData()
     for i in range(10):
         file_path = os.path.join(background_dir, "background_audio_%d.wav" % i)
         self._saveTestWavFile(file_path, wav_data)
     self._saveWavFolders(tmp_dir, ["a", "b", "c"], 100)
     audio_processor = input_data.AudioProcessor("", tmp_dir, 10, 10,
                                                 ["a", "b"], 10, 10,
                                                 self._model_settings(), tmp_dir)
     self.assertEqual(10, len(audio_processor.background_data))
    def testGetFeaturesForWav(self):
        tmp_dir = self.get_temp_dir()
        wav_dir = os.path.join(tmp_dir, "wavs")
        os.mkdir(wav_dir)
        self._saveWavFolders(wav_dir, ["a", "b", "c"], 1)
        desired_samples = 1600
        model_settings = {
            "desired_samples": desired_samples,
            "fingerprint_size": 40,
            "label_count": 4,
            "window_size_samples": 100,
            "window_stride_samples": 100,
            "fingerprint_width": 40,
            "average_window_width": 6,
            "preprocess": "average",
        }
        with self.cached_session() as sess:
            audio_processor = input_data.AudioProcessor(
                "", wav_dir, 10, 10, ["a", "b"], 10, 10, model_settings,
                tmp_dir)
            sample_data = np.zeros([desired_samples, 1])
            for i in range(desired_samples):
                phase = i % 4
                if phase == 0:
                    sample_data[i, 0] = 0
                elif phase == 1:
                    sample_data[i, 0] = -1
                elif phase == 2:
                    sample_data[i, 0] = 0
                elif phase == 3:
                    sample_data[i, 0] = 1
            test_wav_path = os.path.join(tmp_dir, "test_wav.wav")
            input_data.save_wav_file(test_wav_path, sample_data, 16000)

            results = audio_processor.get_features_for_wav(
                test_wav_path, model_settings, sess)
            spectrogram = results[0]
            self.assertEqual(1, spectrogram.shape[0])
            self.assertEqual(16, spectrogram.shape[1])
            self.assertEqual(11, spectrogram.shape[2])
            self.assertNear(0, spectrogram[0, 0, 0], 0.1)
            self.assertNear(200, spectrogram[0, 0, 5], 0.1)
 def _runGetDataTest(self, preprocess, window_length_ms):
     tmp_dir = self.get_temp_dir()
     wav_dir = os.path.join(tmp_dir, "wavs")
     os.mkdir(wav_dir)
     self._saveWavFolders(wav_dir, ["a", "b", "c"], 100)
     background_dir = os.path.join(wav_dir, "_background_noise_")
     os.mkdir(background_dir)
     wav_data = self._getWavData()
     for i in range(10):
         file_path = os.path.join(background_dir, "background_audio_%d.wav" % i)
         self._saveTestWavFile(file_path, wav_data)
     model_settings = models.prepare_model_settings(
         4, 16000, 1000, window_length_ms, 20, 40, preprocess)
     with self.cached_session() as sess:
         audio_processor = input_data.AudioProcessor(
             "", wav_dir, 10, 10, ["a", "b"], 10, 10, model_settings, tmp_dir)
         result_data, result_labels = audio_processor.get_data(
             10, 0, model_settings, 0.3, 0.1, 100, "training", sess)
         self.assertEqual(10, len(result_data))
         self.assertEqual(10, len(result_labels))
 def testGetUnprocessedData(self):
     tmp_dir = self.get_temp_dir()
     wav_dir = os.path.join(tmp_dir, "wavs")
     os.mkdir(wav_dir)
     self._saveWavFolders(wav_dir, ["a", "b", "c"], 100)
     model_settings = {
         "desired_samples": 160,
         "fingerprint_size": 40,
         "label_count": 4,
         "window_size_samples": 100,
         "window_stride_samples": 100,
         "fingerprint_width": 40,
         "preprocess": "mfcc",
     }
     audio_processor = input_data.AudioProcessor("", wav_dir, 10, 10, ["a", "b"],
                                                 10, 10, model_settings, tmp_dir)
     result_data, result_labels = audio_processor.get_unprocessed_data(
         10, model_settings, "training")
     self.assertEqual(10, len(result_data))
     self.assertEqual(10, len(result_labels))
示例#10
0
def main(_):
    # Set the verbosity based on flags (default is INFO, so we see all messages)
    tf.compat.v1.logging.set_verbosity(FLAGS.verbosity)

    # Start a new TensorFlow session.
    sess = tf.compat.v1.InteractiveSession()

    # Begin by making sure we have the training data we need. If you already have
    # training data of your own, use `--data_url= ` on the command line to avoid
    # downloading.
    model_settings = models.prepare_model_settings(
        len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
        FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess)
    audio_processor = input_data.AudioProcessor(
        FLAGS.data_url, FLAGS.data_dir,
        FLAGS.silence_percentage, FLAGS.unknown_percentage,
        FLAGS.wanted_words.split(','), FLAGS.validation_percentage,
        FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir)
    fingerprint_size = model_settings['fingerprint_size']
    label_count = model_settings['label_count']
    time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000)
    # Figure out the learning rates for each training phase. Since it's often
    # effective to have high learning rates at the start of training, followed by
    # lower levels towards the end, the number of steps and learning rates can be
    # specified as comma-separated lists to define the rate at each stage. For
    # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
    # will run 13,000 training loops in total, with a rate of 0.001 for the first
    # 10,000, and 0.0001 for the final 3,000.
    training_steps_list = list(
        map(int, FLAGS.how_many_training_steps.split(',')))
    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
    if len(training_steps_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_steps and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_steps_list), len(learning_rates_list)))

    input_placeholder = tf.compat.v1.placeholder(tf.float32,
                                                 [None, fingerprint_size],
                                                 name='fingerprint_input')
    if FLAGS.quantize:
        fingerprint_min, fingerprint_max = input_data.get_features_range(
            model_settings)
        fingerprint_input = tf.quantization.fake_quant_with_min_max_args(
            input_placeholder, fingerprint_min, fingerprint_max)
    else:
        fingerprint_input = input_placeholder

    logits, dropout_prob = models.create_model(fingerprint_input,
                                               model_settings,
                                               FLAGS.model_architecture,
                                               is_training=True)

    # Define loss and optimizer
    ground_truth_input = tf.compat.v1.placeholder(tf.int64, [None],
                                                  name='groundtruth_input')

    # Optionally we can add runtime checks to spot when NaNs or other symptoms of
    # numerical errors start occurring during training.
    control_dependencies = []
    if FLAGS.check_nans:
        checks = tf.compat.v1.add_check_numerics_ops()
        control_dependencies = [checks]

    # Create the back propagation and training evaluation machinery in the graph.
    with tf.compat.v1.name_scope('cross_entropy'):
        cross_entropy_mean = tf.compat.v1.losses.sparse_softmax_cross_entropy(
            labels=ground_truth_input, logits=logits)

    if FLAGS.quantize:
        try:
            tf.contrib.quantize.create_training_graph(quant_delay=0)
        except AttributeError as e:
            msg = e.args[0]
            msg += (
                '\n\n The --quantize option still requires contrib, which is not '
                'part of TensorFlow 2.0. Please install a previous version:'
                '\n    `pip install tensorflow<=1.15`')
            e.args = (msg, )
            raise e

    with tf.compat.v1.name_scope('train'), tf.control_dependencies(
            control_dependencies):
        learning_rate_input = tf.compat.v1.placeholder(
            tf.float32, [], name='learning_rate_input')
        if FLAGS.optimizer == 'gradient_descent':
            train_step = tf.compat.v1.train.GradientDescentOptimizer(
                learning_rate_input).minimize(cross_entropy_mean)
        elif FLAGS.optimizer == 'momentum':
            train_step = tf.compat.v1.train.MomentumOptimizer(
                learning_rate_input, .9,
                use_nesterov=True).minimize(cross_entropy_mean)
        else:
            raise Exception('Invalid Optimizer')
    predicted_indices = tf.argmax(input=logits, axis=1)
    correct_prediction = tf.equal(predicted_indices, ground_truth_input)
    confusion_matrix = tf.math.confusion_matrix(labels=ground_truth_input,
                                                predictions=predicted_indices,
                                                num_classes=label_count)
    evaluation_step = tf.reduce_mean(
        input_tensor=tf.cast(correct_prediction, tf.float32))
    with tf.compat.v1.get_default_graph().name_scope('eval'):
        tf.compat.v1.summary.scalar('cross_entropy', cross_entropy_mean)
        tf.compat.v1.summary.scalar('accuracy', evaluation_step)

    global_step = tf.compat.v1.train.get_or_create_global_step()
    increment_global_step = tf.compat.v1.assign(global_step, global_step + 1)

    saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables())

    # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
    merged_summaries = tf.compat.v1.summary.merge_all(scope='eval')
    train_writer = tf.compat.v1.summary.FileWriter(
        FLAGS.summaries_dir + '/train', sess.graph)
    validation_writer = tf.compat.v1.summary.FileWriter(FLAGS.summaries_dir +
                                                        '/validation')

    tf.compat.v1.global_variables_initializer().run()

    start_step = 1

    if FLAGS.start_checkpoint:
        models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint)
        start_step = global_step.eval(session=sess)

    tf.compat.v1.logging.info('Training from step: %d ', start_step)

    # Save graph.pbtxt.
    tf.io.write_graph(sess.graph_def, FLAGS.train_dir,
                      FLAGS.model_architecture + '.pbtxt')

    # Save list of words.
    with gfile.GFile(
            os.path.join(FLAGS.train_dir,
                         FLAGS.model_architecture + '_labels.txt'), 'w') as f:
        f.write('\n'.join(audio_processor.words_list))

    # Training loop.
    training_steps_max = np.sum(training_steps_list)
    for training_step in xrange(start_step, training_steps_max + 1):
        # Figure out what the current learning rate is.
        training_steps_sum = 0
        for i in range(len(training_steps_list)):
            training_steps_sum += training_steps_list[i]
            if training_step <= training_steps_sum:
                learning_rate_value = learning_rates_list[i]
                break
        # Pull the audio samples we'll use for training.
        train_fingerprints, train_ground_truth = audio_processor.get_data(
            FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency,
            FLAGS.background_volume, time_shift_samples, 'training', sess)
        # Run the graph with this batch of training data.
        train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
            [
                merged_summaries,
                evaluation_step,
                cross_entropy_mean,
                train_step,
                increment_global_step,
            ],
            feed_dict={
                fingerprint_input: train_fingerprints,
                ground_truth_input: train_ground_truth,
                learning_rate_input: learning_rate_value,
                dropout_prob: 0.5
            })
        train_writer.add_summary(train_summary, training_step)
        tf.compat.v1.logging.info(
            'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
            (training_step, learning_rate_value, train_accuracy * 100,
             cross_entropy_value))
        is_last_step = (training_step == training_steps_max)
        if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step:
            set_size = audio_processor.set_size('validation')
            total_accuracy = 0
            total_conf_matrix = None
            for i in xrange(0, set_size, FLAGS.batch_size):
                validation_fingerprints, validation_ground_truth = (
                    audio_processor.get_data(FLAGS.batch_size, i,
                                             model_settings, 0.0, 0.0, 0,
                                             'validation', sess))
                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy, conf_matrix = sess.run(
                    [merged_summaries, evaluation_step, confusion_matrix],
                    feed_dict={
                        fingerprint_input: validation_fingerprints,
                        ground_truth_input: validation_ground_truth,
                        dropout_prob: 1.0
                    })
                validation_writer.add_summary(validation_summary,
                                              training_step)
                batch_size = min(FLAGS.batch_size, set_size - i)
                total_accuracy += (validation_accuracy * batch_size) / set_size
                if total_conf_matrix is None:
                    total_conf_matrix = conf_matrix
                else:
                    total_conf_matrix += conf_matrix
            tf.compat.v1.logging.info('Confusion Matrix:\n %s' %
                                      (total_conf_matrix))
            tf.compat.v1.logging.info(
                'Step %d: Validation accuracy = %.1f%% (N=%d)' %
                (training_step, total_accuracy * 100, set_size))

        # Save the model checkpoint periodically.
        if (training_step % FLAGS.save_step_interval == 0
                or training_step == training_steps_max):
            checkpoint_path = os.path.join(FLAGS.train_dir,
                                           FLAGS.model_architecture + '.ckpt')
            tf.compat.v1.logging.info('Saving to "%s-%d"', checkpoint_path,
                                      training_step)
            saver.save(sess, checkpoint_path, global_step=training_step)

    set_size = audio_processor.set_size('testing')
    tf.compat.v1.logging.info('set_size=%d', set_size)
    total_accuracy = 0
    total_conf_matrix = None
    for i in xrange(0, set_size, FLAGS.batch_size):
        test_fingerprints, test_ground_truth = audio_processor.get_data(
            FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess)
        test_accuracy, conf_matrix = sess.run(
            [evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: test_fingerprints,
                ground_truth_input: test_ground_truth,
                dropout_prob: 1.0
            })
        batch_size = min(FLAGS.batch_size, set_size - i)
        total_accuracy += (test_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
            total_conf_matrix = conf_matrix
        else:
            total_conf_matrix += conf_matrix
    tf.compat.v1.logging.warn('Confusion Matrix:\n %s' % (total_conf_matrix))
    tf.compat.v1.logging.warn('Final test accuracy = %.1f%% (N=%d)' %
                              (total_accuracy * 100, set_size))
示例#11
0
def main(_):
    words_list = input_data.prepare_words_list(FLAGS.wanted_words.split(','))
    model_settings = models.prepare_model_settings(len(words_list),
                                                   FLAGS.sample_rate,
                                                   FLAGS.clip_duration_ms,
                                                   FLAGS.window_size_ms,
                                                   FLAGS.window_stride_ms,
                                                   FLAGS.dct_coefficient_count)
    audio_processor = input_data.AudioProcessor('', FLAGS.data_dir,
                                                FLAGS.silence_percentage, 10,
                                                FLAGS.wanted_words.split(','),
                                                FLAGS.validation_percentage,
                                                FLAGS.testing_percentage,
                                                model_settings)

    output_audio_sample_count = FLAGS.sample_rate * FLAGS.test_duration_seconds
    output_audio = np.zeros((output_audio_sample_count, ), dtype=np.float32)

    # Set up background audio.
    background_crossover_ms = 500
    background_segment_duration_ms = (FLAGS.clip_duration_ms +
                                      background_crossover_ms)
    background_segment_duration_samples = int(
        (background_segment_duration_ms * FLAGS.sample_rate) / 1000)
    background_segment_stride_samples = int(
        (FLAGS.clip_duration_ms * FLAGS.sample_rate) / 1000)
    background_ramp_samples = int(
        ((background_crossover_ms / 2) * FLAGS.sample_rate) / 1000)

    # Mix the background audio into the main track.
    how_many_backgrounds = int(
        math.ceil(output_audio_sample_count /
                  background_segment_stride_samples))
    for i in range(how_many_backgrounds):
        output_offset = int(i * background_segment_stride_samples)
        background_index = np.random.randint(
            len(audio_processor.background_data))
        background_samples = audio_processor.background_data[background_index]
        background_offset = np.random.randint(
            0,
            len(background_samples) - model_settings['desired_samples'])
        background_volume = np.random.uniform(0, FLAGS.background_volume)
        mix_in_audio_sample(output_audio, output_offset, background_samples,
                            background_offset,
                            background_segment_duration_samples,
                            background_volume, background_ramp_samples,
                            background_ramp_samples)

    # Mix the words into the main track, noting their labels and positions.
    output_labels = []
    word_stride_ms = FLAGS.clip_duration_ms + FLAGS.word_gap_ms
    word_stride_samples = int((word_stride_ms * FLAGS.sample_rate) / 1000)
    clip_duration_samples = int(
        (FLAGS.clip_duration_ms * FLAGS.sample_rate) / 1000)
    word_gap_samples = int((FLAGS.word_gap_ms * FLAGS.sample_rate) / 1000)
    how_many_words = int(
        math.floor(output_audio_sample_count / word_stride_samples))
    all_test_data, all_test_labels = audio_processor.get_unprocessed_data(
        -1, model_settings, 'testing')
    for i in range(how_many_words):
        output_offset = (int(i * word_stride_samples) +
                         np.random.randint(word_gap_samples))
        output_offset_ms = (output_offset * 1000) / FLAGS.sample_rate
        is_unknown = np.random.randint(100) < FLAGS.unknown_percentage
        if is_unknown:
            wanted_label = input_data.UNKNOWN_WORD_LABEL
        else:
            wanted_label = words_list[2 +
                                      np.random.randint(len(words_list) - 2)]
        test_data_start = np.random.randint(len(all_test_data))
        found_sample_data = None
        index_lookup = np.arange(len(all_test_data), dtype=np.int32)
        np.random.shuffle(index_lookup)
        for test_data_offset in range(len(all_test_data)):
            test_data_index = index_lookup[(test_data_start + test_data_offset)
                                           % len(all_test_data)]
            current_label = all_test_labels[test_data_index]
            if current_label == wanted_label:
                found_sample_data = all_test_data[test_data_index]
                break
        mix_in_audio_sample(output_audio, output_offset, found_sample_data, 0,
                            clip_duration_samples, 1.0, 500, 500)
        output_labels.append({'label': wanted_label, 'time': output_offset_ms})

    input_data.save_wav_file(FLAGS.output_audio_file, output_audio,
                             FLAGS.sample_rate)
    tf.logging.info('Saved streaming test wav to %s', FLAGS.output_audio_file)

    with open(FLAGS.output_labels_file, 'w') as f:
        for output_label in output_labels:
            f.write('%s, %f\n' % (output_label['label'], output_label['time']))
    tf.logging.info('Saved streaming test labels to %s',
                    FLAGS.output_labels_file)
示例#12
0
def wav_to_features(sample_rate, clip_duration_ms, window_size_ms,
                    window_stride_ms, feature_bin_count, quantize, preprocess,
                    input_wav, output_c_file):
  """Converts an audio file into its corresponding feature map.

  Args:
    sample_rate: Expected sample rate of the wavs.
    clip_duration_ms: Expected duration in milliseconds of the wavs.
    window_size_ms: How long each spectrogram timeslice is.
    window_stride_ms: How far to move in time between spectrogram timeslices.
    feature_bin_count: How many bins to use for the feature fingerprint.
    quantize: Whether to train the model for eight-bit deployment.
    preprocess: Spectrogram processing mode; "mfcc", "average" or "micro".
    input_wav: Path to the audio WAV file to read.
    output_c_file: Where to save the generated C source file.
  """

  # Start a new TensorFlow session.
  sess = tf.compat.v1.InteractiveSession()

  model_settings = models.prepare_model_settings(
      0, sample_rate, clip_duration_ms, window_size_ms, window_stride_ms,
      feature_bin_count, preprocess)
  audio_processor = input_data.AudioProcessor(None, None, 0, 0, '', 0, 0,
                                              model_settings, None)

  results = audio_processor.get_features_for_wav(input_wav, model_settings,
                                                 sess)
  features = results[0]

  variable_base = os.path.splitext(os.path.basename(input_wav).lower())[0]

  # Save a C source file containing the feature data as an array.
  with gfile.GFile(output_c_file, 'w') as f:
    f.write('/* File automatically created by\n')
    f.write(' * tensorflow/examples/speech_commands/wav_to_features.py \\\n')
    f.write(' * --sample_rate=%d \\\n' % sample_rate)
    f.write(' * --clip_duration_ms=%d \\\n' % clip_duration_ms)
    f.write(' * --window_size_ms=%d \\\n' % window_size_ms)
    f.write(' * --window_stride_ms=%d \\\n' % window_stride_ms)
    f.write(' * --feature_bin_count=%d \\\n' % feature_bin_count)
    if quantize:
      f.write(' * --quantize=1 \\\n')
    f.write(' * --preprocess="%s" \\\n' % preprocess)
    f.write(' * --input_wav="%s" \\\n' % input_wav)
    f.write(' * --output_c_file="%s" \\\n' % output_c_file)
    f.write(' */\n\n')
    f.write('const int g_%s_width = %d;\n' %
            (variable_base, model_settings['fingerprint_width']))
    f.write('const int g_%s_height = %d;\n' %
            (variable_base, model_settings['spectrogram_length']))
    if quantize:
      features_min, features_max = input_data.get_features_range(model_settings)
      f.write('const unsigned char g_%s_data[] = {' % variable_base)
      i = 0
      for value in features.flatten():
        quantized_value = int(
            round(
                (255 * (value - features_min)) / (features_max - features_min)))
        if quantized_value < 0:
          quantized_value = 0
        if quantized_value > 255:
          quantized_value = 255
        if i == 0:
          f.write('\n  ')
        f.write('%d, ' % (quantized_value))
        i = (i + 1) % 10
    else:
      f.write('const float g_%s_data[] = {\n' % variable_base)
      i = 0
      for value in features.flatten():
        if i == 0:
          f.write('\n  ')
        f.write(' ,%f' % value)
        i = (i + 1) % 10
    f.write('\n};\n')