Python audiofile_to_features示例，util.feeding.audiofile_to_features Python示例

示例#1

0

显示文件

def do_single_file_inference(input_file_path):
    with tfv1.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        saver = tfv1.train.Saver()

        # Restore variables from training checkpoint
        # TODO: This restores the most recent checkpoint, but if we use validation to counteract
        #       over-fitting, we may want to restore an earlier checkpoint.
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        features, features_len = audiofile_to_features(input_file_path)
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']:
            features,
            inputs['input_lengths']:
            features_len,
            inputs['previous_state_c']:
            previous_state_c,
            inputs['previous_state_h']:
            previous_state_h,
        },
                                         session=session)

        logits = np.squeeze(logits)

        if FLAGS.lm_binary_path:
            scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                            FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                            Config.alphabet)
        else:
            scorer = None
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer,
                                          cutoff_prob=FLAGS.cutoff_prob,
                                          cutoff_top_n=FLAGS.cutoff_top_n)
        # Print highest probability result
        print(decoded[0][1])

示例#2

0

显示文件

文件： DeepSpeech.py 项目： bababoss/deepspeech_plus_plus

def do_single_file_inference(input_file_path):
    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        mapping = {
            v.op.name: v
            for v in tf.global_variables()
            if not v.op.name.startswith('previous_state_')
        }
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        # TODO: This restores the most recent checkpoint, but if we use validation to counteract
        #       over-fitting, we may want to restore an earlier checkpoint.
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)
        session.run(outputs['initialize_state'])

        features, features_len = audiofile_to_features(input_file_path)

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']: features,
            inputs['input_lengths']: features_len,
        },
                                         session=session)

        logits = np.squeeze(logits)

        scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                        FLAGS.lm_trie_path, Config.alphabet)
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer)
        # Print highest probability result
        print(decoded[0][1])

示例#3

0

显示文件

文件： DeepSpeech.py 项目： wahyubram82/DeepSpeech

def do_single_file_inference(input_file_path):
    with tfv1.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Restore variables from training checkpoint
        if FLAGS.load == 'auto':
            method_order = ['best', 'last']
        else:
            method_order = [FLAGS.load]
        load_or_init_graph(session, method_order)

        features, features_len = audiofile_to_features(input_file_path)
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']:
            features,
            inputs['input_lengths']:
            features_len,
            inputs['previous_state_c']:
            previous_state_c,
            inputs['previous_state_h']:
            previous_state_h,
        },
                                         session=session)

        logits = np.squeeze(logits)

        if FLAGS.scorer_path:
            scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path,
                            Config.alphabet)
        else:
            scorer = None
        decoded = ctc_beam_search_decoder(logits,
                                          Config.alphabet,
                                          FLAGS.beam_width,
                                          scorer=scorer,
                                          cutoff_prob=FLAGS.cutoff_prob,
                                          cutoff_top_n=FLAGS.cutoff_top_n)
        # Print highest probability result
        print(decoded[0][1])

示例#4

0

显示文件

文件： DeepSpeech.py 项目： Perpleex/DeepSpeech

def do_single_file_inference(input_file_path):
    with tfv1.Session(config=Config.session_config) as session:
        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)

        # Create a saver using variables from the above newly created graph
        saver = tfv1.train.Saver()

        # Restore variables from training checkpoint
        loaded = False
        if not loaded and FLAGS.load in ['auto', 'last']:
            loaded = try_loading(session, saver, 'checkpoint', 'most recent', load_step=False)
        if not loaded and FLAGS.load in ['auto', 'best']:
            loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation', load_step=False)
        if not loaded:
            print('Could not load checkpoint from {}'.format(FLAGS.checkpoint_dir))
            sys.exit(1)

        features, features_len = audiofile_to_features(input_file_path)
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        # Add batch dimension
        features = tf.expand_dims(features, 0)
        features_len = tf.expand_dims(features_len, 0)

        # Evaluate
        features = create_overlapping_windows(features).eval(session=session)
        features_len = features_len.eval(session=session)

        logits = outputs['outputs'].eval(feed_dict={
            inputs['input']: features,
            inputs['input_lengths']: features_len,
            inputs['previous_state_c']: previous_state_c,
            inputs['previous_state_h']: previous_state_h,
        }, session=session)

        logits = np.squeeze(logits)

        if FLAGS.lm_binary_path:
            scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                            FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                            Config.alphabet)
        else:
            scorer = None
        decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width,
                                          scorer=scorer, cutoff_prob=FLAGS.cutoff_prob,
                                          cutoff_top_n=FLAGS.cutoff_top_n)
        # Print highest probability result
        print(decoded[0][1])

示例#5

0

显示文件

文件： activations.py 项目： mhooijman/thesis-2020

def activations_common_voice_pertubed_sets(input_dir,
                                           output_dir,
                                           test_only=False,
                                           prune_percentage=0,
                                           scores_file=None,
                                           random=False,
                                           verbose=True,
                                           randomly_initialized=False):
    '''Obtains activations for wavs in input_dir and saves them to output_dir'''
    inputs, outputs, layers = create_inference_graph(batch_size=1, n_steps=-1)
    intermediate_layer_names = [
        'layer_1', 'layer_2', 'layer_3', 'rnn_output', 'layer_4', 'layer_5'
    ]
    intermediate_layers = [
        l for n, l in layers.items() if n in intermediate_layer_names
    ]

    pertubed_sets = json.load(open('data/pertubed_input_sets_balanced.json'))
    skip_sets = []
    if test_only: skip_sets = json.load(open('./results/set_ids_used.json'))

    if not prune_percentage: base_path = '{}/activations'.format(output_dir)
    else:
        base_path = '{}/activations/pruned-{}'.format(output_dir,
                                                      prune_percentage * 100)
    if random: base_path += '-random'

    with tfv1.Session(config=Config.session_config) as session:
        # Create a saver using variables from the above newly created graph

        if not randomly_initialized:
            saver = tfv1.train.Saver()

            # Restore variables from training checkpoint
            loaded = False
            if not loaded and FLAGS.load in ['auto', 'last']:
                loaded = try_loading(session,
                                     saver,
                                     'checkpoint',
                                     'most recent',
                                     load_step=False)
            if not loaded and FLAGS.load in ['auto', 'best']:
                loaded = try_loading(session,
                                     saver,
                                     'best_dev_checkpoint',
                                     'best validation',
                                     load_step=False)
            if not loaded:
                print('Could not load checkpoint from {}'.format(
                    FLAGS.checkpoint_dir))
                sys.exit(1)
        else:
            initializer = tfv1.global_variables_initializer()
            session.run(initializer)

        ###### PRUNING PART ######

        if verbose:
            if not prune_percentage: print('No pruning done.')
        else:
            if verbose: print('-' * 80)
            if verbose: print('pruning with {}%...'.format(prune_percentage))
            scores_per_layer = np.load(scores_file)
            layer_masks = prune_matrices(scores_per_layer,
                                         prune_percentage=prune_percentage,
                                         random=random,
                                         verbose=verbose,
                                         skip_lstm=False)

            n_layers_to_prune = len(layer_masks)
            i = 0
            for index, v in enumerate(tf.trainable_variables()):
                lstm_layer_name = 'cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel:0'
                if 'weights' not in v.name and v.name != lstm_layer_name:
                    continue
                if (i >= n_layers_to_prune):
                    break  # if i < total_ops, it is not yet the last layer
                # make mask into the shape of the weights
                if v.name == lstm_layer_name:
                    if skip_lstm: continue
                    # Shape of LSTM weights: [(2*neurons), (4*neurons)]
                    cell_template = np.ones((2, 4))
                    mask = np.repeat(layer_masks[i], v.shape[0] // 2, axis=0)
                    mask = mask.reshape(
                        [layer_masks[i].shape[0], v.shape[0] // 2])
                    mask = np.swapaxes(mask, 0, 1)
                    mask = np.kron(mask, cell_template)
                else:
                    idx = layer_masks[i] == 1
                    mask = np.repeat(layer_masks[i], v.shape[0], axis=0)
                    mask = mask.reshape([layer_masks[i].shape[0], v.shape[0]])
                    mask = np.swapaxes(mask, 0, 1)

                # apply mask to weights
                session.run(v.assign(tf.multiply(v, mask)))
                i += 1

        ###### END PRUNING PART ######

        # Default states for LSTM cell
        previous_state_c = np.zeros([1, Config.n_cell_dim])
        previous_state_h = np.zeros([1, Config.n_cell_dim])

        sets_to_process = [
            set for set in pertubed_sets if str(set['set_id']) not in skip_sets
        ]
        print('{} sets found'.format(len(sets_to_process)))
        for set in sets_to_process:
            print('Processing set {}, {} items...'.format(
                set['set_id'], set['set_length']))

            # Only process files that are not yet available in results directory
            create_dir_if_not_exists('{}/{}'.format(
                base_path, set['set_id']))  # Check if directory exists
            files_done = [
                f[:-4]
                for f in os.listdir('{}/{}'.format(base_path, set['set_id']))
                if f.endswith('.npy')
            ]

            for item in set['set_items']:
                file_name = item['path'][:-4]
                print(file_name)
                if file_name in files_done:
                    print('Skipped.')
                    continue
                print('current file: {}'.format(file_name))

                input_file_path = '{}/{}.wav'.format(input_dir, file_name)

                # Prepare features
                features, features_len = audiofile_to_features(input_file_path)
                features = tf.expand_dims(features, 0)
                features_len = tf.expand_dims(features_len, 0)
                features = create_overlapping_windows(features).eval(
                    session=session)
                features_len = features_len.eval(session=session)

                feed_dict = {
                    inputs['input']: features,
                    inputs['input_lengths']: features_len,
                    inputs['previous_state_c']: previous_state_c,
                    inputs['previous_state_h']: previous_state_h,
                }
                intermediate_activations = session.run(intermediate_layers,
                                                       feed_dict=feed_dict)

                # Save activations of actual input
                save_to_path_activations = '{}/{}/{}.npy'.format(
                    base_path, set['set_id'], file_name)
                write_numpy_to_file(save_to_path_activations,
                                    np.array(intermediate_activations))
                print('Activations for {} are saved to: {}'.format(
                    file_name, save_to_path_activations))

    return True