Exemplos de levenshtein em Python, exemplos de util.text.levenshtein em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: distance.py Projeto: Benzsoft/aggcluster

def levenshtein100(dictA, dictB):
    dictA = OrderedDict(sorted(dictA.items(), 
                       key=lambda t: t[1],reverse=True)[:10])
    dictB = OrderedDict(sorted(dictB.items(), 
                       key=lambda t: t[1],reverse=True)[:10])
    return sum([v1*v2*textutil.levenshtein(k1,k2) for k1,v1 in dictA.iteritems()
                for k2,v2 in dictB.iteritems()])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: convert-to-text-SD.py Projeto: desaur/Speech-to-Text-SD

def calculate_report(results_tuple):
    r'''
    This routine will calculate a WER report.
    It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest
    loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER).
    '''
    samples = []
    items = list(zip(*results_tuple))
    total_levenshtein = 0.0
    total_label_length = 0.0
    for label, decoding, distance, loss in items:
        sample_wer = wer(label, decoding)
        sample = Sample(label, decoding, loss, distance, sample_wer)
        samples.append(sample)
        total_levenshtein += levenshtein(label.split(), decoding.split())
        total_label_length += float(len(label.split()))

    # Getting the WER from the accumulated levenshteins and lengths
    samples_wer = total_levenshtein / total_label_length

    # Filter out all items with WER=0
    samples = [s for s in samples if s.wer > 0]

    # Order the remaining items by their loss (lowest loss on top)
    samples.sort(key=lambda s: s.loss)

    # Take only the first report_count items
    samples = samples[:FLAGS.report_count]

    # Order this top FLAGS.report_count items by their WER (lowest WER on top)
    samples.sort(key=lambda s: s.wer)

    return samples_wer, samples

Exemplo n.º 3

0

Exibir arquivo

Arquivo: evaluate_tools.py Projeto: bhaweshchandola/SpeechToText-model-training-with-DeepSpeech

def process_decode_result(item):
    ground_truth, prediction, loss = item
    char_distance = levenshtein(ground_truth, prediction)
    char_length = len(ground_truth)
    word_distance = levenshtein(ground_truth.split(), prediction.split())
    word_length = len(ground_truth.split())
    return AttrDict({
        'src': ground_truth,
        'res': prediction,
        'loss': loss,
        'char_distance': char_distance,
        'char_length': char_length,
        'word_distance': word_distance,
        'word_length': word_length,
        'cer': char_distance / char_length,
        'wer': word_distance / word_length,
    })

Exemplo n.º 4

0

Exibir arquivo

Arquivo: evaluate_tools.py Projeto: lissyx/DeepSpeech

def process_decode_result(item):
    label, decoding, distance, loss = item
    word_distance = levenshtein(label.split(), decoding.split())
    word_length = float(len(label.split()))
    return AttrDict({
        'src': label,
        'res': decoding,
        'loss': loss,
        'distance': distance,
        'wer': word_distance / word_length,
    })

Exemplo n.º 5

0

Exibir arquivo

Arquivo: evaluate_tools.py Projeto: xueshang-liulp/DeepSpeech-1

def process_decode_result(item):
    label, decoding, distance, loss = item
    word_distance = levenshtein(label.split(), decoding.split())
    word_length = float(len(label.split()))
    return AttrDict({
        'src': label,
        'res': decoding,
        'loss': loss,
        'distance': distance,
        'wer': word_distance / word_length,
    })

Exemplo n.º 6

0

Exibir arquivo

Arquivo: evaluate.py Projeto: revirevy/DeepSpeech

def process_decode_result(item):
    label, decoding, distance, loss = item
    sample_wer = wer(label, decoding)
    return AttrDict({
        'src': label,
        'res': decoding,
        'loss': loss,
        'distance': distance,
        'wer': sample_wer,
        'levenshtein': levenshtein(label.split(), decoding.split()),
        'label_length': float(len(label.split())),
    })

Exemplo n.º 7

0

Exibir arquivo

Arquivo: evaluate_tflite.py Projeto: xueshang-liulp/DeepSpeech-1

def main():
    parser = argparse.ArgumentParser(description='Computing TFLite accuracy')
    parser.add_argument('--model', required=True,
                        help='Path to the model (protocol buffer binary file)')
    parser.add_argument('--alphabet', required=True,
                        help='Path to the configuration file specifying the alphabet used by the network')
    parser.add_argument('--lm', required=True,
                        help='Path to the language model binary file')
    parser.add_argument('--trie', required=True,
                        help='Path to the language model trie file created with native_client/generate_trie')
    parser.add_argument('--csv', required=True,
                        help='Path to the CSV source file')
    parser.add_argument('--proc', required=False, default=cpu_count(), type=int,
                        help='Number of processes to spawn, defaulting to number of CPUs')
    args = parser.parse_args()

    work_todo = JoinableQueue()   # this is where we are going to store input data
    work_done = Queue()  # this where we are gonna push them out

    processes = []
    for i in range(args.proc):
        worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i))
        worker_process.start()        # Launch reader() as a separate python process
        processes.append(worker_process)

    print([x.name for x in processes])

    ground_truths = []
    predictions = []
    losses = []

    with open(args.csv, 'r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        for row in csvreader:
            work_todo.put({'filename': row['wav_filename'], 'transcript': row['transcript']})
    work_todo.join()

    while (not work_done.empty()):
        msg = work_done.get()
        losses.append(0.0)
        ground_truths.append(msg['ground_truth'])
        predictions.append(msg['prediction'])

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
    mean_loss = np.mean(losses)

    print('Test - WER: %f, CER: %f, loss: %f' %
          (wer, cer, mean_loss))

Exemplo n.º 8

0

Exibir arquivo

Arquivo: evaluate.py Projeto: lissyx/DeepSpeech

        def run_test(init_op, dataset):
            logitses = []
            losses = []
            seq_lengths = []
            ground_truths = []

            bar = create_progressbar(prefix='Computing acoustic model predictions | ',
                                     widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start()
            log_progress('Computing acoustic model predictions...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    logits, loss_, lengths, transcripts = session.run([transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                step_count += 1
                bar.update(step_count)

                logitses.append(logits)
                losses.extend(loss_)
                seq_lengths.append(lengths)
                ground_truths.extend(sparse_tensor_value_to_texts(transcripts, Config.alphabet))

            bar.finish()

            predictions = []

            bar = create_progressbar(max_value=step_count,
                                     prefix='Decoding predictions | ').start()
            log_progress('Decoding predictions...')

            # Second pass, decode logits and compute WER and edit distance metrics
            for logits, seq_length in bar(zip(logitses, seq_lengths)):
                decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width,
                                                        num_processes=num_processes, scorer=scorer)
                predictions.extend(d[0][1] for d in decoded)

            distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

            wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)

            print('Test on %s - WER: %f, CER: %f, loss: %f' %
                  (dataset, wer, cer, mean_loss))
            print('-' * 80)
            for sample in report_samples:
                print('WER: %f, CER: %f, loss: %f' %
                      (sample.wer, sample.distance, sample.loss))
                print(' - src: "%s"' % sample.src)
                print(' - res: "%s"' % sample.res)
                print('-' * 80)

            return samples

Exemplo n.º 9

0

Exibir arquivo

Arquivo: evaluate.py Projeto: revirevy/DeepSpeech

def main(_):
    initialize_globals()

    if not FLAGS.test_files:
        log_error('You need to specify what files to use for evaluation via '
                  'the --test_files flag.')
        exit(1)

    global alphabet
    alphabet = Alphabet(FLAGS.alphabet_config_path)

    scorer = Scorer(FLAGS.lm_weight, FLAGS.valid_word_count_weight,
                    FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                    alphabet)

    # sort examples by length, improves packing of batches and timesteps
    test_data = preprocess(
        FLAGS.test_files.split(','),
        FLAGS.test_batch_size,
        alphabet=alphabet,
        numcep=N_FEATURES,
        numcontext=N_CONTEXT,
        hdf5_cache_path=FLAGS.hdf5_test_set).sort_values(
        by="features_len",
        ascending=False)

    def create_windows(features):
        num_strides = len(features) - (N_CONTEXT * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2*N_CONTEXT+1
        features = np.lib.stride_tricks.as_strided(
            features,
            (num_strides, window_size, N_FEATURES),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        return features

    # Create overlapping windows over the features
    test_data['features'] = test_data['features'].apply(create_windows)

    with tf.Session() as session:
        inputs, outputs, layers = create_inference_graph(batch_size=FLAGS.test_batch_size, n_steps=-1)

        # Transpose to batch major for decoder
        transposed = tf.transpose(outputs['outputs'], [1, 0, 2])

        labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels")
        label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths")

        sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32)
        loss = tf.nn.ctc_loss(labels=sparse_labels,
                              inputs=layers['raw_logits'],
                              sequence_length=inputs['input_lengths'])

        # Create a saver using variables from the above newly created graph
        mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')}
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        logitses = []
        losses = []

        print('Computing acoustic model predictions...')
        batch_count = len(test_data) // FLAGS.test_batch_size
        bar = progressbar.ProgressBar(max_value=batch_count,
                                      widget=progressbar.AdaptiveETA)

        # First pass, compute losses and transposed logits for decoding
        for batch in bar(split_data(test_data, FLAGS.test_batch_size)):
            session.run(outputs['initialize_state'])

            features = pad_to_dense(batch['features'].values)
            features_len = batch['features_len'].values
            labels = pad_to_dense(batch['transcript'].values)
            label_lengths = batch['transcript_len'].values

            logits, loss = session.run([transposed, loss], feed_dict={
                inputs['input']: features,
                inputs['input_lengths']: features_len,
                labels_ph: labels,
                label_lengths_ph: label_lengths
            })

            logitses.append(logits)
            losses.extend(loss)

        ground_truths = []
        predictions = []
        distances = []

        print('Decoding predictions...')
        bar = progressbar.ProgressBar(max_value=batch_count,
                                      widget=progressbar.AdaptiveETA)

        # Get number of accessible CPU cores for this process
        num_processes = len(os.sched_getaffinity(0))

        # Second pass, decode logits and compute WER and edit distance metrics
        for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))):
            seq_lengths = batch['features_len'].values.astype(np.int32)
            decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, alphabet, FLAGS.beam_width,
                                                    num_processes=num_processes, scorer=scorer)

            ground_truths.extend(alphabet.decode(l) for l in batch['transcript'])
            predictions.extend(d[0][1] for d in decoded)
            distances.extend(levenshtein(a, b) for a, b in zip(labels, predictions))

    wer, samples = calculate_report(ground_truths, predictions, distances, losses)
    mean_edit_distance = np.mean(distances)
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)

    print('Test - WER: %f, loss: %f, mean edit distance: %f' %
          (wer, mean_loss, mean_edit_distance))
    print('-' * 80)
    for sample in report_samples:
        print('WER: %f, loss: %f, edit distance: %f' %
              (sample.wer, sample.loss, sample.distance))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)

    if FLAGS.test_output_file:
        json.dump(samples, open(FLAGS.test_output_file, 'w'), default=lambda x: float(x))

Exemplo n.º 10

0

Exibir arquivo

Arquivo: evaluate.py Projeto: PiotrowskiD/DeepSpeech

def evaluate(test_data, inference_graph, alphabet):
    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta,
                    FLAGS.lm_binary_path, FLAGS.lm_trie_path,
                    Config.alphabet)


    def create_windows(features):
        num_strides = len(features) - (Config.n_context * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2*Config.n_context+1
        features = np.lib.stride_tricks.as_strided(
            features,
            (num_strides, window_size, Config.n_input),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        return features

    # Create overlapping windows over the features
    test_data['features'] = test_data['features'].apply(create_windows)

    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, layers = inference_graph

        # Transpose to batch major for decoder
        transposed = tf.transpose(outputs['outputs'], [1, 0, 2])

        labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels")
        label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths")

        sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32)
        loss = tf.nn.ctc_loss(labels=sparse_labels,
                              inputs=layers['raw_logits'],
                              sequence_length=inputs['input_lengths'])

        # Create a saver using variables from the above newly created graph
        mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')}
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        if FLAGS.checkpoint_dir is not None:
            checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            if not checkpoint:
                log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir))
                exit(1)

            checkpoint_path = checkpoint.model_checkpoint_path
            saver.restore(session, checkpoint_path)

        logitses = []
        losses = []

        print('Computing acoustic model predictions...')
        batch_count = len(test_data) // FLAGS.test_batch_size
        bar = progressbar.ProgressBar(max_value=batch_count,
                                      widget=progressbar.AdaptiveETA)

        # First pass, compute losses and transposed logits for decoding
        for batch in bar(split_data(test_data, FLAGS.test_batch_size)):
            session.run(outputs['initialize_state'])

            features = pad_to_dense(batch['features'].values)
            features_len = batch['features_len'].values
            labels = pad_to_dense(batch['transcript'].values)
            label_lengths = batch['transcript_len'].values

            logits, loss_ = session.run([transposed, loss], feed_dict={
                inputs['input']: features,
                inputs['input_lengths']: features_len,
                labels_ph: labels,
                label_lengths_ph: label_lengths
            })

            logitses.append(logits)
            losses.extend(loss_)

    ground_truths = []
    predictions = []

    print('Decoding predictions...')
    bar = progressbar.ProgressBar(max_value=batch_count,
                                  widget=progressbar.AdaptiveETA)

    # Get number of accessible CPU cores for this process
    try:
        num_processes = cpu_count()
    except:
        num_processes = 1

    # Second pass, decode logits and compute WER and edit distance metrics
    for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))):
        seq_lengths = batch['features_len'].values.astype(np.int32)
        decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, alphabet, FLAGS.beam_width,
                                                num_processes=num_processes, scorer=scorer)

        ground_truths.extend(alphabet.decode(l) for l in batch['transcript'])
        predictions.extend(d[0][1] for d in decoded)

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, samples = calculate_report(ground_truths, predictions, distances, losses)
    mean_edit_distance = np.mean(distances)
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)

    print('Test - WER: %f, CER: %f, loss: %f' %
          (wer, mean_edit_distance, mean_loss))
    print('-' * 80)
    for sample in report_samples:
        print('WER: %f, CER: %f, loss: %f' %
              (sample.wer, sample.distance, sample.loss))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)

    return samples

Exemplo n.º 11

0

Exibir arquivo

Arquivo: evaluate.py Projeto: sshaar/DeepSpeech

def evaluate(test_csvs, create_model, try_loading):
    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                    FLAGS.lm_trie_path, Config.alphabet)

    test_set = create_dataset(test_csvs,
                              batch_size=FLAGS.test_batch_size,
                              cache_path=FLAGS.test_cached_features_path)
    it = test_set.make_one_shot_iterator()

    (batch_x, batch_x_len), batch_y = it.get_next()

    # One rate per layer
    no_dropout = [None] * 6
    logits, _ = create_model(batch_x=batch_x,
                             seq_length=batch_x_len,
                             dropout=no_dropout)

    # Transpose to batch major and apply softmax for decoder
    transposed = tf.nn.softmax(tf.transpose(logits, [1, 0, 2]))

    loss = tf.nn.ctc_loss(labels=batch_y,
                          inputs=logits,
                          sequence_length=batch_x_len)

    global_step = tf.train.get_or_create_global_step()

    with tf.Session(config=Config.session_config) as session:
        # Create a saver using variables from the above newly created graph
        saver = tf.train.Saver()

        # Restore variables from training checkpoint
        loaded = try_loading(session, saver, 'best_dev_checkpoint',
                             'best validation')
        if not loaded:
            loaded = try_loading(session, saver, 'checkpoint', 'most recent')
        if not loaded:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        logitses = []
        losses = []
        seq_lengths = []
        ground_truths = []

        print('Computing acoustic model predictions...')
        bar = progressbar.ProgressBar(widgets=[
            'Steps: ',
            progressbar.Counter(), ' | ',
            progressbar.Timer()
        ])

        step_count = 0

        # First pass, compute losses and transposed logits for decoding
        while True:
            try:
                logits, loss_, lengths, transcripts = session.run(
                    [transposed, loss, batch_x_len, batch_y])
            except tf.errors.OutOfRangeError:
                break

            step_count += 1
            bar.update(step_count)

            logitses.append(logits)
            losses.extend(loss_)
            seq_lengths.append(lengths)
            ground_truths.extend(
                sparse_tensor_value_to_texts(transcripts, Config.alphabet))

    bar.finish()

    predictions = []

    # Get number of accessible CPU cores for this process
    try:
        num_processes = cpu_count()
    except:
        num_processes = 1

    print('Decoding predictions...')
    bar = progressbar.ProgressBar(max_value=step_count,
                                  widget=progressbar.AdaptiveETA)

    # Second pass, decode logits and compute WER and edit distance metrics
    for logits, seq_length in bar(zip(logitses, seq_lengths)):
        decoded = ctc_beam_search_decoder_batch(logits,
                                                seq_length,
                                                Config.alphabet,
                                                FLAGS.beam_width,
                                                num_processes=num_processes,
                                                scorer=scorer)
        predictions.extend(d[0][1] for d in decoded)

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances,
                                         losses)
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)

    print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
    print('-' * 80)
    for sample in report_samples:
        print('WER: %f, CER: %f, loss: %f' %
              (sample.wer, sample.distance, sample.loss))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)

    return samples

Exemplo n.º 12

0

Exibir arquivo

Arquivo: evaluate.py Projeto: ujwalbvs/DeepSpeech

def evaluate(test_data, inference_graph):
    scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path,
                    FLAGS.lm_trie_path, Config.alphabet)

    def create_windows(features):
        num_strides = len(features) - (Config.n_context * 2)

        # Create a view into the array with overlapping strides of size
        # numcontext (past) + 1 (present) + numcontext (future)
        window_size = 2 * Config.n_context + 1
        features = np.lib.stride_tricks.as_strided(
            features, (num_strides, window_size, Config.n_input),
            (features.strides[0], features.strides[0], features.strides[1]),
            writeable=False)

        return features

    # Create overlapping windows over the features
    test_data['features'] = test_data['features'].apply(create_windows)

    with tf.Session(config=Config.session_config) as session:
        inputs, outputs, layers = inference_graph
        layer_4 = layers['rnn_output']
        layer_5 = layers['layer_5']
        layer_6 = layers['layer_6']
        # Transpose to batch major for decoder
        transposed = tf.transpose(outputs['outputs'], [1, 0, 2])

        labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None],
                                   name="labels")
        label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size],
                                          name="label_lengths")

        # We add 1 to all elements of the transcript to avoid any zero values
        # since we use that as an end-of-sequence token for converting the batch
        # into a SparseTensor. So here we convert the placeholder back into a
        # SparseTensor and subtract ones to get the real labels.
        sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph)
        neg_ones = tf.SparseTensor(sparse_labels.indices,
                                   -1 * tf.ones_like(sparse_labels.values),
                                   sparse_labels.dense_shape)
        sparse_labels = tf.sparse_add(sparse_labels, neg_ones)

        loss = tf.nn.ctc_loss(labels=sparse_labels,
                              inputs=layers['raw_logits'],
                              sequence_length=inputs['input_lengths'])

        # Create a saver using variables from the above newly created graph
        mapping = {
            v.op.name: v
            for v in tf.global_variables()
            if not v.op.name.startswith('previous_state_')
        }
        saver = tf.train.Saver(mapping)

        # Restore variables from training checkpoint
        checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if not checkpoint:
            log_error(
                'Checkpoint directory ({}) does not contain a valid checkpoint state.'
                .format(FLAGS.checkpoint_dir))
            exit(1)

        checkpoint_path = checkpoint.model_checkpoint_path
        saver.restore(session, checkpoint_path)

        logitses = []
        losses = []
        ## To Print the embeddings
        layer_4s = []
        layer_5s = []
        layer_6s = []

        print('Computing acoustic model predictions...')
        batch_count = len(test_data) // FLAGS.test_batch_size
        print('Batch Count: ', batch_count)
        bar = progressbar.ProgressBar(max_value=batch_count,
                                      widget=progressbar.AdaptiveETA)

        # First pass, compute losses and transposed logits for decoding
        for batch in bar(split_data(test_data, FLAGS.test_batch_size)):
            session.run(outputs['initialize_state'])
            #TODO: Need to remove it to generalize for greater batch size!
            assert FLAGS.test_batch_size == 1, 'Embedding Extraction will only work for Batch Size = 1 for now!'

            features = pad_to_dense(batch['features'].values)
            features_len = batch['features_len'].values
            labels = pad_to_dense(batch['transcript'].values + 1)
            label_lengths = batch['transcript_len'].values

            logits, loss_, lay4, lay5, lay6 = session.run(
                [transposed, loss, layer_4, layer_5, layer_6],
                feed_dict={
                    inputs['input']: features,
                    inputs['input_lengths']: features_len,
                    labels_ph: labels,
                    label_lengths_ph: label_lengths
                })

            logitses.append(logits)
            losses.extend(loss_)
            layer_4s.append(lay4)
            layer_5s.append(lay5)
            layer_6s.append(lay6)
            print('Saving to Files: ')
            #lay4.tofile('embeddings/lay4.txt')
            #lay5.tofile('embeddings/lay5.txt')
            #lay6.tofile('embeddings/lay6.txt')
            #            np.save('embeddings/lay41.npy', lay4)
            filename = batch.fname.iloc[0]
            save_np_array(lay4, Config.LAYER4 + filename + '.npy')
            save_np_array(lay5, Config.LAYER5 + filename + '.npy')
            save_np_array(lay6, Config.LAYER6 + filename + '.npy')
            #            print('\nLayer 4 Shape: ', load_np_array('embeddings/lay41.npy').shape)
            #            print('\nLayer 4 Shape: ', np.load('embeddings/lay41.npy').shape)
            print('Layer 5 Shape: ', lay5.shape)
            print('Layer 6 Shape: ', lay6.shape)
    print('LAYER4: ', Config.LAYER4)
    ground_truths = []
    predictions = []
    fnames = []

    print('Decoding predictions...')
    bar = progressbar.ProgressBar(max_value=batch_count,
                                  widget=progressbar.AdaptiveETA)

    # Get number of accessible CPU cores for this process
    try:
        num_processes = cpu_count()
    except:
        num_processes = 1

    # Second pass, decode logits and compute WER and edit distance metrics
    for logits, batch in bar(
            zip(logitses, split_data(test_data, FLAGS.test_batch_size))):
        seq_lengths = batch['features_len'].values.astype(np.int32)
        decoded = ctc_beam_search_decoder_batch(logits,
                                                seq_lengths,
                                                Config.alphabet,
                                                FLAGS.beam_width,
                                                num_processes=num_processes,
                                                scorer=scorer)
        #print('Batch\n', batch)
        ground_truths.extend(
            Config.alphabet.decode(l) for l in batch['transcript'])
        fnames.extend([l for l in batch['fname']])
        #fnames.append(batch['fname'])
        #print(fnames)
        predictions.extend(d[0][1] for d in decoded)

    distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]

    wer, cer, samples = calculate_report(ground_truths, predictions, distances,
                                         losses, fnames)
    print('Sample Lengths: ', len(samples))
    mean_loss = np.mean(losses)

    # Take only the first report_count items
    report_samples = itertools.islice(samples, FLAGS.report_count)
    print(report_samples)
    print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
    print('-' * 80)
    count = 0
    for sample in report_samples:
        count += 1
        with open(Config.TEXT + sample.fname + '.txt', 'w') as f:
            f.write(sample.res)
        print("File Name: ", sample.fname)
        print('WER: %f, CER: %f, loss: %f' %
              (sample.wer, sample.distance, sample.loss))
        print(' - src: "%s"' % sample.src)
        print(' - res: "%s"' % sample.res)
        print('-' * 80)
    print('Total Count: ', count)
    return samples

Exemplo n.º 13

0

Exibir arquivo

Arquivo: asr-set-filter.py Projeto: xmaster83/DeepSpeech

        def process_sample(row):
            row = list(row)
            thread_name = threading.current_thread().getName()

            thread_num = int(thread_name.replace("Thread-", ""))

            #print "processing in thread %s" % (thread_name)

            if not (thread_name in sessions_per_thread):
                
                gpu_id = thread_num % 2
                print "init session with GPU id = %i" % (gpu_id)

                with tf.device('/device:GPU:%i' % (gpu_id)):
                    session_tuple = infer.init_session()
                    sessions_per_thread[thread_name] = session_tuple
                
            #else:
                #print "using saved session for thread %s" % (thread_name)

            session_tuple = sessions_per_thread[thread_name]


            #print "process item %i in %s" % (index, str())

            global total_passed_num
            global approved_num

            total_passed_num+=1
            

            original = row[2].strip()

            
            decoded = infer.infer(row[0], session_tuple)
            


            decoded = decoded.strip()

            print "-------------------"
            print original
            print decoded



            original_words = original.split()
            decoded_words = decoded.split()

            start_take_num = max(CER_CALC_NUM, len(original_words[0]))
            end_take_num = max(CER_CALC_NUM, len(original_words[-1]))        

            original_start = list(original)[:start_take_num]
            decoded_start = list(decoded)[:start_take_num]
            start_cer = text_utils.levenshtein(list(original_start), list(decoded_start))/float(len(original_start))

            original_end = list(original)[-end_take_num:]
            decoded_end = list(decoded)[-end_take_num:]
            end_cer = text_utils.levenshtein(list(original_end), list(decoded_end))/float(len(original_end))

            print "start: %s vs %s" % ("".join(original_start), "".join(decoded_start))

            print "end: %s vs %s" % ("".join(original_end), "".join(decoded_end))
            
            print "start_cer: %.3f, end_cer: %.3f" % (start_cer, end_cer)

            if start_cer < 0.5 and end_cer < 0.5:
                approved_num+=1
                row.append(1)                
            else:
                print "SKIP"
                row.append(0)

            with csv_writer_lock:
                csv_writer.writerow(row)                

            print "%.1f%% approved (%.2f%% processed of %i)" % (float(approved_num)/float(total_passed_num)*100,
                 float(total_passed_num)/float(total_rows_to_process)*100, total_rows_to_process)

            

            p_bar.update(1)

Exemplo n.º 14

0

Exibir arquivo

        def run_test(init_op, dataset):
            logitses = []
            losses = []
            seq_lengths = []
            ground_truths = []

            bar = create_progressbar(
                prefix='Computing acoustic model predictions | ',
                widgets=[
                    'Steps: ',
                    progressbar.Counter(), ' | ',
                    progressbar.Timer()
                ]).start()
            log_progress('Computing acoustic model predictions...')

            step_count = 0

            # Initialize iterator to the appropriate dataset
            session.run(init_op)

            # First pass, compute losses and transposed logits for decoding
            while True:
                try:
                    logits, loss_, lengths, transcripts = session.run(
                        [transposed, loss, batch_x_len, batch_y])
                except tf.errors.OutOfRangeError:
                    break

                step_count += 1
                bar.update(step_count)

                logitses.append(logits)
                losses.extend(loss_)
                seq_lengths.append(lengths)
                ground_truths.extend(
                    sparse_tensor_value_to_texts(transcripts, Config.alphabet))

            bar.finish()

            predictions = []

            bar = create_progressbar(max_value=step_count,
                                     prefix='Decoding predictions | ').start()
            log_progress('Decoding predictions...')

            # Second pass, decode logits and compute WER and edit distance metrics
            for logits, seq_length in bar(zip(logitses, seq_lengths)):
                decoded = ctc_beam_search_decoder_batch(
                    logits,
                    seq_length,
                    Config.alphabet,
                    FLAGS.beam_width,
                    num_processes=num_processes,
                    scorer=scorer)
                predictions.extend(d[0][1] for d in decoded)

            distances = [
                levenshtein(a, b) for a, b in zip(ground_truths, predictions)
            ]

            wer, cer, samples = calculate_report(ground_truths, predictions,
                                                 distances, losses)
            mean_loss = np.mean(losses)

            # Take only the first report_count items
            report_samples = itertools.islice(samples, FLAGS.report_count)

            print('Test on %s - WER: %f, CER: %f, loss: %f' %
                  (dataset, wer, cer, mean_loss))
            print('-' * 80)
            for sample in report_samples:
                print('WER: %f, CER: %f, loss: %f' %
                      (sample.wer, sample.distance, sample.loss))
                print(' - src: "%s"' % sample.src)
                print(' - res: "%s"' % sample.res)
                print('-' * 80)

            return samples