def levenshtein100(dictA, dictB): dictA = OrderedDict(sorted(dictA.items(), key=lambda t: t[1],reverse=True)[:10]) dictB = OrderedDict(sorted(dictB.items(), key=lambda t: t[1],reverse=True)[:10]) return sum([v1*v2*textutil.levenshtein(k1,k2) for k1,v1 in dictA.iteritems() for k2,v2 in dictB.iteritems()])
def calculate_report(results_tuple): r''' This routine will calculate a WER report. It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER). ''' samples = [] items = list(zip(*results_tuple)) total_levenshtein = 0.0 total_label_length = 0.0 for label, decoding, distance, loss in items: sample_wer = wer(label, decoding) sample = Sample(label, decoding, loss, distance, sample_wer) samples.append(sample) total_levenshtein += levenshtein(label.split(), decoding.split()) total_label_length += float(len(label.split())) # Getting the WER from the accumulated levenshteins and lengths samples_wer = total_levenshtein / total_label_length # Filter out all items with WER=0 samples = [s for s in samples if s.wer > 0] # Order the remaining items by their loss (lowest loss on top) samples.sort(key=lambda s: s.loss) # Take only the first report_count items samples = samples[:FLAGS.report_count] # Order this top FLAGS.report_count items by their WER (lowest WER on top) samples.sort(key=lambda s: s.wer) return samples_wer, samples
def process_decode_result(item): ground_truth, prediction, loss = item char_distance = levenshtein(ground_truth, prediction) char_length = len(ground_truth) word_distance = levenshtein(ground_truth.split(), prediction.split()) word_length = len(ground_truth.split()) return AttrDict({ 'src': ground_truth, 'res': prediction, 'loss': loss, 'char_distance': char_distance, 'char_length': char_length, 'word_distance': word_distance, 'word_length': word_length, 'cer': char_distance / char_length, 'wer': word_distance / word_length, })
def process_decode_result(item): label, decoding, distance, loss = item word_distance = levenshtein(label.split(), decoding.split()) word_length = float(len(label.split())) return AttrDict({ 'src': label, 'res': decoding, 'loss': loss, 'distance': distance, 'wer': word_distance / word_length, })
def process_decode_result(item): label, decoding, distance, loss = item word_distance = levenshtein(label.split(), decoding.split()) word_length = float(len(label.split())) return AttrDict({ 'src': label, 'res': decoding, 'loss': loss, 'distance': distance, 'wer': word_distance / word_length, })
def process_decode_result(item): label, decoding, distance, loss = item sample_wer = wer(label, decoding) return AttrDict({ 'src': label, 'res': decoding, 'loss': loss, 'distance': distance, 'wer': sample_wer, 'levenshtein': levenshtein(label.split(), decoding.split()), 'label_length': float(len(label.split())), })
def main(): parser = argparse.ArgumentParser(description='Computing TFLite accuracy') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') parser.add_argument('--alphabet', required=True, help='Path to the configuration file specifying the alphabet used by the network') parser.add_argument('--lm', required=True, help='Path to the language model binary file') parser.add_argument('--trie', required=True, help='Path to the language model trie file created with native_client/generate_trie') parser.add_argument('--csv', required=True, help='Path to the CSV source file') parser.add_argument('--proc', required=False, default=cpu_count(), type=int, help='Number of processes to spawn, defaulting to number of CPUs') args = parser.parse_args() work_todo = JoinableQueue() # this is where we are going to store input data work_done = Queue() # this where we are gonna push them out processes = [] for i in range(args.proc): worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() # Launch reader() as a separate python process processes.append(worker_process) print([x.name for x in processes]) ground_truths = [] predictions = [] losses = [] with open(args.csv, 'r') as csvfile: csvreader = csv.DictReader(csvfile) for row in csvreader: work_todo.put({'filename': row['wav_filename'], 'transcript': row['transcript']}) work_todo.join() while (not work_done.empty()): msg = work_done.get() losses.append(0.0) ground_truths.append(msg['ground_truth']) predictions.append(msg['prediction']) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
def run_test(init_op, dataset): logitses = [] losses = [] seq_lengths = [] ground_truths = [] bar = create_progressbar(prefix='Computing acoustic model predictions | ', widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start() log_progress('Computing acoustic model predictions...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: logits, loss_, lengths, transcripts = session.run([transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break step_count += 1 bar.update(step_count) logitses.append(logits) losses.extend(loss_) seq_lengths.append(lengths) ground_truths.extend(sparse_tensor_value_to_texts(transcripts, Config.alphabet)) bar.finish() predictions = [] bar = create_progressbar(max_value=step_count, prefix='Decoding predictions | ').start() log_progress('Decoding predictions...') # Second pass, decode logits and compute WER and edit distance metrics for logits, seq_length in bar(zip(logitses, seq_lengths)): decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def main(_): initialize_globals() if not FLAGS.test_files: log_error('You need to specify what files to use for evaluation via ' 'the --test_files flag.') exit(1) global alphabet alphabet = Alphabet(FLAGS.alphabet_config_path) scorer = Scorer(FLAGS.lm_weight, FLAGS.valid_word_count_weight, FLAGS.lm_binary_path, FLAGS.lm_trie_path, alphabet) # sort examples by length, improves packing of batches and timesteps test_data = preprocess( FLAGS.test_files.split(','), FLAGS.test_batch_size, alphabet=alphabet, numcep=N_FEATURES, numcontext=N_CONTEXT, hdf5_cache_path=FLAGS.hdf5_test_set).sort_values( by="features_len", ascending=False) def create_windows(features): num_strides = len(features) - (N_CONTEXT * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2*N_CONTEXT+1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, N_FEATURES), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) return features # Create overlapping windows over the features test_data['features'] = test_data['features'].apply(create_windows) with tf.Session() as session: inputs, outputs, layers = create_inference_graph(batch_size=FLAGS.test_batch_size, n_steps=-1) # Transpose to batch major for decoder transposed = tf.transpose(outputs['outputs'], [1, 0, 2]) labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels") label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths") sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32) loss = tf.nn.ctc_loss(labels=sparse_labels, inputs=layers['raw_logits'], sequence_length=inputs['input_lengths']) # Create a saver using variables from the above newly created graph mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')} saver = tf.train.Saver(mapping) # Restore variables from training checkpoint checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) logitses = [] losses = [] print('Computing acoustic model predictions...') batch_count = len(test_data) // FLAGS.test_batch_size bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # First pass, compute losses and transposed logits for decoding for batch in bar(split_data(test_data, FLAGS.test_batch_size)): session.run(outputs['initialize_state']) features = pad_to_dense(batch['features'].values) features_len = batch['features_len'].values labels = pad_to_dense(batch['transcript'].values) label_lengths = batch['transcript_len'].values logits, loss = session.run([transposed, loss], feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, labels_ph: labels, label_lengths_ph: label_lengths }) logitses.append(logits) losses.extend(loss) ground_truths = [] predictions = [] distances = [] print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # Get number of accessible CPU cores for this process num_processes = len(os.sched_getaffinity(0)) # Second pass, decode logits and compute WER and edit distance metrics for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))): seq_lengths = batch['features_len'].values.astype(np.int32) decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) ground_truths.extend(alphabet.decode(l) for l in batch['transcript']) predictions.extend(d[0][1] for d in decoded) distances.extend(levenshtein(a, b) for a, b in zip(labels, predictions)) wer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_edit_distance = np.mean(distances) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test - WER: %f, loss: %f, mean edit distance: %f' % (wer, mean_loss, mean_edit_distance)) print('-' * 80) for sample in report_samples: print('WER: %f, loss: %f, edit distance: %f' % (sample.wer, sample.loss, sample.distance)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) if FLAGS.test_output_file: json.dump(samples, open(FLAGS.test_output_file, 'w'), default=lambda x: float(x))
def evaluate(test_data, inference_graph, alphabet): scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) def create_windows(features): num_strides = len(features) - (Config.n_context * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2*Config.n_context+1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, Config.n_input), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) return features # Create overlapping windows over the features test_data['features'] = test_data['features'].apply(create_windows) with tf.Session(config=Config.session_config) as session: inputs, outputs, layers = inference_graph # Transpose to batch major for decoder transposed = tf.transpose(outputs['outputs'], [1, 0, 2]) labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels") label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths") sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32) loss = tf.nn.ctc_loss(labels=sparse_labels, inputs=layers['raw_logits'], sequence_length=inputs['input_lengths']) # Create a saver using variables from the above newly created graph mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')} saver = tf.train.Saver(mapping) # Restore variables from training checkpoint if FLAGS.checkpoint_dir is not None: checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) logitses = [] losses = [] print('Computing acoustic model predictions...') batch_count = len(test_data) // FLAGS.test_batch_size bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # First pass, compute losses and transposed logits for decoding for batch in bar(split_data(test_data, FLAGS.test_batch_size)): session.run(outputs['initialize_state']) features = pad_to_dense(batch['features'].values) features_len = batch['features_len'].values labels = pad_to_dense(batch['transcript'].values) label_lengths = batch['transcript_len'].values logits, loss_ = session.run([transposed, loss], feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, labels_ph: labels, label_lengths_ph: label_lengths }) logitses.append(logits) losses.extend(loss_) ground_truths = [] predictions = [] print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # Get number of accessible CPU cores for this process try: num_processes = cpu_count() except: num_processes = 1 # Second pass, decode logits and compute WER and edit distance metrics for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))): seq_lengths = batch['features_len'].values.astype(np.int32) decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) ground_truths.extend(alphabet.decode(l) for l in batch['transcript']) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_edit_distance = np.mean(distances) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test - WER: %f, CER: %f, loss: %f' % (wer, mean_edit_distance, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def evaluate(test_csvs, create_model, try_loading): scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) test_set = create_dataset(test_csvs, batch_size=FLAGS.test_batch_size, cache_path=FLAGS.test_cached_features_path) it = test_set.make_one_shot_iterator() (batch_x, batch_x_len), batch_y = it.get_next() # One rate per layer no_dropout = [None] * 6 logits, _ = create_model(batch_x=batch_x, seq_length=batch_x_len, dropout=no_dropout) # Transpose to batch major and apply softmax for decoder transposed = tf.nn.softmax(tf.transpose(logits, [1, 0, 2])) loss = tf.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_x_len) global_step = tf.train.get_or_create_global_step() with tf.Session(config=Config.session_config) as session: # Create a saver using variables from the above newly created graph saver = tf.train.Saver() # Restore variables from training checkpoint loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation') if not loaded: loaded = try_loading(session, saver, 'checkpoint', 'most recent') if not loaded: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) logitses = [] losses = [] seq_lengths = [] ground_truths = [] print('Computing acoustic model predictions...') bar = progressbar.ProgressBar(widgets=[ 'Steps: ', progressbar.Counter(), ' | ', progressbar.Timer() ]) step_count = 0 # First pass, compute losses and transposed logits for decoding while True: try: logits, loss_, lengths, transcripts = session.run( [transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break step_count += 1 bar.update(step_count) logitses.append(logits) losses.extend(loss_) seq_lengths.append(lengths) ground_truths.extend( sparse_tensor_value_to_texts(transcripts, Config.alphabet)) bar.finish() predictions = [] # Get number of accessible CPU cores for this process try: num_processes = cpu_count() except: num_processes = 1 print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=step_count, widget=progressbar.AdaptiveETA) # Second pass, decode logits and compute WER and edit distance metrics for logits, seq_length in bar(zip(logitses, seq_lengths)): decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def evaluate(test_data, inference_graph): scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) def create_windows(features): num_strides = len(features) - (Config.n_context * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2 * Config.n_context + 1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, Config.n_input), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) return features # Create overlapping windows over the features test_data['features'] = test_data['features'].apply(create_windows) with tf.Session(config=Config.session_config) as session: inputs, outputs, layers = inference_graph layer_4 = layers['rnn_output'] layer_5 = layers['layer_5'] layer_6 = layers['layer_6'] # Transpose to batch major for decoder transposed = tf.transpose(outputs['outputs'], [1, 0, 2]) labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels") label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths") # We add 1 to all elements of the transcript to avoid any zero values # since we use that as an end-of-sequence token for converting the batch # into a SparseTensor. So here we convert the placeholder back into a # SparseTensor and subtract ones to get the real labels. sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph) neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape) sparse_labels = tf.sparse_add(sparse_labels, neg_ones) loss = tf.nn.ctc_loss(labels=sparse_labels, inputs=layers['raw_logits'], sequence_length=inputs['input_lengths']) # Create a saver using variables from the above newly created graph mapping = { v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_') } saver = tf.train.Saver(mapping) # Restore variables from training checkpoint checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) logitses = [] losses = [] ## To Print the embeddings layer_4s = [] layer_5s = [] layer_6s = [] print('Computing acoustic model predictions...') batch_count = len(test_data) // FLAGS.test_batch_size print('Batch Count: ', batch_count) bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # First pass, compute losses and transposed logits for decoding for batch in bar(split_data(test_data, FLAGS.test_batch_size)): session.run(outputs['initialize_state']) #TODO: Need to remove it to generalize for greater batch size! assert FLAGS.test_batch_size == 1, 'Embedding Extraction will only work for Batch Size = 1 for now!' features = pad_to_dense(batch['features'].values) features_len = batch['features_len'].values labels = pad_to_dense(batch['transcript'].values + 1) label_lengths = batch['transcript_len'].values logits, loss_, lay4, lay5, lay6 = session.run( [transposed, loss, layer_4, layer_5, layer_6], feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, labels_ph: labels, label_lengths_ph: label_lengths }) logitses.append(logits) losses.extend(loss_) layer_4s.append(lay4) layer_5s.append(lay5) layer_6s.append(lay6) print('Saving to Files: ') #lay4.tofile('embeddings/lay4.txt') #lay5.tofile('embeddings/lay5.txt') #lay6.tofile('embeddings/lay6.txt') # np.save('embeddings/lay41.npy', lay4) filename = batch.fname.iloc[0] save_np_array(lay4, Config.LAYER4 + filename + '.npy') save_np_array(lay5, Config.LAYER5 + filename + '.npy') save_np_array(lay6, Config.LAYER6 + filename + '.npy') # print('\nLayer 4 Shape: ', load_np_array('embeddings/lay41.npy').shape) # print('\nLayer 4 Shape: ', np.load('embeddings/lay41.npy').shape) print('Layer 5 Shape: ', lay5.shape) print('Layer 6 Shape: ', lay6.shape) print('LAYER4: ', Config.LAYER4) ground_truths = [] predictions = [] fnames = [] print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # Get number of accessible CPU cores for this process try: num_processes = cpu_count() except: num_processes = 1 # Second pass, decode logits and compute WER and edit distance metrics for logits, batch in bar( zip(logitses, split_data(test_data, FLAGS.test_batch_size))): seq_lengths = batch['features_len'].values.astype(np.int32) decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) #print('Batch\n', batch) ground_truths.extend( Config.alphabet.decode(l) for l in batch['transcript']) fnames.extend([l for l in batch['fname']]) #fnames.append(batch['fname']) #print(fnames) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses, fnames) print('Sample Lengths: ', len(samples)) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print(report_samples) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) print('-' * 80) count = 0 for sample in report_samples: count += 1 with open(Config.TEXT + sample.fname + '.txt', 'w') as f: f.write(sample.res) print("File Name: ", sample.fname) print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) print('Total Count: ', count) return samples
def process_sample(row): row = list(row) thread_name = threading.current_thread().getName() thread_num = int(thread_name.replace("Thread-", "")) #print "processing in thread %s" % (thread_name) if not (thread_name in sessions_per_thread): gpu_id = thread_num % 2 print "init session with GPU id = %i" % (gpu_id) with tf.device('/device:GPU:%i' % (gpu_id)): session_tuple = infer.init_session() sessions_per_thread[thread_name] = session_tuple #else: #print "using saved session for thread %s" % (thread_name) session_tuple = sessions_per_thread[thread_name] #print "process item %i in %s" % (index, str()) global total_passed_num global approved_num total_passed_num+=1 original = row[2].strip() decoded = infer.infer(row[0], session_tuple) decoded = decoded.strip() print "-------------------" print original print decoded original_words = original.split() decoded_words = decoded.split() start_take_num = max(CER_CALC_NUM, len(original_words[0])) end_take_num = max(CER_CALC_NUM, len(original_words[-1])) original_start = list(original)[:start_take_num] decoded_start = list(decoded)[:start_take_num] start_cer = text_utils.levenshtein(list(original_start), list(decoded_start))/float(len(original_start)) original_end = list(original)[-end_take_num:] decoded_end = list(decoded)[-end_take_num:] end_cer = text_utils.levenshtein(list(original_end), list(decoded_end))/float(len(original_end)) print "start: %s vs %s" % ("".join(original_start), "".join(decoded_start)) print "end: %s vs %s" % ("".join(original_end), "".join(decoded_end)) print "start_cer: %.3f, end_cer: %.3f" % (start_cer, end_cer) if start_cer < 0.5 and end_cer < 0.5: approved_num+=1 row.append(1) else: print "SKIP" row.append(0) with csv_writer_lock: csv_writer.writerow(row) print "%.1f%% approved (%.2f%% processed of %i)" % (float(approved_num)/float(total_passed_num)*100, float(total_passed_num)/float(total_rows_to_process)*100, total_rows_to_process) p_bar.update(1)
def run_test(init_op, dataset): logitses = [] losses = [] seq_lengths = [] ground_truths = [] bar = create_progressbar( prefix='Computing acoustic model predictions | ', widgets=[ 'Steps: ', progressbar.Counter(), ' | ', progressbar.Timer() ]).start() log_progress('Computing acoustic model predictions...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: logits, loss_, lengths, transcripts = session.run( [transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break step_count += 1 bar.update(step_count) logitses.append(logits) losses.extend(loss_) seq_lengths.append(lengths) ground_truths.extend( sparse_tensor_value_to_texts(transcripts, Config.alphabet)) bar.finish() predictions = [] bar = create_progressbar(max_value=step_count, prefix='Decoding predictions | ').start() log_progress('Decoding predictions...') # Second pass, decode logits and compute WER and edit distance metrics for logits, seq_length in bar(zip(logitses, seq_lengths)): decoded = ctc_beam_search_decoder_batch( logits, seq_length, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) predictions.extend(d[0][1] for d in decoded) distances = [ levenshtein(a, b) for a, b in zip(ground_truths, predictions) ] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples