def __main(): def word2example_count_group(word): examples = example_counts[word] if examples <= 5: group = 0 elif examples <= 10: group = 1 elif examples <= 25: group = 2 elif examples <= 50: group = 3 elif examples <= 100: group = 4 else: group = 5 return group net_output_file = '/home/aleks/projects/thesis/auto_rating/augparts-test_vad3_bycleaned1_novoicefix0.netrating' beta_file = 'output/siamese_53_20_12_2018_epoch_66.beta' human_ratings_file = os.path.join(processed_data_dir, 'all_snodgrass_cleaned_v5_test_ratings_full') word2id_file = os.path.join(processed_data_dir, 'all_snodgrass_cleaned_v5_train_word2id') word2id = load_pickled(word2id_file) example_counts = load_pickled('train_counts.pckl') thresholded_by_beta = threshold_net_output_by_beta(net_output_file, beta_file, word2id_file, max_dist_rise=0.001, min_frame_rise_len=None, check_dists_for_end=True) net_rated_words = compare_to_human_correctness(load_pickled(human_ratings_file), thresholded_by_beta[0][-1], leeway_start=0.2, leeway_end=0.2) counts = Counter([x.word for x in net_rated_words]) correct = np.zeros(len(word2id)) incorrect = np.zeros(len(word2id)) for x in net_rated_words: if x.correct: correct[word2id[x.word]] += 1 else: incorrect[word2id[x.word]] += 1 # for word in counts: # accuracy = correct[word2id[word]] / (correct[word2id[word]] + incorrect[word2id[word]]) * 100 # print('{0}: {1:.3f}% accuracy'.format(word, accuracy)) group_analysis(net_rated_words, word2character_count_group, 6) group_analysis(net_rated_words, word2example_count_group, 6)
def threshold_net_output_by_beta(net_output_file_or_data, beta_file_or_data, word2id_file, max_dist_rise=0.001, min_frame_rise_len=None, check_dists_for_end=True): if isinstance(net_output_file_or_data, list): net_annotated_recordings = net_output_file_or_data net_output_name = 'no_file' else: net_annotated_recordings: List[List[NetAnnotatedSegment]] = load_pickled(net_output_file_or_data) net_output_name = net_output_file_or_data if isinstance(beta_file_or_data, np.ndarray): beta = beta_file_or_data else: beta = np.load(beta_file_or_data) word2id = load_pickled(word2id_file) all_ratings = [] net_ratings = [None] * len(net_annotated_recordings) for rec_idx, rec_segments in enumerate(net_annotated_recordings): for k, segment_rating in enumerate(rec_segments): dists = segment_rating.dists start_sec = segment_rating.start_sec segment_idx = segment_rating.segment_idx thr_immediate = beta[word2id[segment_rating.word]] - 0.20 thr_exhaustive = beta[word2id[segment_rating.word]] + 0.20 best_frame, frames_before_rise, end_frame, time = \ get_best_frame_with_rise(dists, max_dist_rise) if not check_dists_for_end: # when using the backwards-trained net the end is not determined here time = segment_rating.end_sec - segment_rating.start_sec frames_before_rise = -1 if min_frame_rise_len is None or (frames_before_rise >= min_frame_rise_len): if dists[best_frame] <= thr_immediate: net_ratings[rec_idx] = (start_sec, time, dists[best_frame], segment_idx, frames_before_rise) break elif dists[best_frame] <= thr_exhaustive: if net_ratings[rec_idx] is None: net_ratings[rec_idx] = (start_sec, time, dists[best_frame], segment_idx, frames_before_rise) elif net_ratings[rec_idx][2] > dists[best_frame]: net_ratings[rec_idx] = (start_sec, time, dists[best_frame], segment_idx, frames_before_rise) if segment_idx == len(rec_segments) - 1 and net_ratings[rec_idx] is None: net_ratings[rec_idx] = (None, None, None, len(rec_segments), 0) all_ratings.append((-1, -1, net_output_name, net_ratings)) return all_ratings
def correlate_to_human_scores(human_ratings_file, ratings_net, leeway_start=0.3, leeway_end=0.3): ratings: List[SnodgrassWordRating] = load_pickled(human_ratings_file) dists = [] human_scores = [] for i, (rating, rating_net) in enumerate(zip(ratings, ratings_net)): net_start = rating_net[0] if rating_net is not None else None net_duration = rating_net[1] if rating_net is not None else None net_dist = rating_net[2] if rating_net is not None else None net_n_segments = rating_net[3] + 1 if rating_net is not None else 0 net_frames_before_rise = rating_net[ 4] + 1 if rating_net is not None and len(rating_net) > 4 else None if net_start is not None: if not response_missing(rating) and not response_with_synonym( rating): if abs(rating.p_delay - rating_net[0]) <= leeway_start and abs( rating.p_delay + rating.duration - (net_start + net_duration)) <= leeway_end: dists.append(net_dist) human_scores.append(rating.p_score) else: pass else: pass dists = np.array(dists) human_scores = np.array(human_scores) return pearsonr(1 / dists, human_scores)
def get_emu_word_counts(exclude_snodgrass=True, cache_path='counts_per_db.pckl'): def emu2word_counts_except(db, words): db_path = os.path.join(raw_data_dir, db) seq_rds_path = os.path.join(raw_data_dir, '{0}.rds'.format(db)) return emu2word_counts(db, db_path, seq_rds_path, partial(exclude_words, words_to_exclude=words), verbose=True) snodgrass_words = load_snodgrass_words() if exclude_snodgrass else None out_path = cache_path if not os.path.exists(out_path): emus = filter( lambda x: os.path.isdir(os.path.join(raw_data_dir, x)) and x. endswith('emuDB'), os.listdir(raw_data_dir)) skip_dbs = ['BROTHERS_emuDB'] counts_per_db = {} for emu in emus: if emu not in skip_dbs: counts = emu2word_counts_except(emu, snodgrass_words) counts_per_db[emu] = counts with open(out_path, 'wb') as f: pickle.dump(counts_per_db, f) return counts_per_db else: return load_pickled(out_path)
def __main(): selected_words_file = 'selected_words.pckl' if not os.path.exists(selected_words_file): selected_words = select_independent_words() with open(selected_words_file, 'wb') as f: pickle.dump(selected_words, f) else: selected_words = load_pickled(selected_words_file)
def get_dataset_word_counts(scp_path): out_path = os.path.splitext( os.path.basename(scp_path))[0] + '_word_counts.pckl' if not os.path.exists(out_path): dataset = KaldiDataset(scp_path) with open(out_path, 'wb') as f: pickle.dump(dataset.counts, f) return dataset.counts else: return load_pickled(out_path)
def load_common_rating_data(ratings_file_or_data, run_dir, run_epoch): train_epoch_embeddings, _, _ = get_or_generate_embeddings( run_dir, run_epoch) words_train, datasets_train, vecs_train, counts_train, word_idxs_train = load_embeddings( train_epoch_embeddings[run_epoch]) if isinstance(ratings_file_or_data, list): ratings = ratings_file_or_data else: ratings: List[SnodgrassWordRating] = load_pickled( os.path.join(processed_data_dir, ratings_file_or_data)) return ratings, vecs_train, word_idxs_train
def clean(net_annotations_file, name, vad_aggressiveness=3, fix_no_voice=True, load_cleaned=True): vad = webrtcvad.Vad(vad_aggressiveness) net_annotated_recordings: List[List[NetAnnotatedSegment]] = load_pickled( net_annotations_file) net_annotated_recordings_filtered: List[List[NetAnnotatedSegment]] = [] for rec_idx, rec_segments in enumerate(net_annotated_recordings): new_rec_annotations = [] for k, segment_rating in enumerate(rec_segments): audio, sample_rate, bytes_for_vad, vad_rate = read_wav_plus_resampled_segment( segment_rating.source_path, segment_rating.start_sec, segment_rating.end_sec, load_cleaned=load_cleaned) frames = frame_generator(10, bytes_for_vad, vad_rate) segment_gen = vad_collector(vad_rate, 10, 100, vad, list(frames)) segments = [] for _, start_sec, end_sec in segment_gen: segments.append((start_sec, end_sec)) dists = segment_rating.dists workaround_frame_means = segment_rating.frame_means if len(segments) > 0: voice_end = segments[0][1] for i in range(workaround_frame_means.shape[0]): workaround_frame_means[i] = 1 if frames2time( i + 1) <= voice_end else -1000 else: if fix_no_voice: dists[:] = 1.5 # invalidate dists if no voice detected new_rec_annotations.append( NetAnnotatedSegment( segment_rating.start_sec, segment_rating.end_sec, segment_rating.segment_idx, dists, workaround_frame_means, segment_rating.word, segment_rating.vp, segment_rating.date, segment_rating.source_path)) net_annotated_recordings_filtered.append(new_rec_annotations) output_name = '{name}_vad{0}_bycleaned{1}_novoicefix{2}.netrating'.format( vad_aggressiveness, 1 if load_cleaned else 0, 1 if fix_no_voice else 0, name=name) save_pickled(net_annotated_recordings_filtered, output_name)
def evaluate_net_ratings_list(human_ratings_file_or_list, net_ratings_list, leeway_start=0.3, leeway_end=0.3, verbose=False): if isinstance(human_ratings_file_or_list, list): ratings = human_ratings_file_or_list else: ratings: List[SnodgrassWordRating] = load_pickled(human_ratings_file_or_list) evaluations = [] # thr_immediate, threshold_exhaustive, TPR, FPR, tp, fp, tn, fn, precision, recall, accuracy for thr_immediate, thr_exhaustive, net_output_file, net_ratings in net_ratings_list: if verbose: print('{0} thresholded at {1} (immediate), {2} (exhaustive)'.format(net_output_file, thr_immediate, thr_exhaustive)) false_negative, false_positive_wasnt_there, false_positive_wrong_time, real_negative, real_positive, \ true_negative, true_positive, _, _, _ = compare_to_human(ratings, net_ratings, leeway_start=leeway_start, leeway_end=leeway_end) false_positive = false_positive_wrong_time + false_positive_wasnt_there total_good = true_positive + true_negative total_ratings = real_positive + real_negative net_positive = true_positive + false_positive if net_positive == 0: net_positive += 1 result = Evaluation(thr_immediate, thr_exhaustive, true_positive / (true_positive + false_negative), false_positive / (false_positive + true_negative), true_positive, false_positive, true_negative, false_negative, true_positive / (net_positive), true_positive / (true_positive + false_negative), total_good / total_ratings * 100) evaluations.append(result) if verbose: print('Positive: true {0}, false wrong time {1}, false was not there: {2}'.format(true_positive, false_positive_wrong_time, false_positive_wasnt_there)) print('Negative: true {0}, false {1}'.format(true_negative, false_negative)) print('Real positive: {0}, real negative: {1}'.format(real_positive, real_negative)) print( 'Total correct: {0}/{1} ({2:.3f}%)'.format(total_good, total_ratings, total_good / total_ratings * 100)) print('') return evaluations
def threshold_net_output(net_output_file, thresholds_immediate, thresholds_exhaustive, max_dist_rise=0.001, min_frame_rise_len=None, check_dists_for_end=True): # XXX: for sliding window segmentation the two-stage thresholding idea does not really apply, # as there is only one potential detection at the end, and so only the higher threshold applies net_annotated_recordings: List[List[NetAnnotatedSegment]] = load_pickled(net_output_file) all_ratings = [] for thr_immediate, thr_exhaustive in zip(thresholds_immediate, thresholds_exhaustive): net_ratings = [None] * len(net_annotated_recordings) for rec_idx, rec_segments in enumerate(net_annotated_recordings): for k, segment_rating in enumerate(rec_segments): dists = segment_rating.dists if isinstance(segment_rating.dists, np.ndarray) else np.array( segment_rating.dists) start_sec = segment_rating.start_sec segment_idx = segment_rating.segment_idx best_frame, frames_before_rise, end_frame, time = \ get_best_frame_with_rise(dists, max_dist_rise) if not check_dists_for_end: # when using the backwards-trained net the end is not determined here time = segment_rating.end_sec - segment_rating.start_sec frames_before_rise = -1 if min_frame_rise_len is None or (frames_before_rise >= min_frame_rise_len): if dists[best_frame] <= thr_immediate: net_ratings[rec_idx] = (start_sec, time, dists[best_frame], segment_idx, frames_before_rise) break elif dists[best_frame] <= thr_exhaustive: if net_ratings[rec_idx] is None: net_ratings[rec_idx] = (start_sec, time, dists[best_frame], segment_idx, frames_before_rise) elif net_ratings[rec_idx][2] > dists[best_frame]: net_ratings[rec_idx] = (start_sec, time, dists[best_frame], segment_idx, frames_before_rise) if segment_idx == len(rec_segments) - 1 and net_ratings[rec_idx] is None: net_ratings[rec_idx] = (None, None, None, len(rec_segments), 0) all_ratings.append((thr_immediate, thr_exhaustive, net_output_file, net_ratings)) return all_ratings
def split_independent_words(output_name, data_sub_dir, dataset_comparable_to): output_path = os.path.join(processed_data_dir, output_name) train_path, dev_path, _ = get_dataset_paths(dataset_comparable_to) counts_train = get_dataset_word_counts(train_path) counts_dev = get_dataset_word_counts(dev_path) selected_words = load_pickled('selected_words.pckl') all_scp = glob.glob( os.path.join(processed_data_dir, data_sub_dir, '*independent_test.scp')) swc_scp = [x for x in all_scp if os.path.basename(x).startswith('SWC')][0] all_scp.remove(swc_scp) emu_lines = [] # this will be the train data swc_lines = [] # this will be the test data for scp in all_scp: read_scp_lines(scp, emu_lines) read_scp_lines(swc_scp, swc_lines) emu_lines = np.array(emu_lines) swc_lines = np.array(swc_lines) emu_words = np.array([key2word(x) for x in emu_lines]) swc_words = np.array([key2word(x) for x in swc_lines]) emu_counts = Counter(emu_words) swc_counts = Counter(swc_words) # for word in emu_counts: # print('{0:<20}: train {1}, test {2}'.format(word, emu_counts[word], swc_counts.get(word, 0))) # for word in counts_train: # new_word = selected_words[word] # print('{0}, train: {1}, dev: {2}'.format(word, counts_train[word], counts_dev.get(word, 0))) # print('{word}: new train count: {0}, new test count: {1}'.format(emu_counts[new_word], swc_counts[new_word], # word=new_word)) new_train = [] new_dev = [] for word, new_word in selected_words.items(): train_new_lines = emu_lines[emu_words == new_word] np.random.shuffle(train_new_lines) new_train.extend(train_new_lines[:counts_train[word]]) dev_new_lines = swc_lines[swc_words == new_word] np.random.shuffle(dev_new_lines) # new_dev.extend(dev_new_lines[:counts_dev.get(word, 5)]) # didn't work at all, maybe bad labels? new_dev.extend(dev_new_lines[:35]) train_scp = '{0}_train.scp'.format(output_path) dev_scp = '{0}_dev.scp'.format(output_path) with open(train_scp, 'w') as train_file: for line in new_train: train_file.write(line) with open(dev_scp, 'w') as dev_file: for line in new_dev: dev_file.write(line) return train_scp, dev_scp, None
def patient_adaptation_test_on_dev(args): run_dir = args.run_dir run_epoch = args.run_epoch ratings_file = args.ratings_file patient = args.patient out_dir = 'patient_adaptation_test_output_{0}'.format(patient) util.ensure_exists(out_dir) if '_dev_' not in ratings_file: raise RuntimeError( 'Only ratings available in the dev dataset currently supported') train_epoch_embeddings, dev_epoch_embeddings, _ = \ get_or_generate_embeddings(run_dir, run_epoch, dev_needed=True, test_needed=False) words_train, datasets_train, vecs_train, counts_train, word_idxs_train = load_embeddings( train_epoch_embeddings[run_epoch]) words_dev, datasets_dev, vecs_dev, counts_dev, word_idxs_dev, keys_dev = load_embeddings( dev_epoch_embeddings[run_epoch], return_keys=True) all_ratings: List[SnodgrassWordRating] = load_pickled( os.path.join(processed_data_dir, ratings_file)) ratings_patient = [r for r in all_ratings if r.vp == patient] all_sessions = np.unique([r.date for r in ratings_patient]) print('{0} sessions for patient {1}'.format(len(all_sessions), patient)) adaptation_functions = { 'only_new_session': adaptation_only_session_examples_if_available, 'add_new_session': adaptation_add_session_examples, 'average_with_new_session': adaptation_average_with_session_examples } for session in all_sessions: fold_ratings = [r for r in ratings_patient if r.date != session] sessions_vecs, session_word_idxs = collect_session_embeddings_data( session, vecs_dev, keys_dev) ratings_name = '{0}_patient_{1}_except_{2}'.format( os.path.basename(ratings_file), patient, session) net_annotate_sliding_window_framewise( run_dir=run_dir, run_epoch=run_epoch, ratings_file_or_object=fold_ratings, skip_starting=0.3, save=True, ratings_name=ratings_name, output_dir=out_dir) for adaptation_type, method in adaptation_functions.items(): reference_vecs, reference_word_idxs = method( vecs_train, word_idxs_train, sessions_vecs, session_word_idxs) ratings_name = '{0}_patient_{1}_except_{2}_adaptation_{3}'.format( os.path.basename(ratings_file), patient, session, adaptation_type) net_annotate_sliding_window_framewise( run_dir=run_dir, run_epoch=run_epoch, ratings_file_or_object=fold_ratings, skip_starting=0.3, reference_vecs_override=reference_vecs, reference_word_idxs_override=reference_word_idxs, save=True, ratings_name=ratings_name, output_dir=out_dir)
def net_annotate_sliding_window_framewise(run_dir, run_epoch, ratings_file_or_object, skip_starting=0, reference_vecs_override=None, reference_word_idxs_override=None, save=True, ratings_name=None, output_dir=None, plot_mode=False): time_start = time.time() if save: if ratings_name is None: ratings_name = os.path.basename(ratings_file_or_object) if output_dir is None: output_dir = 'output' util.ensure_exists(output_dir) output_file = '{0}_epoch_{1}_{2}_full{3}_skip{4:.3f}.netrating_faster' \ .format(os.path.basename(run_dir), run_epoch, ratings_name, 'own_segmentation', skip_starting) output_file = os.path.join(output_dir, output_file) net, config, _, _, _, _, train_scp, feature_mean, feature_std, word2id, mean_sub, var_norm = \ load_net(run_dir, epoch=run_epoch, logger=None, train=False) ratings, vecs_train, word_idxs_train = load_common_rating_data( ratings_file_or_object, run_dir, run_epoch) word_lengths = load_pickled(scp2word_lengths_file(train_scp)) reference_vecs = reference_vecs_override if reference_vecs_override is not None else vecs_train reference_word_idxs = reference_word_idxs_override if reference_word_idxs_override is not None else word_idxs_train output: List[List[NetAnnotatedSegment]] = [[] for _ in range(len(ratings))] for rating, rating_idx, start_sec, end_sec, audio, sample_rate in \ plain_audio_generator(ratings, skip_starting=skip_starting): if audio.shape[0] == 0: output[rating_idx].append( NetAnnotatedSegment(0, 0, 0, np.array([1000]), np.array([1000]), rating.word, rating.vp, rating.date, rating.wav_path)) else: mean_length, max_length = word_lengths[rating.word] spacing_frames = 5 # TODO: half of mean duration may not be the best choice for every word duration_frames = time2frames(mean_length / 2) full_features = new_features(audio, sample_rate, feature_mean, feature_std, mean_sub, var_norm) starts = subsegment_starts(full_features.shape[0], duration_frames, spacing_frames) # much faster than segmenting first and then getting the features of each small segment features = [(full_features[s:s + duration_frames]) for s in starts] num_segments = len(features) subsegment_portions = [0.75, 0.5, 0.25] size_multiplier = len(subsegment_portions) + 1 features_plus_extra, lengths = stack_features( features, num_segments, duration_frames, subsegment_portions, size_multiplier) stacked_dists = evaluate_stacked_features( net, config, features_plus_extra, lengths, rating, reference_vecs, reference_word_idxs, num_segments, size_multiplier) bs_idx, best_start, best_end, best_duration = \ select_best_segmentation(stacked_dists, starts, end_sec, max_length, skip_starting) best_duration_frames = time2frames(best_duration) def plot_dists(savefig=False): import matplotlib.pyplot as plt from matplotlib import rc rc('text', usetex=True) rc('font', size=12) rc('legend', fontsize=12) font = {'family': 'serif', 'serif': ['cmr10']} rc('font', **font) if not response_missing(rating) and not response_with_synonym( rating): p_delay_adjusted = rating.p_delay plt.axvline(p_delay_adjusted, color='xkcd:bright lavender', dashes=[5, 5], zorder=2, label='Word start') plt.axvline(p_delay_adjusted + rating.duration, color='xkcd:light grass green', dashes=[1, 1], zorder=2, label='Word end') plt.plot([frames2time(x) + skip_starting for x in starts], stacked_dists[:, -1], zorder=1, color='xkcd:cobalt blue') plt.axvline(best_start, color='xkcd:lightish red', dashes=[1, 0], zorder=2, label='Word start guess') plt.xlabel('Time (s)') plt.ylabel('Avg distance to reference examples') plt.legend() if savefig: plt.savefig( 'plots_output/recording_dists_{0:04}.pdf'.format( rating_idx), dpi=300, bbox_inches='tight', pad_inches=0) else: plt.show() plt.clf() if plot_mode: if not response_missing(rating) and not response_with_synonym( rating): plot_dists(savefig=True) if rating_idx >= 10: break dists_best_guess, features_best_guess = \ evaluate_stepwise(net, config, full_features[starts[bs_idx]:starts[bs_idx] + best_duration_frames], rating, reference_vecs, reference_word_idxs) output[rating_idx].append( NetAnnotatedSegment(best_start, best_end, 0, dists_best_guess, features_best_guess[:, 0, :].mean(axis=1), rating.word, rating.vp, rating.date, rating.wav_path)) print('Finished rating number {0}'.format(rating_idx + 1)) if save and not plot_mode: save_pickled(output, output_file) if hasattr(net, 'beta'): beta = net.beta.detach().cpu().numpy() beta_out_file = os.path.join( output_dir, "{0}_epoch_{1}.beta".format(os.path.basename(run_dir), run_epoch)) with open(beta_out_file, 'wb') as f: np.save(f, beta) print('Elapsed sec: {0:.3f}'.format(time.time() - time_start)) return output, net.beta.detach().cpu().numpy() if hasattr(net, 'beta') else None