def plot_l1_with_code_switches(results_dir='../../simulations/messageless/messageless_balanced', languages=['en', 'es']): df = pd.read_csv(f'{results_dir}/performance_per_lang.csv', index_col=None, header=0, skipinitialspace=True, dtype={'epoch': int}) plt = Plotter(results_dir=results_dir) for l in languages: plt.performance(df[df.switch_from == l], fname=f'l1_performance_{l}', include_code_switches=True, max_epochs=40)
def cognate_simulations(results_dir, models, only_last_epoch, create_files, create_csv, plot_items, per_switch_direction): if create_files: create_all_model_csv_files(results_dir, models=models, create_csv=create_csv, per_switch_direction=per_switch_direction, only_last_epoch=only_last_epoch) if plot_items: plt = Plotter(results_dir=results_dir) # plt.plot_cognate_last_epoch() all_models = [] for m in models: all_models.extend([f'{m}1', f'{m}2']) # plt.performance_all_models(models=all_models) # plt.plot_cognate_effect_over_time(df_name=f'count_{fname}', ci=68) # for i in range(10, 31, 10): # plt.plot_cognate_effect_over_time(df_name=f'count_{i}cog_models_merged.csv', ci=68, ignore_baseline=True, # info_to_plot=('code_switched',)) # plt.print_switches_around_switch_point(df_name=f'count_{i}cog_models_merged.csv') for i in models: print(i) i = i.replace("/", "_") print(i, results_dir) info_to_plot = (('code_switched', 'switched_before', 'switched_at', 'switched_right_after', 'switched_second_after', 'switched_after_anywhere') if ('eos' in results_dir or 'bos' in results_dir) else ('code_switched',)) plt.plot_cognate_last_epoch(df_name=f'{"per_lang" if per_switch_direction else ""}' f'count_{i}_models_merged.csv', ci=68, info_to_plot=info_to_plot, lineplot=False)
def pairwise_cross_model_comparison(create_files, fname='cog_models_merged', only_last_epoch=True, results_dir='../../simulations/cog_paper/cognate_percentage/pairwise_training/' 'evaluation', models=('10/cog', '15/cog', '20/cog', '25/cog', '30/cog')): if create_files: create_all_model_csv_files(results_dir, models=models, create_csv=True, only_last_epoch=only_last_epoch) all_df = [] for m in models: m = m.replace('/', '_') print(m, f'{results_dir}/{m}_models_merged.csv') df = pd.read_csv(f'{results_dir}/{m}_models_merged.csv') df['model_name'] = m.split('_')[0] all_df.append(df) sentences_to_test = pd.concat(all_df, sort=False) only_common_sentences = True if only_common_sentences: count_correct = sentences_to_test.groupby('sentence_idx').count() num_per_unique_sentence = len(models) * 2 # models * (cognate + non_cognate sentences) sentences_to_test = sentences_to_test.loc[sentences_to_test['sentence_idx'].isin( count_correct[count_correct['model_name'] == num_per_unique_sentence].index)] sentences_to_test.to_csv(f'{results_dir}/{fname}.csv') group_models(results_dir, f'{fname}.csv') plt = Plotter(results_dir=results_dir) plt.plot_cognate_last_epoch(df_name=f'count_{fname}.csv', ci=68, xaxis_step=5, info_to_plot=('code_switched',), lineplot=True)
def non_pairwise_cross_model_comparison(results_dir='../../simulations/cog_paper/cognate_percentage/' 'non_pairwise_training/', create_csv=True, num_simulations=70, remove_incorrect_meaning=True): if create_csv: df_per_test_set = [] for fname in ['non_cognate_evaluation', 'cognate_evaluation']: models = [i for i in range(0 if 'non' in fname else 10, num_simulations + 1, 10)] num_per_unique_sentence = len(models) all_dfs = [] for m in models: print(m) df = pd.read_csv(f'{results_dir}/{fname}/{m}/all_results.csv') df = df[df.epoch == df.epoch.max()] if remove_incorrect_meaning: df.drop(df.loc[df.meaning == 0].index, inplace=True) if fname == 'cognate_evaluation': assert len(df) == len(df[df.target_has_cognate == True]) df['model_name'] = m df['sentence_idx'] = df.apply(lambda x: hash_string(f'{x.message}{x.network_num}'), axis=1) df['test_set'] = fname all_dfs.append(df) non_pairwise_merged = pd.concat(all_dfs, sort=False) only_common_sentences = True if only_common_sentences: count_correct = non_pairwise_merged.groupby('sentence_idx').count() print(num_per_unique_sentence, 'total:', len(non_pairwise_merged)) non_pairwise_merged = non_pairwise_merged.loc[non_pairwise_merged['sentence_idx'].isin( count_correct[count_correct['model_name'] == num_per_unique_sentence].index)] print('clean:', len(non_pairwise_merged)) df_per_test_set.append(non_pairwise_merged) test_sets_merged = pd.concat(df_per_test_set, sort=False) test_sets_merged.to_csv(f'{results_dir}/non_pairwise_cognate_non_cognate.csv') gb = test_sets_merged.groupby(['epoch', 'network_num', 'model_name', 'test_set']).apply( lambda dft: pd.Series({'code_switched': dft.is_code_switched.sum(), 'total_sentences': len(dft.meaning), 'code_switched_percentage': dft.is_code_switched.sum() * 100 / len(dft.meaning) })) gb.to_csv(f'{results_dir}/non_pairwise_test_sets_grouped' f'{f"_include_incorrect" if not remove_incorrect_meaning else ""}.csv') plt = Plotter(results_dir=results_dir) plt.plot_cognate_last_epoch(df_name='non_pairwise_test_sets_grouped.csv', hue='test_set', include_annotations=False, lineplot=True, ci=68, xrow='model_name')
def code_switching_patterns_model_comparison(): results_dir = '../../simulations/patterns' plot_code_switches = True create_csv_files = False if create_csv_files: num_sim = 40 for m in ['early', 'esen', 'enes']: create_dataframes_for_plots(results_dir=f'{results_dir}/{m}', epoch_from=0, epoch_to=40, simulation_range=range(1, num_sim + 1)) plt = Plotter(results_dir=results_dir) if plot_code_switches: plt.l1_performance_all_models() plt.l2_performance_all_models()
def plot_l1_l2_performance_cognate_models(results_dir='../../simulations/cog_paper/'): plt = Plotter(results_dir=results_dir) for testset in ['generic']: # ['bos', 'eos', 'generic']: for model in ['balanced', 'enes', 'esen']: for m in ['cog1', 'cog2']: print(f'{results_dir}/{testset}/{model}/{m}/performance_per_lang.csv') df = pd.read_csv(f'{results_dir}/{testset}/{model}/{m}/performance_per_lang.csv', index_col=None, header=0, skipinitialspace=True, dtype={'epoch': int}) l2_lang = model[-2:] if model != 'balanced' else None lang = [model[-4:-2]] if model != 'balanced' else ['en', 'es'] if model != 'balanced': df.l2_epoch = 10 print(lang, l2_lang) for l in lang: plt.performance(df[df.switch_from == l], fname=f'l1_{testset}_{model}_{m}_{l}', include_code_switches=True) if l2_lang: plt.l2_performance(df, l2_lang=l2_lang, fname=f'l2_{testset}_{model}_{m}_{l2_lang}', include_code_switches=True)
elif str(platform.system() == 'Darwin'): messagebox.showinfo("Alert", "Platform is macOS - Auto load dataset") try: omega = LoadOmega.Load_Omega( "../../data/Dataset 2/run2/run2.omega.pasco.csv") print("Loaded omega run 2 from dataset 2") except: print("Unable to load dataset. Please contact maintainer HA.") else: messagebox.showinfo("ALERT", "Select an omega dataset from data/dataset/") try: omega = LoadOmega.Load_Omega() except: print("No files were chosen. Exiting with code 69-420-247...") accel = Simulate.AccelData_Rotate( Simulate.AccelData_CreateFromRotary(omega, random.randint(0, 10)), random.randint(0, 2)) plt1 = Plotter.MultiPlotter([accel], [omega]) a = [] for i in range(100): a.append( [random.randint(0, 20), random.randint(0, 20), random.randint(0, 20)]) plt1.display()
]) eval_sets = set() if args.eval_test: eval_sets.add('test') if args.eval_training: eval_sets.add('training') results_mean_and_std = compute_mean_and_std( valid_results, evaluated_sets=eval_sets, epochs=args.epochs) with lz4.open(f"{results_dir}/summary_results.pickled", 'wb') as pckl: pickle.dump(results_mean_and_std, pckl, protocol=-1) plot = Plotter(results_dir=results_dir, summary_sim=num_valid_simulations, title=args.title, epochs=args.epochs, num_training=num_training, num_test=num_test) plot.plot_results( results_mean_and_std, cognate_experiment=args.cognate_experiment, test_sentences_with_pronoun=test_sentences_with_pronoun, auxiliary_experiment=args.auxiliary_experiment, evaluated_datasets=eval_sets) if not isinstance( results_mean_and_std['correct_code_switches']['test'], int): with open(f"{results_dir}/results.log", 'w') as f: f.write( f"Code-switched percentage (test set): " f"{Plotter.percentage(results_mean_and_std['correct_code_switches']['test'], num_test)}"
def plot_regression_analysis_results(results_dir='../../mixed_effects_regression_analysis'): plt = Plotter(results_dir=results_dir) for fname, ylim in [('cog_enes_esen_sim_per_L1', 3)]:#, ('cog_enes_esen_sim_per_L2', 30), #('cog_balanced_enes_esen_sim', 26)]: plt.plot_merged_cognate_csv(df_name=fname, ylim=ylim) # cog_enes_all_esen_all_sim_per_L1')#_per_L2