import pandas as pd import matplotlib matplotlib.rc('xtick', labelsize=15) matplotlib.rc('ytick', labelsize=15) matplotlib.use('Agg') #change backend for headless operation import matplotlib.pyplot as plt from framework import util import numpy as np from main.data import VAD, BE5 df = util.load_tsv('overview.tsv') ind = np.arange(1, 9) print(ind) fig, ax = plt.subplots() ax.bar(ind[:3], df.loc['Average', VAD], color='blue', edgecolor='black', linewidth=1, hatch='/', zorder=3, label='VAD') #zorder determines foreground-background ordering ax.bar(ind[3:], df.loc['Average', BE5], color='red', edgecolor='black', linewidth=1,
import pandas as pd from framework.util import load_tsv from framework import prepare_data # compute additional entries for each data set in the monolingual approach with open('new_entries_monolingual.tsv', 'w') as f: print('Lexcion\tN', file=f) #english set_new = set(load_tsv('lexicons/Warriner_BE.tsv').index) set_old = set(prepare_data.load_anew99().index) n_new = len(set_new.difference(set_old)) print('{}\t{}'.format('Warriner_BE', n_new), file=f) #spanish set_new = set(load_tsv('lexicons/Stadthagen_Dominance.tsv').index) set_old = set(prepare_data.load_redondo07().index) set_old = set(prepare_data.load_hinojosa16().index).union(set_old) n_new = len(set_new.difference(set_old)) print('{}\t{}'.format('Stadthagen_Dominance', n_new), file=f) #german vo set_new = set(load_tsv('lexicons/Vo_BE.tsv').index) set_old = set(prepare_data.load_briesemeister11().index) n_new = len(set_new.difference(set_old)) print('{}\t{}'.format('Vo_BE', n_new), file=f) #polish set_new = set(load_tsv('lexicons/Imbir_BE.tsv').index) set_old = set(prepare_data.load_wierzba15().index) n_new = len(set_new.difference(set_old)) print('{}\t{}'.format('Imbir_BE', n_new), file=f)
import framework.util as util from main.data import IN_PAPER_NAMES, VA, BE5, SHORT_COLUMNS import datetime import pandas as pd df=util.load_tsv('results.tsv') df=df[VA+BE5] df.rename(index=IN_PAPER_NAMES, inplace=True) df.rename(index=str, columns=SHORT_COLUMNS, inplace=True) #reoder index df=df.reindex([value for key,value in IN_PAPER_NAMES.items()]) df_shr=util.load_tsv('../../analysis/shr/shr_normalized.tsv').drop('Dom', axis=1) df=df.round(3) df_lesser=df<df_shr df_greater=df>df_shr lines=[] lines.append('%%%%%% Automatic Python output from {} &%%%%%%%%%%'.format(datetime.datetime.now())) lines.append('\\begin{tabular}{|l|rr|rrrrr|}')
base_model=base_model, source_lexicon=source_lexicon) for var in list(source_lexicon): models[var] = framework.models.SKlearn_Mapping_Model( base_model=base_model, source_lexicon=source_lexicon.drop(var, axis=1)) # Run actual evaluation ev = framework.models.Evaluator(models=models) ev.crossvalidate(words=target_lexicon.index, labels=target_lexicon, k_splits=k_fold, outpath='results/{}/{}/{}/'.format( curr_dir, base_model_name, setting.name)) ### compute difference to full model: df_full = util.load_tsv('results/{}/{}/{}/full.tsv'.format( curr_dir, base_model_name, setting.name)) print(df_full) for var in list(source_lexicon): df_var = util.load_tsv('results/{}/{}/{}/{}.tsv'.format( curr_dir, base_model_name, setting.name, var)) print(df_var) df_diff = df_var - df_full print(df_diff) util.save_tsv(df=df_diff, path='results/{}/{}/{}/diff_{}.tsv'.format( curr_dir, base_model_name, setting.name, var)) ### compute average values average_subdirs('results/be2vad/lm') average_subdirs('results/vad2be/lm')
import pandas as pd from main.data import SETTINGS, IN_PAPER_NAMES, VAD, BE5, SHORT_COLUMNS from framework.util import get_average_result_from_df, save_tsv, no_zeros_formatter, load_tsv import datetime import framework.util as util directions = ['be2vad', 'vad2be'] models = ['baseline', 'reference_LM', 'Reference_KNN', 'my_model'] VARS = VAD + BE5 df = pd.DataFrame(index=[setting.name for setting in SETTINGS], columns=VARS) for d in directions: for s in SETTINGS: results = load_tsv('results/{}/{}/my_model.tsv'.format(d, s.name)) for var in VARS: if var in list(results): df.loc[s.name, var] = results.loc['Average', var] df.rename(index=IN_PAPER_NAMES, inplace=True) df.rename(index=str, columns=SHORT_COLUMNS, inplace=True) save_tsv(df, 'overview_individual.tsv') # read normalized split half reliabilites to make larger values bold df_shr = load_tsv('../../analysis/shr/shr_normalized.tsv') df_greater = df > df_shr df_lesser = df < df_shr print(df_greater) print(df_lesser)
with open('overview.tex', 'w') as f: print(string, file=f) #################################################### ### Significance tests settings = [s.name for s in SETTINGS] star_df = pd.DataFrame(columns=directions) for d in directions: for s in settings: ### load all individual data frames dfs = {} for m in models: dfs[m] = load_tsv('results/{}/{}/{}.tsv'.format(d, s, m)) # write average results into single data frame to determine the two best systems average_results = pd.DataFrame(columns=['r']) for key, value in dfs.items(): average_results.loc[key, 'r'] = value.loc['Average', 'Average'] # sort by performance and get name of the best two systems average_results = average_results.sort_values(by='r', axis=0, ascending=False) best_2 = list(average_results.index)[:2] # compute paired t-test on individual results of cross-validation pvalue = st.ttest_rel(a=dfs[best_2[0]].drop(['SD', 'Average'], axis=0)['Average'], b=dfs[best_2[1]].drop(['SD', 'Average'], axis=0)['Average'])[1] # compute the number of stars