- ((x['lambda_exp']*x['min_crowd_sat'] + (1 - x['lambda_exp']) * x['max_crowd_sat'])/x['max_crowd_sat'])) , axis = 1) res_kalai['gain_ratio'] = pd.merge(ref_satisfaction, res_kalai, on = 'alternative_id').apply( lambda x: np.abs(( ( x['lambda_exp']*x['max_expert_sat'] + (1 - x['lambda_exp']) * x['min_expert_sat'])/x['max_expert_sat']) - ((x['lambda_exp']*x['min_crowd_sat'] + (1 - x['lambda_exp']) * x['max_crowd_sat'])/x['max_crowd_sat'])) , axis = 1) res_nash.to_csv('results/results_nash_' + expert_type +'.csv') res_kalai.to_csv('results/results_kalai_' + expert_type +'.csv') res_baseline.to_csv('results/results_baseline_'+ expert_type +'.csv') ## ---------------------------------------------------------------------------- ######### SUMMARIZE RESULTS res_relative_sat = relative_overall_satisfaction(res_nash, res_kalai, res_baseline, ref_satisfaction) res_relative_sat = res_relative_sat.rename(columns = ({'crowd_sat': 'rel-crowd_sat', 'expert_sat':'rel-expert_sat', 'satisfaction_area' : 'rel-satisfaction_area', 'satisfaction_sum': 'rel-satisfaction_sum'})) all_sum_res = pd.merge(res_relative_sat, res_overal_sat, on = 'method') all_sum_res.to_csv('results/results_overall_relative_'+ expert_type +'.csv') # a = pd.merge(res_baseline[['alternative_id','crowd_sat-crowd_median']], # res_kalai[['alternative_id', 'crowd_sat']], on = 'alternative_id') #################### Result analysis - lower uncertanty #df_crowd_sample = df_crowd.groupby('vote', group_keys = False).apply(lambda x: x.sample(min(len(x),3)))
res_overal_sat.to_csv('results/results_overall_avg_satisfaction_'+ expert_type +'.csv') diff_grade = 1.0 alts_diff = list(res_baseline[res_baseline['median_diff']>= diff_med]['alternative_id']) print(len(alts_diff)) kalai = res_kalai[res_kalai['alternative_id'].isin(alts_diff)] nash = res_nash[res_nash['alternative_id'].isin(alts_diff)] baseline = res_baseline[res_baseline['alternative_id'].isin(alts_diff)] weighted = res_weighted[res_weighted['alternative_id'].isin(alts_diff)] res_overal_sat = avg_satisfaction_by_group(kalai, nash, baseline, weighted).reset_index() res_relative_sat = relative_overall_satisfaction(nash, kalai, baseline, weighted, ref_satisfaction) res_relative_sat = res_relative_sat.rename(columns = ({'crowd_sat': 'rel-crowd_sat', 'expert_sat':'rel-expert_sat', 'satisfaction_area' : 'rel-satisfaction_area', 'satisfaction_sum': 'rel-satisfaction_sum'})) all_sum_res = pd.merge(res_relative_sat, res_overal_sat, on = 'method') all_sum_res = all_sum_res.drop('index', axis = 1) all_sum_res.to_csv('results/results_overall_relative_'+ expert_type +'.csv') ###### Statistic # Mann-Whitney U test
def experiment_artifical_data(df_expert_crowd, max_grade=10): df_expert_crowd = df_expert_crowd.rename(columns={ 'votes': 'rate', 'group': 'voter', 'case': 'alternative_name' }) df_expert_crowd['voter'] = df_expert_crowd[ 'voter'] + '_' + df_expert_crowd['id'].astype(str) df_expert_crowd['rate'] = df_expert_crowd['rate'].astype('float') alternative_map = crete_alternatives_map( df_expert_crowd, alternative_name='alternative_name') #alt_names = list(alternative_map['alternative_id'].unique()) voter_lookup = df_expert_crowd.copy() voter_lookup['voter_id'] = voter_lookup.groupby('voter').ngroup() voter_lookup = voter_lookup[[ 'voter', 'voter_id' ]].drop_duplicates().reset_index().drop('index', axis=1) voter_lookup = voter_lookup.sort_values('voter_id') df_expert_crowd = pd.merge(df_expert_crowd, alternative_map, on='alternative_name')[[ 'voter', 'alternative_id', 'rate' ]] df_expert_crowd = pd.merge(voter_lookup, df_expert_crowd, on='voter').drop('voter', axis=1) expert_ids = get_user_ids_from_mapping(voter_lookup, 'expert') crowd_ids = get_user_ids_from_mapping(voter_lookup, 'crowd') #df_expert = df_expert_crowd[df_expert_crowd['voter_id'].isin(expert_ids)] #df_crowd = df_expert_crowd[df_expert_crowd['voter_id'].isin(crowd_ids)] ''' Optimize grade absolute ''' df_alt_votes = get_aggregated_data(df_expert_crowd, voter_lookup['voter_id'], index_column='alternative_id', column='voter_id', value='rate') result_optm_abs0 = pd.DataFrame(df_alt_votes['alternative_id'], columns=(['alternative_id'])) result_optm_abs1 = pd.DataFrame(df_alt_votes['alternative_id'], columns=(['alternative_id'])) result_optm_abs0['optimal_grade'] = df_alt_votes[crowd_ids].apply( lambda x: np.median(x), axis=1) result_optm_abs0['alpha'] = 0.0 result_optm_abs1['optimal_grade'] = df_alt_votes[expert_ids].apply( lambda x: np.median(x), axis=1) result_optm_abs1['alpha'] = 1.0 result_optm_abs = pd.concat([result_optm_abs0, result_optm_abs1]) result_optm_abs = calculate_satisfaction_absolute(df_alt_votes, result_optm_abs, max_grade, expert_ids, crowd_ids) # del(result_optm_abs0) # del(result_optm_abs1) ''' ################################ Results ''' ###### nash cons = [{'type': 'eq', 'fun': lambda_const}] bnds = ((0.01, 0.99), (0.01, 0.99), (1, 10)) res_nash = nash_results(df_alt_votes, max_grade, crowd_ids, expert_ids, cons, bnds, lambda_expert=0.5) #res_nash.to_csv('results/results_nash' + ' .csv') ###### kalai res_kalai = kalai_results(df_alt_votes, result_optm_abs, max_grade, crowd_ids, expert_ids) res_baseline = calculate_baseline_stats_satisfaction( df_alt_votes, max_grade, crowd_ids, expert_ids, stats=['np.mean', 'np.median', 'mode']) res_overal_sat = avg_satisfaction_by_group(res_kalai, res_nash, res_baseline).reset_index() max_satisfaction = result_optm_abs[[ 'alternative_id', 'crowd_sat', 'expert_sat' ]].groupby(by='alternative_id').agg('max').reset_index() max_satisfaction = max_satisfaction.rename(columns={ 'crowd_sat': 'max_crowd_sat', 'expert_sat': 'max_expert_sat' }) max_satisfaction['max_satisfaction_sum'] = max_satisfaction[ 'max_crowd_sat'] + max_satisfaction['max_expert_sat'] max_satisfaction['max_satisfaction_area'] = max_satisfaction[ 'max_crowd_sat'] * max_satisfaction['max_expert_sat'] min_satisfaction = result_optm_abs[[ 'alternative_id', 'crowd_sat', 'expert_sat' ]].groupby(by='alternative_id').agg('min').reset_index() min_satisfaction = min_satisfaction.rename(columns={ 'crowd_sat': 'min_crowd_sat', 'expert_sat': 'min_expert_sat' }) min_satisfaction['min_satisfaction_sum'] = min_satisfaction[ 'min_crowd_sat'] + min_satisfaction['min_expert_sat'] min_satisfaction['min_satisfaction_area'] = min_satisfaction[ 'min_crowd_sat'] * min_satisfaction['min_expert_sat'] ref_satisfaction = pd.merge(max_satisfaction, min_satisfaction, on='alternative_id') res_nash = relative_detail_satisfaction_nash(res_nash, max_satisfaction) res_kalai = relative_detail_satisfaction_kalai(res_kalai, max_satisfaction) res_baseline = relative_detail_satisfaction_baseline( res_baseline, max_satisfaction) ##### Calculate gain res_nash['gain_ratio'] = pd.merge( ref_satisfaction, res_nash, on='alternative_id' ).apply(lambda x: np.abs( ((x['lambda_exp'] * x['max_expert_sat'] + (1 - x['lambda_exp']) * x['min_expert_sat']) / x['max_expert_sat']) - ((x['lambda_exp'] * x['min_crowd_sat'] + (1 - x['lambda_exp']) * x['max_crowd_sat']) / x['max_crowd_sat'])), axis=1) res_kalai['gain_ratio'] = pd.merge( ref_satisfaction, res_kalai, on='alternative_id' ).apply(lambda x: np.abs( ((x['lambda_exp'] * x['max_expert_sat'] + (1 - x['lambda_exp']) * x['min_expert_sat']) / x['max_expert_sat']) - ((x['lambda_exp'] * x['min_crowd_sat'] + (1 - x['lambda_exp']) * x['max_crowd_sat']) / x['max_crowd_sat'])), axis=1) ## ---------------------------------------------------------------------------- # res_relative_sat_ext = relative_overall_satisfaction(res_nash_extreme, res_kalai_extreme, res_baseline_extreme, max_satisfaction) res_relative_sat = relative_overall_satisfaction(res_nash, res_kalai, res_baseline, ref_satisfaction) res_relative_sat #################### Result analysis - lower uncertanty #df_crowd_sample = df_crowd.groupby('vote', group_keys = False).apply(lambda x: x.sample(min(len(x),3))) res_kalai = pd.merge(alternative_map, res_kalai, on='alternative_id') res_nash = pd.merge(alternative_map, res_nash, on='alternative_id') res_baseline = pd.merge(alternative_map, res_baseline, on='alternative_id') # res_kalai_extreme = pd.merge(alternative_map, res_kalai_extreme, on = 'alternative_id') # res_nash_extreme = pd.merge(alternative_map, res_nash_extreme, on = 'alternative_id') return res_kalai, res_nash, res_baseline, res_overal_sat, res_relative_sat