res_nash.to_csv('results/results_nash' + expert_type + '.csv') ###### kalai res_kalai = kalai_results(df_alt_votes, result_optm_abs,max_grade, crowd_ids, expert_ids) res_kalai.to_csv('results/results_kalai_' + expert_type + '.csv') # res_kalai = pd.read_csv('results/results_kalai.csv').drop('Unnamed: 0', axis = 1) ####### Baseline methods res_baseline = calculate_baseline_stats_satisfaction( df_alt_votes, max_grade, crowd_ids, expert_ids ,stats = ['np.mean', 'np.median', 'mode']) res_overal_sat = avg_satisfaction_by_group(res_kalai, res_nash, res_baseline).reset_index() res_overal_sat.to_csv('results/results_overall_avg_satisfaction_'+ expert_type +'.csv') ###### relative satisfaction calculation # max_satisfaction = pd.DataFrame() # max_satisfaction['alternative_id'] = result_optm_abs['alternative_id'].unique() max_satisfaction = result_optm_abs[['alternative_id', 'crowd_sat', 'expert_sat']].groupby(by='alternative_id' ).agg('max').reset_index() max_satisfaction = max_satisfaction.rename(columns = {'crowd_sat':'max_crowd_sat', 'expert_sat' : 'max_expert_sat'}) max_satisfaction['max_satisfaction_sum'] = max_satisfaction['max_crowd_sat'] + max_satisfaction['max_expert_sat'] max_satisfaction['max_satisfaction_area'] = max_satisfaction['max_crowd_sat'] * max_satisfaction['max_expert_sat'] min_satisfaction = result_optm_abs[['alternative_id', 'crowd_sat', 'expert_sat']].groupby(by='alternative_id' ).agg('min').reset_index() min_satisfaction = min_satisfaction.rename(columns = {'crowd_sat':'min_crowd_sat', 'expert_sat' : 'min_expert_sat'}) min_satisfaction['min_satisfaction_sum'] = min_satisfaction['min_crowd_sat'] + min_satisfaction['min_expert_sat'] min_satisfaction['min_satisfaction_area'] = min_satisfaction['min_crowd_sat'] * min_satisfaction['min_expert_sat']
######### SUMMARIZE RESULTS ####### select alternatives with certan level of difference between groups res_baseline['median_diff'] = np.abs(res_baseline['expert_median'] - res_baseline['crowd_median']) diff_med = 1.0 alts_diff = list(res_baseline[res_baseline['median_diff']>= diff_med]['alternative_id']) print(len(alts_diff)) kalai = res_kalai[res_kalai['alternative_id'].isin(alts_diff)] nash = res_nash[res_nash['alternative_id'].isin(alts_diff)] baseline = res_baseline[res_baseline['alternative_id'].isin(alts_diff)] weighted = res_weighted[res_weighted['alternative_id'].isin(alts_diff)] res_overal_sat = avg_satisfaction_by_group(kalai, nash, baseline, weighted).reset_index() res_overal_sat.to_csv('results/results_overall_avg_satisfaction_'+ expert_type +'.csv') diff_grade = 1.0 alts_diff = list(res_baseline[res_baseline['median_diff']>= diff_med]['alternative_id']) print(len(alts_diff)) kalai = res_kalai[res_kalai['alternative_id'].isin(alts_diff)] nash = res_nash[res_nash['alternative_id'].isin(alts_diff)] baseline = res_baseline[res_baseline['alternative_id'].isin(alts_diff)] weighted = res_weighted[res_weighted['alternative_id'].isin(alts_diff)] res_overal_sat = avg_satisfaction_by_group(kalai, nash, baseline, weighted).reset_index()
def experiment_artifical_data(df_expert_crowd, max_grade=10): df_expert_crowd = df_expert_crowd.rename(columns={ 'votes': 'rate', 'group': 'voter', 'case': 'alternative_name' }) df_expert_crowd['voter'] = df_expert_crowd[ 'voter'] + '_' + df_expert_crowd['id'].astype(str) df_expert_crowd['rate'] = df_expert_crowd['rate'].astype('float') alternative_map = crete_alternatives_map( df_expert_crowd, alternative_name='alternative_name') #alt_names = list(alternative_map['alternative_id'].unique()) voter_lookup = df_expert_crowd.copy() voter_lookup['voter_id'] = voter_lookup.groupby('voter').ngroup() voter_lookup = voter_lookup[[ 'voter', 'voter_id' ]].drop_duplicates().reset_index().drop('index', axis=1) voter_lookup = voter_lookup.sort_values('voter_id') df_expert_crowd = pd.merge(df_expert_crowd, alternative_map, on='alternative_name')[[ 'voter', 'alternative_id', 'rate' ]] df_expert_crowd = pd.merge(voter_lookup, df_expert_crowd, on='voter').drop('voter', axis=1) expert_ids = get_user_ids_from_mapping(voter_lookup, 'expert') crowd_ids = get_user_ids_from_mapping(voter_lookup, 'crowd') #df_expert = df_expert_crowd[df_expert_crowd['voter_id'].isin(expert_ids)] #df_crowd = df_expert_crowd[df_expert_crowd['voter_id'].isin(crowd_ids)] ''' Optimize grade absolute ''' df_alt_votes = get_aggregated_data(df_expert_crowd, voter_lookup['voter_id'], index_column='alternative_id', column='voter_id', value='rate') result_optm_abs0 = pd.DataFrame(df_alt_votes['alternative_id'], columns=(['alternative_id'])) result_optm_abs1 = pd.DataFrame(df_alt_votes['alternative_id'], columns=(['alternative_id'])) result_optm_abs0['optimal_grade'] = df_alt_votes[crowd_ids].apply( lambda x: np.median(x), axis=1) result_optm_abs0['alpha'] = 0.0 result_optm_abs1['optimal_grade'] = df_alt_votes[expert_ids].apply( lambda x: np.median(x), axis=1) result_optm_abs1['alpha'] = 1.0 result_optm_abs = pd.concat([result_optm_abs0, result_optm_abs1]) result_optm_abs = calculate_satisfaction_absolute(df_alt_votes, result_optm_abs, max_grade, expert_ids, crowd_ids) # del(result_optm_abs0) # del(result_optm_abs1) ''' ################################ Results ''' ###### nash cons = [{'type': 'eq', 'fun': lambda_const}] bnds = ((0.01, 0.99), (0.01, 0.99), (1, 10)) res_nash = nash_results(df_alt_votes, max_grade, crowd_ids, expert_ids, cons, bnds, lambda_expert=0.5) #res_nash.to_csv('results/results_nash' + ' .csv') ###### kalai res_kalai = kalai_results(df_alt_votes, result_optm_abs, max_grade, crowd_ids, expert_ids) res_baseline = calculate_baseline_stats_satisfaction( df_alt_votes, max_grade, crowd_ids, expert_ids, stats=['np.mean', 'np.median', 'mode']) res_overal_sat = avg_satisfaction_by_group(res_kalai, res_nash, res_baseline).reset_index() max_satisfaction = result_optm_abs[[ 'alternative_id', 'crowd_sat', 'expert_sat' ]].groupby(by='alternative_id').agg('max').reset_index() max_satisfaction = max_satisfaction.rename(columns={ 'crowd_sat': 'max_crowd_sat', 'expert_sat': 'max_expert_sat' }) max_satisfaction['max_satisfaction_sum'] = max_satisfaction[ 'max_crowd_sat'] + max_satisfaction['max_expert_sat'] max_satisfaction['max_satisfaction_area'] = max_satisfaction[ 'max_crowd_sat'] * max_satisfaction['max_expert_sat'] min_satisfaction = result_optm_abs[[ 'alternative_id', 'crowd_sat', 'expert_sat' ]].groupby(by='alternative_id').agg('min').reset_index() min_satisfaction = min_satisfaction.rename(columns={ 'crowd_sat': 'min_crowd_sat', 'expert_sat': 'min_expert_sat' }) min_satisfaction['min_satisfaction_sum'] = min_satisfaction[ 'min_crowd_sat'] + min_satisfaction['min_expert_sat'] min_satisfaction['min_satisfaction_area'] = min_satisfaction[ 'min_crowd_sat'] * min_satisfaction['min_expert_sat'] ref_satisfaction = pd.merge(max_satisfaction, min_satisfaction, on='alternative_id') res_nash = relative_detail_satisfaction_nash(res_nash, max_satisfaction) res_kalai = relative_detail_satisfaction_kalai(res_kalai, max_satisfaction) res_baseline = relative_detail_satisfaction_baseline( res_baseline, max_satisfaction) ##### Calculate gain res_nash['gain_ratio'] = pd.merge( ref_satisfaction, res_nash, on='alternative_id' ).apply(lambda x: np.abs( ((x['lambda_exp'] * x['max_expert_sat'] + (1 - x['lambda_exp']) * x['min_expert_sat']) / x['max_expert_sat']) - ((x['lambda_exp'] * x['min_crowd_sat'] + (1 - x['lambda_exp']) * x['max_crowd_sat']) / x['max_crowd_sat'])), axis=1) res_kalai['gain_ratio'] = pd.merge( ref_satisfaction, res_kalai, on='alternative_id' ).apply(lambda x: np.abs( ((x['lambda_exp'] * x['max_expert_sat'] + (1 - x['lambda_exp']) * x['min_expert_sat']) / x['max_expert_sat']) - ((x['lambda_exp'] * x['min_crowd_sat'] + (1 - x['lambda_exp']) * x['max_crowd_sat']) / x['max_crowd_sat'])), axis=1) ## ---------------------------------------------------------------------------- # res_relative_sat_ext = relative_overall_satisfaction(res_nash_extreme, res_kalai_extreme, res_baseline_extreme, max_satisfaction) res_relative_sat = relative_overall_satisfaction(res_nash, res_kalai, res_baseline, ref_satisfaction) res_relative_sat #################### Result analysis - lower uncertanty #df_crowd_sample = df_crowd.groupby('vote', group_keys = False).apply(lambda x: x.sample(min(len(x),3))) res_kalai = pd.merge(alternative_map, res_kalai, on='alternative_id') res_nash = pd.merge(alternative_map, res_nash, on='alternative_id') res_baseline = pd.merge(alternative_map, res_baseline, on='alternative_id') # res_kalai_extreme = pd.merge(alternative_map, res_kalai_extreme, on = 'alternative_id') # res_nash_extreme = pd.merge(alternative_map, res_nash_extreme, on = 'alternative_id') return res_kalai, res_nash, res_baseline, res_overal_sat, res_relative_sat