def get_df(max_date=-1): df = CsvL.get_avatar_info() df = dh.add_avatar_data(df, cut_pce=[5, 6, 7]) df = dh.add_session_date(df) df.drop(['AvatarId'], axis=1, inplace=True) df = dm.filter_date(df, 'YMD', max_date) df = Users.merge_users_clean(df) # filtro colonne per primo test sensibilita df = df.drop(['Age', 'YearMonth', 'ClientCode', 'FarmaId', 'Latitudine'], axis=1) return df
def get_df(max_date=-1): df = CsvL.get_prod_history() df = dh.add_avatar_data(df, cut_pce=[5, 6, 7]) __hist_hardfix(df) df = dh.add_session_date(df) df.drop(['AvatarId'], axis=1, inplace=True) df = dm.filter_date(df, 'YMD', max_date) df = Users.merge_users_clean(df) df = df[[ 'Id', 'UserId', 'SessionId', 'ActionType', 'ProductId', 'AvSessId', 'AvatarPce', 'YMD', 'NameSurname', 'Regione', 'RoleId', 'FarmaId', 'Latitudine', 'ProvId', 'Sex' ]] df = dh.add_prod_name(df) return df
def progresso(df, soglia, ax=None): if ax is None: f = plt.figure(figsize=(9, 8)) ax = f.add_subplot(111) dsess_max = df.groupby('UserId')['SessionId'].max().reset_index() urw = dh.rwcount_base(df, 'UserId', 'Product') sessratio = pd.merge(dsess_max, urw, on='UserId') utanti = sessratio[sessratio.SessionId >= soglia].UserId.values upochi = sessratio[sessratio.SessionId < soglia].UserId.values dtanti = df[df.UserId.isin(utanti)] dpochi = df[df.UserId.isin(upochi)] rwta = dh.rwcount_base(dtanti, 'SessionId', 'Product').reset_index() rwpo = dh.rwcount_base(dpochi, 'SessionId', 'Product').reset_index() rwtutti = dh.rwcount_base(df, 'SessionId', 'Product').reset_index() rwtutti = rwtutti[rwtutti.SessionId < soglia] l1 = ax.bar(rwta.SessionId, rwta.Ratio, zorder=1) l2 = ax.scatter(rwpo.SessionId, rwpo.Ratio, zorder=3, s=40, facecolor=co.ab_colors['rosso'], marker='v') l3 = ax.scatter(rwtutti.SessionId, rwtutti.Ratio, marker='o', zorder=2, facecolor=co.ab_colors['giallo'], s=40) ax.legend([l1, l2, l3], [ 'Almeno {0} sessioni'.format(soglia), 'Meno di {0} sessioni'.format(soglia), 'Tutti' ], fontsize=14, loc='lower right') ax.set_title('Correttezza media al crescere dell\'impegno', size=20) ax.set_ylabel('Correttezza', size=16) ax.set_xlabel('Sessioni', size=16) vals = [0, 0.25, 0.5, 0.75, 1] ax.set_yticks(vals) ax.yaxis.set_major_formatter(ticker.PercentFormatter()) ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
def av_freq_hist(df, title="", ylbl="", legend=True): if title == "": title = 'Gli avatar più giocati hanno ricevuto prodotti corretti?' if ylbl == "": ylbl = 'Numero di prodotti consigliati' f = plt.figure(figsize=(9, 8)) ax = f.add_subplot(111) df = df.sort_values('nTot') ax.barh(range(0, len(df)), df.Ratio * 100) for i, (i_pce) in enumerate(df.AvatarPce): i_hist = ax.get_children()[i] i_hist.set_color(co.abc_l[i_pce - 1]) i_hist.set_height(0.8) i_hist.set_edgecolor('k') i_hist.set_linewidth(1) if legend: l_hand = [] for i in range(0, 4): i_patch = mpatches.Patch(color=co.abc_l[i], lw=1, ec='k', label=dh.pce_descr(i + 1)) l_hand.append(i_patch) ax.legend(handles=l_hand, fontsize=16, edgecolor='k') for i, (i_name, i_tot, i_sess) in enumerate(zip(df.AvName, df.nTot, df.SessionId)): i_lbl = "{0} - {1} - {2}".format(int(i_tot), i_name, i_sess) ax.text(1, i - 0.1, i_lbl, color="k", va="center", size=14) ax.xaxis.set_major_formatter(ticker.PercentFormatter()) __barh_ax_set(ax, title) ax.set_xticks([0, 25, 50, 75, 100]) ax.set_yticks([]) xgrid = ax.xaxis.get_gridlines() xgrid[1].set_color('k') xgrid[1].set_ls('--') xgrid[1].set_lw(1) xgrid[2].set_color('k') xgrid[2].set_ls('--') xgrid[2].set_lw(1) xgrid[3].set_color('k') xgrid[3].set_ls('--') xgrid[3].set_lw(1) ax.set_xlim([0, 100]) ax.set_ylim([-0.75, len(df) - 0.25]) ax.set_xlabel(r'Correttezza', size=18) ax.set_ylabel(ylbl, size=18) f.tight_layout()
def plot_uhist(uid, type_suffix): uhist = Info.get_user_history(uid) uhistrw = rwcount(uhist, 'SessionId', type_suffix).fillna(0) arr_start = dh.sess_start(uhist) gm.plot_uhist(uid, uhistrw, arr_start)
def rwcount(df, group, type_suffix='Info', count_col='Id'): return dh.rwcount_base(df, group, type_suffix, count_col)
sns.set() sns.set_style('ticks') sns.set_palette(Constants.abc_l) def rwcount(df, group, type_suffix='Info', count_col='Id'): return dh.rwcount_base(df, group, type_suffix, count_col) df = Info.get_df() # dataframe base # {{{ Grafico prodotti piu frequenti if 0: rw_info = rwcount(df, 'ProductId', "Info") rw_info_freq = dh.add_prod_name(rw_info.nlargest(20, 'nTot')) gm.freq_hist(rw_info_freq, "Le informazioni sui prodotti più frequenti " "sono corrette?", "Numero di informazioni date") if 0: rw_info = rwcount(df, 'ProductId', "Benefit") rw_info_freq = dh.add_prod_name(rw_info.nlargest(20, 'nTot')) gm.freq_hist(rw_info_freq, "I benefici indicati per i prodotti " "più frequenti sono corretti?", "Numero di benefici indicati") if 0: rw_info = rwcount(df, 'ProductId', "Advantages") rw_info_freq = dh.add_prod_name(rw_info.nlargest(20, 'nTot')) gm.freq_hist(rw_info_freq, "I vantaggi indicati per i prodotti " "più frequenti sono corretti?", "Numero di vantaggi indicati") # }}}