示例#1
0
文件: Info.py 项目: DLongoni/Aboca
def get_df(max_date=-1):
    df = CsvL.get_avatar_info()
    df = dh.add_avatar_data(df, cut_pce=[5, 6, 7])
    df = dh.add_session_date(df)
    df.drop(['AvatarId'], axis=1, inplace=True)
    df = dm.filter_date(df, 'YMD', max_date)
    df = Users.merge_users_clean(df)
    # filtro colonne per primo test sensibilita
    df = df.drop(['Age', 'YearMonth', 'ClientCode', 'FarmaId', 'Latitudine'],
                 axis=1)
    return df
示例#2
0
def get_df(max_date=-1):
    df = CsvL.get_prod_history()
    df = dh.add_avatar_data(df, cut_pce=[5, 6, 7])
    __hist_hardfix(df)
    df = dh.add_session_date(df)
    df.drop(['AvatarId'], axis=1, inplace=True)
    df = dm.filter_date(df, 'YMD', max_date)
    df = Users.merge_users_clean(df)
    df = df[[
        'Id', 'UserId', 'SessionId', 'ActionType', 'ProductId', 'AvSessId',
        'AvatarPce', 'YMD', 'NameSurname', 'Regione', 'RoleId', 'FarmaId',
        'Latitudine', 'ProvId', 'Sex'
    ]]
    df = dh.add_prod_name(df)
    return df
示例#3
0
def progresso(df, soglia, ax=None):
    if ax is None:
        f = plt.figure(figsize=(9, 8))
        ax = f.add_subplot(111)
    dsess_max = df.groupby('UserId')['SessionId'].max().reset_index()
    urw = dh.rwcount_base(df, 'UserId', 'Product')
    sessratio = pd.merge(dsess_max, urw, on='UserId')
    utanti = sessratio[sessratio.SessionId >= soglia].UserId.values
    upochi = sessratio[sessratio.SessionId < soglia].UserId.values
    dtanti = df[df.UserId.isin(utanti)]
    dpochi = df[df.UserId.isin(upochi)]
    rwta = dh.rwcount_base(dtanti, 'SessionId', 'Product').reset_index()
    rwpo = dh.rwcount_base(dpochi, 'SessionId', 'Product').reset_index()
    rwtutti = dh.rwcount_base(df, 'SessionId', 'Product').reset_index()
    rwtutti = rwtutti[rwtutti.SessionId < soglia]
    l1 = ax.bar(rwta.SessionId, rwta.Ratio, zorder=1)
    l2 = ax.scatter(rwpo.SessionId,
                    rwpo.Ratio,
                    zorder=3,
                    s=40,
                    facecolor=co.ab_colors['rosso'],
                    marker='v')
    l3 = ax.scatter(rwtutti.SessionId,
                    rwtutti.Ratio,
                    marker='o',
                    zorder=2,
                    facecolor=co.ab_colors['giallo'],
                    s=40)
    ax.legend([l1, l2, l3], [
        'Almeno {0} sessioni'.format(soglia),
        'Meno di {0} sessioni'.format(soglia), 'Tutti'
    ],
              fontsize=14,
              loc='lower right')
    ax.set_title('Correttezza media al crescere dell\'impegno', size=20)
    ax.set_ylabel('Correttezza', size=16)
    ax.set_xlabel('Sessioni', size=16)
    vals = [0, 0.25, 0.5, 0.75, 1]
    ax.set_yticks(vals)
    ax.yaxis.set_major_formatter(ticker.PercentFormatter())
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
示例#4
0
def av_freq_hist(df, title="", ylbl="", legend=True):
    if title == "":
        title = 'Gli avatar più giocati hanno ricevuto prodotti corretti?'
    if ylbl == "":
        ylbl = 'Numero di prodotti consigliati'
    f = plt.figure(figsize=(9, 8))
    ax = f.add_subplot(111)
    df = df.sort_values('nTot')
    ax.barh(range(0, len(df)), df.Ratio * 100)
    for i, (i_pce) in enumerate(df.AvatarPce):
        i_hist = ax.get_children()[i]
        i_hist.set_color(co.abc_l[i_pce - 1])
        i_hist.set_height(0.8)
        i_hist.set_edgecolor('k')
        i_hist.set_linewidth(1)

    if legend:
        l_hand = []
        for i in range(0, 4):
            i_patch = mpatches.Patch(color=co.abc_l[i],
                                     lw=1,
                                     ec='k',
                                     label=dh.pce_descr(i + 1))
            l_hand.append(i_patch)

        ax.legend(handles=l_hand, fontsize=16, edgecolor='k')
    for i, (i_name, i_tot,
            i_sess) in enumerate(zip(df.AvName, df.nTot, df.SessionId)):

        i_lbl = "{0} - {1} - {2}".format(int(i_tot), i_name, i_sess)
        ax.text(1, i - 0.1, i_lbl, color="k", va="center", size=14)

    ax.xaxis.set_major_formatter(ticker.PercentFormatter())
    __barh_ax_set(ax, title)
    ax.set_xticks([0, 25, 50, 75, 100])
    ax.set_yticks([])
    xgrid = ax.xaxis.get_gridlines()
    xgrid[1].set_color('k')
    xgrid[1].set_ls('--')
    xgrid[1].set_lw(1)
    xgrid[2].set_color('k')
    xgrid[2].set_ls('--')
    xgrid[2].set_lw(1)
    xgrid[3].set_color('k')
    xgrid[3].set_ls('--')
    xgrid[3].set_lw(1)
    ax.set_xlim([0, 100])
    ax.set_ylim([-0.75, len(df) - 0.25])
    ax.set_xlabel(r'Correttezza', size=18)
    ax.set_ylabel(ylbl, size=18)
    f.tight_layout()
示例#5
0
def plot_uhist(uid, type_suffix):
    uhist = Info.get_user_history(uid)
    uhistrw = rwcount(uhist, 'SessionId', type_suffix).fillna(0)
    arr_start = dh.sess_start(uhist)
    gm.plot_uhist(uid, uhistrw, arr_start)
示例#6
0
def rwcount(df, group, type_suffix='Info', count_col='Id'):
    return dh.rwcount_base(df, group, type_suffix, count_col)
示例#7
0
sns.set()
sns.set_style('ticks')
sns.set_palette(Constants.abc_l)


def rwcount(df, group, type_suffix='Info', count_col='Id'):
    return dh.rwcount_base(df, group, type_suffix, count_col)


df = Info.get_df()  # dataframe base


# {{{ Grafico prodotti piu frequenti
if 0:
    rw_info = rwcount(df, 'ProductId', "Info")
    rw_info_freq = dh.add_prod_name(rw_info.nlargest(20, 'nTot'))
    gm.freq_hist(rw_info_freq, "Le informazioni sui prodotti più frequenti "
                 "sono corrette?", "Numero di informazioni date")

if 0:
    rw_info = rwcount(df, 'ProductId', "Benefit")
    rw_info_freq = dh.add_prod_name(rw_info.nlargest(20, 'nTot'))
    gm.freq_hist(rw_info_freq, "I benefici indicati per i prodotti "
                 "più frequenti sono corretti?", "Numero di benefici indicati")

if 0:
    rw_info = rwcount(df, 'ProductId', "Advantages")
    rw_info_freq = dh.add_prod_name(rw_info.nlargest(20, 'nTot'))
    gm.freq_hist(rw_info_freq, "I vantaggi indicati per i prodotti "
                 "più frequenti sono corretti?", "Numero di vantaggi indicati")
# }}}