Exemplo n.º 1
0
def get_users_table():
    u  = CsvL.get_users_anag()
    f  = CsvL.get_customers_anag()
    p  = CsvL.get_province()
    r  = CsvL.get_regions()
    ur = CsvL.get_user_roles()

    f = f[['Id', 'Code', 'PdcCode', 'Province']]
    f = f.rename(columns={'Id': 'FarmaId'})
    f = f.rename(columns={'Province': 'ProvId'})
    p = p.drop(['Id', 'Nome', 'Longitudine'], axis=1)
    p = p[['Id_Regione', 'Sigla_automobilistica', 'Latitudine']]
    p = p.rename(columns={'Sigla_automobilistica': 'ProvId'})
    # r['Nome'] = r.Nome.apply(lambda n: n[0:4])
    ur = ur.drop(['Id', 'CreatorUserId', 'TenantId'], axis=1)

    # alcuni utenti tipo 208 hanno più di un ruolo
    ur = ur.groupby('UserId')['RoleId'].last().reset_index()

    uf = pd.merge(u, f, left_on=['ClientCode', 'PdcCode'],
                  right_on=['Code', 'PdcCode'], how='left')
    ufp = pd.merge(uf, p)
    ufpr = pd.merge(ufp, r)
    ufprr = pd.merge(ufpr, ur)
    ufprr.drop(['Id_Regione'], axis=1, inplace=True)
    ufprr.rename(columns={'Nome': 'Regione'}, inplace=True)
    ufprr.drop(['Code', 'PdcCode'], axis=1, inplace=True)
    ufprr = ufprr[ufprr.UserId != 12]  # Andrea Dini
    return ufprr
Exemplo n.º 2
0
def add_avatar_data(df, cut_pce=[]):
    avatar_pce = CsvL.get_avatar_pce()
    av_anag = CsvL.get_avatar_anag()
    avatar_pce = pd.merge(avatar_pce,
                          av_anag,
                          left_on='AvatarId',
                          right_on='AvatarId')
    if len(cut_pce) > 0:
        avatar_pce = avatar_pce[~avatar_pce.AvatarPce.isin(cut_pce)]

    on_col = __av_merge_col(df)
    df = pd.merge(df, avatar_pce, on=on_col, how='left')
    df = df[~df.AvSessId.isnull()]
    return df
Exemplo n.º 3
0
def get_df_group_prod(include_rare=False):
    df = get_df()
    p_anag = CsvL.get_prod_anag()

    feat_df = df.groupby('ProductId').apply(lambda x: pd.Series(
        {
            'nUsers': x.UserId.nunique(),
            'nFarma': x.FarmaId.nunique(),
            'nProv': x.ProvId.nunique(),
            'nReg': x.Regione.nunique(),
            'nAvSess': x.AvSessId.nunique(),
            'nSess': x.SessionId.nunique(),
            'nTot': x.Id.count(),
            'MedianPce': x.AvatarPce.median(),
            'MeanPce': x.AvatarPce.mean(),
            'nRight': sum(x.ActionType == "RightProduct"),
            'NordSud': x.Latitudine.mean() - 42,
            # 'LatVar': x.Latitudine.var(),
            'UserRatio': x.Id.count() / x.UserId.nunique(),
            'Ratio': sum(x.ActionType == 'RightProduct') / x.Id.count(),
            'Recency': (dm.MAXDATE - x.YMD.max()).days + 1,
            'Frequency': x.YMD.nunique()
        })).reset_index()

    prod = pd.merge(p_anag, feat_df)
    if not include_rare:
        prod = prod[prod.nTot > 2].reset_index(drop=True)
    return prod
Exemplo n.º 4
0
def get_df(max_date=-1):
    df = CsvL.get_avatar_info()
    df = dh.add_avatar_data(df, cut_pce=[5, 6, 7])
    df = dh.add_session_date(df)
    df.drop(['AvatarId'], axis=1, inplace=True)
    df = dm.filter_date(df, 'YMD', max_date)
    df = Users.merge_users_clean(df)
    # filtro colonne per primo test sensibilita
    df = df.drop(['Age', 'YearMonth', 'ClientCode', 'FarmaId', 'Latitudine'],
                 axis=1)
    return df
Exemplo n.º 5
0
def get_df(max_date=-1):
    df = CsvL.get_prod_history()
    df = dh.add_avatar_data(df, cut_pce=[5, 6, 7])
    __hist_hardfix(df)
    df = dh.add_session_date(df)
    df.drop(['AvatarId'], axis=1, inplace=True)
    df = dm.filter_date(df, 'YMD', max_date)
    df = Users.merge_users_clean(df)
    df = df[[
        'Id', 'UserId', 'SessionId', 'ActionType', 'ProductId', 'AvSessId',
        'AvatarPce', 'YMD', 'NameSurname', 'Regione', 'RoleId', 'FarmaId',
        'Latitudine', 'ProvId', 'Sex'
    ]]
    df = dh.add_prod_name(df)
    return df
Exemplo n.º 6
0
def plot_uhist(uid, i_uh, arr_start):
    i_uh_r = i_uh.rolling(10)
    i_sumR = i_uh_r.RightCount.sum()
    i_sumT = i_uh_r.nTot.sum()
    i_movavg = i_sumR / i_sumT
    r_obs = range(0, len(i_movavg))
    r_obs2 = np.arange(-0.7, len(i_movavg) - 0.7, 1)
    f = plt.figure(figsize=(9, 8))
    ax = f.add_subplot(111)
    ax2 = ax.twinx()
    l_hand = []
    l_lab = []
    line_avg, = ax.plot(r_obs,
                        i_movavg,
                        lw=3,
                        zorder=10,
                        color=co.ab_colors['rosso'])
    bar_ratio = ax.bar(r_obs2,
                       i_uh.Ratio,
                       color=co.ab_colors['azzurro'],
                       alpha=0.5,
                       width=0.7,
                       align='edge',
                       zorder=1)
    bar_ntot = ax2.bar(r_obs,
                       i_uh.nTot,
                       color=co.ab_colors['verde'],
                       alpha=0.5,
                       width=0.25,
                       align='edge')
    weblog = CsvL.get_web_log(uid)
    ax.set_zorder(ax2.get_zorder() + 1)
    ax.patch.set_visible(False)
    l_hand.append(line_avg)
    l_lab.append('Rolling average')
    l_hand.append(bar_ratio)
    l_lab.append('Correctness')
    l_hand.append(bar_ntot)
    l_lab.append('Number of products')
    if weblog is not None:
        num_web_check = __count_occurrences(arr_start.values, weblog)
        xval = np.add(np.where(num_web_check > 0), 0.25).squeeze(axis=0)
        yval = np.ones(len(xval)) * 0.5
        sca_web = ax.scatter(xval,
                             yval,
                             marker='d',
                             edgecolors='k',
                             s=150,
                             lw=1,
                             facecolor=co.ab_colors['giallo'],
                             zorder=2)
        l_hand.append(sca_web)
        l_lab.append('Website check')

    ax.legend(tuple(l_hand), tuple(l_lab), fontsize=16)
    ax.set_ylabel('% of correct recommendations', size=18)
    ax2.set_ylabel('Number of recommended products', size=18)
    ax.set_xlabel('Sessions', size=18)
    ax.set_xticks(r_obs)
    ax.tick_params(labelsize=16)
    vals = [0, 0.25, 0.5, 0.75, 1]
    ax.set_yticks(vals)
    ax.yaxis.set_major_formatter(ticker.PercentFormatter())
    ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

    ax2.tick_params(labelsize=16)
    yticks = np.arange(0, i_uh.nTot.max(), 5)
    yticks = np.append(yticks, i_uh.nTot.max())
    ax2.set_yticks(yticks)
    ax2.yaxis.set_major_formatter(ticker.ScalarFormatter())
    lbl = arr_start.dt.strftime('%d/%m/%Y')
    ax.set_xticklabels(lbl, rotation=45, ha='right', size=15)

    plt.title("Performance history for user"
              " {0}".format(Users.get_user_name(uid)),
              size=25,
              y=1.02)
Exemplo n.º 7
0
def add_session_date(df):
    ah = CsvL.get_avatar_history()
    df = pd.merge(df, ah, on=['AvatarId', 'SessionId', 'UserId'])
    return df
Exemplo n.º 8
0
def add_prod_name(df):
    p_anag = CsvL.get_prod_anag()
    p_anag = p_anag[['ProductId', 'ProdName']]
    df = pd.merge(df, p_anag, left_on='ProductId', right_on='ProductId')
    return df
Exemplo n.º 9
0
def get_user_name(user_id):
    u = CsvL.get_users_anag()
    ret = u[u.UserId == user_id].NameSurname.values[0]
    return ret
Exemplo n.º 10
0
def get_product_name(product_id):
    p = CsvL.get_prod_anag()
    ret = p[p.ProductId == product_id].ProdName.values[0]
    return ret