示例#1
0
def prod_plot(uid, uhist, most_uprod):
    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2)
    cmap = [0, 1, 2, 4]
    for i, (ip, ia) in enumerate(zip(most_uprod, [ax1, ax2, ax3, ax4])):
        idup = uhist[uhist.ProductId == ip]
        idup_rw = idup.groupby('SessionId').apply(lambda x: pd.Series(
            {
                'Ratio': sum(x.ActionType == 'RightProduct') / x.Id.count(),
                'nTot': x.Id.count()
            })).reset_index()
        idup_rw.to_csv('./8-' + ip + '.csv')
        ic = co.abc_l[cmap[i]]
        r_obs2 = range(0, len(idup_rw))
        r_obs = np.arange(-0.7, len(idup_rw) - 0.7, 1)
        ia.bar(r_obs, idup_rw.Ratio, color=ic, width=0.7, align='edge')
        ia2 = ia.twinx()
        ia2.bar(r_obs2,
                idup_rw.nTot,
                color=co.ab_colors['giallo'],
                width=0.25,
                align='edge')
        ia2.set_yticks(range(0, int(idup_rw.nTot.max() + 1)))
        ipname = Prodotti.get_product_name(ip)
        ia.set_title('Progresso per {0}'.format(ipname), size=22)
        ia.set_xticks([])
        ia.tick_params(labelsize=16)
        vals = [0, 0.25, 0.5, 0.75, 1]
        ia.set_yticks(vals)
        ia.yaxis.set_major_formatter(ticker.PercentFormatter())
        ia.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

    a = f.axes
    a[0].set_ylabel('Correttezza', size=18)
    a[2].set_ylabel('Correttezza', size=18)
    a[5].set_ylabel('Numero prodotti consigliati',
                    size=18,
                    color=co.ab_colors['giallo'])
    a[7].set_ylabel('Numero prodotti consigliati',
                    size=18,
                    color=co.ab_colors['giallo'])
    a[2].set_xlabel('Sessioni', size=18)
    a[3].set_xlabel('Sessioni', size=18)

    f.suptitle('Utente {0}'.format(Users.get_user_name(uid)), size=25)
示例#2
0
import Utils.Renders as rd  # NOQA
from DA import Prodotti
from Utils import Clust  # NOQA
from Utils import Constants
from Utils.ClAnalyzer import ClAnalyzer

from IPython import embed
# }}}

sns.set()
sns.set_palette(Constants.abc_l)
# plt.ion()

# {{{ Preparazione Dataset
df = Prodotti.get_df_group_prod(include_rare=True)
df_scaled = Prodotti.get_df_group_prod_proc(include_rare=True)
CA = ClAnalyzer(df)
CA.add_df(df_scaled, 'scaled')
feats = ['nAvSess', 'Recency', 'nUsers', 'Ratio', 'UserRatio']
feats3 = ['Recency', 'nUsers', 'Ratio']
CA.features = feats
# 1: molto consigliato
# 2: consigliato a nord
# 3: consigliato correttamente e numeroso
# samples = ['P0011AN', 'P0018AN', 'P0080AB']
samples = ['P0011AN']
CA.set_samples(samples, 'ProductId')

CA.print_relevance(df_name='scaled')
if 0:
示例#3
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from sklearn.cluster import DBSCAN, KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import Utils.Renders as rd  # NOQA
import pandas as pd
from DA import Prodotti
from Utils import Clust # NOQA
from Utils.ClAnalyzer import ClAnalyzer
from IPython import embed
# }}}

# {{{ Preparazione Dataset
prod = Prodotti.get_df_group_prod()
prod_proc = Prodotti.get_df_group_prod_proc()
CA = ClAnalyzer(prod)
CA.add_df(prod_proc, 'scaled')
feats = ['nAvSess', 'Recency', 'nUsers', 'Ratio', 'UserRatio']
feats3 = ['Recency', 'nUsers', 'Ratio']
CA.features = feats
CA.print_relevance(df_name='scaled')

# 1: molto consigliato
# 2: consigliato a nord
# 3: consigliato correttamente e numeroso
samples = ['P0011AN', 'P0018AN', 'P0080AB']
CA.set_samples(samples, 'ProductId')
if 0:
    CA.print_outliers()
示例#4
0
        return self._dataset

    @dataset.setter
    def dataset(self, value):
        self._dataset = value

    @property
    def n_clust(self):
        return self._n_clust

    @n_clust.setter
    def n_clust(self, value):
        self._n_clust = value

    @property
    def name(self):
        return self._name
# }}}


if __name__ == '__main__':
    prod = Prodotti.get_df_group_prod()
    CA = ClAnalyzer("Kmeans", prod)
    samples = ['P0011AN', 'P0018AN', 'P0080AB']
    features = ['Ratio', 'nProv', 'NordSud']
    CA.features = features
    CA.set_samples(samples, 'ProductId')
    CA.print_relevance()
    CA.print_outliers()
    embed()