def prod_plot(uid, uhist, most_uprod): f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2) cmap = [0, 1, 2, 4] for i, (ip, ia) in enumerate(zip(most_uprod, [ax1, ax2, ax3, ax4])): idup = uhist[uhist.ProductId == ip] idup_rw = idup.groupby('SessionId').apply(lambda x: pd.Series( { 'Ratio': sum(x.ActionType == 'RightProduct') / x.Id.count(), 'nTot': x.Id.count() })).reset_index() idup_rw.to_csv('./8-' + ip + '.csv') ic = co.abc_l[cmap[i]] r_obs2 = range(0, len(idup_rw)) r_obs = np.arange(-0.7, len(idup_rw) - 0.7, 1) ia.bar(r_obs, idup_rw.Ratio, color=ic, width=0.7, align='edge') ia2 = ia.twinx() ia2.bar(r_obs2, idup_rw.nTot, color=co.ab_colors['giallo'], width=0.25, align='edge') ia2.set_yticks(range(0, int(idup_rw.nTot.max() + 1))) ipname = Prodotti.get_product_name(ip) ia.set_title('Progresso per {0}'.format(ipname), size=22) ia.set_xticks([]) ia.tick_params(labelsize=16) vals = [0, 0.25, 0.5, 0.75, 1] ia.set_yticks(vals) ia.yaxis.set_major_formatter(ticker.PercentFormatter()) ia.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) a = f.axes a[0].set_ylabel('Correttezza', size=18) a[2].set_ylabel('Correttezza', size=18) a[5].set_ylabel('Numero prodotti consigliati', size=18, color=co.ab_colors['giallo']) a[7].set_ylabel('Numero prodotti consigliati', size=18, color=co.ab_colors['giallo']) a[2].set_xlabel('Sessioni', size=18) a[3].set_xlabel('Sessioni', size=18) f.suptitle('Utente {0}'.format(Users.get_user_name(uid)), size=25)
import Utils.Renders as rd # NOQA from DA import Prodotti from Utils import Clust # NOQA from Utils import Constants from Utils.ClAnalyzer import ClAnalyzer from IPython import embed # }}} sns.set() sns.set_palette(Constants.abc_l) # plt.ion() # {{{ Preparazione Dataset df = Prodotti.get_df_group_prod(include_rare=True) df_scaled = Prodotti.get_df_group_prod_proc(include_rare=True) CA = ClAnalyzer(df) CA.add_df(df_scaled, 'scaled') feats = ['nAvSess', 'Recency', 'nUsers', 'Ratio', 'UserRatio'] feats3 = ['Recency', 'nUsers', 'Ratio'] CA.features = feats # 1: molto consigliato # 2: consigliato a nord # 3: consigliato correttamente e numeroso # samples = ['P0011AN', 'P0018AN', 'P0080AB'] samples = ['P0011AN'] CA.set_samples(samples, 'ProductId') CA.print_relevance(df_name='scaled') if 0:
import numpy as np import matplotlib.pyplot as plt from scipy.spatial.distance import pdist, squareform from sklearn.cluster import DBSCAN, KMeans from sklearn.neighbors import NearestNeighbors from sklearn.decomposition import PCA import Utils.Renders as rd # NOQA import pandas as pd from DA import Prodotti from Utils import Clust # NOQA from Utils.ClAnalyzer import ClAnalyzer from IPython import embed # }}} # {{{ Preparazione Dataset prod = Prodotti.get_df_group_prod() prod_proc = Prodotti.get_df_group_prod_proc() CA = ClAnalyzer(prod) CA.add_df(prod_proc, 'scaled') feats = ['nAvSess', 'Recency', 'nUsers', 'Ratio', 'UserRatio'] feats3 = ['Recency', 'nUsers', 'Ratio'] CA.features = feats CA.print_relevance(df_name='scaled') # 1: molto consigliato # 2: consigliato a nord # 3: consigliato correttamente e numeroso samples = ['P0011AN', 'P0018AN', 'P0080AB'] CA.set_samples(samples, 'ProductId') if 0: CA.print_outliers()
return self._dataset @dataset.setter def dataset(self, value): self._dataset = value @property def n_clust(self): return self._n_clust @n_clust.setter def n_clust(self, value): self._n_clust = value @property def name(self): return self._name # }}} if __name__ == '__main__': prod = Prodotti.get_df_group_prod() CA = ClAnalyzer("Kmeans", prod) samples = ['P0011AN', 'P0018AN', 'P0080AB'] features = ['Ratio', 'nProv', 'NordSud'] CA.features = features CA.set_samples(samples, 'ProductId') CA.print_relevance() CA.print_outliers() embed()