def random(): start = time.time() qry = db_session.query(dropdown_table_new) df2 = pd.read_sql(qry.statement, qry.session.bind) list2=[] # get (prod_subfamily,[all unique prod_name]) for x in df2['prod_subfamily'].unique(): temp=df2[df2['prod_subfamily']==x]['prod_name'].unique() list2.append((x, temp)) # df2=df[['CAT1','modele_intitule']].drop_duplicates() # df3=df[['CAT1','sous_famille_intitule','modele_intitule']].drop_duplicates() # get all unique prod_subfamily for each distinct CAT1 and prod_family df4=df2.sort_values(by=['prod_family']) df4=df4.groupby(['CAT1','prod_family'])['prod_subfamily'].unique().apply(list).reset_index() # get random customer id client_id = recommender.random_client_id() df_reco_models = recommender.recommend1(client_id ) #recommended_model_ids = recommendations['model_id'].unique() #df_reco_models = recommender.get_model_ids(recommended_model_ids) if len(df_reco_models[df_reco_models['category1'].isin(['L','D','B'])]['CAT1'].unique()) == 3: df_reco_models=df_reco_models[df_reco_models['category1'].isin(['L','D','B'])] df_reco_models.sort_values('category1', ascending=False, inplace=True) df_reco_models.reset_index(inplace=True) else: df_reco_models=df_reco_models[df_reco_models['category1'].isin(['L','D','B', 'A'])] df_reco_models.sort_values('category1', ascending=False, inplace=True) df_reco_models.reset_index(inplace=True) recomm_df=df_reco_models.groupby('CAT1').head(1) df_history = recommender.get_history(client_id) prod_list = [] for row in recomm_df.itertuples(): prod_list.append(getattr(row, 'prod_name')) columns= [ ('transaction_date', 'ORDER DATE'), ('prod_family', 'FAMILY'), ('prod_subfamily', 'SUB-FAMILY'), ('prod_name', 'MODEL'), ('model_id', 'MODEL ID'), ('transaction_id', 'ORDER #') ] end = time.time() print("********************************running time: "+ str(end-start)) return render_template('reco.html', client_id=client_id, history_df=df_history, history_columns=columns, #recommendations=pformat(recommendations), #recomm_df=df_reco_models.groupby('CAT1').head(1), #recomm_df=df_reco_models, recomm_df=recomm_df, df2=df2, list2=list2, df4=df4,prod=prod_list)
def loadData(self): qry = db_session.query(purchases) df = pd.read_sql(qry.statement, qry.session.bind) # remove records whose prod_name contains 'spare' or 'service' df['transaction_date'] = pd.to_datetime(df['transaction_date']) df = df[df['prod_name'].str.lower().str.contains('spare') == False] df = df[df['prod_name'].str.lower().str.contains('service') == False] #convert data type df['cont_id'] = df['cont_id'].astype(str) #sort by customer_id and transaction date df = df.sort_values(by=['cont_id', 'transaction_date']) self.df_with_inputs = df df_without_inputs = df[(df['transaction_id'] != 'suggestion')] self.df_without_inputs = df_without_inputs return df_without_inputs
def get_history(self, client_id): # get records given a client id qry = db_session.query(purchases) df = pd.read_sql(qry.statement, qry.session.bind) df['transaction_date'] = pd.to_datetime(df['transaction_date']) df=df[df['prod_name'].str.lower().str.contains('spare')==False] df=df[df['prod_name'].str.lower().str.contains('service')==False] #convert data type df['cont_id'] = df['cont_id'].astype(str) df=df.sort_values(by=['cont_id','transaction_date']) # update the df since users could insert a new record df['qty']=df.groupby(['cont_id','prod_name'])['prod_name'].transform('size') self.df = df return self.df[self.df['cont_id'] == client_id]
def suggestion(): start = time.time() qry = db_session.query(dropdown_table_new) df2 = pd.read_sql(qry.statement, qry.session.bind) list2 = [] for x in df2['prod_subfamily'].unique(): temp = df2[df2['prod_subfamily'] == x]['prod_name'].unique() list2.append((x, temp)) df4 = df2.sort_values(by=['prod_family']) df4 = df4.groupby(['CAT1', 'prod_family' ])['prod_subfamily'].unique().apply(list).reset_index() client_id = request.args['query'] df_reco_models = recommender.recommend1(client_id) if len(df_reco_models[df_reco_models['category1'].isin( ['L', 'D', 'B'])]['CAT1'].unique()) == 3: df_reco_models = df_reco_models[df_reco_models['category1'].isin( ['L', 'D', 'B'])] else: df_reco_models = df_reco_models[df_reco_models['category1'].isin( ['L', 'D', 'B', 'A'])] #get purchasing history df_history = recommender.get_history(client_id) recomm_df = df_reco_models.groupby('CAT1').head(1) # get recommendation list prod_list = [] for row in recomm_df.itertuples(): prod_list.append(getattr(row, 'prod_name')) columns = [('transaction_date', 'ORDER DATE'), ('prod_family', 'FAMILY'), ('prod_subfamily', 'SUB-FAMILY'), ('prod_name', 'MODEL'), ('model_id', 'MODEL ID'), ('transaction_id', 'ORDER #')] end = time.time() print("********************************running time: " + str(end - start)) return render_template( 'reco.html', client_id=client_id, history_df=df_history, history_columns=columns, #recommendations=pformat(recommendations), #recomm_df=df_reco_models, recomm_df=df_reco_models.groupby('CAT1').head(1), df2=df2, list2=list2, df4=df4, prod=prod_list)
def suggestion(): qry = db_session.query(dropdown_table_new) df2 = pd.read_sql(qry.statement, qry.session.bind) list2=[] for x in df2['prod_subfamily'].unique(): temp=df2[df2['prod_subfamily']==x]['prod_name'].unique() list2.append((x, temp)) # df2=df[['CAT1','modele_intitule']].drop_duplicates() # df3=df[['CAT1','sous_famille_intitule','modele_intitule']].drop_duplicates() df4=df2.sort_values(by=['prod_family']) df4=df4.groupby(['CAT1','prod_family'])['prod_subfamily'].unique().apply(list).reset_index() client_id=request.args['query'] df_reco_models = recommender.recommend1(client_id) # recommended_model_ids = recommendations['model_id'].unique() # df_reco_models=recommendations #df_reco_models = recommender.get_model_ids(recommended_model_ids) if len(df_reco_models[df_reco_models['category1'].isin(['L','D','B'])]['CAT1'].unique()) == 3: df_reco_models=df_reco_models[df_reco_models['category1'].isin(['L','D','B'])] else: df_reco_models=df_reco_models[df_reco_models['category1'].isin(['L','D','B', 'A'])] df_history = recommender.get_history(client_id) prod_list = [] recomm_df = df_reco_models.groupby('CAT1').head(1) for row in recomm_df.itertuples(): prod_list.append(getattr(row, 'prod_name')) columns= [ ('transaction_date', 'ORDER DATE'), ('prod_family', 'FAMILY'), ('prod_subfamily', 'SUB-FAMILY'), ('prod_name', 'MODEL'), ('model_id', 'MODEL ID'), ('transaction_id', 'ORDER #') ] return render_template('reco.html', client_id=client_id, history_df=df_history, history_columns=columns, #recommendations=pformat(recommendations), #recomm_df=df_reco_models, recomm_df=recomm_df, df2=df2, list2=list2, df4=df4,prod=prod_list )
def update_model1(self): qry = db_session.query(purchases) df = pd.read_sql(qry.statement, qry.session.bind) # remove records whose prod_name contains 'spare' or 'service' df['transaction_date'] = pd.to_datetime(df['transaction_date']) df=df[df['prod_name'].str.lower().str.contains('spare')==False] df=df[df['prod_name'].str.lower().str.contains('service')==False] #convert data type df['cont_id'] = df['cont_id'].astype(str) df=df.sort_values(by=['cont_id','transaction_date']) df['qty']=df.groupby(['cont_id','prod_name'])['prod_name'].transform('size') ndf=df.groupby(['cont_id','prod_name'])['qty'].sum().reset_index() # get all candidates products items = ndf.pivot(index = 'prod_name', columns = 'cont_id', values = 'qty').fillna(0) # compress sparse row marix item_rows=csr_matrix(items.values) # build model model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute') # train model model_knn.fit(item_rows) self.model_knn1 = model_knn
def __init__(self): # get the inbox_table qry = db_session.query(purchases) df = pd.read_sql(qry.statement, qry.session.bind) # remove records whose prod_name contains 'spare' or 'service' df['transaction_date'] = pd.to_datetime(df['transaction_date']) df=df[df['prod_name'].str.lower().str.contains('spare')==False] df=df[df['prod_name'].str.lower().str.contains('service')==False] #convert data type df['cont_id'] = df['cont_id'].astype(str) df=df[(df['transaction_id']!='suggestion')] df=df.sort_values(by=['cont_id','transaction_date']) # get all customers self.client_ids = list(set(list(df['cont_id']))) # get customers in testing dataset self.client_ids = list(set(list(df['cont_id']))) df['qty']=df.groupby(['cont_id','prod_name'])['prod_name'].transform('size') self.df = df ndf=df.groupby(['cont_id','prod_name'])['qty'].sum().reset_index() self.ndf=ndf self.split_data() # get all candidates products items = ndf.pivot(index = 'prod_name', columns = 'cont_id', values = 'qty').fillna(0) self.items=items # compress sparse row marix item_rows=csr_matrix(items.values) self.item_rows=item_rows # build model model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute') # train model model_knn.fit(item_rows) self.model_knn2 = model_knn self.update_model1()