def objective_function(x_int): objective_function.n_iterations += 1 list_score = [] # Parameter to optimise w_sim, w_rank, w_reg, w_SVM, w_lightfm = x_int for week_ID in list_week_ID : for index, user in enumerate(d_user[week_ID]) : d_blend_pred[week_ID][user] = d_user_pred[week_ID][user]["sim"]*w_sim d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_rank"]*w_rank d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_reg"]*w_reg d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_SVM"]*w_SVM d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["lightfm"]*w_lightfm list_pred = d_blend_pred[week_ID][user] top_k = np.argsort(-list_pred)[:10] d_blend_pred[week_ID][user] = d_coupon[week_ID][top_k] list_user = d_user_purchase[week_ID].keys() list_actual = [d_user_purchase[week_ID][key] for key in list_user] list_pred = [d_blend_pred[week_ID][key] for key in list_user] list_score.append(mapr.mapk(list_actual, list_pred)) list_score = np.array(list_score) print(objective_function.n_iterations, \ "w_sim, w_rank, w_reg, w_SVM, w_lightfm =", w_sim, w_rank, w_reg, w_SVM, w_lightfm, \ "\nList_score = ", list_score, \ "\nMean of MAP = ", np.mean(list_score), \ "\n Std of MAP = ", np.std(list_score)) return -np.median(list_score)
def objective_function(x_int): objective_function.n_iterations += 1 list_score = [] # Parameter to optimise gnr, disc, disp, large, small, val, us_sum, sex = x_int #Build sparse matrix of weights Wm = sps.block_diag( (gnr * np.eye(13), disc * np.eye(1), disp * np.eye(1), large * np.eye(9), small * np.eye(55), val * np.eye(2), us_sum * np.eye(1), sex * np.eye(2))) Wm_sparse = sps.csr_matrix(Wm) for week_ID in list_week: WmT = Wm_sparse.dot(d_test[week_ID]) score = 1. / distance.cdist(uchar_sparse.todense(), WmT.T.todense(), metric) #Store predictions in a dict d_user_pred[week_ID] = {} # Compute score for i, user in enumerate(d_user_full[week_ID]): list_pred = np.ravel(score[i, :]) list_index_top10 = list_pred.argsort()[-10:][::-1] d_user_pred[week_ID][user] = d_coupon[week_ID][ list_index_top10] for key in d_user_purchase[week_ID].keys(): try: d_user_pred[week_ID][key] except KeyError: d_user_pred[week_ID][key] = [] list_user = d_user_purchase[week_ID].keys() list_actual = [d_user_purchase[week_ID][key] for key in list_user] list_pred = [d_user_pred[week_ID][key] for key in list_user] list_score.append(mapr.mapk(list_actual, list_pred)) list_score = np.array(list_score) print objective_function.n_iterations, \ "gnr, disc, disp, large, small, val, us_sum, sex =", gnr, disc, disp, large, small, val, us_sum, sex, \ "\nMean of MAP = ", np.mean(list_score), \ "\n Std of MAP = ", np.std(list_score) return -np.min(list_score)
def score_similarity_predictions(): """ Score cosine similarity predictions """ list_score = [] # Loop over validation weeks for week_ID in ["week51", "week52"]: script_utils.print_utility("Training until " + week_ID) #Get predictions d_user_pred, list_user_full, list_coupon = get_similarity_distance( week_ID, "1", "cosine") #Format predictions for index, user in enumerate(list_user_full): list_pred = d_user_pred[user] top_k = np.argsort(-list_pred)[:10] d_user_pred[user] = list_coupon[top_k] #Get actual purchase d_user_purchase = {} with open( "../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp: d_user_purchase = pickle.load(fp) # Take care of users which registered during validation test week for key in d_user_purchase.keys(): try: d_user_pred[key] except KeyError: d_user_pred[key] = [] list_user = np.array(d_user_purchase.keys()) permut = np.random.permutation(len(list_user)) list_actual = [ d_user_purchase[key] for key in list_user[permut][:int(len(permut))] ] list_pred = [ d_user_pred[key] for key in list_user[permut][:int(len(permut))] ] list_score.append(mapr.mapk(list_actual, list_pred)) list_score = np.array(list_score) print list_score print str(np.mean(list_score)) + " +/- " + str(np.std(list_score)) return np.mean(list_score)
def objective_function(x_int): objective_function.n_iterations += 1 list_score = [] # Parameter to optimise gnr, disc, disp, large, small, val, us_sum, sex = x_int #Build sparse matrix of weights Wm = sps.block_diag((gnr*np.eye(13), disc*np.eye(1), disp*np.eye(1), large*np.eye(9), small*np.eye(55), val*np.eye(2), us_sum*np.eye(1), sex*np.eye(2))) Wm_sparse = sps.csr_matrix(Wm) for week_ID in list_week : WmT = Wm_sparse.dot(d_test[week_ID]) score = 1./distance.cdist(uchar_sparse.todense(), WmT.T.todense(), metric) #Store predictions in a dict d_user_pred[week_ID] = {} # Compute score for i, user in enumerate(d_user_full[week_ID]) : list_pred = np.ravel(score[i,:]) list_index_top10 = list_pred.argsort()[-10:][::-1] d_user_pred[week_ID][user] = d_coupon[week_ID][list_index_top10] for key in d_user_purchase[week_ID].keys() : try : d_user_pred[week_ID][key] except KeyError : d_user_pred[week_ID][key] = [] list_user = d_user_purchase[week_ID].keys() list_actual = [d_user_purchase[week_ID][key] for key in list_user] list_pred = [d_user_pred[week_ID][key] for key in list_user] list_score.append(mapr.mapk(list_actual, list_pred)) list_score = np.array(list_score) print objective_function.n_iterations, \ "gnr, disc, disp, large, small, val, us_sum, sex =", gnr, disc, disp, large, small, val, us_sum, sex, \ "\nMean of MAP = ", np.mean(list_score), \ "\n Std of MAP = ", np.std(list_score) return -np.min(list_score)
def score_lightFM(no_comp, lr, ep): """ Score the lightFM model for mean average precision at k = 10 args = no_comp, lr, ep (int, float, int) number of components, learning rate, number of epochs for lightFM model """ list_score = [] # Loop over validation weeks for week_ID in ["week51"] : #Get predictions, manually choose metric and classifier d_user_pred, list_user_full, list_coupon = fit_model(week_ID, no_comp, lr, ep) #Format predictions for index, user in enumerate(list_user_full) : list_pred = d_user_pred[user] top_k = np.argsort(-list_pred)[:10] d_user_pred[user] = list_coupon[top_k] #Get actual purchase d_user_purchase = {} with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp: d_user_purchase = pickle.load(fp) # Take care of users which registered during validation test week for key in d_user_purchase.keys() : try : d_user_pred[key] except KeyError : d_user_pred[key] = [] list_user = d_user_purchase.keys() list_actual = [d_user_purchase[key] for key in list_user] list_pred = [d_user_pred[key] for key in list_user] list_score.append(mapr.mapk(list_actual, list_pred)) print(list_score) list_score = np.array(list_score) print(list_score) print(str(np.mean(list_score)) + " +/- " + str(np.std(list_score)))
def score_similarity_predictions(): """ Score cosine similarity predictions """ list_score = [] # Loop over validation weeks for week_ID in ["week51", "week52"] : script_utils.print_utility("Training until " + week_ID) #Get predictions d_user_pred, list_user_full, list_coupon = get_similarity_distance(week_ID, "1", "cosine") #Format predictions for index, user in enumerate(list_user_full) : list_pred = d_user_pred[user] top_k = np.argsort(-list_pred)[:10] d_user_pred[user] = list_coupon[top_k] #Get actual purchase d_user_purchase = {} with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp: d_user_purchase = pickle.load(fp) # Take care of users which registered during validation test week for key in d_user_purchase.keys() : try : d_user_pred[key] except KeyError : d_user_pred[key] = [] list_user = np.array(d_user_purchase.keys()) permut = np.random.permutation(len(list_user)) list_actual = [d_user_purchase[key] for key in list_user[permut][:int(len(permut))]] list_pred = [d_user_pred[key] for key in list_user[permut][:int(len(permut))]] list_score.append(mapr.mapk(list_actual, list_pred)) list_score = np.array(list_score) print list_score print str(np.mean(list_score)) + " +/- " + str(np.std(list_score)) return np.mean(list_score)
def score_submission(): """ Score cosine similarity predictions """ list_score = [] # Loop over validation weeks for week_ID in ["week51", "week52"] : print "Training " + week_ID #Get predictions, manually choose metric and classifier d_user_pred, list_user_full, list_coupon = fit_xgboost(week_ID, "reg:linear") # d_user_pred, list_user_full, list_coupon = fit_SVM(week_ID) #Format predictions for index, user in enumerate(list_user_full) : list_pred = d_user_pred[user] top_k = np.argsort(-list_pred)[:10] d_user_pred[user] = list_coupon[top_k] #Get actual purchase d_user_purchase = {} with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp: d_user_purchase = pickle.load(fp) # Take care of users which registered during validation test week for key in d_user_purchase.keys() : try : d_user_pred[key] except KeyError : d_user_pred[key] = [] list_user = d_user_purchase.keys() list_actual = [d_user_purchase[key] for key in list_user] list_pred = [d_user_pred[key] for key in list_user] list_score.append(mapr.mapk(list_actual, list_pred)) print list_score list_score = np.array(list_score) print list_score print str(np.mean(list_score)) + " +/- " + str(np.std(list_score))