Exemplo n.º 1
0
    def objective_function(x_int):
        objective_function.n_iterations += 1
        list_score = []
        # Parameter to optimise
        w_sim, w_rank, w_reg, w_SVM, w_lightfm = x_int

        for week_ID in list_week_ID :
            for index, user in enumerate(d_user[week_ID]) :
                d_blend_pred[week_ID][user] = d_user_pred[week_ID][user]["sim"]*w_sim
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_rank"]*w_rank
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_reg"]*w_reg
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["xgb_SVM"]*w_SVM
                d_blend_pred[week_ID][user] += d_user_pred[week_ID][user]["lightfm"]*w_lightfm
                
                list_pred        = d_blend_pred[week_ID][user]
                top_k = np.argsort(-list_pred)[:10]
                d_blend_pred[week_ID][user] = d_coupon[week_ID][top_k]

            list_user = d_user_purchase[week_ID].keys()
            list_actual = [d_user_purchase[week_ID][key] for key in list_user]
            list_pred = [d_blend_pred[week_ID][key] for key in list_user] 

            list_score.append(mapr.mapk(list_actual, list_pred))

        list_score = np.array(list_score)

        print(objective_function.n_iterations, \
            "w_sim, w_rank, w_reg, w_SVM, w_lightfm =", w_sim, w_rank, w_reg, w_SVM, w_lightfm, \
            "\nList_score = ", list_score, \
            "\nMean of MAP = ", np.mean(list_score), \
            "\n Std of MAP = ", np.std(list_score))
        
        return -np.median(list_score)
    def objective_function(x_int):
        objective_function.n_iterations += 1

        list_score = []

        # Parameter to optimise
        gnr, disc, disp, large, small, val, us_sum, sex = x_int

        #Build sparse matrix of weights
        Wm = sps.block_diag(
            (gnr * np.eye(13), disc * np.eye(1), disp * np.eye(1),
             large * np.eye(9), small * np.eye(55), val * np.eye(2),
             us_sum * np.eye(1), sex * np.eye(2)))
        Wm_sparse = sps.csr_matrix(Wm)

        for week_ID in list_week:

            WmT = Wm_sparse.dot(d_test[week_ID])
            score = 1. / distance.cdist(uchar_sparse.todense(),
                                        WmT.T.todense(), metric)

            #Store predictions in a dict
            d_user_pred[week_ID] = {}

            # Compute score
            for i, user in enumerate(d_user_full[week_ID]):
                list_pred = np.ravel(score[i, :])
                list_index_top10 = list_pred.argsort()[-10:][::-1]
                d_user_pred[week_ID][user] = d_coupon[week_ID][
                    list_index_top10]

            for key in d_user_purchase[week_ID].keys():
                try:
                    d_user_pred[week_ID][key]
                except KeyError:
                    d_user_pred[week_ID][key] = []

            list_user = d_user_purchase[week_ID].keys()
            list_actual = [d_user_purchase[week_ID][key] for key in list_user]
            list_pred = [d_user_pred[week_ID][key] for key in list_user]

            list_score.append(mapr.mapk(list_actual, list_pred))

        list_score = np.array(list_score)

        print objective_function.n_iterations, \
            "gnr, disc, disp, large, small, val, us_sum, sex =", gnr, disc, disp, large, small, val, us_sum, sex, \
            "\nMean of MAP = ", np.mean(list_score), \
            "\n Std of MAP = ", np.std(list_score)
        return -np.min(list_score)
Exemplo n.º 3
0
def score_similarity_predictions():
    """ Score cosine similarity predictions
    """

    list_score = []

    # Loop over validation weeks
    for week_ID in ["week51", "week52"]:
        script_utils.print_utility("Training until " + week_ID)
        #Get predictions
        d_user_pred, list_user_full, list_coupon = get_similarity_distance(
            week_ID, "1", "cosine")
        #Format predictions
        for index, user in enumerate(list_user_full):
            list_pred = d_user_pred[user]
            top_k = np.argsort(-list_pred)[:10]
            d_user_pred[user] = list_coupon[top_k]

        #Get actual purchase
        d_user_purchase = {}
        with open(
                "../Data/Validation/" + week_ID +
                "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
            d_user_purchase = pickle.load(fp)

        # Take care of users which registered during validation test week
        for key in d_user_purchase.keys():
            try:
                d_user_pred[key]
            except KeyError:
                d_user_pred[key] = []

        list_user = np.array(d_user_purchase.keys())
        permut = np.random.permutation(len(list_user))

        list_actual = [
            d_user_purchase[key]
            for key in list_user[permut][:int(len(permut))]
        ]
        list_pred = [
            d_user_pred[key] for key in list_user[permut][:int(len(permut))]
        ]

        list_score.append(mapr.mapk(list_actual, list_pred))

    list_score = np.array(list_score)
    print list_score
    print str(np.mean(list_score)) + " +/- " + str(np.std(list_score))
    return np.mean(list_score)
Exemplo n.º 4
0
    def objective_function(x_int):
        objective_function.n_iterations += 1

        list_score = []

        # Parameter to optimise
        gnr, disc, disp, large, small, val, us_sum, sex = x_int

        #Build sparse matrix of weights
        Wm = sps.block_diag((gnr*np.eye(13), disc*np.eye(1), disp*np.eye(1), large*np.eye(9), small*np.eye(55), val*np.eye(2), us_sum*np.eye(1), sex*np.eye(2)))
        Wm_sparse = sps.csr_matrix(Wm)

        for week_ID in list_week :

            WmT = Wm_sparse.dot(d_test[week_ID])
            score = 1./distance.cdist(uchar_sparse.todense(), WmT.T.todense(), metric) 

            #Store predictions in a dict
            d_user_pred[week_ID] = {}

            # Compute score  
            for i, user in enumerate(d_user_full[week_ID]) :
                list_pred = np.ravel(score[i,:])
                list_index_top10 = list_pred.argsort()[-10:][::-1]
                d_user_pred[week_ID][user] = d_coupon[week_ID][list_index_top10]

            for key in d_user_purchase[week_ID].keys() :
                try :
                    d_user_pred[week_ID][key]
                except KeyError :
                    d_user_pred[week_ID][key] = []

            list_user = d_user_purchase[week_ID].keys()
            list_actual = [d_user_purchase[week_ID][key] for key in list_user]
            list_pred = [d_user_pred[week_ID][key] for key in list_user] 

            list_score.append(mapr.mapk(list_actual, list_pred))

        list_score = np.array(list_score)

        print objective_function.n_iterations, \
            "gnr, disc, disp, large, small, val, us_sum, sex =", gnr, disc, disp, large, small, val, us_sum, sex, \
            "\nMean of MAP = ", np.mean(list_score), \
            "\n Std of MAP = ", np.std(list_score)
        return -np.min(list_score)
Exemplo n.º 5
0
def score_lightFM(no_comp, lr, ep):
	"""
	Score the lightFM model for mean average precision at k = 10

	args = no_comp, lr, ep (int, float, int) 
	number of components, learning rate, number of epochs for lightFM model
	"""
       
	list_score = []

	# Loop over validation weeks
	for week_ID in ["week51"] :
		#Get predictions, manually choose metric and classifier
		d_user_pred, list_user_full, list_coupon   = fit_model(week_ID, no_comp, lr, ep)
		#Format predictions
		for index, user in enumerate(list_user_full) :
			list_pred         = d_user_pred[user]
			top_k             = np.argsort(-list_pred)[:10]
			d_user_pred[user] = list_coupon[top_k]
		
		#Get actual purchase
		d_user_purchase = {}
		with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
			d_user_purchase = pickle.load(fp)
		
		# Take care of users which registered during validation test week
		for key in d_user_purchase.keys() :
			try :
				d_user_pred[key]
			except KeyError :
				d_user_pred[key] = []
		
		list_user = d_user_purchase.keys()
		list_actual = [d_user_purchase[key] for key in list_user]
		list_pred = [d_user_pred[key] for key in list_user] 

        list_score.append(mapr.mapk(list_actual, list_pred))
        print(list_score)

	list_score = np.array(list_score)
	print(list_score)
	print(str(np.mean(list_score)) + " +/- " + str(np.std(list_score)))
Exemplo n.º 6
0
def score_similarity_predictions():
    """ Score cosine similarity predictions
    """

    list_score = []

    # Loop over validation weeks
    for week_ID in ["week51", "week52"] :
        script_utils.print_utility("Training until " + week_ID)
        #Get predictions
        d_user_pred, list_user_full, list_coupon = get_similarity_distance(week_ID, "1", "cosine")
        #Format predictions
        for index, user in enumerate(list_user_full) :
            list_pred = d_user_pred[user]
            top_k       = np.argsort(-list_pred)[:10]
            d_user_pred[user] = list_coupon[top_k]

        #Get actual purchase
        d_user_purchase = {}
        with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
            d_user_purchase = pickle.load(fp)

        # Take care of users which registered during validation test week
        for key in d_user_purchase.keys() :
            try :
                d_user_pred[key]
            except KeyError :
                d_user_pred[key] = []

        list_user = np.array(d_user_purchase.keys())
        permut = np.random.permutation(len(list_user))

        list_actual = [d_user_purchase[key] for key in list_user[permut][:int(len(permut))]]
        list_pred = [d_user_pred[key] for key in list_user[permut][:int(len(permut))]] 

        list_score.append(mapr.mapk(list_actual, list_pred))

    list_score = np.array(list_score)
    print list_score 
    print str(np.mean(list_score)) + " +/- " + str(np.std(list_score))
    return np.mean(list_score)
Exemplo n.º 7
0
def score_submission():
    """ Score cosine similarity predictions
    """

    list_score = []

    # Loop over validation weeks
    for week_ID in ["week51", "week52"] :
        print "Training " + week_ID
        #Get predictions, manually choose metric and classifier
        d_user_pred, list_user_full, list_coupon = fit_xgboost(week_ID, "reg:linear")
        # d_user_pred, list_user_full, list_coupon = fit_SVM(week_ID)
        #Format predictions
        for index, user in enumerate(list_user_full) :
            list_pred = d_user_pred[user]
            top_k = np.argsort(-list_pred)[:10]
            d_user_pred[user] = list_coupon[top_k]

        #Get actual purchase
        d_user_purchase = {}
        with open("../Data/Validation/" + week_ID + "/dict_purchase_validation_" + week_ID + ".pickle", "r") as fp:
            d_user_purchase = pickle.load(fp)

        # Take care of users which registered during validation test week
        for key in d_user_purchase.keys() :
            try :
                d_user_pred[key]
            except KeyError :
                d_user_pred[key] = []

        list_user = d_user_purchase.keys()
        list_actual = [d_user_purchase[key] for key in list_user]
        list_pred = [d_user_pred[key] for key in list_user] 

        list_score.append(mapr.mapk(list_actual, list_pred))
        print list_score

    list_score = np.array(list_score)
    print list_score 
    print str(np.mean(list_score)) + " +/- " + str(np.std(list_score))