示例#1
0
def calculate_mapk_extended_true_pred(class_hierarchy, y_true, y_pred, k):
    apks = []

    for true, pred in zip(y_true, y_pred):
        true_set = set(list(true) + class_hierarchy._get_ancestors_of(true))
        pred_set = set(list(pred) + class_hierarchy._get_ancestors_of(pred))

        _apk = apk(true_set, pred_set, k)
        apks.append(_apk)
    return np.mean(apks)
示例#2
0
def AP(prediction,label,names):
    mask = np.abs(label)==1
    if np.sum(label==1)==0:
        return 0.0
    groundtruth = names[label == 1]
    prediction = prediction[mask]
    retrieval = names[mask]
    sort_idx = np.argsort(prediction)[::-1]
    retrieval = retrieval[sort_idx]
    return apk(groundtruth,retrieval,len(prediction))
示例#3
0
    def prediction_eval(self, prediction, reference):
        # things on the prediction object that are not in the reference will
        # be simply ignored
        keys = sorted(reference.keys())
        real_labels = []
        predicted_labels = []
        for key in keys:
            real_labels.append(reference[key])
            prob_1 = prediction[key]
            predicted_labels.append([1 - prob_1, prob_1])

        actual_ev = [ev for ev, s in reference.items() if s == 1.0]
        predicted_ev, _ = zip(*sorted(prediction.items(), key=itemgetter(1)))
        return {
            u'log_loss': log_loss(real_labels, predicted_labels),
            u'avg_len_actual': apk(actual_ev, predicted_ev, k=len(actual_ev)),
            u'avg_len_predic': apk(actual_ev,
                                   predicted_ev,
                                   k=len(predicted_ev))
        }
    pred_dv_file = "val_sub_d8_i500.csv"

    actual_dv_handle = open(actual_dv_file)
    pred_dv_handle = open(pred_dv_file)

    actual_reader = csv.DictReader(actual_dv_handle)
    pred_reader = csv.DictReader(pred_dv_handle)

    user_count = 0
    summation = 0
    for act_row in actual_reader:
        pred_row = pred_reader.next()
        assert act_row["USER_ID_hash"] == pred_row["USER_ID_hash"]
        if act_row["PURCHASED_COUPONS"]:
            actual_coupons = act_row["PURCHASED_COUPONS"].split(" ")
        else:
            actual_coupons = []
        pred_coupons = pred_row["PURCHASED_COUPONS"].split(" ")
        #map_at_10 = computeMAP(actual_coupons, pred_coupons, m=10, n=10)
        map_at_10 = apk(actual_coupons, pred_coupons, k=10)
        summation += map_at_10
        user_count += 1

    print user_count
    print summation
    overall_map_at_10 = summation / float(user_count)
    print overall_map_at_10

    actual_dv_handle.close()
    pred_dv_handle.close()
	pred_dv_file = "val_sub_d8_i500.csv"

	actual_dv_handle = open(actual_dv_file)
	pred_dv_handle = open(pred_dv_file)

	actual_reader = csv.DictReader(actual_dv_handle)
	pred_reader = csv.DictReader(pred_dv_handle)

	user_count = 0
	summation = 0
	for act_row in actual_reader:
		pred_row = pred_reader.next()
		assert act_row["USER_ID_hash"] == pred_row["USER_ID_hash"]
		if act_row["PURCHASED_COUPONS"]:
			actual_coupons = act_row["PURCHASED_COUPONS"].split(" ")
		else:
			actual_coupons = []
		pred_coupons = pred_row["PURCHASED_COUPONS"].split(" ")
		#map_at_10 = computeMAP(actual_coupons, pred_coupons, m=10, n=10)
		map_at_10 = apk(actual_coupons, pred_coupons, k=10)
		summation += map_at_10
		user_count += 1

	print user_count
	print summation
	overall_map_at_10 = summation / float(user_count)
	print overall_map_at_10

	actual_dv_handle.close()
	pred_dv_handle.close() 
示例#6
0
from average_precision import apk,mapk

# this code test the accuracy of the MAP scoring function
# 

actual = [1]

predicted = [1,2,3,4,5]

print('Answer=',actual,'predicted=',predicted)
print('AP@5 =',apk(actual,predicted,5) )

predicted = [2,1,3,4,5]
print('Answer=',actual,'predicted=',predicted)
print('AP@5 =',apk(actual,predicted,5) )

predicted = [3,2,1,4,5]
print('Answer=',actual,'predicted=',predicted)
print('AP@5 =',apk(actual,predicted,5) )

predicted = [4,2,3,1,5]
print('Answer=',actual,'predicted=',predicted)
print('AP@5 =',apk(actual,predicted,5) )

predicted = [4,2,3,5,1]
print('Answer=',actual,'predicted=',predicted)
print('AP@5 =',apk(actual,predicted,5) )

print mapk([[1],[1],[1],[1],[1]],[[1,2,3,4,5],[2,1,3,4,5],[3,2,1,4,5],[4,2,3,1,5],[4,2,3,5,1]], 5)
    return fscore

if __name__=="__main__":

    TEST_SAMPLE = [(31367867,1329369)]
    POST_DATA_FILE = "../data/trainPosts.json"

    c = Predictions()
    # TEST_SAMPLE = new_test(POST_DATA_FILE)
    TEST_SAMPLE = c.test_sample
    predicted_list = c.no_logic(TEST_SAMPLE)

    actual = []
    predicted = []

    for (i,j,l,label) in predicted_list:
        actual.append(l)
        predicted.append(label)

    map_val = apk(actual,predicted,100)

    print "Mean Average Precision at 10"
    print map_val

    fscore = fscore(actual, predicted)

    print "F-score"
    print fscore


    #logistic = c.logistic()
示例#8
0
from average_precision import apk, mapk

# this code test the accuracy of the MAP scoring function
#

actual = [1]

predicted = [1, 2, 3, 4, 5]

print('Answer=', actual, 'predicted=', predicted)
print('AP@5 =', apk(actual, predicted, 5))

predicted = [2, 1, 3, 4, 5]
print('Answer=', actual, 'predicted=', predicted)
print('AP@5 =', apk(actual, predicted, 5))

predicted = [3, 2, 1, 4, 5]
print('Answer=', actual, 'predicted=', predicted)
print('AP@5 =', apk(actual, predicted, 5))

predicted = [4, 2, 3, 1, 5]
print('Answer=', actual, 'predicted=', predicted)
print('AP@5 =', apk(actual, predicted, 5))

predicted = [4, 2, 3, 5, 1]
print('Answer=', actual, 'predicted=', predicted)
print('AP@5 =', apk(actual, predicted, 5))

print mapk([[1], [1], [1], [1], [1]],
           [[1, 2, 3, 4, 5], [2, 1, 3, 4, 5], [3, 2, 1, 4, 5], [4, 2, 3, 1, 5],
            [4, 2, 3, 5, 1]], 5)
示例#9
0
# predicted = model.predict(X_test)
# print predicted

# generate class probabilities and find the top 5 highest class
probs = model.predict_proba(X_test)  #matrix
y_test_ls = y_test.as_matrix()
# predict_top5 = []
map5score = []

for i in range(len(probs)):
    ls = probs[i]
    # predict_top5[i] = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__)
    predict_top5 = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__)
    actual = []
    actual.append(y_test_ls[i])
    score = apk(actual, predict_top5, 5)
    map5score.append(score)

# map5score = mapk(y_test_ls,predict_top5,5)

# print count/len(probs)
print "the testing data MAP5 is: %f " % np.mean(map5score)

# analysis the feature
ls_feat_val = model.feature_importances_
ls_feat_name = list(X_train.columns.values)

ls_index = heapq.nlargest(10,
                          xrange(len(ls_feat_val)),
                          key=ls_feat_val.__getitem__)
ls_feat_name_top = list(ls_feat_name[i] for i in ls_index)
示例#10
0
# predicted = model.predict(X_test) 
# print predicted

# generate class probabilities and find the top 5 highest class
probs = model.predict_proba(X_test) #matrix
y_test_ls = y_test.as_matrix()
# predict_top5 = []
map5score = []

for i in range(len(probs)):
    ls = probs[i]
    # predict_top5[i] = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__)
    predict_top5 = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__)
    actual = []
    actual.append(y_test_ls[i])
    score = apk(actual,predict_top5,5)
    map5score.append(score)

# map5score = mapk(y_test_ls,predict_top5,5)

# print count/len(probs)
print "the testing data MAP5 is: %f " % np.mean(map5score)

# analysis the feature 
ls_feat_val= model.feature_importances_
ls_feat_name = list(X_train.columns.values)

ls_index = heapq.nlargest(10, xrange(len(ls_feat_val)), key=ls_feat_val.__getitem__)
ls_feat_name_top = list(ls_feat_name[i] for i in ls_index)
print ls_feat_name_top