def calculate_mapk_extended_true_pred(class_hierarchy, y_true, y_pred, k): apks = [] for true, pred in zip(y_true, y_pred): true_set = set(list(true) + class_hierarchy._get_ancestors_of(true)) pred_set = set(list(pred) + class_hierarchy._get_ancestors_of(pred)) _apk = apk(true_set, pred_set, k) apks.append(_apk) return np.mean(apks)
def AP(prediction,label,names): mask = np.abs(label)==1 if np.sum(label==1)==0: return 0.0 groundtruth = names[label == 1] prediction = prediction[mask] retrieval = names[mask] sort_idx = np.argsort(prediction)[::-1] retrieval = retrieval[sort_idx] return apk(groundtruth,retrieval,len(prediction))
def prediction_eval(self, prediction, reference): # things on the prediction object that are not in the reference will # be simply ignored keys = sorted(reference.keys()) real_labels = [] predicted_labels = [] for key in keys: real_labels.append(reference[key]) prob_1 = prediction[key] predicted_labels.append([1 - prob_1, prob_1]) actual_ev = [ev for ev, s in reference.items() if s == 1.0] predicted_ev, _ = zip(*sorted(prediction.items(), key=itemgetter(1))) return { u'log_loss': log_loss(real_labels, predicted_labels), u'avg_len_actual': apk(actual_ev, predicted_ev, k=len(actual_ev)), u'avg_len_predic': apk(actual_ev, predicted_ev, k=len(predicted_ev)) }
pred_dv_file = "val_sub_d8_i500.csv" actual_dv_handle = open(actual_dv_file) pred_dv_handle = open(pred_dv_file) actual_reader = csv.DictReader(actual_dv_handle) pred_reader = csv.DictReader(pred_dv_handle) user_count = 0 summation = 0 for act_row in actual_reader: pred_row = pred_reader.next() assert act_row["USER_ID_hash"] == pred_row["USER_ID_hash"] if act_row["PURCHASED_COUPONS"]: actual_coupons = act_row["PURCHASED_COUPONS"].split(" ") else: actual_coupons = [] pred_coupons = pred_row["PURCHASED_COUPONS"].split(" ") #map_at_10 = computeMAP(actual_coupons, pred_coupons, m=10, n=10) map_at_10 = apk(actual_coupons, pred_coupons, k=10) summation += map_at_10 user_count += 1 print user_count print summation overall_map_at_10 = summation / float(user_count) print overall_map_at_10 actual_dv_handle.close() pred_dv_handle.close()
from average_precision import apk,mapk # this code test the accuracy of the MAP scoring function # actual = [1] predicted = [1,2,3,4,5] print('Answer=',actual,'predicted=',predicted) print('AP@5 =',apk(actual,predicted,5) ) predicted = [2,1,3,4,5] print('Answer=',actual,'predicted=',predicted) print('AP@5 =',apk(actual,predicted,5) ) predicted = [3,2,1,4,5] print('Answer=',actual,'predicted=',predicted) print('AP@5 =',apk(actual,predicted,5) ) predicted = [4,2,3,1,5] print('Answer=',actual,'predicted=',predicted) print('AP@5 =',apk(actual,predicted,5) ) predicted = [4,2,3,5,1] print('Answer=',actual,'predicted=',predicted) print('AP@5 =',apk(actual,predicted,5) ) print mapk([[1],[1],[1],[1],[1]],[[1,2,3,4,5],[2,1,3,4,5],[3,2,1,4,5],[4,2,3,1,5],[4,2,3,5,1]], 5)
return fscore if __name__=="__main__": TEST_SAMPLE = [(31367867,1329369)] POST_DATA_FILE = "../data/trainPosts.json" c = Predictions() # TEST_SAMPLE = new_test(POST_DATA_FILE) TEST_SAMPLE = c.test_sample predicted_list = c.no_logic(TEST_SAMPLE) actual = [] predicted = [] for (i,j,l,label) in predicted_list: actual.append(l) predicted.append(label) map_val = apk(actual,predicted,100) print "Mean Average Precision at 10" print map_val fscore = fscore(actual, predicted) print "F-score" print fscore #logistic = c.logistic()
from average_precision import apk, mapk # this code test the accuracy of the MAP scoring function # actual = [1] predicted = [1, 2, 3, 4, 5] print('Answer=', actual, 'predicted=', predicted) print('AP@5 =', apk(actual, predicted, 5)) predicted = [2, 1, 3, 4, 5] print('Answer=', actual, 'predicted=', predicted) print('AP@5 =', apk(actual, predicted, 5)) predicted = [3, 2, 1, 4, 5] print('Answer=', actual, 'predicted=', predicted) print('AP@5 =', apk(actual, predicted, 5)) predicted = [4, 2, 3, 1, 5] print('Answer=', actual, 'predicted=', predicted) print('AP@5 =', apk(actual, predicted, 5)) predicted = [4, 2, 3, 5, 1] print('Answer=', actual, 'predicted=', predicted) print('AP@5 =', apk(actual, predicted, 5)) print mapk([[1], [1], [1], [1], [1]], [[1, 2, 3, 4, 5], [2, 1, 3, 4, 5], [3, 2, 1, 4, 5], [4, 2, 3, 1, 5], [4, 2, 3, 5, 1]], 5)
# predicted = model.predict(X_test) # print predicted # generate class probabilities and find the top 5 highest class probs = model.predict_proba(X_test) #matrix y_test_ls = y_test.as_matrix() # predict_top5 = [] map5score = [] for i in range(len(probs)): ls = probs[i] # predict_top5[i] = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__) predict_top5 = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__) actual = [] actual.append(y_test_ls[i]) score = apk(actual, predict_top5, 5) map5score.append(score) # map5score = mapk(y_test_ls,predict_top5,5) # print count/len(probs) print "the testing data MAP5 is: %f " % np.mean(map5score) # analysis the feature ls_feat_val = model.feature_importances_ ls_feat_name = list(X_train.columns.values) ls_index = heapq.nlargest(10, xrange(len(ls_feat_val)), key=ls_feat_val.__getitem__) ls_feat_name_top = list(ls_feat_name[i] for i in ls_index)
# predicted = model.predict(X_test) # print predicted # generate class probabilities and find the top 5 highest class probs = model.predict_proba(X_test) #matrix y_test_ls = y_test.as_matrix() # predict_top5 = [] map5score = [] for i in range(len(probs)): ls = probs[i] # predict_top5[i] = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__) predict_top5 = heapq.nlargest(5, xrange(len(ls)), key=ls.__getitem__) actual = [] actual.append(y_test_ls[i]) score = apk(actual,predict_top5,5) map5score.append(score) # map5score = mapk(y_test_ls,predict_top5,5) # print count/len(probs) print "the testing data MAP5 is: %f " % np.mean(map5score) # analysis the feature ls_feat_val= model.feature_importances_ ls_feat_name = list(X_train.columns.values) ls_index = heapq.nlargest(10, xrange(len(ls_feat_val)), key=ls_feat_val.__getitem__) ls_feat_name_top = list(ls_feat_name[i] for i in ls_index) print ls_feat_name_top