def compute_mapk(interactions_dict, recomendations_dict): actual = [] pred = [] for k,_ in recomendations_dict.items(): actual.append(list(interactions_dict[k])) pred.append(list(recomendations_dict[k])) return mapk(actual,pred)
def objective(params): """ objective function for lightgbm. """ # hyperopt casts as float params['num_boost_round'] = int(params['num_boost_round']) params['num_leaves'] = int(params['num_leaves']) # need to be passed as parameter params['verbose'] = -1 params['seed'] = 1 cv_result = lgb.cv( params, train, nfold=3, metrics='rmse', num_boost_round=params['num_boost_round'], early_stopping_rounds=20, stratified=False, ) early_stop_dict[objective.i] = len(cv_result['rmse-mean']) params['num_boost_round'] = len(cv_result['rmse-mean']) model = lgb.LGBMRegressor(**params) model.fit(train.data,train.label,feature_name=all_cols,categorical_feature=cat_cols) preds = model.predict(X_valid) df_eval['interest'] = preds recomendations_dict = self.recomendations_dictionary(df_eval, ranking_metric='interest') actual = [] pred = [] for k,_ in recomendations_dict.items(): actual.append(list(interactions_valid_dict[k])) pred.append(list(recomendations_dict[k])) result = mapk(actual,pred) if verbose: print("INFO: iteration {} MAP {:.3f}".format(objective.i, result)) objective.i+=1 return 1-result
accuracy.rmse(predictions, verbose=True) def get_top_n(predictions, n=10): # First map the predictions to each user. top_n = defaultdict(list) for uid, iid, true_r, est, _ in predictions: top_n[uid].append((iid, est)) # Then sort the predictions for each user and retrieve the k highest ones. for uid, user_ratings in top_n.items(): user_ratings.sort(key=lambda x: x[1], reverse=True) top_n[uid] = user_ratings[:n] return top_n top_n = get_top_n(predictions, n=7) #evaluate actual_list = [[pid] for pid in test['project_id'].values] predicted_list = [[project_id for (project_id, _) in top_n[uid]] for uid in test['userCode'].values] print(average_precision.mapk(actual_list, predicted_list)) # Print the recommended items for each user # for uid, ratings in top_n.items(): # print(uid, [(project_id, rating) for (project_id, rating) in ratings])
# recommend_items(sample_userCode, 7) if is_test: test = pd.read_csv('./input/test_tiny.csv', nrows=50) else: test = pd.read_csv('./input/testing_users.csv', delimiter=';') predicted_list = [] with tqdm.tqdm(total=len(test)) as progress: for uid in test['userCode']: # print(visited_dict[uid]) recom = recommend_items(uid, 7, items_to_ignore=visited_dict[uid] + ignore_project) # todo item ignore predicted_list.append(recom) progress.update(1) evaluate = 1 if evaluate: #evaluate actual_list = [[pid] for pid in test['project_id'].values] print('{:.10f}'.format( average_precision.mapk(actual_list, predicted_list, k=7))) to_csv = 1 if to_csv: test['project_id'] = [ ' '.join(map(str, pre)) for pre in predicted_list ] test.to_csv('submission.csv', index=False)
predicted_list = [] with tqdm.tqdm(total=len(test)) as progress: for uid in test['userCode'].unique(): predictions = model.predict(unique_user_list.index(uid), np.arange(num_project), user_features=user_feature_matrix, item_features=item_feature_matrix ) top_items = unique_project.iloc[np.argsort(-predictions)] top_list = [] top_n = 0 for project_id in top_items.values: if project_id not in visited_dict[uid]: # todo add ignore project top_list.append(project_id) top_n+=1 if top_n >= 7: break predicted_list.append(top_list) progress.update(1) to_csv = 0 if to_csv: test['project_id'] = [' '.join(map(str, pre)) for pre in predicted_list] test[['userCode','project_id']].to_csv('submission_{}.csv'.format('lightfm'), index=False) # print(top_items.values) if is_evaluate: actual_list = [[pid] for pid in test['project_id'].values] print('%.10f'%average_precision.mapk(actual_list, predicted_list, k=7))