def one_fold(): path = 'D:/output/topk/random/' # path = '/mnt/sata_disk/hossein/wsdmcup2020/output/' k_list = [3, 5, 10, 100] random_sample_size = 1000 top_relevant_paper_size = 100 feature_types = ['bm.1_1_gram', 'bm.1_2_gram'] feature_types += ['tf.1_1_gram', 'tf.1_2_gram'] feature_types += ['tfidf.1_1_gram', 'tfidf.1_2_gram'] feature_types += ['emd.50', 'emd.100', 'emd.200', 'emd.300'] feature_types += [ 'glv.emd.50', 'glv.emd.100', 'glv.emd.200', 'glv.emd.300' ] feature_types += ['d2v.50', 'd2v.100', 'd2v.200', 'd2v.300'] paper_info = ['title', 'abstract', 'journal'] workbook = xlsxwriter.Workbook( path + 'topk/random/eval_one_fold.{}.{}.xlsx'.format( random_sample_size, top_relevant_paper_size)) ws_avg_map_k = workbook.add_worksheet('avg_map_k') ws_var_map_k = workbook.add_worksheet('var_map_k') ws_avg_suc_k = workbook.add_worksheet('avg_suc_k') ws_var_suc_k = workbook.add_worksheet('var_suc_k') row = 0 for info in paper_info: for feature_type in feature_types: row += 1 for i, k in enumerate(k_list): ws_avg_map_k.write(0, i + 1, 'avg_map@{}'.format(k)) ws_avg_map_k.write(row, 0, '{}.{}'.format(info, feature_type)) ws_var_map_k.write(0, i + 1, 'var_map@{}'.format(k)) ws_var_map_k.write(row, 0, '{}.{}'.format(info, feature_type)) ws_avg_suc_k.write(0, i + 1, 'avg_suc@{}'.format(k)) ws_avg_suc_k.write(row, 0, '{}.{}'.format(info, feature_type)) ws_var_suc_k.write(0, i + 1, 'avg_suc@{}'.format(k)) ws_var_suc_k.write(row, 0, '{}.{}'.format(info, feature_type)) predictions = pd.read_csv( path + 'topk/random/train.cos.{}.{}.{}.top{}'.format( info, feature_type, random_sample_size, top_relevant_paper_size), header=0) trues = [[t] for t in predictions['paper_id'].tolist()] preds = [str(p).split(',') for p in predictions['topk']] mean_suc_k(true=trues, pred=preds, k=k) print('map@{}:{}.{}:{}'.format( k, info, feature_type, mean_ap_k(true=trues, pred=preds, k=k))) print('suc@{}:{}.{}:{}'.format( k, info, feature_type, mean_suc_k(true=trues, pred=preds, k=k))) ws_avg_map_k.write(row, i + 1, mean_ap_k(true=trues, pred=preds, k=k)) ws_avg_suc_k.write(row, i + 1, mean_suc_k(true=trues, pred=preds, k=k)) workbook.close()
def one_fold_mixed(): path = 'D:/output/topk/random/' # path = '/mnt/sata_disk/hossein/wsdmcup2020/output/' k_list = [3, 5, 10, 100] random_sample_size = 1000 top_relevant_paper_size = 100 feature_types = [ 'bm.1_1_gram', 'bm.1_2_gram', 'tfidf.1_1_gram', 'tfidf.1_2_gram' ] paper_info = ['title', 'abstract'] k = 100 workbook = xlsxwriter.Workbook( path + 'eval_one_fold.mix.{}.{}.xlsx'.format(random_sample_size, top_relevant_paper_size)) ws_avg_map_k = workbook.add_worksheet('avg_map_k') ws_var_map_k = workbook.add_worksheet('var_map_k') ws_avg_suc_k = workbook.add_worksheet('avg_suc_k') ws_var_suc_k = workbook.add_worksheet('var_suc_k') row = 0 for info1 in paper_info: for feature_type1 in feature_types: for info2 in paper_info: for feature_type2 in feature_types: if info1 == info2 and feature_type1 == feature_type2: continue logger.info( 'train.cos.{}.{} mixed with train.cos.{}.{} ...'. format(info1, feature_type1, info2, feature_type2)) row += 1 for i, k in enumerate(k_list): ws_avg_map_k.write(0, i + 1, 'avg_map@{}'.format(k)) ws_avg_map_k.write( row, 0, '{}.{}.mix.{}.{}'.format(info1, feature_type1, info2, feature_type2)) ws_var_map_k.write(0, i + 1, 'var_map@{}'.format(k)) ws_var_map_k.write( row, 0, '{}.{}.mix.{}.{}'.format(info1, feature_type1, info2, feature_type2)) ws_avg_suc_k.write(0, i + 1, 'avg_suc@{}'.format(k)) ws_avg_suc_k.write( row, 0, '{}.{}.mix.{}.{}'.format(info1, feature_type1, info2, feature_type2)) ws_var_suc_k.write(0, i + 1, 'avg_suc@{}'.format(k)) ws_var_suc_k.write( row, 0, '{}.{}.mix.{}.{}'.format(info1, feature_type1, info2, feature_type2)) predictions = pd.read_csv( path + 'train.cos.{}.{}.mix.{}.{}.{}.top{}'.format( info1, feature_type1, info2, feature_type2, random_sample_size, top_relevant_paper_size)) trues = [[t] for t in predictions['paper_id'].tolist()] preds = [ str(p).split(',') for p in predictions['topk'] ] mean_suc_k(true=trues, pred=preds, k=k) ws_avg_map_k.write( row, i + 1, mean_ap_k(true=trues, pred=preds, k=k)) ws_avg_suc_k.write( row, i + 1, mean_suc_k(true=trues, pred=preds, k=k)) workbook.close()