def main(_log, _config): p = _config modelname = file2name[p['modelfn']] mod_model = importlib.import_module('models.%s' % p['modelfn']) model_cls = getattr(mod_model, modelname) model_params_raw = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'} list_of_model_params = get_model_param(model_params_raw) expids = list() for model_params in list_of_model_params: model = model_cls(model_params, rnd_seed=p['seed']) expid = model.params_to_string(model_params, True) expids.append(expid) raw_expid = model.params_to_string(model_params_raw, True) for train_years in train_test_years: for i in range(len(train_test_years[train_years])): test_year, val_year = train_test_years[train_years][i], train_test_years[train_years][1 - i] train_years='wt09_10' test_year='wt11' val_year='wt11' pred_dirs, val_dirs = list(), list() for expid in expids: pred_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % (p['parentdir'], train_years, p['expname'], test_year, expid) val_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % (p['parentdir'], train_years, p['expname'], val_year, expid) if not os.path.isdir(pred_dir) or not os.path.isdir(val_dir): warnings.warn('No such dir {0}/{1}'.format(pred_dir, val_dir), RuntimeWarning) continue pred_dirs.append(pred_dir) val_dirs.append(val_dir) output_file='%s/train_%s/%s/evaluations/statdocpair/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\ p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid) try: if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) except OSError as e: pass _log.info('evaluate {0} on {1} based on val {2} \ over docpairs benchmark and output to {3}'.format(expid, test_year, val_year, output_file)) test_qids = year_qids[test_year] qrelf = get_qrelf(qrelfdir, test_year) qid_cwid_label = read_qrel(qrelf, test_qids, include_spam=False) year_pkey_docpairs = create_docpairs(qid_cwid_label, test_qids, qid_year) best_pred_dir, argmax_epoch, argmax_run, argmax_ndcg, argmax_err = get_epoch_from_val(pred_dirs, val_dirs) qid_cwid_invrank, _, runid = read_run(os.path.join(best_pred_dir, argmax_run)) pkey_qid_acc = eval_docpair_predaccuracy(qid_cwid_invrank, year_pkey_docpairs, test_year) sored_data = sorted(pkey_qid_acc) print("pkey_qid_acc : ",sored_data) '''
def pred(_log, _config): p = _config modelname = file2name[p['modelfn']] mod_model = importlib.import_module('models.%s' % p['modelfn']) model_cls = getattr(mod_model, modelname) model_params = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'} model = model_cls(model_params, rnd_seed=p['seed']) expid = model.params_to_string(model_params) outdir_plot=trunc_dir('%s/train_%s/%s/predict_per_epoch/test_%s' % (p['parentdir'], p['train_years'], p['expname'], p['test_year'])) outdir_run=trunc_dir('%s/%s'%(outdir_plot, expid)) tmp_dir=trunc_dir(os.path.join(outdir_run,'tmp')) weight_dir=trunc_dir('%s/train_%s/%s/model_weight/%s' % (p['parentdir'], p['train_years'],p['expname'], expid)) detail_outdir=trunc_dir('%s/train_%s/%s/model_detail/' % (p['parentdir'], p['train_years'], p['expname'])) assert os.path.isdir(weight_dir), "weight_dir " + weight_dir + " does not exist. Make sure you trained the model." assert os.path.isdir(detail_dir), "detail_dir " + detail_dir + " does not exist. Make sure you trained the model." if len(os.listdir(weight_dir)) < 1: raise SoftFailure('weight dir empty') try: if not os.path.isdir(outdir_run): os.makedirs(outdir_run) os.makedirs(tmp_dir) except OSError: pass _log.info('Processing {0}'.format(outdir_run)) ################### label2tlabel={4:2,3:2,2:2,1:1,0:0,-2:0} topk4eval=20 NGRAM_NFILTER, N_GRAMS = get_ngram_nfilter(p['winlen'], p['qproximity'], p['maxqlen'], p['xfilters']) _log.info('process {0} and output to {1}'.format(weight_dir, outdir_run)) _log.info('{0} {1} {2} {3} {4}'.format(p['distill'], 'NGRAM_NFILTER', NGRAM_NFILTER, 'N_GRAMS', N_GRAMS)) # prepare train data qids = get_train_qids(p['test_year']) qrelf = get_qrelf(qrelfdir, p['test_year']) qid_cwid_label = read_qrel(qrelf, qids, include_spam=False) test_qids =[qid for qid in qids if qid in qid_cwid_label] _log.info('%s test_num %d '%(p['test_year'], len(test_qids))) f_ndcg=dict() f_epochs = set() # sort weights by time and only use the first weights for each epoch # (in case there are duplicate weights from a failed/re-run train) for f in sorted(os.listdir(weight_dir), key=lambda x: os.path.getctime(os.path.join(weight_dir, x))): if f.split('.')[-1] != 'h5': continue cols = f.split('.')[0].split('_') if len(cols) == 4: nb_epoch, loss, n_batch, n_samples = int(cols[0]), int(cols[1]), int(cols[2]), int(cols[3]) if nb_epoch <= p['epochs'] and nb_epoch not in f_epochs: f_epochs.add(nb_epoch) f_ndcg[f]=(nb_epoch, loss, n_batch, n_samples) finished_epochs = {} for fn in sorted(os.listdir(outdir_run), key=lambda x: os.path.getctime(os.path.join(outdir_run, x))): if fn.endswith(".run"): fields = fn[:-4].split("_") # trim .run assert len(fields) == 5 epoch, loss = int(fields[0]), int(fields[4]) ndcg, mapv, err = float(fields[1]), float(fields[2]), float(fields[3]) #assert epoch not in finished_epochs if epoch in finished_epochs: _log.error("TODO two weights exist for same epoch") finished_epochs[epoch] = (epoch, err, ndcg, mapv, loss) _log.info('skipping finished epochs: {0}'.format(finished_epochs)) def model_pred(NGRAM_NFILTER, weight_file, test_data, test_docids, test_qids): dump_modelplot(model.build(), detail_outdir + 'predplot_' + expid) model_predict = model.build_from_dump(weight_file) qid_cwid_pred = pred_label(model_predict, test_data, test_docids, test_qids) return qid_cwid_pred test_doc_vec, test_docids, test_qids=load_test_data(qids, rawdoc_mat_dir, qid_cwid_label, N_GRAMS, p) epoch_err_ndcg_loss=list() _log.info('start {0} {1} {2}'.format(expid, p['train_years'], p['test_year'])) for f in sorted(f_ndcg, key=lambda x:f_ndcg[x][0]): nb_epoch, loss, n_batch, n_samples = f_ndcg[f] if nb_epoch in finished_epochs: epoch_err_ndcg_loss.append(finished_epochs[nb_epoch]) continue weight_file = os.path.join(weight_dir, f) qid_cwid_pred = model_pred(NGRAM_NFILTER, weight_file, test_doc_vec, test_docids, test_qids) ndcg20, err20, mapv = eval_run(_log, qid_cwid_pred, expid, perlf, treceval, tmp_dir, topk4eval, qrelf) loss = int(loss) out_name = '%d_%0.4f_%0.4f_%0.4f_%d.run' % (nb_epoch, ndcg20, mapv, err20, loss) epoch_err_ndcg_loss.append((nb_epoch, err20, ndcg20, mapv, loss)) print_run(qid_cwid_pred, outdir_run, out_name, expid) _log.info('finished {0}'.format(f)) _log.info('finish {0} {1} {2}'.format(expid, p['train_years'], p['test_year'])) plot_curve(epoch_err_ndcg_loss, outdir_plot, expid, p) if max(f_epochs) < p['epochs'] - 3: raise SoftFailure("prediction finished, but not all epochs are available yet. last epoch found: %s" % max(f_epochs))
def main(_log, _config): p = _config modelname = file2name[p['modelfn']] mod_model = importlib.import_module('models.%s' % p['modelfn']) # load the model to be employed, say from models/pacrr.py model_cls = getattr(mod_model, modelname) model_params = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'} model = model_cls(model_params, rnd_seed=p['seed']) # create a expid based on the configured parameters expid = model.params_to_string(model_params) # the model files outdir='%s/train_%s/%s/model_weight/%s'%(p['parentdir'], p['train_years'], p['expname'], expid) # the plots for the model, the training loss etc.. detail_outdir='%s/train_%s/%s/model_detail/'%(p['parentdir'], p['train_years'], p['expname']) if not os.path.isdir(detail_outdir + 'outs'): print(detail_outdir + 'outs') os.makedirs(detail_outdir + 'outs') _log.info('Input parameters: {0}'.format(p)) label2tlabel={4:2,3:2,2:2,1:1,0:0,-2:0} sample_label_prob=dict() _log.info('{0} {1} {2}'.format(p['expname'], p['train_years'], sample_label_prob)) NGRAM_NFILTER, N_GRAMS = get_ngram_nfilter(p['winlen'], p['qproximity'], p['maxqlen'], p['xfilters']) _log.info('process and output to %s'%outdir) _log.info('{0} {1} {2} {3} {4}'.format(p['distill'], 'NGRAM_NFILTER', NGRAM_NFILTER, 'N_GRAMS', N_GRAMS)) if os.path.exists(outdir) and len(os.listdir(outdir)) == p['epochs']: _log.info("outdir already seems to be full... exiting early") return # prepare train data qids = get_train_qids(p['train_years']) qrelf = get_qrelf(qrelfdir, p['train_years']) qid_cwid_label = read_qrel(qrelf, qids, include_spam=False) train_qids =[qid for qid in qids if qid in qid_cwid_label] _log.info('%s train_num %d '%(p['train_years'], len(train_qids))) def plot_curve_loss(epoch_train_loss, outdir, name, plot_id, series): epochs, losses = zip(*list(enumerate(epoch_train_loss))) argmin_loss_epoch = np.argmin(epoch_train_loss) fig = plt.figure() plt.plot(epochs, losses, 'k:') plt.ylabel('Training Loss') plt.tick_params('y') plt.xlabel('epoches') plt.title('loss:%d %.3f'%(argmin_loss_epoch, epoch_train_loss[argmin_loss_epoch])) fig.savefig(trunc_dir(outdir) + trunc_file(name + '_' + plot_id + '.pdf'), format='pdf') plt.close() # dump model plot built_model = model.build() model.build_predict() # run build_predict to verify it's working dump_modelplot(built_model, detail_outdir + 'model_' + expid) # callback function, dump the model and compute ndcg/map dump_weight = DumpWeight(outdir, batch_size=p['batch'], nb_sample=p['nsamples']) # keras 2 steps per epoch is number of batches per epoch, not number of samples per epoch steps_per_epoch = np.int(p['nsamples'] / p['batch']) # the generator for training data train_data_generator=\ load_train_data_generator(qids, rawdoc_mat_dir, qid_cwid_label, N_GRAMS, p,\ label2tlabel=label2tlabel, sample_label_prob=sample_label_prob) history = built_model.fit_generator(train_data_generator, steps_per_epoch=steps_per_epoch, epochs=p['epochs'], verbose=0, callbacks=[dump_weight], max_q_size=15, workers=1, pickle_safe=False) epoch_train_loss = history.history['loss'] # plot the training loss for debugging plot_curve_loss(epoch_train_loss, detail_outdir, 'train_', expid, ['loss']) historyfile = detail_outdir + 'hist_' + expid + '.history' with open(detail_outdir + 'hist_' + expid + '.p', 'wb') as handle: pickle.dump(history.history, handle, protocol=pickle.HIGHEST_PROTOCOL)
def main(_log, _config): p = _config modelname = file2name[p['modelfn']] mod_model = importlib.import_module('models.%s' % p['modelfn']) model_cls = getattr(mod_model, modelname) model_params_raw = { k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn' } list_of_model_params = get_model_param(model_params_raw) expids = list() for model_params in list_of_model_params: model = model_cls(model_params, rnd_seed=p['seed']) expid = model.params_to_string(model_params, True) expids.append(expid) raw_expid = model.params_to_string(model_params_raw, True) for train_years in train_test_years: for i in range(len(train_test_years[train_years])): test_year, val_year = train_test_years[train_years][ i], train_test_years[train_years][1 - i] pred_dirs, val_dirs = list(), list() for expid in expids: pred_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % ( p['parentdir'], train_years, p['expname'], test_year, expid) val_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % ( p['parentdir'], train_years, p['expname'], val_year, expid) if not os.path.isdir(pred_dir) or not os.path.isdir(val_dir): warnings.warn( 'No such dir {0}/{1}'.format(pred_dir, val_dir), RuntimeWarning) continue pred_dirs.append(pred_dir) val_dirs.append(val_dir) output_ql='%s/train_%s/%s/evaluations/rerank-ql/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\ p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid) output_rrall='%s/train_%s/%s/evaluations/rerank-all/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\ p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid) reranked_run_dir='%s/train_%s/%s/reranking/trecrun/v%s-t%s_%s'%(p['outdir'],train_years,\ p['expname'],val_year, test_year,raw_expid) reranked_metric_dir='%s/train_%s/%s/reranking/ndcgerr/v%s-t%s_%s'%(p['outdir'],train_years,\ p['expname'],val_year, test_year,raw_expid) try: if not os.path.isdir(reranked_run_dir): os.makedirs(reranked_run_dir) except OSError as e: print(e) try: if not os.path.isdir(reranked_metric_dir): os.makedirs(reranked_metric_dir) except OSError as e: print(e) try: if not os.path.isdir(os.path.dirname(output_ql)): os.makedirs(os.path.dirname(output_ql)) except OSError as e: print(e) try: if not os.path.isdir(os.path.dirname(output_rrall)): os.makedirs(os.path.dirname(output_rrall)) except OSError as e: print(e) _log.info('evaluate {0} on {1} based on val {2} \ over docpairs benchmark and output to {3}, {4}'.format( expid, test_year, val_year, output_ql, output_rrall)) trec_run_dir = '{0}/{1}'.format(trec_run_basedir, test_year) eval_trecrun_dir = '{0}/{1}'.format(eval_trec_run_basedir, test_year) test_qids = year_qids[test_year] qrelf = get_qrelf(qrelfdir, test_year) qid_cwid_label = read_qrel(qrelf, test_qids, include_spam=False) best_pred_dir, argmax_epoch, argmax_run, argmax_ndcg, argmax_err = get_epoch_from_val( pred_dirs, val_dirs) # create re-rank qid_cwid_pred, _, pred_expid = read_run( os.path.join(best_pred_dir, argmax_run)) sysn_qid_cwid_pred = trec_run_predscore(_log, trec_run_dir, qid_cwid_pred) print_rerun(sysn_qid_cwid_pred, reranked_run_dir, pred_expid, val_year, test_year) # eval re-rank for runfile in os.listdir(reranked_run_dir): outfile = '%s/%s.ndcg_err' % (reranked_metric_dir, runfile[:-3]) with open(outfile, 'w') as outf: subprocess.Popen( [perlf, qrelf, '%s/%s' % (reranked_run_dir, runfile)], stdout=outf) _log.info( 'finished {0} train on {1}, val on {2}, test on {3}'. format(runfile, train_years, val_year, test_year)) # read in eval and generate results trecrun_qid_ndcgerr = read_eval_res(eval_trecrun_dir) while True: rr_trecrun_qid_ndcgerr = read_eval_res(reranked_metric_dir) _log.error('mismatched #run {0} != {1}'.format( len(trecrun_qid_ndcgerr), len(rr_trecrun_qid_ndcgerr))) if len(trecrun_qid_ndcgerr) == len(rr_trecrun_qid_ndcgerr): break # latency for subprocess.Popen time.sleep(2) # orig_rank, orig_score, qidscores, rr_rank, rr_score, qidscores orig_rr_ndcg_rank, orig_rr_err_rank = get_rank( rr_trecrun_qid_ndcgerr, trecrun_qid_ndcgerr) if test_year in fold_names: # query likelihood benchmark cols = [ 'QL-Variants', 'Measures', 'TREC', 'Trec-Rank', 'Rerank', 'Rerank-Rank', 'Comparison', 'p-value' ] tabledict = dict() measure_ind = {'ERR': 1, 'nDCG': 0} for j, col in enumerate(cols): tabledict[col] = list() for method in ['cwindri']: for measure in ['ERR', 'nDCG']: if j == 0: tabledict[col].append(method) elif j == 1: tabledict[col].append(measure) # original trec score elif j == 2: if measure == 'ERR': tabledict[col].append( '%.3f' % orig_rr_err_rank[method][1]) elif measure == 'nDCG': tabledict[col].append( '%.3f' % orig_rr_ndcg_rank[method][1]) # original trec rank elif j == 3: if measure == 'ERR': tabledict[col].append( orig_rr_err_rank[method][0]) elif measure == 'nDCG': tabledict[col].append( orig_rr_ndcg_rank[method][0]) # reranked score elif j == 4: if measure == 'ERR': tabledict[col].append( '%.3f' % orig_rr_err_rank[method][4]) elif measure == 'nDCG': tabledict[col].append( '%.3f' % orig_rr_ndcg_rank[method][4]) # reranked rank elif j == 5: if measure == 'ERR': tabledict[col].append( orig_rr_err_rank[method][3]) elif measure == 'nDCG': tabledict[col].append( orig_rr_ndcg_rank[method][3]) # comparison: (r-t)/t * 100 % elif j == 6: if measure == 'ERR': comp = (orig_rr_err_rank[method][4] - orig_rr_err_rank[method][1] ) / orig_rr_err_rank[method][1] tabledict[col].append('%.0f%%' % (comp * 100)) elif measure == 'nDCG': comp = (orig_rr_ndcg_rank[method][4] - orig_rr_ndcg_rank[method][1] ) / orig_rr_ndcg_rank[method][1] tabledict[col].append('%.0f%%' % (comp * 100)) # comparison: p-value elif j == 7: if measure == 'ERR': _, p_err_diff = ttest_rel( orig_rr_err_rank[method][2], orig_rr_err_rank[method][5]) tabledict[col].append('%.3f' % (p_err_diff)) elif measure == 'nDCG': _, p_ndcg_diff = ttest_rel( orig_rr_ndcg_rank[method][2], orig_rr_ndcg_rank[method][5]) tabledict[col].append('%.3f' % (p_ndcg_diff)) dftable = pd.DataFrame(tabledict, columns=cols, index=None) _log.info('\n' + dftable.to_string()) dftable.to_csv(output_ql + '.csv', float_format='%.3f', header=True, index=False, sep=',', mode='w') _log.info('finished ql benchmark {0} {1} {2} {3}'.format( expid, train_years, val_year, test_year)) # re-rank all benchmark def comparison(orig_rr_rank): count = 0 percents = list() for r in orig_rr_rank: orig_rank, orig_score, orig_scores, rr_rank, rr_score, rr_scores = orig_rr_rank[ r] if rr_rank < orig_rank: count += 1 # compute micro avg qid_chg = (rr_score - orig_score) / orig_score percents.append(qid_chg) return count, np.mean(percents), np.median(percents) cols = ['Measures', '#Total Runs', '#Improved', 'Avg', 'Median'] orig_rr_ranks = [orig_rr_ndcg_rank, orig_rr_err_rank] tabledict = list() for i, measure in enumerate(['nDCG', 'ERR']): tabledict.append(dict()) count, avg_chg, median_chg = comparison(orig_rr_ranks[i]) for j, col in enumerate(cols): if j == 0: tabledict[i][col] = measure elif j == 1: tabledict[i][col] = len(orig_rr_ranks[i]) elif j == 2: tabledict[i][col] = count elif j == 3: tabledict[i][col] = '%.0f%%' % (avg_chg * 100) elif j == 4: tabledict[i][col] = '%.0f%%' % (median_chg * 100) dftable = pd.DataFrame(tabledict, columns=cols, index=None) _log.info('\n' + dftable.to_string()) dftable.to_csv(output_rrall + '.csv', float_format='%.3f', header=True, index=False, sep=',', mode='w') _log.info('finished rerank all benchmark {0} {1} {2} {3}'.format( expid, train_years, val_year, test_year)) # rank vs percentage of change def rank_improve(orig_rr_rank): oscore_percent = list() for r in orig_rr_rank: orig_rank, orig_score, orig_scores, rr_rank, rr_score, rr_scores = orig_rr_rank[ r] percent = (rr_score - orig_score) / orig_score oscore_percent.append((orig_score, percent)) return [ p for s, p in sorted( oscore_percent, key=lambda s_p: s_p[0], reverse=True) ] def plot_curve(ranks, ndcg_ps, err_ps, outfilename): fig, ax = plt.subplots() rects1 = ax.scatter(ranks, ndcg_ps, s=25, c='b', marker="^", lw=0) rects2 = ax.scatter(ranks, err_ps, s=25, c='r', marker="o", lw=0) vals = ax.get_yticks() ax.set_yticklabels(['{:3.0f}%'.format(x * 100) for x in vals]) ax.set_xlabel( 'Rank of runs from TREC sorted by corresponding measures') ax.set_ylabel('Relative improvement based on Err/nDCG') ax.legend((rects1, rects2), ('Improvements based on nDCG', 'Improvements based on ERR')) plt.grid(b=False, linestyle='--') fig.savefig(outfilename + '.pdf', format='pdf') plt.close() ndcg_ps = rank_improve(orig_rr_ndcg_rank) err_ps = rank_improve(orig_rr_err_rank) ranks = range(1, len(ndcg_ps) + 1) plot_curve(ranks, ndcg_ps, err_ps, output_rrall)
def main(_log, _config): p = _config modelname = file2name[p['modelfn']] mod_model = importlib.import_module('models.%s' % p['modelfn']) model_cls = getattr(mod_model, modelname) model_params_raw = { k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn' } list_of_model_params = get_model_param(model_params_raw) expids = list() for model_params in list_of_model_params: model = model_cls(model_params, rnd_seed=p['seed']) expid = model.params_to_string(model_params, True) expids.append(expid) raw_expid = model.params_to_string(model_params_raw, True) for train_years in train_test_years: for i in range(len(train_test_years[train_years])): test_year, val_year = train_test_years[train_years][ i], train_test_years[train_years][1 - i] pred_dirs, val_dirs = list(), list() for expid in expids: default_dir = lambda year: '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % \ (p['parentdir'], train_years, p['expname'], year, expid) pred_dir = default_dir(test_year) while not os.path.isdir(pred_dir): print pred_dir, 'is not a valid pred_dir' pred_dir = raw_input('Enter new pred_dir: ') val_dir = default_dir(val_year) while not os.path.isdir(val_dir): print val_dir, 'is not a valid val_dir' val_dir = raw_input('Enter new val_dir: ') pred_dirs.append(pred_dir) val_dirs.append(val_dir) # we want to avoid making the output file too long, so we truncate it to the PATH_MAX output_file_long='%s/train_%s/%s/evaluations/statdocpair/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\ p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid) output_file = output_file_long[:PATH_MAX] if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) _log.info('evaluate {0} on {1} based on val {2} \ over docpairs benchmark and output to {3}'.format( expid, test_year, val_year, output_file)) test_qids = year_qids[test_year] qrelf = get_qrelf(qrelfdir, test_year) qid_cwid_label = read_qrel(qrelf, test_qids, include_spam=False) year_pkey_docpairs = create_docpairs(qid_cwid_label, test_qids, qid_year) best_pred_dir, argmax_epoch, argmax_run, argmax_ndcg, argmax_err = get_epoch_from_val( pred_dirs, val_dirs) qid_cwid_invrank, _, runid = read_run( os.path.join(best_pred_dir, argmax_run)) pkey_qid_acc = eval_docpair_predaccuracy(qid_cwid_invrank, year_pkey_docpairs, test_year) dftable = df(pkey_qid_acc, index=sorted(list(qid_cwid_invrank.keys())) + [0, -1]) _log.info('\n' + dftable.to_string()) dftable.to_csv(output_file + '.csv', float_format='%.3f', header=True, index=True, sep=',', mode='w') _log.info('finished {0} {1} {2} {3}'.format( expid, train_years, val_year, test_year))