示例#1
0
def main(_log, _config):
    p = _config
    modelname = file2name[p['modelfn']]
    mod_model = importlib.import_module('models.%s' % p['modelfn'])
    model_cls = getattr(mod_model, modelname)
    model_params_raw = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'}
    
    list_of_model_params = get_model_param(model_params_raw)
    expids = list()
    for model_params in list_of_model_params:
        model = model_cls(model_params, rnd_seed=p['seed'])
        expid = model.params_to_string(model_params, True)
        expids.append(expid)
    raw_expid = model.params_to_string(model_params_raw, True)
    
    for train_years in train_test_years:

        for i in range(len(train_test_years[train_years])):
            test_year, val_year = train_test_years[train_years][i], train_test_years[train_years][1 - i]
            train_years='wt09_10'
            test_year='wt11'           
            val_year='wt11'
            pred_dirs, val_dirs = list(), list()
            
            for expid in expids:
                pred_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % (p['parentdir'], train_years, p['expname'], test_year, expid)
                val_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % (p['parentdir'], train_years, p['expname'], val_year, expid)
                if not os.path.isdir(pred_dir) or not os.path.isdir(val_dir):
                    warnings.warn('No such dir {0}/{1}'.format(pred_dir, val_dir), RuntimeWarning)
                    continue
                pred_dirs.append(pred_dir) 
                val_dirs.append(val_dir)
            output_file='%s/train_%s/%s/evaluations/statdocpair/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\
                    p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid)

            try:
                if not os.path.isdir(os.path.dirname(output_file)):
                    os.makedirs(os.path.dirname(output_file))
            except OSError as e:
                pass
            _log.info('evaluate {0} on {1} based on val {2} \
                    over docpairs benchmark and output to {3}'.format(expid, test_year, val_year, output_file))

            test_qids = year_qids[test_year]
            qrelf = get_qrelf(qrelfdir, test_year)
            qid_cwid_label = read_qrel(qrelf, test_qids, include_spam=False)
            year_pkey_docpairs = create_docpairs(qid_cwid_label, test_qids, qid_year)
            
            best_pred_dir, argmax_epoch, argmax_run, argmax_ndcg, argmax_err = get_epoch_from_val(pred_dirs, val_dirs)

            qid_cwid_invrank, _, runid = read_run(os.path.join(best_pred_dir, argmax_run))
            pkey_qid_acc = eval_docpair_predaccuracy(qid_cwid_invrank, year_pkey_docpairs, test_year)
            sored_data = sorted(pkey_qid_acc) 
            print("pkey_qid_acc : ",sored_data)
            '''
示例#2
0
def pred(_log, _config):
    p = _config

    modelname = file2name[p['modelfn']]
    mod_model = importlib.import_module('models.%s' % p['modelfn'])
    model_cls = getattr(mod_model, modelname)
    model_params = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'}
    model = model_cls(model_params, rnd_seed=p['seed'])
    expid = model.params_to_string(model_params)

    outdir_plot=trunc_dir('%s/train_%s/%s/predict_per_epoch/test_%s' % (p['parentdir'], p['train_years'],
                                                              p['expname'], p['test_year']))
    outdir_run=trunc_dir('%s/%s'%(outdir_plot, expid))
    tmp_dir=trunc_dir(os.path.join(outdir_run,'tmp'))
    weight_dir=trunc_dir('%s/train_%s/%s/model_weight/%s' % (p['parentdir'], p['train_years'],p['expname'], expid))
    detail_outdir=trunc_dir('%s/train_%s/%s/model_detail/' % (p['parentdir'], p['train_years'], p['expname']))

    assert os.path.isdir(weight_dir), "weight_dir " + weight_dir + " does not exist. Make sure you trained the model."
    assert os.path.isdir(detail_dir), "detail_dir " + detail_dir + " does not exist. Make sure you trained the model."

    if len(os.listdir(weight_dir)) < 1:
        raise SoftFailure('weight dir empty')

    try:
        if not os.path.isdir(outdir_run):
            os.makedirs(outdir_run)
            os.makedirs(tmp_dir)
    except OSError:
        pass
    _log.info('Processing {0}'.format(outdir_run))
    ###################
    label2tlabel={4:2,3:2,2:2,1:1,0:0,-2:0}
    topk4eval=20
    NGRAM_NFILTER, N_GRAMS = get_ngram_nfilter(p['winlen'], p['qproximity'], p['maxqlen'], p['xfilters'])

    _log.info('process {0} and output to {1}'.format(weight_dir, outdir_run))
    _log.info('{0} {1} {2} {3} {4}'.format(p['distill'], 'NGRAM_NFILTER', NGRAM_NFILTER, 'N_GRAMS', N_GRAMS))

    # prepare train data
    qids = get_train_qids(p['test_year'])
    qrelf = get_qrelf(qrelfdir, p['test_year'])
    qid_cwid_label = read_qrel(qrelf, qids, include_spam=False)
    test_qids =[qid for qid in qids if qid in qid_cwid_label]
    _log.info('%s test_num %d '%(p['test_year'], len(test_qids)))

    f_ndcg=dict()
    f_epochs = set()
    # sort weights by time and only use the first weights for each epoch
    # (in case there are duplicate weights from a failed/re-run train)
    for f in sorted(os.listdir(weight_dir),
                    key=lambda x: os.path.getctime(os.path.join(weight_dir, x))):
        if f.split('.')[-1] != 'h5':
            continue
        cols = f.split('.')[0].split('_')
        if len(cols) == 4:
            nb_epoch, loss, n_batch, n_samples = int(cols[0]), int(cols[1]), int(cols[2]), int(cols[3])
            if nb_epoch <= p['epochs'] and nb_epoch not in f_epochs:
                f_epochs.add(nb_epoch)
                f_ndcg[f]=(nb_epoch, loss, n_batch, n_samples)


    finished_epochs = {}
    for fn in sorted(os.listdir(outdir_run),
                     key=lambda x: os.path.getctime(os.path.join(outdir_run, x))):
        if fn.endswith(".run"):
            fields = fn[:-4].split("_") # trim .run
            assert len(fields) == 5

            epoch, loss = int(fields[0]), int(fields[4])
            ndcg, mapv, err = float(fields[1]), float(fields[2]), float(fields[3])

            #assert epoch not in finished_epochs
            if epoch in finished_epochs:
                _log.error("TODO two weights exist for same epoch")
            finished_epochs[epoch] = (epoch, err, ndcg, mapv, loss)

    _log.info('skipping finished epochs: {0}'.format(finished_epochs))

    def model_pred(NGRAM_NFILTER, weight_file, test_data, test_docids, test_qids):
        dump_modelplot(model.build(), detail_outdir + 'predplot_' + expid)
        model_predict = model.build_from_dump(weight_file)
        qid_cwid_pred = pred_label(model_predict, test_data, test_docids, test_qids)
        return qid_cwid_pred

    test_doc_vec, test_docids, test_qids=load_test_data(qids, rawdoc_mat_dir, qid_cwid_label, N_GRAMS, p)
    epoch_err_ndcg_loss=list()
    _log.info('start {0} {1} {2}'.format(expid, p['train_years'], p['test_year']))
    for f in sorted(f_ndcg, key=lambda x:f_ndcg[x][0]):
        nb_epoch, loss, n_batch, n_samples = f_ndcg[f]
        if nb_epoch in finished_epochs:
            epoch_err_ndcg_loss.append(finished_epochs[nb_epoch])
            continue
        weight_file = os.path.join(weight_dir, f)
        qid_cwid_pred = model_pred(NGRAM_NFILTER, weight_file, test_doc_vec, test_docids, test_qids)
        ndcg20, err20, mapv = eval_run(_log, qid_cwid_pred, expid, perlf, treceval, tmp_dir, topk4eval, qrelf)
        loss = int(loss)
        out_name = '%d_%0.4f_%0.4f_%0.4f_%d.run' % (nb_epoch, ndcg20, mapv, err20, loss)
        epoch_err_ndcg_loss.append((nb_epoch, err20, ndcg20, mapv, loss))
        print_run(qid_cwid_pred, outdir_run, out_name, expid)
        _log.info('finished {0}'.format(f))
    _log.info('finish {0} {1} {2}'.format(expid, p['train_years'], p['test_year']))

    plot_curve(epoch_err_ndcg_loss, outdir_plot, expid, p)

    if max(f_epochs) < p['epochs'] - 3:
        raise SoftFailure("prediction finished, but not all epochs are available yet. last epoch found: %s" % max(f_epochs))
示例#3
0
def main(_log, _config):
    p = _config

    modelname = file2name[p['modelfn']]
    mod_model = importlib.import_module('models.%s' % p['modelfn'])
    # load the model to be employed, say from models/pacrr.py
    model_cls = getattr(mod_model, modelname)
    model_params = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'}
    model = model_cls(model_params, rnd_seed=p['seed'])
    # create a expid based on the configured parameters
    expid = model.params_to_string(model_params)

    # the model files
    outdir='%s/train_%s/%s/model_weight/%s'%(p['parentdir'], p['train_years'], p['expname'], expid)
    # the plots for the model, the training loss etc..
    detail_outdir='%s/train_%s/%s/model_detail/'%(p['parentdir'], p['train_years'], p['expname'])

    if not os.path.isdir(detail_outdir + 'outs'):
        print(detail_outdir + 'outs')
        os.makedirs(detail_outdir + 'outs')

    _log.info('Input parameters: {0}'.format(p))
    label2tlabel={4:2,3:2,2:2,1:1,0:0,-2:0}
    sample_label_prob=dict()
    _log.info('{0} {1} {2}'.format(p['expname'], p['train_years'], sample_label_prob))

    NGRAM_NFILTER, N_GRAMS = get_ngram_nfilter(p['winlen'], p['qproximity'], p['maxqlen'], p['xfilters'])

    _log.info('process and output to %s'%outdir)
    _log.info('{0} {1} {2} {3} {4}'.format(p['distill'], 'NGRAM_NFILTER', NGRAM_NFILTER, 'N_GRAMS', N_GRAMS))
    if os.path.exists(outdir) and len(os.listdir(outdir)) == p['epochs']:
        _log.info("outdir already seems to be full... exiting early")
        return

    # prepare train data
    qids = get_train_qids(p['train_years'])
    qrelf = get_qrelf(qrelfdir, p['train_years'])
    qid_cwid_label = read_qrel(qrelf, qids, include_spam=False)
    train_qids =[qid for qid in qids if qid in qid_cwid_label]
    _log.info('%s train_num %d '%(p['train_years'], len(train_qids)))

    def plot_curve_loss(epoch_train_loss, outdir, name, plot_id, series):
        epochs, losses = zip(*list(enumerate(epoch_train_loss)))
        argmin_loss_epoch =  np.argmin(epoch_train_loss)
        fig = plt.figure()
        plt.plot(epochs, losses, 'k:')
        plt.ylabel('Training Loss')
        plt.tick_params('y')
        plt.xlabel('epoches')
        plt.title('loss:%d %.3f'%(argmin_loss_epoch, epoch_train_loss[argmin_loss_epoch]))
        fig.savefig(trunc_dir(outdir) + trunc_file(name + '_' + plot_id + '.pdf'), format='pdf')
        plt.close()


    # dump model plot
    built_model = model.build()
    model.build_predict()  # run build_predict to verify it's working
    dump_modelplot(built_model, detail_outdir + 'model_' + expid)

    # callback function, dump the model and compute ndcg/map
    dump_weight = DumpWeight(outdir, batch_size=p['batch'], nb_sample=p['nsamples'])

    # keras 2 steps per epoch is number of batches per epoch, not number of samples per epoch
    steps_per_epoch = np.int(p['nsamples'] / p['batch'])

    # the generator for training data
    train_data_generator=\
            load_train_data_generator(qids, rawdoc_mat_dir, qid_cwid_label, N_GRAMS, p,\
                    label2tlabel=label2tlabel, sample_label_prob=sample_label_prob)

    history = built_model.fit_generator(train_data_generator, steps_per_epoch=steps_per_epoch, epochs=p['epochs'],
                                        verbose=0, callbacks=[dump_weight], max_q_size=15, workers=1, pickle_safe=False)

    epoch_train_loss = history.history['loss']

    # plot the training loss for debugging
    plot_curve_loss(epoch_train_loss, detail_outdir, 'train_', expid, ['loss'])
    historyfile = detail_outdir + 'hist_' + expid + '.history'
    with open(detail_outdir + 'hist_' + expid + '.p', 'wb') as handle:
        pickle.dump(history.history, handle, protocol=pickle.HIGHEST_PROTOCOL)
示例#4
0
文件: rerank.py 项目: amirvt/copacrr
def main(_log, _config):
    p = _config

    modelname = file2name[p['modelfn']]
    mod_model = importlib.import_module('models.%s' % p['modelfn'])
    model_cls = getattr(mod_model, modelname)

    model_params_raw = {
        k: v
        for k, v in p.items() if k in model_cls.params or k == 'modelfn'
    }

    list_of_model_params = get_model_param(model_params_raw)
    expids = list()
    for model_params in list_of_model_params:
        model = model_cls(model_params, rnd_seed=p['seed'])
        expid = model.params_to_string(model_params, True)
        expids.append(expid)
    raw_expid = model.params_to_string(model_params_raw, True)

    for train_years in train_test_years:

        for i in range(len(train_test_years[train_years])):
            test_year, val_year = train_test_years[train_years][
                i], train_test_years[train_years][1 - i]

            pred_dirs, val_dirs = list(), list()
            for expid in expids:
                pred_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % (
                    p['parentdir'], train_years, p['expname'], test_year,
                    expid)
                val_dir = '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % (
                    p['parentdir'], train_years, p['expname'], val_year, expid)
                if not os.path.isdir(pred_dir) or not os.path.isdir(val_dir):
                    warnings.warn(
                        'No such dir {0}/{1}'.format(pred_dir, val_dir),
                        RuntimeWarning)
                    continue
                pred_dirs.append(pred_dir)
                val_dirs.append(val_dir)
            output_ql='%s/train_%s/%s/evaluations/rerank-ql/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\
                    p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid)
            output_rrall='%s/train_%s/%s/evaluations/rerank-all/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\
                    p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid)
            reranked_run_dir='%s/train_%s/%s/reranking/trecrun/v%s-t%s_%s'%(p['outdir'],train_years,\
                        p['expname'],val_year, test_year,raw_expid)
            reranked_metric_dir='%s/train_%s/%s/reranking/ndcgerr/v%s-t%s_%s'%(p['outdir'],train_years,\
                        p['expname'],val_year, test_year,raw_expid)

            try:
                if not os.path.isdir(reranked_run_dir):
                    os.makedirs(reranked_run_dir)
            except OSError as e:
                print(e)
            try:
                if not os.path.isdir(reranked_metric_dir):
                    os.makedirs(reranked_metric_dir)
            except OSError as e:
                print(e)
            try:
                if not os.path.isdir(os.path.dirname(output_ql)):
                    os.makedirs(os.path.dirname(output_ql))
            except OSError as e:
                print(e)
            try:
                if not os.path.isdir(os.path.dirname(output_rrall)):
                    os.makedirs(os.path.dirname(output_rrall))
            except OSError as e:
                print(e)
            _log.info('evaluate {0} on {1} based on val {2} \
                    over docpairs benchmark and output to {3}, {4}'.format(
                expid, test_year, val_year, output_ql, output_rrall))

            trec_run_dir = '{0}/{1}'.format(trec_run_basedir, test_year)
            eval_trecrun_dir = '{0}/{1}'.format(eval_trec_run_basedir,
                                                test_year)

            test_qids = year_qids[test_year]
            qrelf = get_qrelf(qrelfdir, test_year)
            qid_cwid_label = read_qrel(qrelf, test_qids, include_spam=False)
            best_pred_dir, argmax_epoch, argmax_run, argmax_ndcg, argmax_err = get_epoch_from_val(
                pred_dirs, val_dirs)
            # create re-rank
            qid_cwid_pred, _, pred_expid = read_run(
                os.path.join(best_pred_dir, argmax_run))
            sysn_qid_cwid_pred = trec_run_predscore(_log, trec_run_dir,
                                                    qid_cwid_pred)
            print_rerun(sysn_qid_cwid_pred, reranked_run_dir, pred_expid,
                        val_year, test_year)
            # eval re-rank
            for runfile in os.listdir(reranked_run_dir):
                outfile = '%s/%s.ndcg_err' % (reranked_metric_dir,
                                              runfile[:-3])
                with open(outfile, 'w') as outf:
                    subprocess.Popen(
                        [perlf, qrelf,
                         '%s/%s' % (reranked_run_dir, runfile)],
                        stdout=outf)
                    _log.info(
                        'finished {0} train on {1}, val on {2}, test on {3}'.
                        format(runfile, train_years, val_year, test_year))
            # read in eval and generate results
            trecrun_qid_ndcgerr = read_eval_res(eval_trecrun_dir)
            while True:
                rr_trecrun_qid_ndcgerr = read_eval_res(reranked_metric_dir)
                _log.error('mismatched #run {0} != {1}'.format(
                    len(trecrun_qid_ndcgerr), len(rr_trecrun_qid_ndcgerr)))
                if len(trecrun_qid_ndcgerr) == len(rr_trecrun_qid_ndcgerr):
                    break
                # latency for subprocess.Popen
                time.sleep(2)
            # orig_rank, orig_score, qidscores, rr_rank, rr_score, qidscores
            orig_rr_ndcg_rank, orig_rr_err_rank = get_rank(
                rr_trecrun_qid_ndcgerr, trecrun_qid_ndcgerr)

            if test_year in fold_names:
                # query likelihood benchmark
                cols = [
                    'QL-Variants', 'Measures', 'TREC', 'Trec-Rank', 'Rerank',
                    'Rerank-Rank', 'Comparison', 'p-value'
                ]
                tabledict = dict()
                measure_ind = {'ERR': 1, 'nDCG': 0}
                for j, col in enumerate(cols):
                    tabledict[col] = list()
                    for method in ['cwindri']:
                        for measure in ['ERR', 'nDCG']:
                            if j == 0:
                                tabledict[col].append(method)
                            elif j == 1:
                                tabledict[col].append(measure)
                            # original trec score
                            elif j == 2:
                                if measure == 'ERR':
                                    tabledict[col].append(
                                        '%.3f' % orig_rr_err_rank[method][1])
                                elif measure == 'nDCG':
                                    tabledict[col].append(
                                        '%.3f' % orig_rr_ndcg_rank[method][1])
                            # original trec rank
                            elif j == 3:
                                if measure == 'ERR':
                                    tabledict[col].append(
                                        orig_rr_err_rank[method][0])
                                elif measure == 'nDCG':
                                    tabledict[col].append(
                                        orig_rr_ndcg_rank[method][0])
                            # reranked score
                            elif j == 4:
                                if measure == 'ERR':
                                    tabledict[col].append(
                                        '%.3f' % orig_rr_err_rank[method][4])
                                elif measure == 'nDCG':
                                    tabledict[col].append(
                                        '%.3f' % orig_rr_ndcg_rank[method][4])
                            # reranked rank
                            elif j == 5:
                                if measure == 'ERR':
                                    tabledict[col].append(
                                        orig_rr_err_rank[method][3])
                                elif measure == 'nDCG':
                                    tabledict[col].append(
                                        orig_rr_ndcg_rank[method][3])
                            # comparison: (r-t)/t * 100 %
                            elif j == 6:
                                if measure == 'ERR':
                                    comp = (orig_rr_err_rank[method][4] -
                                            orig_rr_err_rank[method][1]
                                            ) / orig_rr_err_rank[method][1]
                                    tabledict[col].append('%.0f%%' %
                                                          (comp * 100))
                                elif measure == 'nDCG':
                                    comp = (orig_rr_ndcg_rank[method][4] -
                                            orig_rr_ndcg_rank[method][1]
                                            ) / orig_rr_ndcg_rank[method][1]
                                    tabledict[col].append('%.0f%%' %
                                                          (comp * 100))
                            # comparison: p-value
                            elif j == 7:
                                if measure == 'ERR':
                                    _, p_err_diff = ttest_rel(
                                        orig_rr_err_rank[method][2],
                                        orig_rr_err_rank[method][5])
                                    tabledict[col].append('%.3f' %
                                                          (p_err_diff))
                                elif measure == 'nDCG':
                                    _, p_ndcg_diff = ttest_rel(
                                        orig_rr_ndcg_rank[method][2],
                                        orig_rr_ndcg_rank[method][5])
                                    tabledict[col].append('%.3f' %
                                                          (p_ndcg_diff))

                dftable = pd.DataFrame(tabledict, columns=cols, index=None)
                _log.info('\n' + dftable.to_string())
                dftable.to_csv(output_ql + '.csv',
                               float_format='%.3f',
                               header=True,
                               index=False,
                               sep=',',
                               mode='w')
                _log.info('finished ql benchmark {0} {1} {2} {3}'.format(
                    expid, train_years, val_year, test_year))

            # re-rank all benchmark
            def comparison(orig_rr_rank):
                count = 0
                percents = list()
                for r in orig_rr_rank:
                    orig_rank, orig_score, orig_scores, rr_rank, rr_score, rr_scores = orig_rr_rank[
                        r]
                    if rr_rank < orig_rank:
                        count += 1
                    # compute micro avg
                    qid_chg = (rr_score - orig_score) / orig_score
                    percents.append(qid_chg)
                return count, np.mean(percents), np.median(percents)

            cols = ['Measures', '#Total Runs', '#Improved', 'Avg', 'Median']
            orig_rr_ranks = [orig_rr_ndcg_rank, orig_rr_err_rank]
            tabledict = list()
            for i, measure in enumerate(['nDCG', 'ERR']):
                tabledict.append(dict())
                count, avg_chg, median_chg = comparison(orig_rr_ranks[i])
                for j, col in enumerate(cols):
                    if j == 0:
                        tabledict[i][col] = measure
                    elif j == 1:
                        tabledict[i][col] = len(orig_rr_ranks[i])
                    elif j == 2:
                        tabledict[i][col] = count
                    elif j == 3:
                        tabledict[i][col] = '%.0f%%' % (avg_chg * 100)
                    elif j == 4:
                        tabledict[i][col] = '%.0f%%' % (median_chg * 100)

            dftable = pd.DataFrame(tabledict, columns=cols, index=None)
            _log.info('\n' + dftable.to_string())
            dftable.to_csv(output_rrall + '.csv',
                           float_format='%.3f',
                           header=True,
                           index=False,
                           sep=',',
                           mode='w')
            _log.info('finished rerank all benchmark {0} {1} {2} {3}'.format(
                expid, train_years, val_year, test_year))

            # rank vs percentage of change
            def rank_improve(orig_rr_rank):
                oscore_percent = list()
                for r in orig_rr_rank:
                    orig_rank, orig_score, orig_scores, rr_rank, rr_score, rr_scores = orig_rr_rank[
                        r]
                    percent = (rr_score - orig_score) / orig_score
                    oscore_percent.append((orig_score, percent))
                return [
                    p for s, p in sorted(
                        oscore_percent, key=lambda s_p: s_p[0], reverse=True)
                ]

            def plot_curve(ranks, ndcg_ps, err_ps, outfilename):
                fig, ax = plt.subplots()
                rects1 = ax.scatter(ranks,
                                    ndcg_ps,
                                    s=25,
                                    c='b',
                                    marker="^",
                                    lw=0)
                rects2 = ax.scatter(ranks,
                                    err_ps,
                                    s=25,
                                    c='r',
                                    marker="o",
                                    lw=0)
                vals = ax.get_yticks()
                ax.set_yticklabels(['{:3.0f}%'.format(x * 100) for x in vals])
                ax.set_xlabel(
                    'Rank of runs from TREC sorted by corresponding measures')
                ax.set_ylabel('Relative improvement based on Err/nDCG')
                ax.legend((rects1, rects2), ('Improvements based on nDCG',
                                             'Improvements based on ERR'))
                plt.grid(b=False, linestyle='--')
                fig.savefig(outfilename + '.pdf', format='pdf')
                plt.close()

            ndcg_ps = rank_improve(orig_rr_ndcg_rank)
            err_ps = rank_improve(orig_rr_err_rank)
            ranks = range(1, len(ndcg_ps) + 1)
            plot_curve(ranks, ndcg_ps, err_ps, output_rrall)
示例#5
0
def main(_log, _config):
    p = _config
    modelname = file2name[p['modelfn']]
    mod_model = importlib.import_module('models.%s' % p['modelfn'])
    model_cls = getattr(mod_model, modelname)
    model_params_raw = {
        k: v
        for k, v in p.items() if k in model_cls.params or k == 'modelfn'
    }

    list_of_model_params = get_model_param(model_params_raw)
    expids = list()
    for model_params in list_of_model_params:
        model = model_cls(model_params, rnd_seed=p['seed'])
        expid = model.params_to_string(model_params, True)
        expids.append(expid)
    raw_expid = model.params_to_string(model_params_raw, True)

    for train_years in train_test_years:

        for i in range(len(train_test_years[train_years])):
            test_year, val_year = train_test_years[train_years][
                i], train_test_years[train_years][1 - i]

            pred_dirs, val_dirs = list(), list()

            for expid in expids:
                default_dir = lambda year: '%s/train_%s/%s/predict_per_epoch/test_%s/%s' % \
                    (p['parentdir'], train_years, p['expname'], year, expid)

                pred_dir = default_dir(test_year)
                while not os.path.isdir(pred_dir):
                    print pred_dir, 'is not a valid pred_dir'
                    pred_dir = raw_input('Enter new pred_dir: ')

                val_dir = default_dir(val_year)
                while not os.path.isdir(val_dir):
                    print val_dir, 'is not a valid val_dir'
                    val_dir = raw_input('Enter new val_dir: ')

                pred_dirs.append(pred_dir)
                val_dirs.append(val_dir)

            # we want to avoid making the output file too long, so we truncate it to the PATH_MAX
            output_file_long='%s/train_%s/%s/evaluations/statdocpair/%s_v-%s_t-%s/%s'%(p['outdir'], train_years,\
                    p['expname'], '-'.join(train_years.split('_')), val_year[2:], test_year[2:], raw_expid)
            output_file = output_file_long[:PATH_MAX]

            if not os.path.isdir(os.path.dirname(output_file)):
                os.makedirs(os.path.dirname(output_file))
            _log.info('evaluate {0} on {1} based on val {2} \
                    over docpairs benchmark and output to {3}'.format(
                expid, test_year, val_year, output_file))

            test_qids = year_qids[test_year]
            qrelf = get_qrelf(qrelfdir, test_year)
            qid_cwid_label = read_qrel(qrelf, test_qids, include_spam=False)
            year_pkey_docpairs = create_docpairs(qid_cwid_label, test_qids,
                                                 qid_year)

            best_pred_dir, argmax_epoch, argmax_run, argmax_ndcg, argmax_err = get_epoch_from_val(
                pred_dirs, val_dirs)

            qid_cwid_invrank, _, runid = read_run(
                os.path.join(best_pred_dir, argmax_run))
            pkey_qid_acc = eval_docpair_predaccuracy(qid_cwid_invrank,
                                                     year_pkey_docpairs,
                                                     test_year)

            dftable = df(pkey_qid_acc,
                         index=sorted(list(qid_cwid_invrank.keys())) + [0, -1])
            _log.info('\n' + dftable.to_string())
            dftable.to_csv(output_file + '.csv',
                           float_format='%.3f',
                           header=True,
                           index=True,
                           sep=',',
                           mode='w')
            _log.info('finished {0} {1} {2} {3}'.format(
                expid, train_years, val_year, test_year))