示例#1
0
def plotDifferenceWith(targetRun,
                       otherRuns,
                       qrels,
                       measure,
                       outputFile,
                       style=PLOT_STYLE):
    avg_baseline, baseline_scores = pytrec_eval.evaluate(
        targetRun, qrels, measure, True)

    bar_chart = pygal.Bar()
    bar_chart.style = style
    allTopics = list(qrels.getTopicIds())

    bar_chart.label_font_size = 8
    bar_chart.legend_at_bottom = True
    bar_chart.legend_font_size = 10
    bar_chart.legend_box_size = 8

    bar_chart.x_label_rotation = 90
    bar_chart.x_labels = allTopics
    bar_chart.x_title = 'Topic Id'
    bar_chart.y_title = 'Difference from ' + targetRun.name + ' (' + pytrec_eval.METRICS_NAMES[
        measure] + ')'

    for otherRun in otherRuns:
        _, other_scores = pytrec_eval.evaluate(otherRun, qrels, measure, True)
        points = [
            (other_scores[topicId] if topicId in other_scores else 0) -
            (baseline_scores[topicId] if topicId in baseline_scores else 0)
            for topicId in allTopics
        ]
        bar_chart.add(otherRun.name, points)
    bar_chart.render_to_file(outputFile)
示例#2
0
def plotEvaluation(trecRun, qrels, measure, outputFile=None, showPlot=True):
    """
    Plots an histogram with one bar per topic.
    Each bar represents the difference between measure computed on the topic
    and the average measure among all topics.
    If outputFile is a string specifying the name of a file, then the plot
    is saved into that file.
    If showPlot then the plot is shown to the user (but not necessarily stored
     into a file).
    """
    plt.clf()
    avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True)
    # to be sure that qId order is the same of score order (maybe it's not necessary...)
    lstDetails = [(qId, score) for qId, score in details.items()]
    lstDetails.sort(key=lambda x: x[0])
    qIds = [qId for qId, _ in lstDetails]
    scores = [score - avg for _, score in lstDetails]
    plt.figure(1)
    x = [i for i in range(len(qIds))]  # np.arange(len(qIds))
    plt.bar(x, scores, width=0.6)
    plt.xticks(x, qIds, rotation=90, size=5)
    plt.xlim(xmax=len(qIds))
    plt.xlabel('Topic Id')
    plt.ylabel('Difference of ' + pytrec_eval.METRICS_NAMES[measure] + ' from Average')
    if showPlot: plt.show()
    if outputFile is not None: plt.savefig(outputFile, bbox_inches=0)
示例#3
0
def plotEvaluationAll(trecRuns, qrels, measure, outputFile, style=PLOT_STYLE):
    """
    Plots an histogram with one bar per topic.
    Each bar represents the difference between measure computed on the topic
    and the average measure among all topics.
    OutputFile is a string specifying the name of the file the plot
    is saved into.
    """
    qIds = list(qrels.getTopicIds())
    qIds.sort()

    bar_chart = pygal.Bar()
    # bar_chart.spacing = 50
    bar_chart.label_font_size = 8
    bar_chart.style = style
    bar_chart.x_label_rotation = 90
    bar_chart.x_labels = qIds
    bar_chart.x_title = 'Topic Id'
    bar_chart.legend_at_bottom = True
    bar_chart.legend_font_size = 10
    bar_chart.legend_box_size = 8

    bar_chart.y_title = pytrec_eval.METRICS_NAMES[measure]

    for trecRun in trecRuns:
        avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True)
        lstDetails = [details[topicId] if topicId in details else 0 for topicId in qIds]
        bar_chart.add(trecRun.name, lstDetails)

    bar_chart.render_to_file(outputFile)
示例#4
0
def plotEvaluation(trecRun, qrels, measure, outputFile, style=PLOT_STYLE):
    """
    Plots an histogram with one bar per topic.
    Each bar represents the difference between measure computed on the topic
    and the average measure among all topics.
    OutputFile is a string specifying the name of the file the plot
    is saved into.
    """

    avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True)
    # to be sure that qId order is the same of score order (maybe it's not necessary...)
    bar_chart = pygal.Bar()
    bar_chart.style = style
    lstDetails = [(qId, score) for qId, score in details.items()]
    lstDetails.sort(key=lambda x: x[0])
    qIds = [qId for qId, _ in lstDetails]
    scores = [score for _, score in lstDetails]
    bar_chart.add(trecRun.name, scores)

    bar_chart.label_font_size = 8
    bar_chart.legend_at_bottom = True
    bar_chart.legend_font_size = 10
    bar_chart.legend_box_size = 8

    bar_chart.x_label_rotation = 90
    bar_chart.x_labels = qIds
    bar_chart.x_title = 'query ids'
    bar_chart.y_title = pytrec_eval.METRICS_NAMES[measure]
    bar_chart.render_to_file(outputFile)
示例#5
0
def plotEvaluationAll(trecRuns, qrels, measure, outputFile, style=PLOT_STYLE):
    """
    Plots an histogram with one bar per topic.
    Each bar represents the difference between measure computed on the topic
    and the average measure among all topics.
    OutputFile is a string specifying the name of the file the plot
    is saved into.
    """
    qIds = list(qrels.getTopicIds())
    qIds.sort()

    bar_chart = pygal.Bar()
    # bar_chart.spacing = 50
    bar_chart.label_font_size = 8
    bar_chart.style = style
    bar_chart.x_label_rotation = 90
    bar_chart.x_labels = qIds
    bar_chart.x_title = 'Topic Id'
    bar_chart.legend_at_bottom = True
    bar_chart.legend_font_size = 10
    bar_chart.legend_box_size = 8

    bar_chart.y_title = pytrec_eval.METRICS_NAMES[measure]

    for trecRun in trecRuns:
        avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True)
        lstDetails = [
            details[topicId] if topicId in details else 0 for topicId in qIds
        ]
        bar_chart.add(trecRun.name, lstDetails)

    bar_chart.render_to_file(outputFile)
示例#6
0
def plotEvaluation(trecRun, qrels, measure, outputFile, style=PLOT_STYLE):
    """
    Plots an histogram with one bar per topic.
    Each bar represents the difference between measure computed on the topic
    and the average measure among all topics.
    OutputFile is a string specifying the name of the file the plot
    is saved into.
    """

    avg, details = pytrec_eval.evaluate(trecRun, qrels, measure, True)
    # to be sure that qId order is the same of score order (maybe it's not necessary...)
    bar_chart = pygal.Bar()
    bar_chart.style = style
    lstDetails = [(qId, score) for qId, score in details.items()]
    lstDetails.sort(key=lambda x: x[0])
    qIds = [qId for qId, _ in lstDetails]
    scores = [score for _, score in lstDetails]
    bar_chart.add(trecRun.name, scores)

    bar_chart.label_font_size = 8
    bar_chart.legend_at_bottom = True
    bar_chart.legend_font_size = 10
    bar_chart.legend_box_size = 8

    bar_chart.x_label_rotation = 90
    bar_chart.x_labels = qIds
    bar_chart.x_title = 'query ids'
    bar_chart.y_title = pytrec_eval.METRICS_NAMES[measure]
    bar_chart.render_to_file(outputFile)
示例#7
0
def rankRuns(runs, qrels, measure):
    """Ranks the runs based on measure.
     Returns a list of pairs (run, score) ordered by
     score descending.
    """
    rank = [ (run, pytrec_eval.evaluate(run, qrels, [measure])[0]) for run in runs ]
    rank.sort(key= lambda x : x[1], reverse=True)
    return rank
def ttest(victim_run, allTheOther_runs, qrels, metric):
    """
    Computes ttest between victim_run and all runs contained in allTheOther_runs
    using relevance judgements contained in qrels to compute the specified metric.
    Returns a dictionary d[otherRunName] = p-value
    """
    victimAvg, victimDetails = evaluation.evaluate(victim_run, qrels, metric, True)
    # to read the scores always in the same order
    keyList = list(victimDetails.keys())
    victimScores = [ victimDetails[k] for k in keyList ]
    result = {}
    for othertrun in allTheOther_runs:
        otherAvg, otherDetails = evaluation.evaluate(othertrun, qrels, metric, True)
        otherScores = [otherDetails[k] for k in keyList]
        _, p = stats.ttest_ind(victimScores, otherScores)
        result[othertrun.name] = p
    return result
示例#9
0
def rankRuns(runs, qrels, measure):
    """Ranks the runs based on measure.
     Returns a list of pairs (run, score) ordered by
     score descending.
    """
    rank = [(run, pytrec_eval.evaluate(run, qrels, [measure])[0])
            for run in runs]
    rank.sort(key=lambda x: x[1], reverse=True)
    return rank
示例#10
0
def ttest(victim_run, allTheOther_runs, qrels, metric):
    """
    Computes ttest between victim_run and all runs contained in allTheOther_runs
    using relevance judgements contained in qrels to compute the specified metric.
    Returns a dictionary d[otherRunName] = p-value
    """
    victimAvg, victimDetails = evaluation.evaluate(victim_run, qrels, metric,
                                                   True)
    # to read the scores always in the same order
    keyList = list(victimDetails.keys())
    victimScores = [victimDetails[k] for k in keyList]
    result = {}
    for othertrun in allTheOther_runs:
        otherAvg, otherDetails = evaluation.evaluate(othertrun, qrels, metric,
                                                     True)
        otherScores = [otherDetails[k] for k in keyList]
        _, p = stats.ttest_ind(victimScores, otherScores)
        result[othertrun.name] = p
    return result
示例#11
0
def plotDifferenceWith(targetRun, otherRuns, qrels, measure, outputFile, style=PLOT_STYLE):
    avg_baseline, baseline_scores = pytrec_eval.evaluate(targetRun, qrels, measure, True)

    bar_chart = pygal.Bar()
    bar_chart.style = style
    allTopics = list(qrels.getTopicIds())

    bar_chart.label_font_size = 8
    bar_chart.legend_at_bottom = True
    bar_chart.legend_font_size = 10
    bar_chart.legend_box_size = 8

    bar_chart.x_label_rotation = 90
    bar_chart.x_labels = allTopics
    bar_chart.x_title = 'Topic Id'
    bar_chart.y_title = 'Difference from ' + targetRun.name + ' (' + pytrec_eval.METRICS_NAMES[measure] + ')'

    for otherRun in otherRuns:
        _, other_scores = pytrec_eval.evaluate(otherRun, qrels, measure, True)
        points = [(other_scores[topicId] if topicId in other_scores else 0) - (baseline_scores[topicId] if topicId in baseline_scores else 0)
                  for topicId in allTopics]
        bar_chart.add(otherRun.name, points)
    bar_chart.render_to_file(outputFile)
示例#12
0
from pytrec_eval import TrecRun
from pytrec_eval import QRels
from pytrec_eval import evaluate
from pytrec_eval import metrics
from pytrec_eval import precisionAt

run = TrecRun(
    "/home/smsarwar/PycharmProjects/deep-siamese-text-similarity/results/results_multitask.txt"
)
qrel = QRels(
    '/home/smsarwar/PycharmProjects/deep-siamese-text-similarity/results/qrel.txt'
)
print(qrel)
print(evaluate(run, qrel, metrics.recall))
print(evaluate(run, qrel, metrics.precisionAt(10)))
print(evaluate(run, qrel, metrics.avgPrec))
#print(metrics.recall(run, qrel, detailed=True))
示例#13
0
    sdm_doc_score_map = get_scores(sdm_docs[str(topic_number)])
    cur_topic_subgraph = scores[str(topic_number)]
    doc_count = 1
    for doc, score in sorted(cur_topic_subgraph.items(), key=lambda item: item[1], reverse = True):
      sdm_score = float(sdm_doc_score_map[doc])
      try:
        centrality_score = normalize(score, cur_topic_subgraph.values())
      except:
        centrality_score = 0
      combined_score = lambda1*sdm_score + lambda2*centrality_score
      temp_results.append(str(topic_number) + " Q0 " + doc + " " + str(doc_count) + " " + str(combined_score)+ " STANDARD")
      doc_count+=1
  with open("temp_file.test", "w") as outfile:
    outfile.write("\n".join(temp_results))
  run = pytrec_eval.TrecRun('temp_file.test')
  qrels = pytrec_eval.QRels('qrels_file.test')
  curr_result = pytrec_eval.evaluate(run, qrels, [pytrec_eval.ndcg])[0]
  if curr_result > best_score:
    if best_results is not None:
      best_results.clear()
    best_score = curr_result
    best_results = list(temp_results)
  print("Run completed with lambda1=" + str(lambda1) + ", lambda2=" + str(lambda2) + " and NDCG=" + str(curr_result) + ". Took: " + str(time() - start_load) +  " s")

for result in best_results:
  output.append(result)
with open("results_file.test", "w") as outfile:
    outfile.write("\n".join(output))
print ("Results took %.2f seconds to run." %(time() - start_project))