def handle_signature(self, signature, categories, form): score_type = form.getvalue("main_form_sort_metric") fact_weights = self.fact_match_weights(form) signature_type = form.getvalue("main_form_signature_type") # Sort all parses with this signature. output = [] for (qid, category, parse) in categories: score = self.parse_score(category, parse, score_type, fact_weights) output.append((qid, category, parse, score)) output.sort(key=lambda x: -x[3]) # Get fact-matching statistics. Consider only the top parse for a category # if it has >1 parses with the same signature. category_count = defaultdict(int) match_counts = util.MatchCounts() span_match_counts = defaultdict(util.MatchCounts) num_members = 0 for qid, category, parse, score in output: category_count[qid] += 1 if category_count[qid] > 1: continue num_members += len(category.members) util.fact_matches_for_parse(parse, match_counts) for span in parse.spans: span_signature = util.span_signature(span, signature_type) util.fact_matches_for_span(span, span_match_counts[span_signature]) # Write a summary. self._tag("div", "<b>%s</b>: in %d (category, parse) pairs across %d categories" % \ (signature, len(categories), len(category_count))) self._tag("b", "#Items across categories: ") self._text("%d (incl. possible duplicates)" % num_members) self._br() self._tag("b", "Span-level fact-matching statistics") self.write_main_table_header( ["Span Signature"], [t.name for t in FactMatchType]) for span_signature, span_matches in span_match_counts.iteritems(): self._begin("tr") self._cell(span_signature) self._separator(header=False) self.write_fact_match_counts(span_matches) self._end("tr") self._begin("tr") self._cell("All") self._separator(header=False) self.write_fact_match_counts(match_counts) self._end("tr") self._end("table") # Give an option to generate a recordio file. if signature_type == "full": self._br() self._begin("table class='main_table'") self._begin("tr") self._begin("td") self._begin("form", id="recordio_form", method="POST", action="", \ target="_blank") self._begin_end("input", type="hidden", name="main_form_functionality", \ value="recordio") self._begin_end("input", type="hidden", name="recordio_signature", \ id="recordio_signature", value=signature) self._tag("b", "Generate recordio for this signature") self._br() self._br() self._text("Filename: ") filename = "local/data/e/wikicat/" + signature.replace("$", "_") + ".rec" self._begin_end("input", type="text", size=100, \ name="recordio_filename", value=filename) self._br() self._text("Category QIDs ('ALL' for all): ") self._begin_end("input", type="text", size=100, value="ALL", \ name="recordio_categories") self._br() self._text("Generate facts for these types:") self._begin_end("input", type="text", size=100, \ value="NEW,ADDITIONAL,SUBSUMED_BY_EXISTING", \ name="recordio_match_types") self._br() self._text("Generate the following facts:") self._br() count = 0 for token in signature.split(): if token[0] == '$' and token[1:].find("$") >= 0: name = "recordio_span%d" % count self._text(" " + token + " ") self._begin_end("input", type="checkbox", name=name, id=name, \ checked="on") self._br() count += 1 self._begin_end("input", type="hidden", name="recordio_num_spans", \ id="recordio_total_spans", value=count) self._begin_end("input", type="submit") self._end(["form", "td", "tr", "table"]) # Write the individual parses in a tabular form. self._br() self._tag("b", "Categories with parses matching '" + signature + "'") seen = set() max_rows = 200 self.write_main_table_header( ["Category", "Prelim parse score", "#Members", "Fact-matching score"], [t.name for t in FactMatchType]) row_count = 0 for qid, category, parse, score in output: if row_count >= max_rows: break if qid in seen: continue row_count += 1 seen.add(qid) self._begin("tr") self._begin("td") self._form_anchor(qid + ": " + category.name, qid) if category_count[qid] > 1: more = category_count[qid] - 1 self._text(" (%d more parse%s)" % (more, "" if more == 1 else "s")) self._end("td") self._cell(parse.score, numeric=True) self._cell(len(category.members), numeric=True) self._cell("%.4f" % self.parse_fact_score(parse, fact_weights), True) self._separator(header=False) counts = util.fact_matches_for_parse(parse) self.write_fact_match_counts(counts) self._end("tr") self._end("table")
def handle_category(self, qid, form): def is_on(name): return form.getvalue("main_form_" + name) == "on" # Various options. show_span_qid = is_on("show_span_qid") show_prelim_parse_scores = is_on("show_prelim_parse_scores") show_span_scores = is_on("show_span_scores") show_fact_matches = is_on("show_fact_matching_statistics") show_span_fact_matches = is_on("show_span_fact_match_stats") show_similar_categories = is_on("show_similar_categories") signature_type = form.getvalue("main_form_signature_type") metric = form.getvalue("main_form_sort_metric") fact_weights = self.fact_match_weights(form) frame = browser_globals.category_frame[qid] document = sling.Document(frame=frame.document) num = len([p for p in frame("parse")]) self._tag("div", "<b>%s = %s</b>: %d members, %d parses" % \ (qid, frame.name, len(frame.members), num)) self._br() # Write the parses in a tabular format. show_prelim_parse_scores &= metric != "prelim_parse_score" self.write_main_table_header( "Signature", [t.word for t in document.tokens], "Metric", "Prelim Scores" if show_prelim_parse_scores else None, [t.name for t in FactMatchType] if show_fact_matches else None, "Matching Categories" if show_similar_categories else None) # Each parse is written as one row. parses = [(parse, self.parse_score(frame, parse, metric, fact_weights)) \ for parse in frame("parse")] parses.sort(key=lambda x: -x[1]) for parse, metric_value in parses: signature = util.parse_signature(parse, signature_type) self._begin("tr") self._begin("td") self._form_anchor(signature, signature) self._end("td") self._separator(header=False) prev_span_end = -1 for span in parse.spans: for index in xrange(prev_span_end + 1, span.begin): self._empty_cell() self._begin("td", colspan=span.end-span.begin, align='middle') text = util.span_signature(span, signature_type) if show_span_qid: text += " (" + str(span.qid) + ")" title = '.'.join([str(p) for p in span.pids]) + ' = ' + str(span.qid) if "name" in span.qid: title += " (" + span.qid[name] + ")" self._tag("span", text, title=title) if show_span_scores and "prior" in span: self._br() self._text("%s = %0.4f" % ("prior", span.prior)) if show_span_fact_matches: local_counts = util.fact_matches_for_span(span) self._br() self._begin("table class='span_fact_match'") self._begin("thead") for t in FactMatchType: self._tag("th", t.name) self._end("thead") self._begin("tr") self.write_fact_match_counts(local_counts) self._end(["tr", "table"]) self._end("td") prev_span_end = span.end - 1 for index in xrange(prev_span_end + 1, len(document.tokens)): self._empty_cell() self._separator(header=False) if type(metric_value) is int: self._cell(metric_value) else: self._cell("%.4f" % metric_value) if show_prelim_parse_scores: self._separator(header=False) self._begin("td class='numeric'") for score_type in ["prior", "member_score", "cover"]: if score_type in parse: self._text("%s = %0.4f" % (score_type, parse[score_type])) self._br() if "score" in parse: self._color_text("Overall = %0.4f" % parse.score, "blue") self._end("td") if show_fact_matches: self._separator(header=False) total_fact_counts = util.fact_matches_for_parse(parse) self.write_fact_match_counts(total_fact_counts) if show_similar_categories: self._separator(header=False) self._begin("td") limit = 5 signature_mapping = browser_globals.full_signature_to_parse if signature_type == "coarse": signature_mapping = browser_globals.coarse_signature_to_parse seen = set() for (other_qid, other_category, other_parse) in \ signature_mapping[signature]: if len(seen) >= limit: break if other_qid != qid and other_qid not in seen: seen.add(other_qid) self._text(other_category.name) self._form_anchor(" (= %s)" % other_qid, other_qid) self._text(" (%0.4f)" % other_parse.score) self._br() self._end("td") self._end("tr") self._end("table")
def handle_signature(self, signature, categories, form): score_type = form.getvalue("main_form_sort_metric") fact_weights = self.fact_match_weights(form) signature_type = form.getvalue("main_form_signature_type") # Sort all parses with this signature. output = [] for (qid, category, parse) in categories: score = self.parse_score(category, parse, score_type, fact_weights) output.append((qid, category, parse, score)) output.sort(key=lambda x: -x[3]) # Get fact-matching statistics. Consider only the top parse for a category # if it has >1 parses with the same signature. category_count = defaultdict(int) match_counts = defaultdict(int) span_match_counts = defaultdict(lambda: defaultdict(int)) num_members = 0 for qid, category, parse, score in output: category_count[qid] += 1 if category_count[qid] > 1: continue num_members += len(category.members) util.fact_matches_for_parse(parse, match_counts) for span in parse.spans: span_signature = util.span_signature(span, signature_type) util.fact_matches_for_span(span, span_match_counts[span_signature]) # Write a summary. self._tag("div", "<b>%s</b>: in %d (category, parse) pairs across %d categories" % \ (signature, len(categories), len(category_count))) self._tag("b", "#Items across categories: ") self._text("%d (incl. possible duplicates)" % num_members) self._br() self._tag("b", "Span-level fact-matching statistics") self.write_main_table_header(["Span Signature"], [t.name for t in FactMatchType]) for span_signature, span_stats in span_match_counts.iteritems(): self._begin("tr") self._cell(span_signature) self._separator(header=False) self.write_fact_match_counts(span_stats) self._end("tr") self._begin("tr") self._cell("All") self._separator(header=False) self.write_fact_match_counts(match_counts) self._end("tr") self._end("table") # Write the individual parses in a tabular form. self._br() self._tag("b", "Categories with parses matching '" + signature + "'") seen = set() max_rows = 200 self.write_main_table_header([ "Category", "Prelim parse score", "#Members", "Fact-matching score" ], [t.name for t in FactMatchType]) row_count = 0 for qid, category, parse, score in output: if row_count >= max_rows: break if qid in seen: continue row_count += 1 seen.add(qid) self._begin("tr") self._begin("td") self._form_anchor(qid + ": " + category.name, qid) if category_count[qid] > 1: more = category_count[qid] - 1 self._text(" (%d more parse%s)" % (more, "" if more == 1 else "s")) self._end("td") self._cell(parse.score, numeric=True) self._cell(len(category.members), numeric=True) self._cell("%.4f" % self.parse_fact_score(parse, fact_weights), True) self._separator(header=False) counts = util.fact_matches_for_parse(parse) self.write_fact_match_counts(counts) self._end("tr") self._end("table")