def __init__(self, signature): self.signature = signature self.score = 0.0 # aggregate score as per user-weights self.counts = util.MatchCounts() # aggregate match counts self.example_category = None # exemplar category for the parse self.rejected = 0 # no. of parses rejected by the formula self.selected = 0 # no. of parses allowed by the formula
def counts_for(self, span_subset=None, counts=None): if counts is None: counts = util.MatchCounts() for index, count in enumerate(self.span_counts): if span_subset is None or span_subset.is_allowed(index): counts.merge(count) return counts
def __init__(self, request): self.request = request self.error_message = None self.span_signatures = util.parse_to_span_signatures(request.query) # All counts are after the parse selector has come into play. self.counts_across_spans = util.MatchCounts() self.counts_across_selected_spans = util.MatchCounts() self.counts_by_span = [ util.MatchCounts() for _ in self.span_signatures ] # Top selected parses shown. self.top_parses = [] # Statistics of selected parses that aren't not shown. self.top_parses_not_shown = {"num": 0, "counts": util.MatchCounts()} # Statistics of unselected parses. self.unselected_parses = {"num": 0, "counts": util.MatchCounts()}
def __init__(self): # Exemplar category QID, frame, and parse with this signature. self.example_qid = None self.example_category = None self.example_parse = None # Aggregate stats across all (category, parse) pairs with this signature. # If a category has >=2 parses with this signature, then only the highest # scoring parse is considered. # Total score across all parses. self.score = 0.0 # Total fact-matching statistics. self.fact_matches = util.MatchCounts() # Non-deduped total number of member items. self.members = 0 # Total number of categories. self.num = 0
def handle_signature(self, signature, categories, form): score_type = form.getvalue("main_form_sort_metric") fact_weights = self.fact_match_weights(form) signature_type = form.getvalue("main_form_signature_type") # Sort all parses with this signature. output = [] for (qid, category, parse) in categories: score = self.parse_score(category, parse, score_type, fact_weights) output.append((qid, category, parse, score)) output.sort(key=lambda x: -x[3]) # Get fact-matching statistics. Consider only the top parse for a category # if it has >1 parses with the same signature. category_count = defaultdict(int) match_counts = util.MatchCounts() span_match_counts = defaultdict(util.MatchCounts) num_members = 0 for qid, category, parse, score in output: category_count[qid] += 1 if category_count[qid] > 1: continue num_members += len(category.members) util.fact_matches_for_parse(parse, match_counts) for span in parse.spans: span_signature = util.span_signature(span, signature_type) util.fact_matches_for_span(span, span_match_counts[span_signature]) # Write a summary. self._tag("div", "<b>%s</b>: in %d (category, parse) pairs across %d categories" % \ (signature, len(categories), len(category_count))) self._tag("b", "#Items across categories: ") self._text("%d (incl. possible duplicates)" % num_members) self._br() self._tag("b", "Span-level fact-matching statistics") self.write_main_table_header( ["Span Signature"], [t.name for t in FactMatchType]) for span_signature, span_matches in span_match_counts.iteritems(): self._begin("tr") self._cell(span_signature) self._separator(header=False) self.write_fact_match_counts(span_matches) self._end("tr") self._begin("tr") self._cell("All") self._separator(header=False) self.write_fact_match_counts(match_counts) self._end("tr") self._end("table") # Give an option to generate a recordio file. if signature_type == "full": self._br() self._begin("table class='main_table'") self._begin("tr") self._begin("td") self._begin("form", id="recordio_form", method="POST", action="", \ target="_blank") self._begin_end("input", type="hidden", name="main_form_functionality", \ value="recordio") self._begin_end("input", type="hidden", name="recordio_signature", \ id="recordio_signature", value=signature) self._tag("b", "Generate recordio for this signature") self._br() self._br() self._text("Filename: ") filename = "local/data/e/wikicat/" + signature.replace("$", "_") + ".rec" self._begin_end("input", type="text", size=100, \ name="recordio_filename", value=filename) self._br() self._text("Category QIDs ('ALL' for all): ") self._begin_end("input", type="text", size=100, value="ALL", \ name="recordio_categories") self._br() self._text("Generate facts for these types:") self._begin_end("input", type="text", size=100, \ value="NEW,ADDITIONAL,SUBSUMED_BY_EXISTING", \ name="recordio_match_types") self._br() self._text("Generate the following facts:") self._br() count = 0 for token in signature.split(): if token[0] == '$' and token[1:].find("$") >= 0: name = "recordio_span%d" % count self._text(" " + token + " ") self._begin_end("input", type="checkbox", name=name, id=name, \ checked="on") self._br() count += 1 self._begin_end("input", type="hidden", name="recordio_num_spans", \ id="recordio_total_spans", value=count) self._begin_end("input", type="submit") self._end(["form", "td", "tr", "table"]) # Write the individual parses in a tabular form. self._br() self._tag("b", "Categories with parses matching '" + signature + "'") seen = set() max_rows = 200 self.write_main_table_header( ["Category", "Prelim parse score", "#Members", "Fact-matching score"], [t.name for t in FactMatchType]) row_count = 0 for qid, category, parse, score in output: if row_count >= max_rows: break if qid in seen: continue row_count += 1 seen.add(qid) self._begin("tr") self._begin("td") self._form_anchor(qid + ": " + category.name, qid) if category_count[qid] > 1: more = category_count[qid] - 1 self._text(" (%d more parse%s)" % (more, "" if more == 1 else "s")) self._end("td") self._cell(parse.score, numeric=True) self._cell(len(category.members), numeric=True) self._cell("%.4f" % self.parse_fact_score(parse, fact_weights), True) self._separator(header=False) counts = util.fact_matches_for_parse(parse) self.write_fact_match_counts(counts) self._end("tr") self._end("table")