def grid_diff_html(xd, compare_with=None): if compare_with: r = mktag('div', tagclass='fullgrid') else: r = mktag('div', tagclass='fullgrid main') similarity_pct = '' if compare_with: real_pct = grid_similarity(xd, compare_with) if real_pct < 25: return '' similarity_pct = " (%d%%)" % real_pct xdlink = mktag('div', tagclass='xdid', inner=mkhref("%s %s" % (xd.xdid(), similarity_pct), '/pub/' + xd.xdid())) if compare_with is not None: r += xdlink else: r += mktag('b', inner=xdlink) r += headers_to_html(xd) r += grid_to_html(xd, compare_with) r += '</div>' # solution return r
def grid_diff_html(xd, compare_with=None): if compare_with: r = mktag("div", tagclass="fullgrid") else: r = mktag("div", tagclass="fullgrid main") similarity_pct = "" if compare_with: real_pct = grid_similarity(xd, compare_with) if real_pct < 25: return "" similarity_pct = " (%d%%)" % real_pct xdlink = mktag("div", tagclass="xdid", inner=mkhref("%s %s" % (xd.xdid(), similarity_pct), "/pub/" + xd.xdid())) if compare_with is not None: r += xdlink else: r += mktag("b", inner=xdlink) r += headers_to_html(xd) r += grid_to_html(xd, compare_with) r += "</div>" # solution return r
def main(): args = utils.get_args('generates .html diffs with deep clues for all puzzles in similar.tsv') outf = utils.open_output() similars = utils.parse_tsv('gxd/similar.tsv', 'Similar') xdids_todo = [ parse_pathname(fn).base for fn in args.inputs ] if not xdids_todo: xdids_todo = [ xdid for xdid, matches in metadb.get_similar_grids().items() if matches ] for mainxdid in xdids_todo: progress(mainxdid) mainxd = xdfile.get_xd(mainxdid) if not mainxd: continue matches = metadb.get_similar_grids().get(mainxdid, []) xddates = {} xddates[mainxdid] = mainxd.date() # Dict to store XD dates for further sort html_grids = {} html_clues = {} # these are added directly to similar.tsv nstaleclues = 0 nstaleanswers = 0 ntotalclues = 0 poss_answers = [] # TODO: pub_uses = {} # [pubid] -> set(ClueAnswer) dcl_html = '' deepcl_html = [] # keep deep clues to parse later - per row for pos, mainclue, mainanswer in mainxd.iterclues(): deepcl_html = [] # Temporary to be replaced late mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue) # 'grid position' column deepcl_html.append('<td class="pos">%s.</td>' % pos) # find other uses of this clue, and other answers, in a single pass for clueans in find_clue_variants(mainclue): if clueans.answer != mainanswer: poss_answers.append(clueans) if clueans.answer == mainanswer: if clueans.pubid in pub_uses: otherpubs = pub_uses[clueans.pubid] else: otherpubs = set() # set of ClueAnswer pub_uses[clueans.pubid] = otherpubs otherpubs.add(clueans) # add 'other uses' to clues_html stale = False deepcl_html.append('<td class="other-uses">') if len(pub_uses) > 0: sortable_uses = [] for pubid, uses in pub_uses.items(): # show the earliest unboiled clue for u in sorted(uses, key=lambda x: x.date or ""): # only show those published earlier if u.date and u.date <= mainxd.date(): if pubid == mainxdid and u.date == mainxd.date(): pass else: stale = True sortable_uses.append((u.date, u, 1)) deepcl_html.append(html_select([ (clue, nuses) for dt, clue, nuses in sorted(sortable_uses, key=lambda x: x[0], reverse=True) ], top_option=mainclue)) else: deepcl_html.append('<div class="original">%s</div>' % esc(mainclue)) deepcl_html.append('</td>') # add 'other answers' to clues_html deepcl_html.append('<td class="other-answers">') deepcl_html.append(html_select_options(poss_answers, strmaker=lambda ca: ca.answer, force_top=mainca)) deepcl_html.append('</td>') # add 'other clues' to clues_html deepcl_html.append('<td class="other-clues">') # bclues is all boiled clues for this particular answer: { [bc] -> #uses } bclues = load_answers().get(mainanswer, []) stale_answer = False if bclues: uses = [] for bc, nuses in bclues.items(): # then find all clues besides this one clue_usages = [ ca for ca in load_clues().get(bc, []) if ca.answer == mainanswer and ca.date < mainxd.date() ] if clue_usages: stale_answer = True if nuses > 1: # only use one (the most recent) ClueAnswer per boiled clue # but use the clue only (no xdid) ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1].clue else: ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1] uses.append((ca, nuses)) if uses: deepcl_html.append(html_select(uses)) deepcl_html.append('</td>') # end 'other-clues' if stale_answer: nstaleanswers += 1 if stale: nstaleclues += 1 ntotalclues += 1 # Quick and dirty - to be replaced dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>' # Store in list to make further formatting as html table easier mainxd = xdfile.get_xd(mainxdid) if mainxd: html_grids[mainxdid] = grid_diff_html(mainxd) # Add for main XD diff_l = [] for pos, mainclue, mainanswer in mainxd.iterclues(): diff_h = mktag('div','fullgrid main') + '%s. ' %pos diff_h += mainclue diff_h += mktag('span', tagclass='main', inner=' ~ ' + mainanswer.upper()) diff_l.append(diff_h) html_clues[mainxdid] = diff_l # Process for all matches for xdid in matches: xd = xdfile.get_xd(xdid) if not xd: continue xddates[xdid] = xd.date() # output each grid html_grids[xdid] = grid_diff_html(xd, compare_with=mainxd) diff_l = [] # output comparison of each set of clues for pos, clue, answer in xd.iterclues(): diff_h = mktag('div','fullgrid') + '%s. ' %pos # Sometimes can return clue == None sm = difflib.SequenceMatcher(lambda x: x == ' ', mainxd.get_clue(pos) or '', clue) if sm.ratio() < 0.50: diff_h += clue else: # Compare based on op codes for opcode in sm.get_opcodes(): c, a1, a2, b1, b2 = opcode if c == 'equal': diff_h += '<span class="match">%s</span>' % clue[b1:b2] else: diff_h += '<span class="diff">%s</span>' % clue[b1:b2] diff_h += mktag('span', tagclass=(answer == mainxd.get_answer(pos)) and 'match' or 'diff', inner=' ~ ' + answer.upper()) diff_h += mktag('/div') diff_l.append(diff_h) html_clues[xdid] = diff_l # Wrap into table diff_h = mktag('table') + mktag('tr') # Sort by date sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1)) for w, dt in sortedkeys: # Wrap into table diff_h += mktag('td') + html_grids[w] + mktag('/td') diff_h += mktag('/tr') for i, clue in enumerate(html_clues[sortedkeys[0][0]]): diff_h += mktag('tr') for w, dt in sortedkeys: if i < len(html_clues[w]): diff_h += mktag('td') + html_clues[w][i] + mktag('/td') diff_h += mktag('/tr') # Process deepclues diff_h += mktag('table') + dcl_html + mktag('/table') diff_h += mktag('/table') outf.write_html('pub/deep/%s/index.html' % mainxdid, diff_h, title='Deep clue comparison for ' + mainxdid)
def pubyear_html(pubyears=[], skip_decades=None): """ skip_decades, default { 'start': 1910, 'end': 1970 } """ global g_all_pubyears if not g_all_pubyears: g_all_pubyears = utils.parse_tsv_data( open("pub/pubyears.tsv").read(), "pubyear") # Read similars to make background of widgets similar_d = defaultdict(dict) for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items(): xd_split = utils.split_xdid(xdid) if xd_split: pubid, year, mon, day = xd_split if year in similar_d[pubid]: similar_d[pubid][year].append(int(v.similar_grid_pct)) else: similar_d[pubid][year] = [int(v.similar_grid_pct)] b = [] # Body # Making collapsed decades depends on args skip_decades = skip_decades if skip_decades else { 'start': 1910, 'end': 1970 } allyears = [] for i in range(skip_decades['start'] // 10, skip_decades['end'] // 10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(skip_decades['end'] + 10, date.today().year + 1) ]) pubs = defaultdict(dict) # generate widget for each year for dowl in g_all_pubyears: dow = {} pubid, year, total = dowl[:3] hint = '' for d, v in zip(utils.WEEKDAYS, dowl[3:]): dow[d] = {'count': int(v) // 2, 'class': ''} dow[d]['class'] = 'red' if d == 'Sun' else 'ord' hint += '%s - %s\n' % (d, v) hint += 'Total: %s\n' % (total) # Define fill class based on average similarity fill_class = None # default fill class for widget if year in similar_d[pubid]: s_avg = sum(similar_d[pubid][year]) / len(similar_d[pubid][year]) hint += 'Avg similarity: %.2f%%' % (s_avg) # Example if average > 10 % fill_class = 'similar10' if s_avg >= 10 else None # Fill pubs with defferent blocks will be used below pubs[pubid][year] = { 'dow_data': dow, 'widget': year_widget(dow, total, fill_class), 'hint': hint, 'total': int(total), } # Process for all decades for dec_year in [x for x in allyears if 's' in x]: for pubid in pubs: year_key = dec_year[:-2] # Remove last year and "s" from the end total = 0 for yf in [x for x in pubs[pubid] if year_key in x]: total += pubs[pubid][yf]['total'] hint = 'Total: %s' % (total) if total > 0: pubs[pubid][dec_year] = { 'widget': decade_widget(total), 'hint': hint, 'total': int(total), } # main table b.append('<table class="pubyears">') yhdr = [' '] + [split_year(y) for y in allyears] yhdr.append("all") b.append( td_with_class(*yhdr, classes=get_pubheader_classes(*yhdr), rowclass="pubyearhead", tag="th")) b.append(tr_empty()) # Process each pubid sorted by earliest year for pubid in sorted(pubs, key=lambda x: min(pubs[x])): pub = metadb.xd_publications().get(pubid) pubname = pub.PublicationName if pub else '' # Pub id to first column b.append(mktag('tr')) b.append(mktag('td', 'pub')) b.append(mkcell( space_with_nbsp(pubname or pubid), "/pub/" + pubid, )) b.append(mktag('/td')) # Process each year not collapsed into decade for yi in allyears: if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0: b.append(mktag('td', 'this')) # Put link directly to year or to decade href = "/pub/%s%s" % ( pubid, yi) if 's' not in yi else "/pub/%s/index.html#%s" % ( pubid, yi[:-1]) b.append( mkcell(pubs[pubid][yi]['widget'], href=href, title=pubs[pubid][yi]['hint'])) b.append(mktag('/td')) else: b.append(mktag('td', 'block')) b.append(' ') b.append(mktag('/td')) b.append(mktag('td')) b.append( str(sum([pubs[pubid][x]['total'] for x in pubs[pubid].keys()]))) b.append(mktag('/td')) b.append(mktag('/tr')) b.append(mktag('/table')) return (" ".join(b))
def main(): args = utils.get_args( 'generates .html diffs with deep clues for all puzzles in similar.tsv') outf = utils.open_output() similars = utils.parse_tsv('gxd/similar.tsv', 'Similar') xds_todo = [] for fn, contents in find_files(*args.inputs, ext='.xd'): xd = xdfile.xdfile(contents.decode('utf-8'), fn) xds_todo.append(xd) for mainxd in xds_todo: mainxdid = mainxd.xdid() progress(mainxdid) matches = metadb.xd_similar(mainxdid) xddates = {} xddates[mainxdid] = mainxd.date( ) # Dict to store XD dates for further sort html_grids = {} # these are added directly to similar.tsv nstaleclues = 0 nstaleanswers = 0 ntotalclues = 0 dcl_html = '<tr>' dcl_html += '<th></th>' dcl_html += '<th>Clue</th>' dcl_html += '<th>ANSWERs</th>' dcl_html += '<th>Alt. clue possibilities</th>' dcl_html += '</tr>' deepcl_html = [] # keep deep clues to parse later - per row for pos, mainclue, mainanswer in mainxd.iterclues(): if not pos: continue poss_answers = [] # TODO: pub_uses = {} # [pubid] -> set(ClueAnswer) deepcl_html = [] # Temporary to be replaced late mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue) # 'grid position' column deepcl_html.append('<td class="pos">%s.</td>' % pos) # find other uses of this clue, and other answers, in a single pass for clueans in find_clue_variants(mainclue): if clueans.answer != mainanswer: poss_answers.append(clueans) if clueans.answer == mainanswer: if clueans.pubid in pub_uses: otherpubs = pub_uses[clueans.pubid] else: otherpubs = set() # set of ClueAnswer pub_uses[clueans.pubid] = otherpubs otherpubs.add(clueans) # add 'other uses' to clues_html deepcl_html.append('<td class="other-uses">') prev = prev_uses(pub_uses, mainxd, mainclue) if prev: deepcl_html.append('<a href="/pub/clue/%s">%s [x%s]</a>' % (boil(mainclue), mainclue, len(prev))) nstaleclues += 1 else: deepcl_html.append(mainclue) deepcl_html.append('</td>') # add 'other answers' to clues_html deepcl_html.append('<td class="other-answers">') deepcl_html.append( html_select_options(poss_answers, strmaker=lambda ca: ca.answer, force_top=mainca, add_total=False)) deepcl_html.append('</td>') # add 'other clues' to clues_html deepcl_html.append('<td class="other-clues">') other_clues = html_other_clues(mainanswer, mainclue, mainxd) if other_clues: deepcl_html.append(other_clues) nstaleanswers += 1 deepcl_html.append('</td>') # end 'other-clues' ntotalclues += 1 # Quick and dirty - to be replaced dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>' # Process deepclues diff_h = '<div class="main-container">' diff_h += grid_to_html(mainxd) diff_h += mktag('table', 'deepclues') + dcl_html + mktag('/table') diff_h += '</div>' info('writing deepclues for %s' % mainxdid) outf.write_html('pub/deep/%s/index.html' % mainxdid, diff_h, title='Deep clue analysis for ' + mainxdid)
def main(): args = utils.get_args("generates .html diffs for all puzzles in similar.tsv") outf = utils.open_output() similars = utils.parse_tsv("gxd/similar.tsv", "Similar") xdids_todo = args.inputs or [xdid for xdid, matches in metadb.get_similar_grids().items() if matches] for mainxdid in xdids_todo: progress(mainxdid) mainxd = xdfile.get_xd(mainxdid) if not mainxd: continue matches = metadb.get_similar_grids().get(mainxdid, []) xddates = {} xddates[mainxdid] = mainxd.date() # Dict to store XD dates for further sort html_grids = {} html_clues = {} # Store in list to make further formatting as html table easier html_grids[mainxdid] = grid_diff_html(xdfile.get_xd(mainxdid)) # Add for main XD diff_l = [] for pos, mainclue, mainanswer in mainxd.iterclues(): diff_h = mktag("div", "fullgrid main") + "%s. " % pos diff_h += mainclue diff_h += mktag("span", tagclass="main", inner=" ~ " + mainanswer.upper()) diff_l.append(diff_h) html_clues[mainxdid] = diff_l # Process for all matches for xdid in matches: xd = xdfile.get_xd(xdid) # Continue if can't load xdid if not xd: continue xddates[xdid] = xd.date() # output each grid html_grids[xdid] = grid_diff_html(xd, compare_with=mainxd) diff_l = [] # output comparison of each set of clues for pos, clue, answer in xd.iterclues(): diff_h = mktag("div", "fullgrid") + "%s. " % pos # Sometimes can return clue == None mainclue = mainxd.get_clue_for_answer(answer) sm = difflib.SequenceMatcher(lambda x: x == " ", mainclue or "", clue) debug("MCLUE: %s [%s]" % (mainclue, sm.ratio())) if mainclue is None or sm.ratio() < 0.40: diff_h += clue else: # Compare based on op codes for opcode in sm.get_opcodes(): c, a1, a2, b1, b2 = opcode if c == "equal": diff_h += '<span class="match">%s</span>' % clue[b1:b2] else: diff_h += '<span class="diff">%s</span>' % clue[b1:b2] tagclass = "match" if mainclue or answer == mainxd.get_answer(pos) else "diff" diff_h += mktag("span", tagclass=tagclass, inner=" ~ " + answer.upper()) diff_h += mktag("/div") diff_l.append(diff_h) html_clues[xdid] = diff_l # Wrap into table diff_h = mktag("table") + mktag("tr") # Sort by date sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1)) for w, dt in sortedkeys: # Wrap into table diff_h += mktag("td") + html_grids[w] + mktag("/td") diff_h += mktag("/tr") for i, clue in enumerate(html_clues[sortedkeys[0][0]]): diff_h += mktag("tr") for w, dt in sortedkeys: if i < len(html_clues[w]): diff_h += mktag("td") + html_clues[w][i] + mktag("/td") diff_h += mktag("/tr") diff_h += mktag("/table") outf.write_html("pub/%s/index.html" % mainxdid, diff_h, title="Comparison for " + mainxdid)
def main(): args = utils.get_args('generates .html diffs for all puzzles in similar.tsv') outf = utils.open_output() similars = utils.parse_tsv('gxd/similar.tsv', 'Similar') xdids_todo = {} for row in metadb.xd_similar_all(): if row.xdid not in xdids_todo: xdids_todo[row.xdid] = [] xdids_todo[row.xdid].append(row) for mainxdid in xdids_todo: progress(mainxdid) mainxd = xdfile.get_xd(mainxdid) if not mainxd: warn('%s not in corpus' % mainxdid) continue matches = xdids_todo[mainxdid] info('generating diffs for %s (%d matches)' % (mainxdid, len(matches))) xddates = {} xddates[mainxdid] = mainxd.date() # Dict to store XD dates for further sort html_grids = {} html_clues = {} # Store in list to make further formatting as html table easier html_grids[mainxdid] = grid_diff_html(xdfile.get_xd(mainxdid)) # Add for main XD diff_l = [] for pos, mainclue, mainanswer in mainxd.iterclues(): if not mainclue: continue diff_h = mktag('div','fullgrid main') + '%s. ' %pos diff_h += mainclue diff_h += mktag('span', tagclass='main', inner=' ~ ' + mainanswer.upper()) diff_l.append(diff_h) html_clues[mainxdid] = diff_l # Process for all matches for row in matches: xdid = row.match_xdid xd = xdfile.get_xd(xdid) # Continue if can't load xdid if not xd: continue xddates[xdid] = xd.date() # output each grid html_grids[xdid] = grid_diff_html(xd, compare_with=mainxd) diff_l = [] # output comparison of each set of clues for pos, clue, answer in xd.iterclues(): diff_h = mktag('div','fullgrid') + '%s. ' %pos if not clue: continue # Sometimes can return clue == None mainclue = mainxd.get_clue_for_answer(answer) sm = difflib.SequenceMatcher(lambda x: x == ' ', mainclue or '', clue) debug('MCLUE: %s [%s]' % (mainclue, sm.ratio())) if mainclue is None or sm.ratio() < 0.40: diff_h += clue else: # Compare based on op codes for opcode in sm.get_opcodes(): c, a1, a2, b1, b2 = opcode if c == 'equal': diff_h += '<span class="match">%s</span>' % clue[b1:b2] else: diff_h += '<span class="diff">%s</span>' % clue[b1:b2] tagclass = 'match' if mainclue or answer == mainxd.get_answer(pos) else 'diff' diff_h += mktag('span', tagclass=tagclass, inner=' ~ ' + answer.upper()) diff_h += mktag('/div') diff_l.append(diff_h) html_clues[xdid] = diff_l # Wrap into table diff_h = mktag('table') + mktag('tr') # Sort by date sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1)) for w, dt in sortedkeys: # Wrap into table diff_h += mktag('td') + html_grids[w] + mktag('/td') diff_h += mktag('/tr') for i, clue in enumerate(html_clues[sortedkeys[0][0]]): diff_h += mktag('tr') for w, dt in sortedkeys: if i < len(html_clues[w]): diff_h += mktag('td') + html_clues[w][i] + mktag('/td') diff_h += mktag('/tr') diff_h += mktag('/table') outf.write_html('pub/%s/index.html' % mainxdid, diff_h, title='Comparison for ' + mainxdid)
def pubyear_html(pubyears=[], skip_decades=None): """ skip_decades, default { 'start': 1910, 'end': 1970 } """ global g_all_pubyears if not g_all_pubyears: g_all_pubyears = utils.parse_tsv_data(open("pub/pubyears.tsv").read(), "pubyear") # Read similars to make background of widgets similar_d = defaultdict(dict) for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items(): xd_split = utils.split_xdid(xdid) if xd_split: pubid, year, mon, day = xd_split if year in similar_d[pubid]: similar_d[pubid][year].append(int(v.similar_grid_pct)) else: similar_d[pubid][year] = [ int(v.similar_grid_pct) ] b = [] # Body # Making collapsed decades depends on args skip_decades = skip_decades if skip_decades else { 'start': 1910, 'end': 1970 } allyears = [] for i in range(skip_decades['start']//10, skip_decades['end']//10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(skip_decades['end'] + 10, date.today().year + 1) ]) pubs = defaultdict(dict) # generate widget for each year for dowl in g_all_pubyears: dow = {} pubid, year, total = dowl[:3] hint = '' for d, v in zip(utils.WEEKDAYS, dowl[3:]): dow[d] = { 'count': int(v)//2, 'class':'' } dow[d]['class'] = 'red' if d == 'Sun' else 'ord' hint += '%s - %s\n' % (d, v) hint += 'Total: %s\n' % (total) # Define fill class based on average similarity fill_class = None # default fill class for widget if year in similar_d[pubid]: s_avg = sum(similar_d[pubid][year]) / len(similar_d[pubid][year]) hint += 'Avg similarity: %.2f%%' % (s_avg) # Example if average > 10 % fill_class = 'similar10' if s_avg >= 10 else None # Fill pubs with defferent blocks will be used below pubs[pubid][year] = { 'dow_data': dow, 'widget': year_widget(dow, total, fill_class), 'hint': hint, 'total': int(total), } # Process for all decades for dec_year in [x for x in allyears if 's' in x]: for pubid in pubs: year_key = dec_year[:-2] # Remove last year and "s" from the end total = 0 for yf in [x for x in pubs[pubid] if year_key in x]: total += pubs[pubid][yf]['total'] hint = 'Total: %s' % (total) if total > 0: pubs[pubid][dec_year] = { 'widget': decade_widget(total), 'hint': hint, 'total': int(total), } # main table b.append('<table class="pubyears">') yhdr = [ ' ' ] + [ split_year(y) for y in allyears ] yhdr.append("all") b.append(td_with_class(*yhdr, classes=get_pubheader_classes(*yhdr), rowclass="pubyearhead",tag="th")) b.append(tr_empty()) # Process each pubid sorted by earliest year for pubid in sorted(pubs, key=lambda x:min(pubs[x])): pub = metadb.xd_publications().get(pubid) pubname = pub.PublicationName if pub else '' # Pub id to first column b.append(mktag('tr')) b.append(mktag('td','pub')) b.append(mkcell(space_with_nbsp(pubname or pubid), "/pub/" + pubid, )) b.append(mktag('/td')) # Process each year not collapsed into decade for yi in allyears: if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0: b.append(mktag('td','this')) # Put link directly to year or to decade href = "/pub/%s%s" % (pubid, yi) if 's' not in yi else "/pub/%s/index.html#%s" % (pubid, yi[:-1]) b.append(mkcell(pubs[pubid][yi]['widget'], href=href, title=pubs[pubid][yi]['hint'])) b.append(mktag('/td')) else: b.append(mktag('td', 'block')) b.append(' ') b.append(mktag('/td')) b.append(mktag('td')) b.append(str(sum([ pubs[pubid][x]['total'] for x in pubs[pubid].keys() ]))) b.append(mktag('/td')) b.append(mktag('/tr')) b.append(mktag('/table')) return (" ".join(b))