def main(): global boiled_clues args = get_args('create clue index') outf = open_output() boiled_clues = load_clues() biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues)) bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ] nreused = len([bc for n, bc, _ in bcs if n > 1]) biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues)) cluepages_to_make = set() # add all boiled clues from all input .xd files for fn, contents in find_files(*args.inputs, ext='.xd'): progress(fn) xd = xdfile.xdfile(contents.decode('utf-8'), fn) for pos, mainclue, mainanswer in xd.iterclues(): cluepages_to_make.add(boil(mainclue)) # add top 100 most used boiled clues from corpus biggest_clues += '<h2>Most used clues</h2>' biggest_clues += '<table class="clues most-used-clues">' biggest_clues += th("clue", "# uses", "answers used with this clue") for n, bc, ans in sorted(bcs, reverse=True)[:100]: cluepages_to_make.add(bc) biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans)) biggest_clues += '</table>' most_ambig = "<h2>Most ambiguous clues</h2>" most_ambig += '(clues with the largest number of different answers)' most_ambig += '<table class="clues most-different-answers">' most_ambig += th("Clue", "answers") for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]: cluepages_to_make.add(bc) clue = mkhref(unboil(bc), bc) if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc: most_ambig += td(clue, html_select_options(ans), rowclass="theme") else: most_ambig += td(clue, html_select_options(ans)) most_ambig += '</table>' for bc in cluepages_to_make: contents = mkwww_cluepage(bc) if contents: outf.write_html('pub/clue/%s/index.html' % bc, contents, title=bc) outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
def mkwww_cluepage(bc): bcs = boiled_clues[bc] clue_html = '' clue_html += '<div>Variants: ' + html_select_options([ ca.clue for ca in bcs ]) + '</div>' clue_html += '<hr/>' clue_html += '<div>Answers for this clue: ' + html_select_options([ ca.answer for ca in bcs ]) + '</div>' clue_html += '<hr/>' clue_html += pubyear.pubyear_html([ (ca.pubyear()[0], ca.pubyear()[1], 1) for ca in bcs ]) return clue_html
def main(): global all_uses args = get_args('create word pages and index') outf = open_output() all_uses = {} for ca in clues(): if ca.answer not in all_uses: all_uses[ca.answer] = [] all_uses[ca.answer].append(ca) h = '<li>%d different words</li>' % len(all_uses) h += '<h2>Most used words</h2>' h += '<table class="clues most-used-words">' h += th("word", "# uses", "clues used with this answer") wordpages_to_make = set(args.inputs) for answer, uses in sorted(all_uses.items(), reverse=True, key=lambda x: len(x[1]))[:100]: wordpages_to_make.add(answer) h += td(mkhref(answer.upper(), answer.upper()), len(uses), html_select_options(uses, strmaker=lambda ca: ca.clue)) h += '</table>' for word in wordpages_to_make: outf.write_html('word/%s/index.html' % word.upper(), mkwww_wordpage(word), title=word) outf.write_html('word/index.html', h, title="Words")
def mkwww_cluepage(bc): if bc not in boiled_clues: return '' bcs = boiled_clues[bc] clue_html = '' clue_html += '<div>Variants: ' + html_select_options([ ca.clue for ca in bcs ]) + '</div>' clue_html += '<hr/>' clue_html += '<div>Answers for this clue: ' + html_select_options([ ca.answer for ca in bcs ]) + '</div>' clue_html += '<hr/>' # TODO: maybe add pubyear chart back in, using stats.tsv as source data (by day-of-week) # clue_html += pubyear.pubyear_html([ (ca.pubyear()[0], ca.pubyear()[1], 1) for ca in bcs ]) return clue_html
def main(): global boiled_clues args = get_args('create clue index') outf = open_output() boiled_clues = load_clues() biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues)) bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ] nreused = len([bc for n, bc, _ in bcs if n > 1]) biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues)) cluepages_to_make = set() biggest_clues += '<h2>Most used clues</h2>' biggest_clues += '<table class="clues most-used-clues">' biggest_clues += th("clue", "# uses", "answers used with this clue") for n, bc, ans in sorted(bcs, reverse=True)[:100]: cluepages_to_make.add(bc) biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans)) biggest_clues += '</table>' most_ambig = "<h2>Most ambiguous clues</h2>" most_ambig += '(clues with the largest number of different answers)' most_ambig += '<table class="clues most-different-answers">' most_ambig += th("Clue", "answers") for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]: cluepages_to_make.add(bc) clue = mkhref(unboil(bc), bc) if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc: most_ambig += td(clue, html_select_options(ans), rowclass="theme") else: most_ambig += td(clue, html_select_options(ans)) most_ambig += '</table>' for bc in cluepages_to_make: outf.write_html('pub/clue/%s/index.html' % bc, mkwww_cluepage(bc), title=bc) outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
def mkwww_wordpage(answer): uses = all_uses[answer] h = '' # h += pubyear.pubyear_html([ (ca.pubyear()[0], ca.pubyear()[1], 1) for ca in uses ]) h += '<hr/>' h += '<div>Clued as: ' + html_select_options([ ca.clue for ca in uses ]) + '</div>' h += '<h2>%d uses</h2>' % len(uses) h += '<table>' for ca in sorted(uses, reverse=True, key=lambda ca: ca.date): try: md = metadb.xd_puzzle(ca.xdid()) h += td(md.xdid, ca.clue, md.Author, md.Copyright) except Exception as e: h += td(ca.xdid, ca.clue, str(e)) if utils.get_args().debug: raise h += '</table>' # h += '<hr/>' # h += '<div>Mutations: ' # h +='</div>' return h
def main(): args = utils.get_args('generates .html diffs with deep clues for all puzzles in similar.tsv') outf = utils.open_output() similars = utils.parse_tsv('gxd/similar.tsv', 'Similar') xdids_todo = [ parse_pathname(fn).base for fn in args.inputs ] if not xdids_todo: xdids_todo = [ xdid for xdid, matches in metadb.get_similar_grids().items() if matches ] for mainxdid in xdids_todo: progress(mainxdid) mainxd = xdfile.get_xd(mainxdid) if not mainxd: continue matches = metadb.get_similar_grids().get(mainxdid, []) xddates = {} xddates[mainxdid] = mainxd.date() # Dict to store XD dates for further sort html_grids = {} html_clues = {} # these are added directly to similar.tsv nstaleclues = 0 nstaleanswers = 0 ntotalclues = 0 poss_answers = [] # TODO: pub_uses = {} # [pubid] -> set(ClueAnswer) dcl_html = '' deepcl_html = [] # keep deep clues to parse later - per row for pos, mainclue, mainanswer in mainxd.iterclues(): deepcl_html = [] # Temporary to be replaced late mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue) # 'grid position' column deepcl_html.append('<td class="pos">%s.</td>' % pos) # find other uses of this clue, and other answers, in a single pass for clueans in find_clue_variants(mainclue): if clueans.answer != mainanswer: poss_answers.append(clueans) if clueans.answer == mainanswer: if clueans.pubid in pub_uses: otherpubs = pub_uses[clueans.pubid] else: otherpubs = set() # set of ClueAnswer pub_uses[clueans.pubid] = otherpubs otherpubs.add(clueans) # add 'other uses' to clues_html stale = False deepcl_html.append('<td class="other-uses">') if len(pub_uses) > 0: sortable_uses = [] for pubid, uses in pub_uses.items(): # show the earliest unboiled clue for u in sorted(uses, key=lambda x: x.date or ""): # only show those published earlier if u.date and u.date <= mainxd.date(): if pubid == mainxdid and u.date == mainxd.date(): pass else: stale = True sortable_uses.append((u.date, u, 1)) deepcl_html.append(html_select([ (clue, nuses) for dt, clue, nuses in sorted(sortable_uses, key=lambda x: x[0], reverse=True) ], top_option=mainclue)) else: deepcl_html.append('<div class="original">%s</div>' % esc(mainclue)) deepcl_html.append('</td>') # add 'other answers' to clues_html deepcl_html.append('<td class="other-answers">') deepcl_html.append(html_select_options(poss_answers, strmaker=lambda ca: ca.answer, force_top=mainca)) deepcl_html.append('</td>') # add 'other clues' to clues_html deepcl_html.append('<td class="other-clues">') # bclues is all boiled clues for this particular answer: { [bc] -> #uses } bclues = load_answers().get(mainanswer, []) stale_answer = False if bclues: uses = [] for bc, nuses in bclues.items(): # then find all clues besides this one clue_usages = [ ca for ca in load_clues().get(bc, []) if ca.answer == mainanswer and ca.date < mainxd.date() ] if clue_usages: stale_answer = True if nuses > 1: # only use one (the most recent) ClueAnswer per boiled clue # but use the clue only (no xdid) ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1].clue else: ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1] uses.append((ca, nuses)) if uses: deepcl_html.append(html_select(uses)) deepcl_html.append('</td>') # end 'other-clues' if stale_answer: nstaleanswers += 1 if stale: nstaleclues += 1 ntotalclues += 1 # Quick and dirty - to be replaced dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>' # Store in list to make further formatting as html table easier mainxd = xdfile.get_xd(mainxdid) if mainxd: html_grids[mainxdid] = grid_diff_html(mainxd) # Add for main XD diff_l = [] for pos, mainclue, mainanswer in mainxd.iterclues(): diff_h = mktag('div','fullgrid main') + '%s. ' %pos diff_h += mainclue diff_h += mktag('span', tagclass='main', inner=' ~ ' + mainanswer.upper()) diff_l.append(diff_h) html_clues[mainxdid] = diff_l # Process for all matches for xdid in matches: xd = xdfile.get_xd(xdid) if not xd: continue xddates[xdid] = xd.date() # output each grid html_grids[xdid] = grid_diff_html(xd, compare_with=mainxd) diff_l = [] # output comparison of each set of clues for pos, clue, answer in xd.iterclues(): diff_h = mktag('div','fullgrid') + '%s. ' %pos # Sometimes can return clue == None sm = difflib.SequenceMatcher(lambda x: x == ' ', mainxd.get_clue(pos) or '', clue) if sm.ratio() < 0.50: diff_h += clue else: # Compare based on op codes for opcode in sm.get_opcodes(): c, a1, a2, b1, b2 = opcode if c == 'equal': diff_h += '<span class="match">%s</span>' % clue[b1:b2] else: diff_h += '<span class="diff">%s</span>' % clue[b1:b2] diff_h += mktag('span', tagclass=(answer == mainxd.get_answer(pos)) and 'match' or 'diff', inner=' ~ ' + answer.upper()) diff_h += mktag('/div') diff_l.append(diff_h) html_clues[xdid] = diff_l # Wrap into table diff_h = mktag('table') + mktag('tr') # Sort by date sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1)) for w, dt in sortedkeys: # Wrap into table diff_h += mktag('td') + html_grids[w] + mktag('/td') diff_h += mktag('/tr') for i, clue in enumerate(html_clues[sortedkeys[0][0]]): diff_h += mktag('tr') for w, dt in sortedkeys: if i < len(html_clues[w]): diff_h += mktag('td') + html_clues[w][i] + mktag('/td') diff_h += mktag('/tr') # Process deepclues diff_h += mktag('table') + dcl_html + mktag('/table') diff_h += mktag('/table') outf.write_html('pub/deep/%s/index.html' % mainxdid, diff_h, title='Deep clue comparison for ' + mainxdid)
def main(): args = utils.get_args( 'generates .html diffs with deep clues for all puzzles in similar.tsv') outf = utils.open_output() similars = utils.parse_tsv('gxd/similar.tsv', 'Similar') xds_todo = [] for fn, contents in find_files(*args.inputs, ext='.xd'): xd = xdfile.xdfile(contents.decode('utf-8'), fn) xds_todo.append(xd) for mainxd in xds_todo: mainxdid = mainxd.xdid() progress(mainxdid) matches = metadb.xd_similar(mainxdid) xddates = {} xddates[mainxdid] = mainxd.date( ) # Dict to store XD dates for further sort html_grids = {} # these are added directly to similar.tsv nstaleclues = 0 nstaleanswers = 0 ntotalclues = 0 dcl_html = '<tr>' dcl_html += '<th></th>' dcl_html += '<th>Clue</th>' dcl_html += '<th>ANSWERs</th>' dcl_html += '<th>Alt. clue possibilities</th>' dcl_html += '</tr>' deepcl_html = [] # keep deep clues to parse later - per row for pos, mainclue, mainanswer in mainxd.iterclues(): if not pos: continue poss_answers = [] # TODO: pub_uses = {} # [pubid] -> set(ClueAnswer) deepcl_html = [] # Temporary to be replaced late mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue) # 'grid position' column deepcl_html.append('<td class="pos">%s.</td>' % pos) # find other uses of this clue, and other answers, in a single pass for clueans in find_clue_variants(mainclue): if clueans.answer != mainanswer: poss_answers.append(clueans) if clueans.answer == mainanswer: if clueans.pubid in pub_uses: otherpubs = pub_uses[clueans.pubid] else: otherpubs = set() # set of ClueAnswer pub_uses[clueans.pubid] = otherpubs otherpubs.add(clueans) # add 'other uses' to clues_html deepcl_html.append('<td class="other-uses">') prev = prev_uses(pub_uses, mainxd, mainclue) if prev: deepcl_html.append('<a href="/pub/clue/%s">%s [x%s]</a>' % (boil(mainclue), mainclue, len(prev))) nstaleclues += 1 else: deepcl_html.append(mainclue) deepcl_html.append('</td>') # add 'other answers' to clues_html deepcl_html.append('<td class="other-answers">') deepcl_html.append( html_select_options(poss_answers, strmaker=lambda ca: ca.answer, force_top=mainca, add_total=False)) deepcl_html.append('</td>') # add 'other clues' to clues_html deepcl_html.append('<td class="other-clues">') other_clues = html_other_clues(mainanswer, mainclue, mainxd) if other_clues: deepcl_html.append(other_clues) nstaleanswers += 1 deepcl_html.append('</td>') # end 'other-clues' ntotalclues += 1 # Quick and dirty - to be replaced dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>' # Process deepclues diff_h = '<div class="main-container">' diff_h += grid_to_html(mainxd) diff_h += mktag('table', 'deepclues') + dcl_html + mktag('/table') diff_h += '</div>' info('writing deepclues for %s' % mainxdid) outf.write_html('pub/deep/%s/index.html' % mainxdid, diff_h, title='Deep clue analysis for ' + mainxdid)