Пример #1
0
def main(filename):
    xd = xdfile(open(filename).read())

    title = 'unknown'
    author = 'unknown'

    for h in xd.headers:
        if h[0] == 'Title':
            title = h[1]
        elif h[0] == 'Author':
            author = h[1]

    puzzle = {'title': title, 'by': author}

    number_index = number_grid(xd.grid)

    cluelist = []
    for xdc_tuple in xd.clues:
        dnum, c, a = xdc_tuple
        d, n = dnum
        n = int(n)

        xy = number_index[n - 1]
        clue = {'d': d, 'n': n, 'x': xy[0], 'y': xy[1], 'a': a, 'c': c}
        cluelist.append(clue)

    puzzle['clues'] = cluelist
    print json.dumps(puzzle)
Пример #2
0
def parse_ujson(content):
    json_data = json.loads(content)

    # init crossword
    rows = int(json_data['Height'])
    xd = xdfile.xdfile()

    # add meta data
    for item in POSSIBLE_META_DATA:
        text = json_data.get(item, None)
        if text:
            xd.headers.append((item, unquote(text).decode("utf-8")))

    # add puzzle
    for row in range(1, rows+1):
        line = json_data['Solution']['Line'+str(row)]
        xd.grid.append("".join(line.replace(' ', xdfile.BLOCK_CHAR)))

    # add clues
    layout = json_data['Layout']
    for clue_type in ('Across', 'Down'):
        for clue in json_data[clue_type + 'Clue'].split(os.linesep):
            number, text = clue.split('|')
            solution = _get_solution(number, clue_type[0], layout, xd.grid)
            xd.clues.append(((clue_type[0], int(number)), unquote(text).decode("utf-8").strip(), solution))
            assert solution

    return xd
Пример #3
0
def main():
    args = get_args("reclue puzzle with clues from other publications")
    outf = open_output()

    all_clues = load_clues()

    missing_tsv = COLUMN_SEPARATOR.join(
        ["grid_xdid", "clues_pubid", "num_missing"]) + EOL

    for fn, contents in find_files(*args.inputs, ext=".xd"):
        xd = xdfile(contents, fn)
        if not xd.grid:
            continue
        xd.set_header("Title", None)
        xd.set_header("Editor", "Timothy Parker Bot")
        xd.set_header(
            "Author",
            "%s %s" % (random.choice(fake_first), random.choice(fake_last)))
        xd.set_header("Copyright", None)
        xd.set_header("Date", iso8601())

        remixed = set()
        for pubid, pub_clues in list(all_clues.items()):
            try:
                if pubid == xd.publication_id():
                    continue  # don't use same publisher's clues

                nmissing = reclue(xd, pub_clues)

                outfn = "%s-%s.xd" % (xd.xdid(), pubid)

                if nmissing == 0:
                    nmutated = 0
                    while nmutated < 100:
                        nmutated += mutate(xd, pub_clues)
                    nmissing = reclue(xd, pub_clues)
                    info("%s missing %d clues after %d mutations" %
                         (outfn, nmissing, nmutated))

                    remixed.add(pubid)
                    outf.write_file(outfn, xd.to_unicode())
                else:
                    debug("%s missing %d clues" % (outfn, nmissing))

                    missing_tsv += COLUMN_SEPARATOR.join(
                        [xd.xdid(), pubid, str(nmissing)]) + EOL

            except Exception as e:
                error("remix error %s" % str(e))

        if remixed:
            info("%d remixed: %s" % (len(remixed), " ".join(remixed)))
            try:
                outf.write_file(
                    parse_pathname(fn).base + ".xd", contents.encode("utf-8"))
            except Exception as e:
                error("couldn't write: " + str(e))

    outf.write_file("remix.log", get_log().encode("utf-8"))
    outf.write_file("remix.tsv", missing_tsv)
Пример #4
0
def main(fn):
    with open(fn, 'r') as fp:
        xd = xdfile.xdfile(fp.read(), fn)

    ipuz = dict(version="http://ipuz.org/v1",
                kind=["http://ipuz.org/crossword#1"],
                dimensions=dict(width=xd.width(), height=xd.height()),
                title='')

    ipuz.update(dict((k.lower(), v) for k, v in xd.headers.items()))

    puzzle = []
    for x in range(xd.height()):
        puzzle.append([None] * xd.width())

    for direction, cluenum, answer, r, c in xd.iteranswers_full():
        puzzle[r][c] = cluenum

    ipuz["puzzle"] = puzzle
    ipuz["clues"] = {
        "Across": [(pos[1], markup_to_html(clue))
                   for pos, clue, answer in xd.iterclues()
                   if pos.startswith('A')],
        "Down": [(pos[1], markup_to_html(clue))
                 for pos, clue, answer in xd.iterclues()
                 if pos.startswith('D')],
    }

    ipuz["solution"] = [list(row) for row in xd.grid]

    print(json.dumps(ipuz))
Пример #5
0
def xd_clues(filename):
    xd = xdfile(open(filename).read())

    grid = xd.grid
    maxx = len(grid[0])
    maxy = len(grid)

    next_n = 1
    across = []
    down = []
    for y in range(0, maxy):
        for x in range(0, maxx):
            light = grid[y][x] != '#'

            start_of_xlight = (light and (x == 0 or grid[y][x - 1] == '#')
                               and (x + 1 < maxx and grid[y][x + 1] != '#'))
            start_of_ylight = (light and (y == 0 or grid[y - 1][x] == '#')
                               and (y + 1 < maxy and grid[y + 1][x] != '#'))

            num = ""
            if start_of_xlight or start_of_ylight:
                num = next_n
                if start_of_xlight:
                    across.append((num, answer_at(grid, (x, y), 'A')))
                if start_of_ylight:
                    down.append((num, answer_at(grid, (x, y), 'D')))
                next_n += 1

    for n, ans in across:
        print 'A%d. xxx ~ %s' % (n, ans)
    for n, ans in down:
        print 'D%d. xxx ~ %s' % (n, ans)
Пример #6
0
def main():
    args = utils.get_args(desc='outputs cleaned puzzle metadata rows')

    for input_source in args.inputs:
        for fn, contents in utils.find_files(input_source, ext='.xd'):
            xd = xdfile.xdfile(contents.decode('utf-8'), fn)
            clean_headers(xd)
            metadb.update_puzzles_row(xd)
Пример #7
0
def main():
    args = utils.get_args(desc='outputs cleaned puzzle metadata rows')

    for input_source in args.inputs:
        for fn, contents in utils.find_files(input_source, ext='.xd'):
            xd = xdfile.xdfile(contents.decode('utf-8'), fn)
            clean_headers(xd)
            metadb.update_puzzles_row(xd)
Пример #8
0
def parse_uxml(content, filename):
    POSSIBLE_META_DATA = ['Title', 'Author', 'Editor', 'Copyright', 'Category']

    try:
        content = content.decode("utf-8")
    except:
        try:
            content = content.decode("cp1252")
        except:
            pass # last ditch effort, just try the original string

    content = content.replace("&", "&amp;")
    content = content.replace('"<"', '"&lt;"')
    content = content.replace("''", '&quot;')
    content = content.replace("\x12", "'")  # ^R seems to be '
    content = content.replace("\x05", "'")  # ^E seems to be junk

    content = re.sub(r'=""(\S)', r'="&quot;\1', content) # one case has c=""foo"".  sheesh
    content = re.sub(r'(\.)""', r'\1&quot;"', content)

    try:
        root = etree.fromstring(content)
    except:
        xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group()
        root = etree.fromstring(xml)

    # init crossword
    rows = int(root.xpath('//crossword/Height')[0].attrib['v'])
    cols = int(root.xpath('//crossword/Width')[0].attrib['v'])
    xd = xdfile.xdfile()

    # add meta data
    for item in POSSIBLE_META_DATA:
        try:
            text = root.xpath('//crossword/' + item)[0].attrib['v']
            if text:
                xd.headers.append((item, unquote(text)))
        except:
            pass

    # add puzzle
    all_answers = root.xpath('//crossword/AllAnswer')[0].attrib['v']
    all_answers = all_answers.replace('-', xdfile.BLOCK_CHAR)
    index = 0
    while index < len(all_answers):
        row = all_answers[index:index+cols]
        xd.grid.append(u"".join(row))
        index += cols

    # add clues
    for clue_type in ('across', 'down'):
        for clue in root.xpath('//crossword/'+clue_type)[0].getchildren():
            number = int(clue.attrib['cn'])
            text = udecode(clue.attrib['c'].strip())
            solution = clue.attrib['a'].strip()
            xd.clues.append(((clue_type[0].upper(), number), text, solution))

    return xd
Пример #9
0
def main():
    global boiled_clues
    args = get_args('create clue index')
    outf = open_output()

    boiled_clues = load_clues()

    biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues))

    bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ]

    nreused = len([bc for n, bc, _ in bcs if n > 1])
    biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues))

    cluepages_to_make = set()

    # add all boiled clues from all input .xd files
    for fn, contents in find_files(*args.inputs, ext='.xd'):
        progress(fn)
        xd = xdfile.xdfile(contents.decode('utf-8'), fn)
        for pos, mainclue, mainanswer in xd.iterclues():
            cluepages_to_make.add(boil(mainclue))


    # add top 100 most used boiled clues from corpus
    biggest_clues += '<h2>Most used clues</h2>'

    biggest_clues += '<table class="clues most-used-clues">'
    biggest_clues += th("clue", "# uses", "answers used with this clue")
    for n, bc, ans in sorted(bcs, reverse=True)[:100]:
        cluepages_to_make.add(bc)
        biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans))

    biggest_clues += '</table>'

    most_ambig = "<h2>Most ambiguous clues</h2>"
    most_ambig += '(clues with the largest number of different answers)'
    most_ambig += '<table class="clues most-different-answers">'
    most_ambig += th("Clue", "answers")

    for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]:
        cluepages_to_make.add(bc)
        clue = mkhref(unboil(bc), bc)
        if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc:
            most_ambig += td(clue, html_select_options(ans), rowclass="theme")
        else:
            most_ambig += td(clue, html_select_options(ans))

    most_ambig += '</table>'

    for bc in cluepages_to_make:
        contents = mkwww_cluepage(bc)
        if contents:
            outf.write_html('pub/clue/%s/index.html' % bc, contents, title=bc)

    outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
Пример #10
0
def parse_uxml(content, filename):
    POSSIBLE_META_DATA = ['Title', 'Author', 'Editor', 'Copyright', 'Category']

    try:
        content = content.decode("utf-8")
    except:
        try:
            content = content.decode("cp1252")
        except:
            pass  # last ditch effort, just try the original string

    content = escape(content, xml_escape_table)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'="&quot;\2&quot;"',
                     content)  # Replace double quotes

    try:
        root = etree.fromstring(content.encode("utf-8"))
    except:
        # TODO: catch the specific exception
        xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group()
        root = etree.fromstring(xml)

    # init crossword
    # rows = int(root.xpath('//crossword/Height')[0].attrib['v'])
    cols = int(root.xpath('//crossword/Width')[0].attrib['v'])
    xd = xdfile.xdfile('', filename)

    # add meta data
    for item in POSSIBLE_META_DATA:
        elem = root.xpath('//crossword/' + item)
        if elem:
            text = elem[0].attrib['v']
            if text:
                text = escape(text, rev_xml_escape_table)
                xd.set_header(item, unquote(text))

    # add puzzle
    all_answers = root.xpath('//crossword/AllAnswer')[0].attrib['v']
    all_answers = all_answers.replace('-', xdfile.BLOCK_CHAR)
    index = 0
    while index < len(all_answers):
        row = all_answers[index:index + cols]
        xd.grid.append("".join(row))
        index += cols

    # add clues
    for clue_type in ('across', 'down'):
        for clue in root.xpath('//crossword/' + clue_type)[0].getchildren():
            number = int(clue.attrib['cn'])
            text = udecode(clue.attrib['c'].strip())
            text = escape(text, rev_xml_escape_table)
            solution = clue.attrib['a'].strip()
            xd.clues.append(((clue_type[0].upper(), number), text, solution))

    return xd
Пример #11
0
def parse_uxml(content, filename):
    POSSIBLE_META_DATA = ["Title", "Author", "Editor", "Copyright", "Category"]

    try:
        content = content.decode("utf-8")
    except:
        try:
            content = content.decode("cp1252")
        except:
            pass  # last ditch effort, just try the original string

    content = escape(content, xml_escape_table)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'="&quot;\2&quot;"', content)  # Replace double quotes

    try:
        root = etree.fromstring(content.encode("utf-8"))
    except:
        # TODO: catch the specific exception
        xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group()
        root = etree.fromstring(xml)

    # init crossword
    # rows = int(root.xpath('//crossword/Height')[0].attrib['v'])
    cols = int(root.xpath("//crossword/Width")[0].attrib["v"])
    xd = xdfile.xdfile("", filename)

    # add meta data
    for item in POSSIBLE_META_DATA:
        elem = root.xpath("//crossword/" + item)
        if elem:
            text = elem[0].attrib["v"]
            if text:
                text = escape(text, rev_xml_escape_table)
                xd.set_header(item, unquote(text))

    # add puzzle
    all_answers = root.xpath("//crossword/AllAnswer")[0].attrib["v"]
    all_answers = all_answers.replace("-", xdfile.BLOCK_CHAR)
    index = 0
    while index < len(all_answers):
        row = all_answers[index : index + cols]
        xd.grid.append("".join(row))
        index += cols

    # add clues
    for clue_type in ("across", "down"):
        for clue in root.xpath("//crossword/" + clue_type)[0].getchildren():
            number = int(clue.attrib["cn"])
            text = udecode(clue.attrib["c"].strip())
            text = escape(text, rev_xml_escape_table)
            solution = clue.attrib["a"].strip()
            xd.clues.append(((clue_type[0].upper(), number), text, solution))

    return xd
Пример #12
0
def main():
    args = get_args("reclue puzzle with clues from other publications")
    outf = open_output()

    all_clues = load_clues()

    missing_tsv = COLUMN_SEPARATOR.join([ "grid_xdid", "clues_pubid", "num_missing" ]) + EOL

    for fn, contents in find_files(*args.inputs, ext=".xd"):
        xd = xdfile(contents, fn)
        if not xd.grid:
            continue
        xd.set_header("Title", None)
        xd.set_header("Editor", "Timothy Parker Bot")
        xd.set_header("Author", "%s %s" % (random.choice(fake_first), random.choice(fake_last)))
        xd.set_header("Copyright", None)
        xd.set_header("Date", iso8601())

        remixed = set()
        for pubid, pub_clues in list(all_clues.items()):
            try:
                if pubid == xd.publication_id():
                    continue  # don't use same publisher's clues

                nmissing = reclue(xd, pub_clues)

                outfn = "%s-%s.xd" % (xd.xdid(), pubid)

                if nmissing == 0:
                    nmutated = 0
                    while nmutated < 100:
                        nmutated += mutate(xd, pub_clues)
                    nmissing = reclue(xd, pub_clues)
                    info("%s missing %d clues after %d mutations" % (outfn, nmissing, nmutated))

                    remixed.add(pubid)
                    outf.write_file(outfn, xd.to_unicode())
                else:
                    debug("%s missing %d clues" % (outfn, nmissing))

                    missing_tsv += COLUMN_SEPARATOR.join([ xd.xdid(), pubid, str(nmissing) ]) + EOL

            except Exception as e:
                error("remix error %s" % str(e))

        if remixed:
            info("%d remixed: %s" % (len(remixed), " ".join(remixed)))
            try:
                outf.write_file(parse_pathname(fn).base + ".xd", contents.encode("utf-8"))
            except Exception as e:
                error("couldn't write: " + str(e))

    outf.write_file("remix.log", get_log().encode("utf-8"))
    outf.write_file("remix.tsv", missing_tsv)
Пример #13
0
def main():
    args = get_args(desc="find similar grids")
    g_corpus = [x for x in corpus()]

    outf = open_output()

    outf.write(xd_similar_header)

    for fn, contents in find_files(*args.inputs, strip_toplevel=False):
        needle = xdfile(contents.decode("utf-8"), fn)
        for pct, a, b in find_similar_to(needle, g_corpus):
            outf.write(xd_similar_row(a, b, pct))
Пример #14
0
def xd_to_puz(filename, filename_out):
    xd = xdfile(open(filename).read())

    grid = xd.grid
    maxx = len(grid[0])
    maxy = len(grid)

    puzzle = crossword.Crossword(maxx, maxy)

    title = 'unknown'
    author = 'unknown'

    for h in xd.headers:
        if h[0] == 'Title':
            title = h[1]
        if h[0] == 'Author':
            author = h[1]

    puzzle.meta.creator = author
    puzzle.meta.title = title

    for xdc_tuple in xd.clues:
        dnum, c, a = xdc_tuple
        d, n = dnum

        if d == 'A':
            puzzle.clues.across[int(n)] = c
        else:
            puzzle.clues.down[int(n)] = c

    for direction, number, clue in puzzle.clues.all():
        print(direction, number, clue)

    for y in range(0, maxy):
        for x in range(0, maxx):
            ch = grid[y][x]
            if ch != '#':
                puzzle[y][x].cell = " "
                puzzle[y][x].solution = grid[y][x]
            else:
                puzzle[y][x].cell = "."
                puzzle[y][x].block = None
                puzzle[y][x].solution = None

    puz = crossword.to_puz(puzzle)
    puz.fill = ''.join([x if x == '.' else '-' for x in puz.solution])
    puz.save(filename_out)
Пример #15
0
def main():
    args = utils.get_args(desc='show grid potentials')

    wordlist = grid_potentials.get_wordlist()
    for input_source in args.inputs:
        for fn, contents in xdfile.utils.find_files(input_source, ext='.xd'):
            xd = xdfile.xdfile(contents.decode('utf-8'), fn)

            pots, answers = grid_potentials.get_potentials(xd, wordlist)
            print_potential_grid(xd, pots)

            for key, v in sorted(answers.items(), key=lambda x:len(x[1][1])):
                pattern, matches = v
                if len(matches) < 10:
                    print(key, pattern, len(matches), ' '.join(matches))
                else:
                    print(key, pattern, len(matches))
Пример #16
0
def main():
    args = utils.get_args(desc='find grid templates')

    templates = set()

    for xd in xdfile.corpus():
        tmpl = tuple(''.join(x if x == BLOCK_CHAR else UNKNOWN_CHAR for x in L) for L in xd.grid)
        templates.add(tmpl)

    print(len(templates), 'templates')

    for input_source in args.inputs:
        for fn, contents in utils.find_files(input_source, ext='.xd'):
            xd = xdfile.xdfile(contents.decode('utf-8'), fn)
            for i, T in enumerate(templates):
                griddedxd = fit_template(T, xd)
                if griddedxd:
                    with open(args.output + ('-t%s.xd' % i), 'w') as fp:
                        fp.write(griddedxd.to_unicode())
Пример #17
0
def main():
    global args
    args = xdfile.utils.get_args(desc='show sorted list of grid potentials')

    wordlist = get_wordlist()

    print ("filename mean stdev var min_answer")
    for input_source in args.inputs:
        for fn, contents in xdfile.utils.find_files(input_source, ext='.xd'):
            xd = xdfile.xdfile(contents.decode('utf-8'), fn)

            pots, answers = get_potentials(xd, wordlist)

            all_pots = []
            unfixed = [len(ch) for row2 in pots for ch in row2 if ch]
            if unfixed:
                all_pots.append((xd, pots, answers, unfixed))
                matches_list = [x[1] for x in answers.values()]
                print ("%s %.02f %.02f %.02f %d" % (xd, mean(unfixed), pstdev(unfixed), pvariance(unfixed), min(len(x) for x in matches_list)))

    print ('\n--')

    for xd, pots, answers, unfixed in sorted(all_pots, key=lambda r: mean(r[3]), reverse=True):
        print ("%20s %.02f %.02f %.02f" % (xd, mean(unfixed), pstdev(unfixed), pvariance(unfixed)))
Пример #18
0
def main():
    args = utils.get_args(
        'generates .html diffs with deep clues for all puzzles in similar.tsv')
    outf = utils.open_output()

    similars = utils.parse_tsv('gxd/similar.tsv', 'Similar')

    xds_todo = []
    for fn, contents in find_files(*args.inputs, ext='.xd'):
        xd = xdfile.xdfile(contents.decode('utf-8'), fn)
        xds_todo.append(xd)

    for mainxd in xds_todo:
        mainxdid = mainxd.xdid()
        progress(mainxdid)

        matches = metadb.xd_similar(mainxdid)

        xddates = {}
        xddates[mainxdid] = mainxd.date(
        )  # Dict to store XD dates for further sort
        html_grids = {}

        # these are added directly to similar.tsv
        nstaleclues = 0
        nstaleanswers = 0
        ntotalclues = 0

        dcl_html = '<tr>'
        dcl_html += '<th></th>'
        dcl_html += '<th>Clue</th>'
        dcl_html += '<th>ANSWERs</th>'
        dcl_html += '<th>Alt. clue possibilities</th>'
        dcl_html += '</tr>'

        deepcl_html = []  # keep deep clues to parse later - per row
        for pos, mainclue, mainanswer in mainxd.iterclues():
            if not pos:
                continue

            poss_answers = []  # TODO:
            pub_uses = {}  # [pubid] -> set(ClueAnswer)

            deepcl_html = []  # Temporary to be replaced late
            mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue)

            # 'grid position' column
            deepcl_html.append('<td class="pos">%s.</td>' % pos)

            # find other uses of this clue, and other answers, in a single pass
            for clueans in find_clue_variants(mainclue):
                if clueans.answer != mainanswer:
                    poss_answers.append(clueans)

                if clueans.answer == mainanswer:
                    if clueans.pubid in pub_uses:
                        otherpubs = pub_uses[clueans.pubid]
                    else:
                        otherpubs = set()  # set of ClueAnswer
                        pub_uses[clueans.pubid] = otherpubs
                    otherpubs.add(clueans)

            # add 'other uses' to clues_html
            deepcl_html.append('<td class="other-uses">')

            prev = prev_uses(pub_uses, mainxd, mainclue)
            if prev:
                deepcl_html.append('<a href="/pub/clue/%s">%s [x%s]</a>' %
                                   (boil(mainclue), mainclue, len(prev)))
                nstaleclues += 1
            else:
                deepcl_html.append(mainclue)

            deepcl_html.append('</td>')

            # add 'other answers' to clues_html
            deepcl_html.append('<td class="other-answers">')
            deepcl_html.append(
                html_select_options(poss_answers,
                                    strmaker=lambda ca: ca.answer,
                                    force_top=mainca,
                                    add_total=False))
            deepcl_html.append('</td>')

            # add 'other clues' to clues_html
            deepcl_html.append('<td class="other-clues">')

            other_clues = html_other_clues(mainanswer, mainclue, mainxd)
            if other_clues:
                deepcl_html.append(other_clues)
                nstaleanswers += 1

            deepcl_html.append('</td>')  # end 'other-clues'

            ntotalclues += 1
            # Quick and dirty - to be replaced
            dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>'

        # Process deepclues
        diff_h = '<div class="main-container">'
        diff_h += grid_to_html(mainxd)
        diff_h += mktag('table', 'deepclues') + dcl_html + mktag('/table')
        diff_h += '</div>'

        info('writing deepclues for %s' % mainxdid)
        outf.write_html('pub/deep/%s/index.html' % mainxdid,
                        diff_h,
                        title='Deep clue analysis for ' + mainxdid)
Пример #19
0
def xd_lint(filename):
    """
    Check some rules about xd files:
        - Filling in the grid using answers alone results in the same grid
        - All numbered locations have corresponding clues
        - All clues have answers
    """
    error = False
    xd = xdfile(open(filename).read())

    title = 'unknown'
    author = 'unknown'

    for h in xd.headers:
        if h[0] == 'Title':
            title = h[1]
        if h[0] == 'Author':
            author = h[1]

    grid = xd.grid
    maxx = len(grid[0])
    maxy = len(grid)

    filled = []
    for i in range(0, maxy):
        filled.append(['#'] * maxx)

    direction = {'A': 'across', 'D': 'down'}

    across = {}
    down = {}
    for xdc_tuple in xd.clues:
        dnum, c, a = xdc_tuple
        d, n = dnum
        if d == 'A':
            across[int(n)] = (c, a)
        else:
            down[int(n)] = (c, a)

        if not c:
            print '%s: error: no clue provided for %s %s' % (filename, n,
                                                             direction[d])
            error = True
        if not a:
            print '%s: error: no answer provided for %s %s' % (filename, n,
                                                               direction[d])
            error = True

    number_index = {}
    next_n = 1
    for y in range(0, maxy):
        for x in range(0, maxx):
            light = grid[y][x] != '#'

            start_of_xlight = (light and (x == 0 or grid[y][x - 1] == '#')
                               and (x + 1 < maxx and grid[y][x + 1] != '#'))
            start_of_ylight = (light and (y == 0 or grid[y - 1][x] == '#')
                               and (y + 1 < maxy and grid[y + 1][x] != '#'))

            if start_of_xlight and not across.get(next_n):
                print '%s: error: missing clue for %d %s' % (filename, next_n,
                                                             direction['A'])
                error = True

            if start_of_ylight and not down.get(next_n):
                print '%s: error: missing clue for %d %s' % (filename, next_n,
                                                             direction['D'])
                error = True

            if start_of_xlight or start_of_ylight:
                number_index[next_n] = (x, y)
                next_n += 1

    for xdc_tuple in xd.clues:
        dnum, c, a = xdc_tuple
        d, n = dnum
        n = int(n)

        if n not in number_index:
            print '%s: error: clue %s %s does not correspond to a grid location' % (
                filename, n, direction[d])
            error = True
            continue

        x, y = number_index[n]
        for i, letter in enumerate(a):
            xp, yp = x, y
            if d == 'A':
                xp = x + i
            else:
                yp = y + i
            if xp >= maxx or yp >= maxy:
                print '%s: error: clue %s %s extends beyond the grid' % (
                    filename, n, direction[d])
            else:
                filled[yp][xp] = letter

    filled = [''.join(x) for x in filled]

    for i, line in enumerate(filled):
        if line != grid[i]:
            print '%s: error: grids do not match on line %d' % (filename,
                                                                i + 1)
            print 'line: %s' % line
            print 'grid: %s' % grid[i]
            error = True
            break

    if error:
        sys.exit(1)

    print 'All checks passed.'
    sys.exit(0)
Пример #20
0
def xd_to_html(filename, answers=False):
    xd = xdfile(open(filename).read())

    title = 'unknown'
    author = 'unknown'

    filename_noext = filename.split(".")[0]

    for h in xd.headers:
        if h[0] == 'Title':
            title = h[1]

    html = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
  "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
  <title>''' + title + '''</title>
  <link rel="stylesheet" type="text/css" href="style.css"/>
  <script type="text/javascript" src="lib/jquery.min.js"></script>
  <script type="text/javascript" src="lib/jquery.hotkeys.js"></script>
  <script type="text/javascript" src="lib/jquery.cookie.js"></script>
  <script type="text/javascript" src="xd.js"></script>
  <script type="text/javascript" src="crossword.js"></script>
  <script type="text/javascript">
  $(function() {
    var crossword;
    crossword = new Crossw1rd('container');
    crossword.init("''' + filename_noext + '''");
  });
  </script>
</head>
<body>
<div id="container">
'''
    grid = xd.grid
    maxx = len(grid[0])
    maxy = len(grid)

    html += '''<div id="cross1wrd" style="height:%spx;width:%spx">''' % (
        maxy * 28 + 6, 200 + maxx * 28 + 20)

    html += '''<div class="clues" style="height:%spx;width:%spx;">''' % (
        maxy * 28, 200)
    html += '''<h4 class="cluelabel">Across</h4>'''
    html += '''<div class="across scroll-pane" style="height:%spx;">''' % (
        maxy * 28 / 2 - 20)
    for xdc_tuple in xd.clues:
        dnum, c, a = xdc_tuple
        d, n = dnum
        if d == 'A':
            html += '''<p class="c%s%s">%s. %s</p>''' % (d, n, n, c)
    html += '''</div>'''
    html += '''<h4 class="cluelabel">Down</h4>'''
    html += '''<div class="down scroll-pane" style="height:%spx;">''' % (
        maxy * 28 / 2 - 20)
    for xdc_tuple in xd.clues:
        dnum, c, a = xdc_tuple
        d, n = dnum
        if d == 'D':
            html += '''<p class="c%s%s">%s. %s</p>''' % (d, n, n, c)
    html += '''</div>'''
    html += '''</div>'''

    html += '''<div class="grid" style="height:%spx;width:%spx;">\n''' % (
        maxy * 28, maxx * 28)

    next_n = 1
    for y in range(0, maxy):
        html += '''<div class="row">'''
        for x in range(0, maxx):
            light = grid[y][x] != '#'

            start_of_xlight = (light and (x == 0 or grid[y][x - 1] == '#')
                               and (x + 1 < maxx and grid[y][x + 1] != '#'))
            start_of_ylight = (light and (y == 0 or grid[y - 1][x] == '#')
                               and (y + 1 < maxy and grid[y + 1][x] != '#'))

            num = ""
            if start_of_xlight or start_of_ylight:
                num = next_n
                next_n += 1

            letter_span = ""
            if answers and light:
                letter_span = '''<span class="letter">%s</span>''' % (
                    grid[y][x])

            html += '''<div%s><span class="num">%s</span>%s</div>''' % (
                ' class="blank"' if not light else "", num, letter_span)
        html += '''</div>\n'''

    html += '''</div>'''
    html += '''</div></div></body></html>'''
    print html
Пример #21
0
def parse_xwordinfo(content, filename):
    content = content.decode('utf-8')

    REBUS_LONG_HANDS = {
        'NINE': '9',
        'EIGHT': '8',
        'SEVEN': '7',
        'SIX': '6',
        'FIVE': '5',
        'FOUR': '4',
        'THREE': '3',
        'TWO': '2',
        'ONE': '1',
        'ZERO': '0',
        'AUGHT': '0',
        'AMPERSAND': '&',
        'AND': '&',
        'ASTERISK': '*',
        'PERCENT': '%',
        'STAR': '*',
        'AT': '@',
        'DOLLAR': '$',
        'PLUS': '+',
        'CENT': 'c',
        # 'DASH': '-',
        # 'DOT': '●'
    }
    rsh = 'zyxwvutsrqponmlkjihgfedcba♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠Фθиλπφя+&%$@?*0987654321'
    REBUS_SHORT_HANDS = list(rsh)

    content = content.replace("<b>", "{*")
    content = content.replace("</b>", "*}")
    content = content.replace("<i>", "{/")
    content = content.replace("</i>", "/}")
    content = content.replace("<em>", "{/")
    content = content.replace("</em>", "/}")
    content = content.replace("<u>", "{_")
    content = content.replace("</u>", "_}")
    content = content.replace("<strike>", "{-")
    content = content.replace("</strike>", "-}")
    content = content.replace("’", "'")
    content = content.replace('“', '"')
    # content = content.replace('–', '-')

    if "CPHContent_" in content:
        xwiprefix = '#CPHContent_'
    else:
        xwiprefix = '#'

    root = html.fromstring(content)

    ## debug("ROOT: %s" % root)

    special_type = ''
    rebus = {}
    rebus_order = []

    xd = xdfile.xdfile('', filename)

    # get crossword info
    title = root.cssselect('#PuzTitle')[0].text.strip()
    try:
        subtitle = root.cssselect(xwiprefix + 'SubTitle')[0].text.strip()
        subtitle = ' [%s]' % subtitle
    except:
        subtitle = ""

    author = root.cssselect('.aegrid div')[1].text.strip()
    editor = root.cssselect('.aegrid div')[3].text.strip()

    copyright = root.cssselect(xwiprefix + 'Copyright')[0].text.strip()

    xd.set_header("Title", '%s%s' % (title, subtitle))
    xd.set_header("Author", author)
    xd.set_header("Editor", editor)
    xd.set_header("Copyright", copyright)

    # nyt title normally has date as e.g. January 1, 2020
    date_re = "(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}"
    try:
        m = re.search(date_re, subtitle if subtitle else title)
        date_string = m.group(0)
        date = datetime.strptime(date_string, "%B %d, %Y")
        xd.set_header("Date", date.strftime("%Y-%m-%d"))
    except:
        pass

    _process_notes(xd, xwiprefix, root)  # add header for notes, if any

    puzzle_table = root.cssselect(xwiprefix + 'PuzTable tr') or root.cssselect(
        '#PuzTable tr')

    for row in puzzle_table:
        row_data = ""
        for cell in row.cssselect('td'):
            # check if the cell is special - with a shade or a circle
            cell_class = cell.get('class')
            cell_type = ''
            if cell_class == 'shade':
                cell_type = 'shaded'
            elif cell_class == 'bigcircle':
                cell_type = 'circle'

            letter = cell.cssselect('div.letter')
            letter = (len(letter) and letter[0].text) or xdfile.BLOCK_CHAR

            # handle rebuses
            if letter == xdfile.BLOCK_CHAR:
                subst = cell.cssselect('div.subst2')
                subst = (len(subst) and subst[0].text) or ''
                if not subst:
                    subst = cell.cssselect('div.subst')
                    if subst:
                        if title in SPLIT_REBUS_TITLES:
                            subst = "/".join(list(subst[0].text))
                        else:
                            subst = subst[0].text
                    else:  # check if color rebus
                        cell_string = etree.tostring(cell).decode('utf-8')
                        m = re.search("background-color:([A-Z]+);",
                                      cell_string)
                        if m:
                            subst = m.group(1)
                        else:
                            subst = ''

                if subst:
                    if subst not in rebus:
                        if subst in REBUS_LONG_HANDS:
                            rebus_val = REBUS_LONG_HANDS[subst]
                            if rebus_val in REBUS_SHORT_HANDS:
                                REBUS_SHORT_HANDS.remove(rebus_val)
                        else:
                            rebus_val = REBUS_SHORT_HANDS.pop()
                        rebus[subst] = rebus_val
                        rebus_order.append(subst)
                    letter = rebus[subst]

            if cell_type:
                # the special cell's letter should be represented in lower case
                letter = letter.lower()
                if not special_type:
                    # hopefully there shouldn't be both shades and circles in
                    # the same puzzle - if that is the case, only the last value
                    # will be put up in the header
                    special_type = cell_type

            row_data += letter
        xd.grid.append(row_data)

    if len(rebus):
        rebus = ["%s=%s" % (rebus[x], x.upper()) for x in rebus_order]
        xd.set_header("Rebus", ','.join(rebus))
    if special_type:
        xd.set_header("Special", special_type)

    across_div = root.cssselect('#ACluesPan') or root.cssselect(xwiprefix +
                                                                'ACluesPan')
    down_div = root.cssselect('#DCluesPan') or root.cssselect(xwiprefix +
                                                              'DCluesPan')

    if across_div and down_div:  # normal puzzle
        _process_clues(xd, 'A', across_div)  # add across clues
        _process_clues(xd, 'D', down_div)  # add down clues
    elif across_div:  # uniclue puzzle?
        _process_uniclues(xd, across_div)
    else:
        raise XWordInfoParseError("No clue divs found.")

    return xd
Пример #22
0
def main():
    p = utils.args_parser(desc="annotate puzzle clues with earliest date used in the corpus")
    p.add_argument("-a", "--all", default=False, help="analyze all puzzles, even those already in similar.tsv")
    p.add_argument("-l", "--limit", default=100, help="limit amount of puzzles to be analyzed [default=100]")
    args = get_args(parser=p)
    outf = open_output()

    num_processed = 0
    prev_similar = metadb.read_rows("gxd/similar")
    for fn, contents in find_files(*args.inputs, ext=".xd"):
        progress(fn)
        mainxd = xdfile(contents.decode("utf-8"), fn)

        if mainxd.xdid() in prev_similar:
            continue  # skip reprocessing .xd that are already in similar.tsv

        """ find similar grids (pct, xd) for the mainxd in the corpus.
        Takes about 1 second per xd.  sorted by pct.
        """
        similar_grids = sorted(find_similar_to(mainxd, corpus(), min_pct=0.20), key=lambda x: x[0], reverse=True)

        num_processed += 1
        if num_processed > int(args.limit):
            break

        if similar_grids:
            info("similar: " + " ".join(("%s=%s" % (xd2.xdid(), pct)) for pct, xd1, xd2 in similar_grids))

        mainpubid = mainxd.publication_id()
        maindate = mainxd.date()

        # go over each clue/answer, find all other uses, other answers, other possibilities.
        # these are added directly to similar.tsv
        nstaleclues = 0
        nstaleanswers = 0
        ntotalclues = 0
        for pos, mainclue, mainanswer in mainxd.iterclues():
            progress(mainanswer)

            poss_answers = []
            pub_uses = {}  # [pubid] -> set(ClueAnswer)

            mainca = ClueAnswer(mainpubid, maindate, mainanswer, mainclue)

            # find other uses of this clue, and other answers, in a single pass
            for clueans in find_clue_variants(mainclue):
                if clueans.answer != mainanswer:
                    poss_answers.append(clueans)

                if clueans.answer == mainanswer:
                    if clueans.pubid in pub_uses:
                        otherpubs = pub_uses[clueans.pubid]
                    else:
                        otherpubs = set()  # set of ClueAnswer
                        pub_uses[clueans.pubid] = otherpubs

                    otherpubs.add(clueans)

            # bclues is all boiled clues for this particular answer: { [bc] -> #uses }
            bclues = load_answers().get(mainanswer, [])
            stale_answer = False

            if bclues:
                uses = []
                for bc, nuses in bclues.items():
                    # then find all clues besides this one
                    clue_usages = [
                        ca for ca in load_clues().get(bc, []) if ca.answer == mainanswer and ca.date < maindate
                    ]

                    if clue_usages:
                        stale_answer = True
                        if nuses > 1:
                            # only use one (the most recent) ClueAnswer per boiled clue
                            # but use the clue only (no xdid)
                            ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1].clue
                        else:
                            ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1]
                        uses.append((ca, nuses))

        # summary row to similar.tsv
        metadb.append_row(
            "gxd/similar",
            [
                mainxd.xdid(),  # xdid
                int(100 * sum(pct / 100.0 for pct, xd1, xd2 in similar_grids)),  # similar_grid_pct
                nstaleclues,  # reused_clues
                nstaleanswers,  # reused_answers
                ntotalclues,  # total_clues
                " ".join(("%s=%s" % (xd2.xdid(), pct)) for pct, xd1, xd2 in similar_grids),  # matches
            ],
        )
Пример #23
0
def parse_xwordinfo(content, filename):
    content = content.decode('utf-8')

    REBUS_LONG_HANDS = {'NINE': '9',
                        'EIGHT': '8',
                        'SEVEN': '7',
                        'SIX': '6',
                        'FIVE': '5',
                        'FOUR': '4',
                        'THREE': '3',
                        'TWO': '2',
                        'ONE': '1',
                        'ZERO': '0',
                        'AUGHT': '0',
                        'AMPERSAND': '&',
                        'AND': '&',
                        'ASTERISK': '*',
                        'PERCENT': '%',
                        'STAR': '*',
                        'AT': '@',
                        'DOLLAR': '$',
                        'PLUS': '+',
                        'CENT': 'c',
                        # 'DASH': '-',
                        # 'DOT': '●'
                        }
    rsh = 'zyxwvutsrqponmlkjihgfedcba♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠Фθиλπφя+&%$@?*0987654321'
    REBUS_SHORT_HANDS = list(rsh)

    content = content.replace("<b>", "{*")
    content = content.replace("</b>", "*}")
    content = content.replace("<i>", "{/")
    content = content.replace("</i>", "/}")
    content = content.replace("<em>", "{/")
    content = content.replace("</em>", "/}")
    content = content.replace("<u>", "{_")
    content = content.replace("</u>", "_}")
    content = content.replace("<strike>", "{-")
    content = content.replace("</strike>", "-}")
    content = content.replace("’", "'")
    content = content.replace('“', '"')
    # content = content.replace('–', '-')

    if "CPHContent_" in content:
        xwiprefix = '#CPHContent_'
    else:
        xwiprefix = '#'

    root = html.fromstring(content)

    ## debug("ROOT: %s" % root)

    special_type = ''
    rebus = {}
    rebus_order = []

    xd = xdfile.xdfile('', filename)

    # get crossword info
    title = root.cssselect(xwiprefix + 'TitleLabel')[0].text.strip()
    try:
        subtitle = root.cssselect(xwiprefix + 'SubTitleLabel')[0].text.strip()
        subtitle = ' [%s]' % subtitle
    except:
        subtitle = ""

    # author = root.cssselect(xwiprefix + 'AuthorLabel')[0].text.strip()
    # editor = root.cssselect(xwiprefix + 'EditorLabel')[0].text.strip()
    try:
        xd.notes = stringify_children(root.cssselect(xwiprefix + 'NotepadDiv')[0])
    except Exception as e:
        xd.notes = ""
        debug('Exception %s' % e)

    xd.set_header("Title", '%s%s' % (title, subtitle))
    xd.set_header("Author", root.cssselect(xwiprefix + 'AuthorLabel')[0].text.strip())
    xd.set_header("Editor", root.cssselect(xwiprefix + 'EditorLabel')[0].text.strip())

    xd.notes = xd.notes.replace("<br/>", "\n")
    xd.notes = xd.notes.replace("<b>Notepad:</b>", "\n")
    xd.notes = xd.notes.replace("&#13;", "\n")
    xd.notes = xd.notes.strip()

    puzzle_table = root.cssselect(xwiprefix + 'PuzTable tr') or root.cssselect('#PuzTable tr')

    for row in puzzle_table:
        row_data = ""
        for cell in row.cssselect('td'):
            # check if the cell is special - with a shade or a circle
            cell_class = cell.get('class')
            cell_type = ''
            if cell_class == 'bigshade':
                cell_type = 'shaded'
            elif cell_class == 'bigcircle':
                cell_type = 'circle'

            letter = cell.cssselect('div.letter')
            letter = (len(letter) and letter[0].text) or xdfile.BLOCK_CHAR

            # handle rebuses
            if letter == xdfile.BLOCK_CHAR:
                subst = cell.cssselect('div.subst2')
                subst = (len(subst) and subst[0].text) or ''
                if not subst:
                    subst = cell.cssselect('div.subst')
                    if subst:
                        if title in SPLIT_REBUS_TITLES:
                            subst = "/".join(list(subst[0].text))
                        else:
                            subst = subst[0].text
                    else:
                        subst = ''

                if subst:
                    if subst not in rebus:
                        if subst in REBUS_LONG_HANDS:
                            rebus_val = REBUS_LONG_HANDS[subst]
                            if rebus_val in REBUS_SHORT_HANDS:
                                REBUS_SHORT_HANDS.remove(rebus_val)
                        else:
                            rebus_val = REBUS_SHORT_HANDS.pop()
                        rebus[subst] = rebus_val
                        rebus_order.append(subst)
                    letter = rebus[subst]

            if cell_type:
                # the special cell's letter should be represented in lower case
                letter = letter.lower()
                if not special_type:
                    # hopefully there shouldn't be both shades and circles in
                    # the same puzzle - if that is the case, only the last value
                    # will be put up in the header
                    special_type = cell_type

            row_data += letter
        xd.grid.append(row_data)

    if len(rebus):
        rebus = ["%s=%s" % (rebus[x], x.upper()) for x in rebus_order]
        xd.set_header("Rebus", ','.join(rebus))
    if special_type:
        xd.set_header("Special", special_type)

    # add clues
    across_clues = _fetch_clues(xd, 'A', root, xwiprefix + 'AcrossClues', rebus)
    down_clues = _fetch_clues(xd, 'D', root, xwiprefix + 'DownClues', rebus)

    return xd
Пример #24
0
def parse_puz(contents):
    puz_object = puz.load(contents)
    puzzle = crossword.from_puz(puz_object)

    grid_dict = dict(zip(string.uppercase, string.uppercase))

    xd = xdfile.xdfile()

    md = dict([ (hdr_renames.get(k.lower(), k), v) for k, v in puzzle.meta() if v ])
    if " / " in md.get("author", ""):
        author, editor = md.get("author").split(" / ")
        editor = editor.strip()
        author = author.strip()
        author = author.lstrip("By ")
        editor = editor.lstrip("Edited by ")
        md["author"] = author
        md["editor"] = editor

    if "Washington Post" in md.get("copyright", ""):
        a = md["author"]
        if " - " in a:
            datestr, rest = a.split(" - ")
            md["date"] = reparse_date(datestr)
            if "By " in rest:
                md["title"], rest = rest.split(" By ")
            else:
                md["title"], rest = rest.split(" by ", 1)

            if "Edited by " in rest:
                md["author"], md["editor"] = rest.split(", Edited by ")
            elif "edited by " in rest:
                md["author"], md["editor"] = rest.split(", edited by ")
            else:
                md["author"] = rest

        md["copyright"] = md["copyright"].lstrip("Copyright")

    for k, v in sorted(md.items(), key=lambda x: hdr_order.index(x[0])):
        if v:
            k = k[0].upper() + k[1:].lower()
            v = decode(v.strip())
            v = v.replace(u"© ", "")
            xd.headers.append((k, v))

    answers = { }
    clue_num = 1

    for r, row in enumerate(puzzle):
        rowstr = ""
        for c, cell in enumerate(row):
            if puzzle.block is None and cell.solution == '.':
                rowstr += xdfile.BLOCK_CHAR
            elif puzzle.block == cell.solution:
                rowstr += xdfile.BLOCK_CHAR
            elif cell == puzzle.empty:
                rowstr += "."
            else:
                if cell.solution not in grid_dict:
                    grid_dict[cell.solution] = rebus_shorthands.pop()

                rowstr += grid_dict[cell.solution]

                # compute number shown in box
                new_clue = False
                if is_block(puzzle, c-1, r):  # across clue start
                    j = 0
                    answer = ""
                    while not is_block(puzzle, c+j, r):
                        answer += puzzle[c+j, r].solution
                        j += 1

                    if len(answer) > 1:
                        new_clue = True
                        answers["A"+str(clue_num)] = answer

                if is_block(puzzle, c, r-1):  # down clue start
                    j = 0
                    answer = ""
                    while not is_block(puzzle, c, r+j):
                        answer += puzzle[c, r+j].solution
                        j += 1

                    if len(answer) > 1:
                        new_clue = True
                        answers["D"+str(clue_num)] = answer

                if new_clue:
                    clue_num += 1
        xd.grid.append(rowstr)

    for number, clue in puzzle.clues.across():
        xd.clues.append((("A", number), decode(clue), answers["A"+str(number)]))

    for number, clue in puzzle.clues.down():
        xd.clues.append((("D", number), decode(clue), answers["D"+str(number)]))

    return xd
Пример #25
0
def parse_ccxml(data, filename):
    content = data.decode('utf-8', errors='replace')
    content = escape(content, xml_escape_table)
    content = consecutive(content)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+',r'="&quot;\2&quot;"', content) # Replace double quotes
    content_xml = content.encode('utf-8')

    ns = {
        'puzzle': 'http://crossword.info/xml/rectangular-puzzle'
    }
    try:
        root = etree.fromstring(content_xml)
    except Exception as e:
        error('Exception %s' % e)
        error(content)
        exit

    # init crossword
    grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns)
    if not grid:
        return None

    grid = grid[0]
    rows = int(grid.attrib['height'])
    cols = int(grid.attrib['width'])

    xd = xdfile.xdfile('', filename)

    # add metadata
    for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]:
        text = metadata.text and metadata.text.strip()
        title = re.sub('\{[^\}]*\}', '', metadata.tag.title())
        title = escape(title, rev_xml_escape_table)
        if text:
            text = escape(text, rev_xml_escape_table)
            xd.set_header(HEADER_RENAMES.get(title, title), text)

    # add puzzle
    puzzle = []
    for i in range(rows):
        puzzle.append([" "] * cols)

    for cell in grid.xpath('./puzzle:cell', namespaces=ns):
        x = int(cell.attrib['x']) - 1
        y = int(cell.attrib['y']) - 1
        if 'solution' in cell.attrib:
            value = cell.attrib['solution']
        if 'type' in cell.attrib and cell.attrib['type'] == 'block':
            value = xdfile.BLOCK_CHAR
        puzzle[y][x] = value

    xd.grid = ["".join(row) for row in puzzle]

    # add clues
    word_map = {}
    for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns):
        word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y'])

    for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns):
        type = clues.xpath('./puzzle:title', namespaces=ns)[0]
        type = "".join(chr(x) for x in etree.tostring(type, method='text').upper() if chr(x) in string.ascii_uppercase)
        type = type[0]

        for clue in clues.xpath('./puzzle:clue', namespaces=ns):
            word_id = clue.attrib['word']
            number = int(clue.attrib['number'])
            text = "|".join(clue.itertext()).strip()
            text = escape(text, rev_xml_escape_table)
            solution = get_solution(word_id, word_map, puzzle)
            xd.clues.append(((type, number), text, solution))

    return xd
Пример #26
0
#!/usr/bin/env python

import time
import sys
import os.path
import mkwww
import xdfile

outlines = [ ]
total_xd = 0
for metafn in sys.argv[1:]:
    pubxd = xdfile.xdfile(file(metafn).read(), metafn)

    num_xd = int(pubxd.get_header("num_xd"))
    total_xd += num_xd
    years = pubxd.get_header("years")
    pubid = metafn.split("/")[-2]

    outlines.append((num_xd, '<li><a href="{pubid}"><b>{pubid}</b></a>: {num_xd} crosswords from {years}</li>'.format(**{
        'pubid': pubid,
        "num_xd": num_xd,
        "years": years
        })))

out = mkwww.html_header.format(title=time.strftime("xd corpus grid similarity results [%Y-%m-%d]"))
out += "The xd corpus has %d crosswords total:" % total_xd
out += "<ul>"
out += "\n".join(L for n, L in sorted(outlines, reverse=True))
out += "</ul>"
out += '<a href="xd-xdiffs.zip">xd-xdiffs.zip</a> (7MB) has raw data for all puzzles that are at least 25% similar.  Source code for using <a href="https://github.com/century-arcade/xd">the .xd format is available on Github.</a><br/>'
out += mkwww.html_footer
Пример #27
0
def parse_puz(contents, filename):
    rebus_shorthands = list(
        "⚷⚳♇♆⛢♄♃♂♁♀☿♹♸♷♶♵♴♳⅘⅗⅖⅕♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠+&%$@?*zyxwvutsrqponmlkjihgfedcba0987654321"
    )

    try:
        puzobj = puz.load(contents)
        puzzle = crossword.from_puz(puzobj)
    except puz.PuzzleFormatError as e:
        emsg = e.message
        if "<html>" in contents.decode('utf-8').lower():
            emsg += " (looks like html)"
        raise xdfile.PuzzleParseError(emsg)

    grid_dict = dict(list(zip(string.ascii_uppercase, string.ascii_uppercase)))

    xd = xdfile.xdfile('', filename)

    xd.set_header("Author", puzobj.author)
    xd.set_header("Copyright", puzobj.copyright)
    xd.set_header("Notes", puzobj.notes)
    xd.set_header("Postscript",
                  "".join(x for x in puzobj.postscript if ord(x) >= ord(' ')))
    xd.set_header("Preamble", puzobj.preamble)

    xd.set_header("Title", puzobj.title)

    used_rebuses = {}  # [puz_rebus_gridvalue_as_string] -> our_rebus_gridvalue
    rebus = {}  # [our_rebus_gridvalue] -> full_cell
    r = puzobj.rebus()
    if r.has_rebus():
        grbs = puzobj.extensions[b"GRBS"]
        if sum(x for x in grbs if x != 0) > 0:  # check for an actual rebus
            for pair in puzobj.extensions[b"RTBL"].decode("cp1252").split(";"):
                pair = pair.strip()
                if not pair:
                    continue
                key, value = pair.split(":")
                rebuskey = rebus_shorthands.pop()
                used_rebuses[key] = rebuskey
                rebus[rebuskey] = decode(value)

            rebustr = xdfile.REBUS_SEP.join([
                ("%s=%s" % (k, v)) for k, v in sorted(rebus.items())
            ])
            xd.set_header("Rebus", rebustr)

    for r, row in enumerate(puzzle):
        rowstr = ""
        for c, cell in enumerate(row):
            if puzzle.block is None and cell.solution == '.':
                rowstr += xdfile.BLOCK_CHAR
            elif cell.solution == puzzle.block:
                rowstr += xdfile.BLOCK_CHAR
            elif cell.solution == ':':
                rowstr += xdfile.OPEN_CHAR
            elif cell == puzzle.empty:
                rowstr += xdfile.UNKNOWN_CHAR
            else:
                n = r * puzobj.width + c
                reb = puzobj.rebus()
                if reb.has_rebus() and n in reb.get_rebus_squares():
                    ch = str(reb.table[n] - 1)
                    rowstr += used_rebuses[ch]
                    cell.solution = rebus[used_rebuses[ch]]
                else:
                    ch = cell.solution
                    if ch not in grid_dict:
                        if ch in rebus_shorthands:
                            cellch = ch
                            rebus_shorthands.remove(ch)
                            warn(
                                "%s: unknown grid character '%s', assuming rebus of itself"
                                % (filename, ch))
                        else:
                            cellch = rebus_shorthands.pop()
                            warn(
                                "%s: unknown grid character '%s', assuming rebus (as '%s')"
                                % (filename, ch, cellch))

                        xd.set_header(
                            "Rebus",
                            xd.get_header("Rebus") + " %s=%s" % (cellch, ch))

                        grid_dict[ch] = cellch
                    rowstr += grid_dict[ch]

        xd.grid.append(rowstr)

    assert xd.size() == (puzzle.width,
                         puzzle.height), "non-matching grid sizes"

    # clues
    answers = {}

    for posdir, posnum, answer in xd.iteranswers():
        answers[posdir[0] + str(posnum)] = answer

    try:
        for number, clue in puzzle.clues.across():
            cluenum = "A" + str(number)
            if cluenum not in answers:
                raise xdfile.IncompletePuzzleParse(
                    xd, "Clue number doesn't match grid: " + cluenum)
            xd.clues.append(
                (("A", number), decode(clue), answers.get(cluenum, "")))

        # xd.append_clue_break()

        for number, clue in puzzle.clues.down():
            cluenum = "D" + str(number)
            if cluenum not in answers:
                raise xdfile.IncompletePuzzleParse(
                    xd, "Clue doesn't match grid: " + cluenum)
            xd.clues.append(
                (("D", number), decode(clue), answers.get(cluenum, "")))
    except KeyError as e:
        raise xdfile.IncompletePuzzleParse(
            xd, "Clue doesn't match grid: " + str(e))

    return xd
Пример #28
0
def parse_xwordinfo(content, filename):
    content = content.decode('utf-8')

    REBUS_LONG_HANDS = {
        'NINE': '9',
        'EIGHT': '8',
        'SEVEN': '7',
        'SIX': '6',
        'FIVE': '5',
        'FOUR': '4',
        'THREE': '3',
        'TWO': '2',
        'ONE': '1',
        'ZERO': '0',
        'AUGHT': '0',
        'AMPERSAND': '&',
        'AND': '&',
        'ASTERISK': '*',
        'PERCENT': '%',
        'STAR': '*',
        'AT': '@',
        'DOLLAR': '$',
        'PLUS': '+',
        'CENT': 'c',
        # 'DASH': '-',
        # 'DOT': '●'
    }
    rsh = 'zyxwvutsrqponmlkjihgfedcba♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠Фθиλπφя+&%$@?*0987654321'
    REBUS_SHORT_HANDS = list(rsh)

    content = content.replace("<b>", "{*")
    content = content.replace("</b>", "*}")
    content = content.replace("<i>", "{/")
    content = content.replace("</i>", "/}")
    content = content.replace("<em>", "{/")
    content = content.replace("</em>", "/}")
    content = content.replace("<u>", "{_")
    content = content.replace("</u>", "_}")
    content = content.replace("<strike>", "{-")
    content = content.replace("</strike>", "-}")
    content = content.replace("’", "'")
    content = content.replace('“', '"')
    # content = content.replace('–', '-')

    if "CPHContent_" in content:
        xwiprefix = '#CPHContent_'
    else:
        xwiprefix = '#'

    root = html.fromstring(content)

    ## debug("ROOT: %s" % root)

    special_type = ''
    rebus = {}
    rebus_order = []

    xd = xdfile.xdfile('', filename)

    # get crossword info
    title = root.cssselect(xwiprefix + 'TitleLabel')[0].text.strip()
    try:
        subtitle = root.cssselect(xwiprefix + 'SubTitleLabel')[0].text.strip()
        subtitle = ' [%s]' % subtitle
    except:
        subtitle = ""

    # author = root.cssselect(xwiprefix + 'AuthorLabel')[0].text.strip()
    # editor = root.cssselect(xwiprefix + 'EditorLabel')[0].text.strip()
    try:
        xd.notes = stringify_children(
            root.cssselect(xwiprefix + 'NotepadDiv')[0])
    except Exception as e:
        xd.notes = ""
        debug('Exception %s' % e)

    xd.set_header("Title", '%s%s' % (title, subtitle))
    xd.set_header("Author",
                  root.cssselect(xwiprefix + 'AuthorLabel')[0].text.strip())
    xd.set_header("Editor",
                  root.cssselect(xwiprefix + 'EditorLabel')[0].text.strip())

    xd.notes = xd.notes.replace("<br/>", "\n")
    xd.notes = xd.notes.replace("<b>Notepad:</b>", "\n")
    xd.notes = xd.notes.replace("&#13;", "\n")
    xd.notes = xd.notes.strip()

    puzzle_table = root.cssselect(xwiprefix + 'PuzTable tr') or root.cssselect(
        '#PuzTable tr')

    for row in puzzle_table:
        row_data = ""
        for cell in row.cssselect('td'):
            # check if the cell is special - with a shade or a circle
            cell_class = cell.get('class')
            cell_type = ''
            if cell_class == 'bigshade':
                cell_type = 'shaded'
            elif cell_class == 'bigcircle':
                cell_type = 'circle'

            letter = cell.cssselect('div.letter')
            letter = (len(letter) and letter[0].text) or xdfile.BLOCK_CHAR

            # handle rebuses
            if letter == xdfile.BLOCK_CHAR:
                subst = cell.cssselect('div.subst2')
                subst = (len(subst) and subst[0].text) or ''
                if not subst:
                    subst = cell.cssselect('div.subst')
                    if subst:
                        if title in SPLIT_REBUS_TITLES:
                            subst = "/".join(list(subst[0].text))
                        else:
                            subst = subst[0].text
                    else:
                        subst = ''

                if subst:
                    if subst not in rebus:
                        if subst in REBUS_LONG_HANDS:
                            rebus_val = REBUS_LONG_HANDS[subst]
                            if rebus_val in REBUS_SHORT_HANDS:
                                REBUS_SHORT_HANDS.remove(rebus_val)
                        else:
                            rebus_val = REBUS_SHORT_HANDS.pop()
                        rebus[subst] = rebus_val
                        rebus_order.append(subst)
                    letter = rebus[subst]

            if cell_type:
                # the special cell's letter should be represented in lower case
                letter = letter.lower()
                if not special_type:
                    # hopefully there shouldn't be both shades and circles in
                    # the same puzzle - if that is the case, only the last value
                    # will be put up in the header
                    special_type = cell_type

            row_data += letter
        xd.grid.append(row_data)

    if len(rebus):
        rebus = ["%s=%s" % (rebus[x], x.upper()) for x in rebus_order]
        xd.set_header("Rebus", ','.join(rebus))
    if special_type:
        xd.set_header("Special", special_type)

    # add clues
    across_clues = _fetch_clues(xd, 'A', root, xwiprefix + 'AcrossClues',
                                rebus)
    down_clues = _fetch_clues(xd, 'D', root, xwiprefix + 'DownClues', rebus)

    return xd
Пример #29
0
 def reload(self):
     import xdfile
     self.xd = xdfile.xdfile(xd_contents=self.source.read_text(),
                             filename=self.source)
     self.rows = self.xd.clues
Пример #30
0
def parse_puz(contents, filename):
    rebus_shorthands = list("⚷⚳♇♆⛢♄♃♂♁♀☿♹♸♷♶♵♴♳⅘⅗⅖⅕♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠+&%$@?*zyxwvutsrqponmlkjihgfedcba0987654321")

    try:
        puzobj = puz.load(contents)
        puzzle = crossword.from_puz(puzobj)
    except puz.PuzzleFormatError as e:
        emsg = e.message
        if "<html>" in contents.decode('utf-8').lower():
            emsg += " (looks like html)"
        raise xdfile.PuzzleParseError(emsg)

    grid_dict = dict(list(zip(string.ascii_uppercase, string.ascii_uppercase)))

    xd = xdfile.xdfile('', filename)

    xd.set_header("Author", puzobj.author)
    xd.set_header("Copyright", puzobj.copyright)
    xd.set_header("Notes", puzobj.notes)
    xd.set_header("Postscript", "".join(x for x in puzobj.postscript if ord(x) >= ord(' ')))
    xd.set_header("Preamble", puzobj.preamble)

    xd.set_header("Title", puzobj.title)

    used_rebuses = {}  # [puz_rebus_gridvalue_as_string] -> our_rebus_gridvalue
    rebus = {}  # [our_rebus_gridvalue] -> full_cell
    r = puzobj.rebus()
    if r.has_rebus():
        grbs = puzobj.extensions[b"GRBS"]
        if sum(x for x in grbs if x != 0) > 0:   # check for an actual rebus
            for pair in puzobj.extensions[b"RTBL"].decode("cp1252").split(";"):
                pair = pair.strip()
                if not pair:
                    continue
                key, value = pair.split(":")
                rebuskey = rebus_shorthands.pop()
                used_rebuses[key] = rebuskey
                rebus[rebuskey] = decode(value)

            rebustr = xdfile.REBUS_SEP.join([("%s=%s" % (k, v)) for k, v in sorted(rebus.items())])
            xd.set_header("Rebus", rebustr)

    for r, row in enumerate(puzzle):
        rowstr = ""
        for c, cell in enumerate(row):
            if puzzle.block is None and cell.solution == '.':
                rowstr += xdfile.BLOCK_CHAR
            elif cell.solution == puzzle.block:
                rowstr += xdfile.BLOCK_CHAR
            elif cell.solution == ':':
                rowstr += xdfile.OPEN_CHAR
            elif cell == puzzle.empty:
                rowstr += xdfile.UNKNOWN_CHAR
            else:
                n = r * puzobj.width + c
                reb = puzobj.rebus()
                if reb.has_rebus() and n in reb.get_rebus_squares():
                    ch = str(reb.table[n] - 1)
                    rowstr += used_rebuses[ch]
                    cell.solution = rebus[used_rebuses[ch]]
                else:
                    ch = cell.solution
                    if ch not in grid_dict:
                        if ch in rebus_shorthands:
                            cellch = ch
                            rebus_shorthands.remove(ch)
                            warn("%s: unknown grid character '%s', assuming rebus of itself" % (filename, ch))
                        else:
                            cellch = rebus_shorthands.pop()
                            warn("%s: unknown grid character '%s', assuming rebus (as '%s')" % (filename, ch, cellch))

                        xd.set_header("Rebus", xd.get_header("Rebus") + " %s=%s" % (cellch, ch))

                        grid_dict[ch] = cellch
                    rowstr += grid_dict[ch]

        xd.grid.append(rowstr)

    assert xd.size() == (puzzle.width, puzzle.height), "non-matching grid sizes"

    # clues
    answers = {}

    for posdir, posnum, answer in xd.iteranswers():
        answers[posdir[0] + str(posnum)] = answer

    try:
        for number, clue in puzzle.clues.across():
            cluenum = "A" + str(number)
            if cluenum not in answers:
                raise xdfile.IncompletePuzzleParse(xd, "Clue number doesn't match grid: " + cluenum)
            xd.clues.append((("A", number), decode(clue), answers.get(cluenum, "")))

        # xd.append_clue_break()

        for number, clue in puzzle.clues.down():
            cluenum = "D" + str(number)
            if cluenum not in answers:
                raise xdfile.IncompletePuzzleParse(xd, "Clue doesn't match grid: " + cluenum)
            xd.clues.append((("D", number), decode(clue), answers.get(cluenum, "")))
    except KeyError as e:
        raise xdfile.IncompletePuzzleParse(xd, "Clue doesn't match grid: " + str(e))

    return xd
Пример #31
0
 def xd_from_grid(grid):
     return xdfile.xdfile("Creator: %s\n\n\n%s" % (cherrypy.request.remote.ip, grid))
Пример #32
0
def main():
    p = utils.args_parser(
        desc="annotate puzzle clues with earliest date used in the corpus")
    p.add_argument(
        '-a',
        '--all',
        default=False,
        help='analyze all puzzles, even those already in similar.tsv')
    p.add_argument('-l',
                   '--limit',
                   default=100,
                   help='limit amount of puzzles to be analyzed [default=100]')
    args = get_args(parser=p)
    outf = open_output()

    num_processed = 0
    prev_similar = metadb.read_rows('gxd/similar')
    for fn, contents in find_files(*args.inputs, ext=".xd"):
        progress(fn)
        mainxd = xdfile(contents.decode('utf-8'), fn)

        if mainxd.xdid() in prev_similar:
            continue  # skip reprocessing .xd that are already in similar.tsv
        """ find similar grids (pct, xd) for the mainxd in the corpus.
        Takes about 1 second per xd.  sorted by pct.
        """
        similar_grids = sorted(find_similar_to(mainxd, corpus(), min_pct=0.20),
                               key=lambda x: x[0],
                               reverse=True)

        num_processed += 1
        if num_processed > int(args.limit):
            break

        if similar_grids:
            info("similar: " + " ".join(("%s=%s" % (xd2.xdid(), pct))
                                        for pct, xd1, xd2 in similar_grids))

        mainpubid = mainxd.publication_id()
        maindate = mainxd.date()

        # go over each clue/answer, find all other uses, other answers, other possibilities.
        # these are added directly to similar.tsv
        nstaleclues = 0
        nstaleanswers = 0
        ntotalclues = 0
        for pos, mainclue, mainanswer in mainxd.iterclues():
            progress(mainanswer)

            poss_answers = []
            pub_uses = {}  # [pubid] -> set(ClueAnswer)

            mainca = ClueAnswer(mainpubid, maindate, mainanswer, mainclue)

            # find other uses of this clue, and other answers, in a single pass
            for clueans in find_clue_variants(mainclue):
                if clueans.answer != mainanswer:
                    poss_answers.append(clueans)

                if clueans.answer == mainanswer:
                    if clueans.pubid in pub_uses:
                        otherpubs = pub_uses[clueans.pubid]
                    else:
                        otherpubs = set()  # set of ClueAnswer
                        pub_uses[clueans.pubid] = otherpubs

                    otherpubs.add(clueans)

            # bclues is all boiled clues for this particular answer: { [bc] -> #uses }
            bclues = load_answers().get(mainanswer, [])
            stale_answer = False

            if bclues:
                uses = []
                for bc, nuses in bclues.items():
                    # then find all clues besides this one
                    clue_usages = [
                        ca for ca in load_clues().get(bc, [])
                        if ca.answer == mainanswer and ca.date < maindate
                    ]

                    if clue_usages:
                        stale_answer = True
                        if nuses > 1:
                            # only use one (the most recent) ClueAnswer per boiled clue
                            # but use the clue only (no xdid)
                            ca = sorted(clue_usages,
                                        key=lambda ca: ca.date or "z")[-1].clue
                        else:
                            ca = sorted(clue_usages,
                                        key=lambda ca: ca.date or "z")[-1]
                        uses.append((ca, nuses))

        # summary row to similar.tsv
        metadb.append_row(
            'gxd/similar',
            [
                mainxd.xdid(),  # xdid
                int(100 * sum(
                    pct / 100.0
                    for pct, xd1, xd2 in similar_grids)),  # similar_grid_pct
                nstaleclues,  # reused_clues
                nstaleanswers,  # reused_answers
                ntotalclues,  # total_clues
                " ".join(("%s=%s" % (xd2.xdid(), pct))
                         for pct, xd1, xd2 in similar_grids)  # matches
            ])
Пример #33
0
    return out

if __name__ == "__main__":

    OUTPUT_DIR = sys.argv[1]
    pubid = OUTPUT_DIR.split("/")[-1]

    if len(sys.argv) > 2:
        similar_txts = sys.argv[2:]
    else:
        similar_txts = [ "crosswords/%s/similar.txt" % pubid ]

    os.makedirs(OUTPUT_DIR)

    pubxd = xdfile.xdfile(file("crosswords/%s/meta.txt" % pubid).read()) # just to parse some cached metadata

    left_index_list =  { } # [(olderfn, newerfn)] -> (pct, index_line)
    right_index_list =  { } # [(olderfn, newerfn)] -> (pct, index_line)

    for inputfn in similar_txts:
      for line in file(inputfn).read().splitlines():
        if not line: continue
        parts = line.strip().split(' ', 2)
        if len(parts) == 2:
            fn1, fn2 = parts
        elif len(parts) == 3:
            fn1, fn2, rest = parts
        else:
            print "ERROR in %s: %s" % (inputfn, line)
            continue
Пример #34
0
def parse_ipuz(contents, filename):
    rebus_shorthands = list("⚷⚳♇♆⛢♄♃♂♁♀☿♹♸♷♶♵♴♳⅘⅗⅖⅕♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠+&%$@?*zyxwvutsrqponmlkjihgfedcba0987654321")

    # i need a .load to create the ipuz_dict, and then maybe i am home free
    ipuz_dict = ipuz.read(contents.decode("utf-8"))
    puzzle = crossword.from_ipuz(ipuz_dict)

    grid_dict = dict(list(zip(string.ascii_uppercase, string.ascii_uppercase)))

    xd = xdfile.xdfile('', filename)

    xd.set_header("Author", puzzle.meta.creator)
    xd.set_header("Editor", puzzle.meta.contributor)
    xd.set_header("Copyright", puzzle.meta.rights)
    dt = parse_date_from_filename(parse_pathname(filename).base)
    if dt:
        xd.set_header("Date", dt)
    xd.set_header("Notes", puzzle.meta.description)
    #xd.set_header("Postscript", "".join(x for x in puzobj.postscript if ord(x) >= ord(' ')))
    #xd.set_header("Preamble", puzobj.preamble)

    xd.set_header("Title", puzzle.meta.title)


    for r, row in enumerate(puzzle):
        rowstr = ""
        for c, cell in enumerate(row):
            if puzzle.block is None and cell.solution == '#':
                rowstr += xdfile.BLOCK_CHAR
            elif cell.solution == puzzle.block:
                rowstr += xdfile.BLOCK_CHAR
            elif cell.solution == ':':
                rowstr += xdfile.OPEN_CHAR
            elif cell == puzzle.empty:
                rowstr += xdfile.UNKNOWN_CHAR
            else:
                n = r * puzzle.width + c
                ch = cell.solution
                if ch not in grid_dict:
                    if ch in rebus_shorthands:
                        cellch = ch
                        rebus_shorthands.remove(ch)
                        warn("%s: unknown grid character '%s', assuming rebus of itself" % (filename, ch))
                    else:
                        cellch = rebus_shorthands.pop()
                        warn("%s: unknown grid character '%s', assuming rebus (as '%s')" % (filename, ch, cellch))
                    xd.set_header("Rebus", xd.get_header("Rebus") + " %s=%s" % (cellch, ch))


                    grid_dict[ch] = cellch
                rowstr += grid_dict[ch]

        xd.grid.append(rowstr)

    assert xd.size() == (puzzle.width, puzzle.height), "non-matching grid sizes"

    # clues
    answers = {}

    for posdir, posnum, answer in xd.iteranswers():
        answers[posdir[0] + str(posnum)] = answer

    try:
        for number, clue in puzzle.clues.across():
            cluenum = "A" + str(number)
            if cluenum not in answers:
                raise xdfile.IncompletePuzzleParse(xd, "Clue number doesn't match grid: " + cluenum)
            xd.clues.append((("A", number), decode(clue), answers.get(cluenum, "")))

        # xd.append_clue_break()

        for number, clue in puzzle.clues.down():
            cluenum = "D" + str(number)
            if cluenum not in answers:
                raise xdfile.IncompletePuzzleParse(xd, "Clue doesn't match grid: " + cluenum)
            xd.clues.append((("D", number), decode(clue), answers.get(cluenum, "")))
    except KeyError as e:
        raise xdfile.IncompletePuzzleParse(xd, "Clue doesn't match grid: " + str(e))

    return xd
Пример #35
0
def parse_ccxml(data, filename):
    content = data.decode('utf-8', errors='replace')
    content = escape(content, xml_escape_table)
    content = consecutive(content)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'="&quot;\2&quot;"',
                     content)  # Replace double quotes
    content_xml = content.encode('utf-8')

    ns = {'puzzle': 'http://crossword.info/xml/rectangular-puzzle'}
    try:
        root = etree.fromstring(content_xml)
    except Exception as e:
        error('Exception %s' % e)
        error(content)
        exit

    # init crossword
    grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns)
    if not grid:
        return None

    grid = grid[0]
    rows = int(grid.attrib['height'])
    cols = int(grid.attrib['width'])

    xd = xdfile.xdfile('', filename)

    # add metadata
    for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]:
        text = metadata.text and metadata.text.strip()
        title = re.sub('\{[^\}]*\}', '', metadata.tag.title())
        title = escape(title, rev_xml_escape_table)
        if text:
            text = escape(text, rev_xml_escape_table)
            xd.set_header(HEADER_RENAMES.get(title, title), text)

    # add puzzle
    puzzle = []
    for i in range(rows):
        puzzle.append([" "] * cols)

    for cell in grid.xpath('./puzzle:cell', namespaces=ns):
        x = int(cell.attrib['x']) - 1
        y = int(cell.attrib['y']) - 1
        if 'solution' in cell.attrib:
            value = cell.attrib['solution']
        if 'type' in cell.attrib and cell.attrib['type'] == 'block':
            value = xdfile.BLOCK_CHAR
        puzzle[y][x] = value

    xd.grid = ["".join(row) for row in puzzle]

    # add clues
    word_map = {}
    for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns):
        word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y'])

    for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns):
        type = clues.xpath('./puzzle:title', namespaces=ns)[0]
        type = "".join(
            chr(x) for x in etree.tostring(type, method='text').upper()
            if chr(x) in string.ascii_uppercase)
        type = type[0]

        for clue in clues.xpath('./puzzle:clue', namespaces=ns):
            word_id = clue.attrib['word']
            number = int(clue.attrib['number'])
            text = "|".join(clue.itertext()).strip()
            text = escape(text, rev_xml_escape_table)
            solution = get_solution(word_id, word_map, puzzle)
            xd.clues.append(((type, number), text, solution))

    return xd
Пример #36
0
def main(fn):
    for fn in sys.argv[1:]:
        print(to_html(xdfile.xdfile(open(fn).read(), fn)))
Пример #37
0
def parse_ccxml(content):
    content = content.replace("<b>", "{*")
    content = content.replace("</b>", "*}")
    content = content.replace("<i>", "{/")
    content = content.replace("</i>", "/}")
    content = content.replace("<em>", "{/")
    content = content.replace("</em>", "/}")
    content = content.replace("<u>", "{_")
    content = content.replace("</u>", "_}")
    content = content.replace("<strike>", "{-")
    content = content.replace("</strike>", "-}")

    ns = {
        'puzzle': 'http://crossword.info/xml/rectangular-puzzle'
    }

    root = etree.fromstring(content)

    # init crossword
    grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns)[0]
    rows = int(grid.attrib['height'])
    cols = int(grid.attrib['width'])

    xd = xdfile.xdfile()

    # add metadata
    for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]:
        text = metadata.text and metadata.text.strip()
        title = re.sub('\{[^\}]*\}', '', metadata.tag.title())
        if text:
            xd.headers.append((title, text))

    # add puzzle
    puzzle = [ ]
    for i in range(rows):
        puzzle.append([ " " ] * cols)

    for cell in grid.xpath('./puzzle:cell', namespaces=ns):
        x = int(cell.attrib['x']) - 1
        y = int(cell.attrib['y']) - 1
        if 'solution' in cell.attrib:
            value = cell.attrib['solution']
        if 'type' in cell.attrib and cell.attrib['type'] == 'block':
            value = xdfile.BLOCK_CHAR
        puzzle[y][x] = value

    xd.grid = [ "".join(row) for row in puzzle ]

    # add clues
    word_map = {}
    for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns):
        word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y'])

    for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns):
        type = clues.xpath('./puzzle:title', namespaces=ns)[0]
        type = "".join(x for x in etree.tostring(type, method='text').upper() if x in string.uppercase)
        type = type[0]

        for clue in clues.xpath('./puzzle:clue', namespaces=ns):
            word_id = clue.attrib['word']
            number = int(clue.attrib['number'])
            text = "|".join(clue.itertext()).strip()
            solution = get_solution(word_id, word_map, puzzle)
            xd.clues.append(((type, number), text, solution))

    return xd
Пример #38
0
def parse_puz(contents, filename):
    rebus_shorthands = list(u"♚♛♜♝♞♟⚅⚄⚃⚂⚁⚀♣♦♥♠Фθиλπφя+&%$@?*zyxwvutsrqponmlkjihgfedcba0987654321")

    if not filename.lower().endswith('.puz'):
        return
    puz_object = puz.load(contents)
    puzzle = crossword.from_puz(puz_object)

    grid_dict = dict(zip(string.uppercase, string.uppercase))

    xd = xdfile.xdfile()

    md = dict([ (k.lower(), v) for k, v in puzzle.meta() if v ])
    author = md.get("creator", "")
    if " / " in author:
        author, editor = author.split(" / ")
    else:
        editor = ""

    author = author.strip()
    editor = editor.strip()

    for editsep in [ "edited by ", "ed. " ]:
      try:
        i = author.lower().index(editsep)
        if i == 0:
            editor = author[len(editsep):]
            author = editor.split(",")[1]
        elif i > 0:
            assert not editor
            editor = author[i+len(editsep):]
            author = author[:i]
      except:
        pass

    author = author.strip()
    editor = editor.strip()

    while author.lower().startswith("by "):
        author = author[3:]

    if author and author[-1] in ",.":
        author = author[:-1]

    md["creator"] = author
    md["editor"] = editor

    for k, v in sorted(md.items(), key=lambda x: hdr_order.index(x[0])):
        if v:
            k = k[0].upper() + k[1:].lower()
            v = decode(v.strip())
            v = v.replace(u"©", "(c)")
            xd.headers.append((k, v))

    answers = { }
    clue_num = 1

    for r, row in enumerate(puzzle):
        rowstr = ""
        for c, cell in enumerate(row):
            if puzzle.block is None and cell.solution == '.':
                rowstr += xdfile.BLOCK_CHAR
            elif puzzle.block == cell.solution:
                rowstr += xdfile.BLOCK_CHAR
            elif cell == puzzle.empty:
                rowstr += "."
            else:
                if cell.solution not in grid_dict:
                    grid_dict[cell.solution] = rebus_shorthands.pop()

                rowstr += grid_dict[cell.solution]

                # compute number shown in box
                new_clue = False
                if is_block(puzzle, c-1, r):  # across clue start
                    j = 0
                    answer = ""
                    while not is_block(puzzle, c+j, r):
                        answer += puzzle[c+j, r].solution
                        j += 1

                    if len(answer) > 1:
                        new_clue = True
                        answers["A"+str(clue_num)] = answer

                if is_block(puzzle, c, r-1):  # down clue start
                    j = 0
                    answer = ""
                    while not is_block(puzzle, c, r+j):
                        answer += puzzle[c, r+j].solution
                        j += 1

                    if len(answer) > 1:
                        new_clue = True
                        answers["D"+str(clue_num)] = answer

                if new_clue:
                    clue_num += 1
        xd.grid.append(rowstr)

    for number, clue in puzzle.clues.across():
        xd.clues.append((("A", number), decode(clue), answers["A"+str(number)]))

    for number, clue in puzzle.clues.down():
        xd.clues.append((("D", number), decode(clue), answers["D"+str(number)]))

    return xd