Пример #1
0
def parse_uxml(content, filename):
    POSSIBLE_META_DATA = ['Title', 'Author', 'Editor', 'Copyright', 'Category']

    try:
        content = content.decode("utf-8")
    except:
        try:
            content = content.decode("cp1252")
        except:
            pass  # last ditch effort, just try the original string

    content = escape(content, xml_escape_table)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'=""\2""',
                     content)  # Replace double quotes

    try:
        root = etree.fromstring(content.encode("utf-8"))
    except:
        # TODO: catch the specific exception
        xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group()
        root = etree.fromstring(xml)

    # init crossword
    # rows = int(root.xpath('//crossword/Height')[0].attrib['v'])
    cols = int(root.xpath('//crossword/Width')[0].attrib['v'])
    xd = xdfile.xdfile('', filename)

    # add meta data
    for item in POSSIBLE_META_DATA:
        elem = root.xpath('//crossword/' + item)
        if elem:
            text = elem[0].attrib['v']
            if text:
                text = escape(text, rev_xml_escape_table)
                xd.set_header(item, unquote(text))

    # add puzzle
    all_answers = root.xpath('//crossword/AllAnswer')[0].attrib['v']
    all_answers = all_answers.replace('-', xdfile.BLOCK_CHAR)
    index = 0
    while index < len(all_answers):
        row = all_answers[index:index + cols]
        xd.grid.append("".join(row))
        index += cols

    # add clues
    for clue_type in ('across', 'down'):
        for clue in root.xpath('//crossword/' + clue_type)[0].getchildren():
            number = int(clue.attrib['cn'])
            text = udecode(clue.attrib['c'].strip())
            text = escape(text, rev_xml_escape_table)
            solution = clue.attrib['a'].strip()
            xd.clues.append(((clue_type[0].upper(), number), text, solution))

    return xd
Пример #2
0
def parse_uxml(content, filename):
    POSSIBLE_META_DATA = ["Title", "Author", "Editor", "Copyright", "Category"]

    try:
        content = content.decode("utf-8")
    except:
        try:
            content = content.decode("cp1252")
        except:
            pass  # last ditch effort, just try the original string

    content = escape(content, xml_escape_table)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'="&quot;\2&quot;"', content)  # Replace double quotes

    try:
        root = etree.fromstring(content.encode("utf-8"))
    except:
        # TODO: catch the specific exception
        xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group()
        root = etree.fromstring(xml)

    # init crossword
    # rows = int(root.xpath('//crossword/Height')[0].attrib['v'])
    cols = int(root.xpath("//crossword/Width")[0].attrib["v"])
    xd = xdfile.xdfile("", filename)

    # add meta data
    for item in POSSIBLE_META_DATA:
        elem = root.xpath("//crossword/" + item)
        if elem:
            text = elem[0].attrib["v"]
            if text:
                text = escape(text, rev_xml_escape_table)
                xd.set_header(item, unquote(text))

    # add puzzle
    all_answers = root.xpath("//crossword/AllAnswer")[0].attrib["v"]
    all_answers = all_answers.replace("-", xdfile.BLOCK_CHAR)
    index = 0
    while index < len(all_answers):
        row = all_answers[index : index + cols]
        xd.grid.append("".join(row))
        index += cols

    # add clues
    for clue_type in ("across", "down"):
        for clue in root.xpath("//crossword/" + clue_type)[0].getchildren():
            number = int(clue.attrib["cn"])
            text = udecode(clue.attrib["c"].strip())
            text = escape(text, rev_xml_escape_table)
            solution = clue.attrib["a"].strip()
            xd.clues.append(((clue_type[0].upper(), number), text, solution))

    return xd
Пример #3
0
def parse_ccxml(data, filename):
    content = data.decode('utf-8', errors='replace')
    content = escape(content, xml_escape_table)
    content = consecutive(content)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+',r'="&quot;\2&quot;"', content) # Replace double quotes
    content_xml = content.encode('utf-8')

    ns = {
        'puzzle': 'http://crossword.info/xml/rectangular-puzzle'
    }
    try:
        root = etree.fromstring(content_xml)
    except Exception as e:
        error('Exception %s' % e)
        error(content)
        exit

    # init crossword
    grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns)
    if not grid:
        return None

    grid = grid[0]
    rows = int(grid.attrib['height'])
    cols = int(grid.attrib['width'])

    xd = xdfile.xdfile('', filename)

    # add metadata
    for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]:
        text = metadata.text and metadata.text.strip()
        title = re.sub('\{[^\}]*\}', '', metadata.tag.title())
        title = escape(title, rev_xml_escape_table)
        if text:
            text = escape(text, rev_xml_escape_table)
            xd.set_header(HEADER_RENAMES.get(title, title), text)

    # add puzzle
    puzzle = []
    for i in range(rows):
        puzzle.append([" "] * cols)

    for cell in grid.xpath('./puzzle:cell', namespaces=ns):
        x = int(cell.attrib['x']) - 1
        y = int(cell.attrib['y']) - 1
        if 'solution' in cell.attrib:
            value = cell.attrib['solution']
        if 'type' in cell.attrib and cell.attrib['type'] == 'block':
            value = xdfile.BLOCK_CHAR
        puzzle[y][x] = value

    xd.grid = ["".join(row) for row in puzzle]

    # add clues
    word_map = {}
    for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns):
        word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y'])

    for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns):
        type = clues.xpath('./puzzle:title', namespaces=ns)[0]
        type = "".join(chr(x) for x in etree.tostring(type, method='text').upper() if chr(x) in string.ascii_uppercase)
        type = type[0]

        for clue in clues.xpath('./puzzle:clue', namespaces=ns):
            word_id = clue.attrib['word']
            number = int(clue.attrib['number'])
            text = "|".join(clue.itertext()).strip()
            text = escape(text, rev_xml_escape_table)
            solution = get_solution(word_id, word_map, puzzle)
            xd.clues.append(((type, number), text, solution))

    return xd
Пример #4
0
def parse_ccxml(data, filename):
    content = data.decode('utf-8', errors='replace')
    content = escape(content, xml_escape_table)
    content = consecutive(content)
    content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'="&quot;\2&quot;"',
                     content)  # Replace double quotes
    content_xml = content.encode('utf-8')

    ns = {'puzzle': 'http://crossword.info/xml/rectangular-puzzle'}
    try:
        root = etree.fromstring(content_xml)
    except Exception as e:
        error('Exception %s' % e)
        error(content)
        exit

    # init crossword
    grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns)
    if not grid:
        return None

    grid = grid[0]
    rows = int(grid.attrib['height'])
    cols = int(grid.attrib['width'])

    xd = xdfile.xdfile('', filename)

    # add metadata
    for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]:
        text = metadata.text and metadata.text.strip()
        title = re.sub('\{[^\}]*\}', '', metadata.tag.title())
        title = escape(title, rev_xml_escape_table)
        if text:
            text = escape(text, rev_xml_escape_table)
            xd.set_header(HEADER_RENAMES.get(title, title), text)

    # add puzzle
    puzzle = []
    for i in range(rows):
        puzzle.append([" "] * cols)

    for cell in grid.xpath('./puzzle:cell', namespaces=ns):
        x = int(cell.attrib['x']) - 1
        y = int(cell.attrib['y']) - 1
        if 'solution' in cell.attrib:
            value = cell.attrib['solution']
        if 'type' in cell.attrib and cell.attrib['type'] == 'block':
            value = xdfile.BLOCK_CHAR
        puzzle[y][x] = value

    xd.grid = ["".join(row) for row in puzzle]

    # add clues
    word_map = {}
    for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns):
        word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y'])

    for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns):
        type = clues.xpath('./puzzle:title', namespaces=ns)[0]
        type = "".join(
            chr(x) for x in etree.tostring(type, method='text').upper()
            if chr(x) in string.ascii_uppercase)
        type = type[0]

        for clue in clues.xpath('./puzzle:clue', namespaces=ns):
            word_id = clue.attrib['word']
            number = int(clue.attrib['number'])
            text = "|".join(clue.itertext()).strip()
            text = escape(text, rev_xml_escape_table)
            solution = get_solution(word_id, word_map, puzzle)
            xd.clues.append(((type, number), text, solution))

    return xd