def parse_uxml(content, filename): POSSIBLE_META_DATA = ['Title', 'Author', 'Editor', 'Copyright', 'Category'] try: content = content.decode("utf-8") except: try: content = content.decode("cp1252") except: pass # last ditch effort, just try the original string content = escape(content, xml_escape_table) content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'=""\2""', content) # Replace double quotes try: root = etree.fromstring(content.encode("utf-8")) except: # TODO: catch the specific exception xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group() root = etree.fromstring(xml) # init crossword # rows = int(root.xpath('//crossword/Height')[0].attrib['v']) cols = int(root.xpath('//crossword/Width')[0].attrib['v']) xd = xdfile.xdfile('', filename) # add meta data for item in POSSIBLE_META_DATA: elem = root.xpath('//crossword/' + item) if elem: text = elem[0].attrib['v'] if text: text = escape(text, rev_xml_escape_table) xd.set_header(item, unquote(text)) # add puzzle all_answers = root.xpath('//crossword/AllAnswer')[0].attrib['v'] all_answers = all_answers.replace('-', xdfile.BLOCK_CHAR) index = 0 while index < len(all_answers): row = all_answers[index:index + cols] xd.grid.append("".join(row)) index += cols # add clues for clue_type in ('across', 'down'): for clue in root.xpath('//crossword/' + clue_type)[0].getchildren(): number = int(clue.attrib['cn']) text = udecode(clue.attrib['c'].strip()) text = escape(text, rev_xml_escape_table) solution = clue.attrib['a'].strip() xd.clues.append(((clue_type[0].upper(), number), text, solution)) return xd
def parse_uxml(content, filename): POSSIBLE_META_DATA = ["Title", "Author", "Editor", "Copyright", "Category"] try: content = content.decode("utf-8") except: try: content = content.decode("cp1252") except: pass # last ditch effort, just try the original string content = escape(content, xml_escape_table) content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'=""\2""', content) # Replace double quotes try: root = etree.fromstring(content.encode("utf-8")) except: # TODO: catch the specific exception xml = re.search(r"<(\w+).*?</\1>", content, flags=re.DOTALL).group() root = etree.fromstring(xml) # init crossword # rows = int(root.xpath('//crossword/Height')[0].attrib['v']) cols = int(root.xpath("//crossword/Width")[0].attrib["v"]) xd = xdfile.xdfile("", filename) # add meta data for item in POSSIBLE_META_DATA: elem = root.xpath("//crossword/" + item) if elem: text = elem[0].attrib["v"] if text: text = escape(text, rev_xml_escape_table) xd.set_header(item, unquote(text)) # add puzzle all_answers = root.xpath("//crossword/AllAnswer")[0].attrib["v"] all_answers = all_answers.replace("-", xdfile.BLOCK_CHAR) index = 0 while index < len(all_answers): row = all_answers[index : index + cols] xd.grid.append("".join(row)) index += cols # add clues for clue_type in ("across", "down"): for clue in root.xpath("//crossword/" + clue_type)[0].getchildren(): number = int(clue.attrib["cn"]) text = udecode(clue.attrib["c"].strip()) text = escape(text, rev_xml_escape_table) solution = clue.attrib["a"].strip() xd.clues.append(((clue_type[0].upper(), number), text, solution)) return xd
def parse_ccxml(data, filename): content = data.decode('utf-8', errors='replace') content = escape(content, xml_escape_table) content = consecutive(content) content = re.sub(r'(=["]{2}([^"]+?)["]{2})+',r'=""\2""', content) # Replace double quotes content_xml = content.encode('utf-8') ns = { 'puzzle': 'http://crossword.info/xml/rectangular-puzzle' } try: root = etree.fromstring(content_xml) except Exception as e: error('Exception %s' % e) error(content) exit # init crossword grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns) if not grid: return None grid = grid[0] rows = int(grid.attrib['height']) cols = int(grid.attrib['width']) xd = xdfile.xdfile('', filename) # add metadata for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]: text = metadata.text and metadata.text.strip() title = re.sub('\{[^\}]*\}', '', metadata.tag.title()) title = escape(title, rev_xml_escape_table) if text: text = escape(text, rev_xml_escape_table) xd.set_header(HEADER_RENAMES.get(title, title), text) # add puzzle puzzle = [] for i in range(rows): puzzle.append([" "] * cols) for cell in grid.xpath('./puzzle:cell', namespaces=ns): x = int(cell.attrib['x']) - 1 y = int(cell.attrib['y']) - 1 if 'solution' in cell.attrib: value = cell.attrib['solution'] if 'type' in cell.attrib and cell.attrib['type'] == 'block': value = xdfile.BLOCK_CHAR puzzle[y][x] = value xd.grid = ["".join(row) for row in puzzle] # add clues word_map = {} for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns): word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y']) for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns): type = clues.xpath('./puzzle:title', namespaces=ns)[0] type = "".join(chr(x) for x in etree.tostring(type, method='text').upper() if chr(x) in string.ascii_uppercase) type = type[0] for clue in clues.xpath('./puzzle:clue', namespaces=ns): word_id = clue.attrib['word'] number = int(clue.attrib['number']) text = "|".join(clue.itertext()).strip() text = escape(text, rev_xml_escape_table) solution = get_solution(word_id, word_map, puzzle) xd.clues.append(((type, number), text, solution)) return xd
def parse_ccxml(data, filename): content = data.decode('utf-8', errors='replace') content = escape(content, xml_escape_table) content = consecutive(content) content = re.sub(r'(=["]{2}([^"]+?)["]{2})+', r'=""\2""', content) # Replace double quotes content_xml = content.encode('utf-8') ns = {'puzzle': 'http://crossword.info/xml/rectangular-puzzle'} try: root = etree.fromstring(content_xml) except Exception as e: error('Exception %s' % e) error(content) exit # init crossword grid = root.xpath('//puzzle:crossword/puzzle:grid', namespaces=ns) if not grid: return None grid = grid[0] rows = int(grid.attrib['height']) cols = int(grid.attrib['width']) xd = xdfile.xdfile('', filename) # add metadata for metadata in root.xpath('//puzzle:metadata', namespaces=ns)[0]: text = metadata.text and metadata.text.strip() title = re.sub('\{[^\}]*\}', '', metadata.tag.title()) title = escape(title, rev_xml_escape_table) if text: text = escape(text, rev_xml_escape_table) xd.set_header(HEADER_RENAMES.get(title, title), text) # add puzzle puzzle = [] for i in range(rows): puzzle.append([" "] * cols) for cell in grid.xpath('./puzzle:cell', namespaces=ns): x = int(cell.attrib['x']) - 1 y = int(cell.attrib['y']) - 1 if 'solution' in cell.attrib: value = cell.attrib['solution'] if 'type' in cell.attrib and cell.attrib['type'] == 'block': value = xdfile.BLOCK_CHAR puzzle[y][x] = value xd.grid = ["".join(row) for row in puzzle] # add clues word_map = {} for word in root.xpath('//puzzle:crossword/puzzle:word', namespaces=ns): word_map[word.attrib['id']] = (word.attrib['x'], word.attrib['y']) for clues in root.xpath('//puzzle:crossword/puzzle:clues', namespaces=ns): type = clues.xpath('./puzzle:title', namespaces=ns)[0] type = "".join( chr(x) for x in etree.tostring(type, method='text').upper() if chr(x) in string.ascii_uppercase) type = type[0] for clue in clues.xpath('./puzzle:clue', namespaces=ns): word_id = clue.attrib['word'] number = int(clue.attrib['number']) text = "|".join(clue.itertext()).strip() text = escape(text, rev_xml_escape_table) solution = get_solution(word_id, word_map, puzzle) xd.clues.append(((type, number), text, solution)) return xd