def read_from_ccj(self, f, title, author, puzzle_number, copyright_message, verbose=False): d = f.read() # i is the index into the file for the rest of this script: i = 2 # I think these must be the list of buttons on the left: while byte_at(d, i) != 0: s, i = read_string(d, i) if verbose: print("got button string:", s) # Then the congratulations message, I think: i += 1 s, i = read_string(d, i) if verbose: print("got congratulations message:", s) # Skip another byte; 0x02 in the Independent it seems, but 0x00 in the # Herald puzzle I tried. i += 1 # I think we get the grid dimensions in the next two: self.width = byte_at(d, i) i += 1 self.height = byte_at(d, i) i += 1 self.grid = Grid(self.width, self.height) # Now skip over everything until we think we see the grid, since I've # no idea what it's meant to mean: while byte_at(d, i) != 0x3f and byte_at(d, i) != 0x23: i += 1 for y in range(0, self.height): for x in range(0, self.width): # Lights seem to be indicated by: '?' (or 'M' very occasionally) if byte_at(d, i) in (0x3f, 0x4d): self.grid.cells[y][x] = Cell(y, x) # Blocked-out squares seem to be always '#' elif byte_at(d, i) == 0x23: pass else: message = "Unknown value {0} at {1}" raise Exception(message.format(str(byte_at(d, i)), coord_str(x, y))) i += 1 if verbose: print("grid is:\n" + self.grid.to_grid_string(True)) # Next there's a grid structure the purpose of which I don't # understand: grid_unknown_purpose = Grid(self.width, self.height) for y in range(0, self.height): for x in range(0, self.width): grid_unknown_purpose.cells[y][x] = Cell(y, x) if byte_at(d, i) == 0: grid_unknown_purpose.cells[y][x].set_letter(' ') elif byte_at(d, i) < 10: letter = str(byte_at(d, i)) grid_unknown_purpose.cells[y][x].set_letter(letter) else: truncated = str(byte_at(d, i) % 10) if verbose: message = "Warning, truncating {0} to {1} at {2}" print(message.format(byte_at(d, i), truncated, coord_str(x, y))) grid_unknown_purpose.cells[y][x].set_letter(truncated) i += 1 # Seem to need to skip over an extra byte (0x01) here before the # answers. Maybe it indicates whether there are answers next or not: if byte_at(d, i) != 1: raise Exception("So far we expect a 0x01 before the answers...") i += 1 if verbose: print("grid_unknown_purpose is:\n" + grid_unknown_purpose.to_grid_string(False)) # Now there's the grid with the answers: for y in range(0, self.height): for x in range(0, self.width): if self.grid.cells[y][x]: self.grid.cells[y][x].set_letter(chr(byte_at(d, i))) i += 1 if verbose: print("grid with answers is:\n" + self.grid.to_grid_string(False)) skipped_blocks_of_four = 0 while skippable_block_of_four(d, i): i += 4 skipped_blocks_of_four += 1 if skipped_blocks_of_four > 0: if verbose: print("Skipped over", str(skipped_blocks_of_four), "ignorable blocks") # I expect the next one to be 0x02: if byte_at(d, i) != 0x02: message = "Expect the first of the block of 16 always to be 0x02, " message += "in fact was: {0}" raise Exception(message.format(byte_at(d, i))) # Always just 16? i += 16 self.across_clues, i = parse_list_of_clues(d, i) if verbose: print("Now do down clues:") self.down_clues, i = parse_list_of_clues(d, i) m = re.search(r'^(.*)-([0-9]+)', self.across_clues.label) if m: self.setter = m.group(1) self.puzzle_number = m.group(2) if (not self.setter) and author: self.setter = author if (not self.puzzle_number) and puzzle_number: self.puzzle_number = puzzle_number self.title = "Crossword" if title: self.title = title if self.setter and self.puzzle_number: self.title += " " + self.puzzle_number + " / " + self.setter elif self.setter: self.title += " / " + self.setter elif self.puzzle_number: self.title += " " + self.puzzle_number if date_string: self.title += " (" + date_string + ")" self.author = "Unknown Setter" if author: self.author = author self.copyright_message = "© Unknown" if copyright_message: self.copyright_message = copyright_message
class ParsedCCJ: def __init__(self): self.width = None self.height = None self.across_clues = None self.down_clues = None self.grid = None self.title = None self.author = None self.copyright_message = None self.setter = None self.puzzle_number = None def read_from_ccj(self, f, title, author, puzzle_number, copyright_message, verbose=False): d = f.read() # i is the index into the file for the rest of this script: i = 2 # I think these must be the list of buttons on the left: while byte_at(d, i) != 0: s, i = read_string(d, i) if verbose: print("got button string:", s) # Then the congratulations message, I think: i += 1 s, i = read_string(d, i) if verbose: print("got congratulations message:", s) # Skip another byte; 0x02 in the Independent it seems, but 0x00 in the # Herald puzzle I tried. i += 1 # I think we get the grid dimensions in the next two: self.width = byte_at(d, i) i += 1 self.height = byte_at(d, i) i += 1 self.grid = Grid(self.width, self.height) # Now skip over everything until we think we see the grid, since I've # no idea what it's meant to mean: while byte_at(d, i) != 0x3f and byte_at(d, i) != 0x23: i += 1 for y in range(0, self.height): for x in range(0, self.width): # Lights seem to be indicated by: '?' (or 'M' very occasionally) if byte_at(d, i) in (0x3f, 0x4d): self.grid.cells[y][x] = Cell(y, x) # Blocked-out squares seem to be always '#' elif byte_at(d, i) == 0x23: pass else: message = "Unknown value {0} at {1}" raise Exception(message.format(str(byte_at(d, i)), coord_str(x, y))) i += 1 if verbose: print("grid is:\n" + self.grid.to_grid_string(True)) # Next there's a grid structure the purpose of which I don't # understand: grid_unknown_purpose = Grid(self.width, self.height) for y in range(0, self.height): for x in range(0, self.width): grid_unknown_purpose.cells[y][x] = Cell(y, x) if byte_at(d, i) == 0: grid_unknown_purpose.cells[y][x].set_letter(' ') elif byte_at(d, i) < 10: letter = str(byte_at(d, i)) grid_unknown_purpose.cells[y][x].set_letter(letter) else: truncated = str(byte_at(d, i) % 10) if verbose: message = "Warning, truncating {0} to {1} at {2}" print(message.format(byte_at(d, i), truncated, coord_str(x, y))) grid_unknown_purpose.cells[y][x].set_letter(truncated) i += 1 # Seem to need to skip over an extra byte (0x01) here before the # answers. Maybe it indicates whether there are answers next or not: if byte_at(d, i) != 1: raise Exception("So far we expect a 0x01 before the answers...") i += 1 if verbose: print("grid_unknown_purpose is:\n" + grid_unknown_purpose.to_grid_string(False)) # Now there's the grid with the answers: for y in range(0, self.height): for x in range(0, self.width): if self.grid.cells[y][x]: self.grid.cells[y][x].set_letter(chr(byte_at(d, i))) i += 1 if verbose: print("grid with answers is:\n" + self.grid.to_grid_string(False)) skipped_blocks_of_four = 0 while skippable_block_of_four(d, i): i += 4 skipped_blocks_of_four += 1 if skipped_blocks_of_four > 0: if verbose: print("Skipped over", str(skipped_blocks_of_four), "ignorable blocks") # I expect the next one to be 0x02: if byte_at(d, i) != 0x02: message = "Expect the first of the block of 16 always to be 0x02, " message += "in fact was: {0}" raise Exception(message.format(byte_at(d, i))) # Always just 16? i += 16 self.across_clues, i = parse_list_of_clues(d, i) if verbose: print("Now do down clues:") self.down_clues, i = parse_list_of_clues(d, i) m = re.search(r'^(.*)-([0-9]+)', self.across_clues.label) if m: self.setter = m.group(1) self.puzzle_number = m.group(2) if (not self.setter) and author: self.setter = author if (not self.puzzle_number) and puzzle_number: self.puzzle_number = puzzle_number self.title = "Crossword" if title: self.title = title if self.setter and self.puzzle_number: self.title += " " + self.puzzle_number + " / " + self.setter elif self.setter: self.title += " / " + self.setter elif self.puzzle_number: self.title += " " + self.puzzle_number if date_string: self.title += " (" + date_string + ")" self.author = "Unknown Setter" if author: self.author = author self.copyright_message = "© Unknown" if copyright_message: self.copyright_message = copyright_message def write_to_puz_file(self, output_filename, verbose=False): """Write the crossword in AcrossLite .puz format to output_filename Note that the version for the file format that this outputs doesn't include checksums, so a strict loader will reject such a file - it's fine in xword, though.""" # In the AcrossLite .PUZ format we need to make sure that there's one # "clue" for each clue number, even if it's just "See 6" for clues # whose answers are split over different clue numbers in the grid. # So, go through the clue dictionaries and make sure that there is # something for every clue. (So we don't miss the "See 6" type of # clue.) # We take a deep copy of the across clues and down clues first # so that we don't add unnecessary fake clues to the # attributes of this instance. clue_groups = { True: copy.deepcopy(self.across_clues), False: copy.deepcopy(self.down_clues)} for group_across in (True, False): clue_dictionary = clue_groups[group_across].clue_dictionary for clue in clue_dictionary.values(): first_clue_entry = str(clue.all_clue_numbers[0][0]) for entry_n, entry_across in clue.all_clue_numbers: clue_string = "See " + first_clue_entry if entry_across != group_across: clue_string += entry_across and " across" or " down" expected_dictionary = { True: clue_groups[True].clue_dictionary, False: clue_groups[False].clue_dictionary }[entry_across] if entry_n not in expected_dictionary.keys(): fake_clue = ParsedClue() fake_clue.across = entry_across fake_clue.text_including_enumeration = clue_string fake_clue.set_number(str(entry_n)) expected_dictionary[entry_n] = fake_clue if verbose: print("**** Added missing clue with index ", str(entry_n), fake_clue.tidied_text_including_enumeration()) # Now the file can be written out: with io.FileIO(output_filename, 'wb') as f: f.write(bytearray(0x2C)) dimensions_etc = bytearray(2) dimensions_etc[0] = self.width dimensions_etc[1] = self.height f.write(dimensions_etc) f.write(struct.pack("<h", clue_groups[True].real_number_of_clues() + clue_groups[False].real_number_of_clues())) f.write(bytearray(4)) solutions = bytearray(self.width*self.height) empty_grid = bytearray(self.width*self.height) i = 0 for y in range(0, self.height): for x in range(0, self.width): c = self.grid.cells[y][x] if c: solutions[i] = ord(c.letter) empty_grid[i] = ord('-') else: solutions[i] = ord('.') empty_grid[i] = ord('.') i += 1 f.write(solutions) f.write(empty_grid) nul = bytearray(1) f.write(self.title.encode('UTF-8')) f.write(nul) f.write(self.author.encode('UTF-8')) f.write(nul) f.write(self.copyright_message.encode('UTF-8')) f.write(nul) all_clues = clue_groups[True].ordered_list_of_clues() all_clues += clue_groups[False].ordered_list_of_clues() all_clues.sort(key=keyfunc_clues) for c in all_clues: number_string_tidied = re.sub(r'/', ',', c.number_string) number_string_tidied = number_string_tidied.lower() clue_text = c.tidied_text_including_enumeration() # We have to stick the number string at the beginning # otherwise it won't be clear when the answers to clues cover # several entries in the grid. f.write(("[" + number_string_tidied + "] ").encode('UTF-8')) # Encode the clue text as UTF-8, because it's not defined what # the character set should be anywhere that I've seen. (xword # currently assumes ISO-8859-1, but that doesn't strike me as # a good enough reason in itself, since it's easily patched.) f.write(clue_text.encode('UTF-8')) f.write(nul) f.write(nul)