def __init__(self): super(Game, self).__init__() self.headers = collections.OrderedDict() self.headers["Event"] = "?" self.headers["Site"] = "?" self.headers["Date"] = "????.??.??" self.headers["Round"] = "?" self.headers["White"] = "?" self.headers["Black"] = "?" self.headers["Result"] = "*" self.errors = []
def scan_headers(handle): """ Scan a PGN file opened in text mode for game offsets and headers. Yields a tuple for each game. The first element is the offset. The second element is an ordered dictionary of game headers. Since actually parsing many games from a big file is relatively expensive, this is a better way to look only for specific games and seek and parse them later. This example scans for the first game with Kasparov as the white player. >>> pgn = open("mega.pgn") >>> for offset, headers in chess.pgn.scan_headers(pgn): ... if "Kasparov" in headers["White"]: ... kasparov_offset = offset ... break Then it can later be seeked an parsed. >>> pgn.seek(kasparov_offset) >>> game = chess.pgn.read_game(pgn) This also works nicely with generators, scanning lazily only when the next offset is required. >>> white_win_offsets = (offset for offset, headers in chess.pgn.scan_headers(pgn) ... if headers["Result"] == "1-0") >>> first_white_win = next(white_win_offsets) >>> second_white_win = next(white_win_offsets) :warning: Be careful when seeking a game in the file while more offsets are being generated. """ in_comment = False game_headers = None game_pos = None last_pos = handle.tell() line = handle.readline() while line: # Skip single line comments. if line.startswith("%"): last_pos = handle.tell() line = handle.readline() continue # Reading a header tag. Parse it and add it to the current headers. if not in_comment and line.startswith("["): tag_match = TAG_REGEX.match(line) if tag_match: if game_pos is None: game_headers = collections.OrderedDict() game_headers["Event"] = "?" game_headers["Site"] = "?" game_headers["Date"] = "????.??.??" game_headers["Round"] = "?" game_headers["White"] = "?" game_headers["Black"] = "?" game_headers["Result"] = "*" game_pos = last_pos game_headers[tag_match.group(1)] = tag_match.group(2) last_pos = handle.tell() line = handle.readline() continue # Reading movetext. Update parser state in_comment in order to skip # comments that look like header tags. if (not in_comment and "{" in line) or (in_comment and "}" in line): in_comment = line.rfind("{") > line.rfind("}") # Reading movetext. If there were headers, previously, those are now # complete and can be yielded. if game_pos is not None: yield game_pos, game_headers game_pos = None last_pos = handle.tell() line = handle.readline() # Yield the headers of the last game. if game_pos is not None: yield game_pos, game_headers