def parse_header_lines(text): '''Read header lines in the rfc822 format. Can e.g. look like:: Content-Type: text/x-zim-wiki Wiki-Format: zim 0.4 Creation-Date: 2010-12-14T14:15:09.134955 @returns: the text minus the headers and a dict with the headers ''' assert isinstance(text, str) meta = OrderedDict() match = _is_header_re.match(text) pos = 0 while match: header = match.group(1) value = match.group(2) pos = match.end() meta[header] = value.strip() match = _is_continue_re.match(text, pos) while match: cont = match.group(2) meta[header] += '\n' + cont.strip() pos = match.end() match = _is_continue_re.match(text, pos) match = _is_header_re.match(text, pos) else: if pos > 0: try: if text[pos] == '\n': pos += 1 except IndexError: pass text = text[pos:] return text, meta
def __init__(self, *arg, **kwarg): self._etree = ElementTreeModule.ElementTree(*arg, **kwarg) self._object_cache = {} self.meta = OrderedDict()