class Serializer(object): # 'id' and 3+ numbers, but not 000 ANCHOR_TEMPLATE = u'id%03d' ANCHOR_RE = RegExp(u'id(?!000$)\\d{3,}') def __init__( self, encoding=None, explicit_start=None, explicit_end=None, version=None, tags=None, dumper=None, ): # type: (Any, Optional[bool], Optional[bool], Optional[VersionType], Any, Any) -> None # NOQA self.dumper = dumper if self.dumper is not None: self.dumper._serializer = self self.use_encoding = encoding self.use_explicit_start = explicit_start self.use_explicit_end = explicit_end if isinstance(version, string_types): self.use_version = tuple(map(int, version.split('.'))) else: self.use_version = version # type: ignore self.use_tags = tags self.serialized_nodes = {} # type: Dict[Any, Any] self.anchors = {} # type: Dict[Any, Any] self.last_anchor_id = 0 self.closed = None # type: Optional[bool] self._templated_id = None @property def emitter(self): # type: () -> Any if hasattr(self.dumper, 'typ'): return self.dumper.emitter return self.dumper._emitter @property def resolver(self): # type: () -> Any if hasattr(self.dumper, 'typ'): self.dumper.resolver return self.dumper._resolver def open(self): # type: () -> None if self.closed is None: self.emitter.emit(StreamStartEvent(encoding=self.use_encoding)) self.closed = False elif self.closed: raise SerializerError('serializer is closed') else: raise SerializerError('serializer is already opened') def close(self): # type: () -> None if self.closed is None: raise SerializerError('serializer is not opened') elif not self.closed: self.emitter.emit(StreamEndEvent()) self.closed = True # def __del__(self): # self.close() def serialize(self, node): # type: (Any) -> None if dbg(DBG_NODE): nprint('Serializing nodes') node.dump() if self.closed is None: raise SerializerError('serializer is not opened') elif self.closed: raise SerializerError('serializer is closed') self.emitter.emit( DocumentStartEvent( explicit=self.use_explicit_start, version=self.use_version, tags=self.use_tags ) ) self.anchor_node(node) self.serialize_node(node, None, None) self.emitter.emit(DocumentEndEvent(explicit=self.use_explicit_end)) self.serialized_nodes = {} self.anchors = {} self.last_anchor_id = 0 def anchor_node(self, node): # type: (Any) -> None if node in self.anchors: if self.anchors[node] is None: self.anchors[node] = self.generate_anchor(node) else: anchor = None try: if node.anchor.always_dump: anchor = node.anchor.value except: # NOQA pass self.anchors[node] = anchor if isinstance(node, SequenceNode): for item in node.value: self.anchor_node(item) elif isinstance(node, MappingNode): for key, value in node.value: self.anchor_node(key) self.anchor_node(value) def generate_anchor(self, node): # type: (Any) -> Any try: anchor = node.anchor.value except: # NOQA anchor = None if anchor is None: self.last_anchor_id += 1 return self.ANCHOR_TEMPLATE % self.last_anchor_id return anchor def serialize_node(self, node, parent, index): # type: (Any, Any, Any) -> None alias = self.anchors[node] if node in self.serialized_nodes: self.emitter.emit(AliasEvent(alias)) else: self.serialized_nodes[node] = True self.resolver.descend_resolver(parent, index) if isinstance(node, ScalarNode): # here check if the node.tag equals the one that would result from parsing # if not equal quoting is necessary for strings detected_tag = self.resolver.resolve(ScalarNode, node.value, (True, False)) default_tag = self.resolver.resolve(ScalarNode, node.value, (False, True)) implicit = ( (node.tag == detected_tag), (node.tag == default_tag), node.tag.startswith('tag:yaml.org,2002:'), ) self.emitter.emit( ScalarEvent( alias, node.tag, implicit, node.value, style=node.style, comment=node.comment, ) ) elif isinstance(node, SequenceNode): implicit = node.tag == self.resolver.resolve(SequenceNode, node.value, True) comment = node.comment end_comment = None seq_comment = None if node.flow_style is True: if comment: # eol comment on flow style sequence seq_comment = comment[0] # comment[0] = None if comment and len(comment) > 2: end_comment = comment[2] else: end_comment = None self.emitter.emit( SequenceStartEvent( alias, node.tag, implicit, flow_style=node.flow_style, comment=node.comment, ) ) index = 0 for item in node.value: self.serialize_node(item, node, index) index += 1 self.emitter.emit(SequenceEndEvent(comment=[seq_comment, end_comment])) elif isinstance(node, MappingNode): implicit = node.tag == self.resolver.resolve(MappingNode, node.value, True) comment = node.comment end_comment = None map_comment = None if node.flow_style is True: if comment: # eol comment on flow style sequence map_comment = comment[0] # comment[0] = None if comment and len(comment) > 2: end_comment = comment[2] self.emitter.emit( MappingStartEvent( alias, node.tag, implicit, flow_style=node.flow_style, comment=node.comment, nr_items=len(node.value), ) ) for key, value in node.value: self.serialize_node(key, node, None) self.serialize_node(value, node, key) self.emitter.emit(MappingEndEvent(comment=[map_comment, end_comment])) self.resolver.ascend_resolver()
from google.appengine._internal.ruamel.yaml.nodes import MappingNode, ScalarNode, SequenceNode # NOQA from google.appengine._internal.ruamel.yaml.util import RegExp # NOQA __all__ = ['BaseResolver', 'Resolver', 'VersionedResolver'] # fmt: off # resolvers consist of # - a list of applicable version # - a tag # - a regexp # - a list of first characters to match implicit_resolvers = [ ([(1, 2)], u'tag:yaml.org,2002:bool', RegExp(u'''^(?:true|True|TRUE|false|False|FALSE)$''', re.X), list(u'tTfF')), ([(1, 1)], u'tag:yaml.org,2002:bool', RegExp(u'''^(?:yes|Yes|YES|no|No|NO |true|True|TRUE|false|False|FALSE |on|On|ON|off|Off|OFF)$''', re.X), list(u'yYnNtTfFoO')), ([(1, 2)], u'tag:yaml.org,2002:float', RegExp(u'''^(?: [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) |[-+]?\\.[0-9_]+(?:[eE][-+][0-9]+)? |[-+]?\\.(?:inf|Inf|INF) |\\.(?:nan|NaN|NAN))$''', re.X),
class Reader(object): # Reader: # - determines the data encoding and converts it to a unicode string, # - checks if characters are in allowed range, # - adds '\0' to the end. # Reader accepts # - a `str` object (PY2) / a `bytes` object (PY3), # - a `unicode` object (PY2) / a `str` object (PY3), # - a file-like object with its `read` method returning `str`, # - a file-like object with its `read` method returning `unicode`. # Yeah, it's ugly and slow. def __init__(self, stream, loader=None): # type: (Any, Any) -> None self.loader = loader if self.loader is not None and getattr(self.loader, '_reader', None) is None: self.loader._reader = self self.reset_reader() self.stream = stream # type: Any # as .read is called def reset_reader(self): # type: () -> None self.name = None # type: Any self.stream_pointer = 0 self.eof = True self.buffer = "" self.pointer = 0 self.raw_buffer = None # type: Any self.raw_decode = None self.encoding = None # type: Optional[Text] self.index = 0 self.line = 0 self.column = 0 @property def stream(self): # type: () -> Any try: return self._stream except AttributeError: raise YAMLStreamError('input stream needs to specified') @stream.setter def stream(self, val): # type: (Any) -> None if val is None: return self._stream = None if isinstance(val, text_type): self.name = '<unicode string>' self.check_printable(val) self.buffer = val + u'\0' # type: ignore elif isinstance(val, binary_type): self.name = '<byte string>' self.raw_buffer = val self.determine_encoding() else: if not hasattr(val, 'read'): raise YAMLStreamError( 'stream argument needs to have a read() method') self._stream = val self.name = getattr(self.stream, 'name', '<file>') self.eof = False self.raw_buffer = None self.determine_encoding() def peek(self, index=0): # type: (int) -> Text try: return self.buffer[self.pointer + index] except IndexError: self.update(index + 1) return self.buffer[self.pointer + index] def prefix(self, length=1): # type: (int) -> Any if self.pointer + length >= len(self.buffer): self.update(length) return self.buffer[self.pointer:self.pointer + length] def forward_1_1(self, length=1): # type: (int) -> None if self.pointer + length + 1 >= len(self.buffer): self.update(length + 1) while length != 0: ch = self.buffer[self.pointer] self.pointer += 1 self.index += 1 if ch in u'\n\x85\u2028\u2029' or ( ch == u'\r' and self.buffer[self.pointer] != u'\n'): self.line += 1 self.column = 0 elif ch != u'\uFEFF': self.column += 1 length -= 1 def forward(self, length=1): # type: (int) -> None if self.pointer + length + 1 >= len(self.buffer): self.update(length + 1) while length != 0: ch = self.buffer[self.pointer] self.pointer += 1 self.index += 1 if ch == u'\n' or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): self.line += 1 self.column = 0 elif ch != u'\uFEFF': self.column += 1 length -= 1 def get_mark(self): # type: () -> Any if self.stream is None: return StringMark(self.name, self.index, self.line, self.column, self.buffer, self.pointer) else: return FileMark(self.name, self.index, self.line, self.column) def determine_encoding(self): # type: () -> None while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): self.update_raw() if isinstance(self.raw_buffer, binary_type): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = codecs.utf_16_le_decode # type: ignore self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = codecs.utf_16_be_decode # type: ignore self.encoding = 'utf-16-be' else: self.raw_decode = codecs.utf_8_decode # type: ignore self.encoding = 'utf-8' self.update(1) if UNICODE_SIZE == 2: NON_PRINTABLE = RegExp(u'[^\x09\x0A\x0D\x20-\x7E\x85' u'\xA0-\uD7FF' u'\uE000-\uFFFD' u']') else: NON_PRINTABLE = RegExp(u'[^\x09\x0A\x0D\x20-\x7E\x85' u'\xA0-\uD7FF' u'\uE000-\uFFFD' u'\U00010000-\U0010FFFF' u']') _printable_ascii = ('\x09\x0A\x0D' + "".join(map(chr, range(0x20, 0x7F)))).encode('ascii') @classmethod def _get_non_printable_ascii(cls, data): # type: ignore # type: (Text, bytes) -> Optional[Tuple[int, Text]] ascii_bytes = data.encode('ascii') non_printables = ascii_bytes.translate( None, cls._printable_ascii) # type: ignore if not non_printables: return None non_printable = non_printables[:1] return ascii_bytes.index(non_printable), non_printable.decode('ascii') @classmethod def _get_non_printable_regex(cls, data): # type: (Text) -> Optional[Tuple[int, Text]] match = cls.NON_PRINTABLE.search(data) if not bool(match): return None return match.start(), match.group() @classmethod def _get_non_printable(cls, data): # type: (Text) -> Optional[Tuple[int, Text]] try: return cls._get_non_printable_ascii(data) # type: ignore except UnicodeEncodeError: return cls._get_non_printable_regex(data) def check_printable(self, data): # type: (Any) -> None non_printable_match = self._get_non_printable(data) if non_printable_match is not None: start, character = non_printable_match position = self.index + (len(self.buffer) - self.pointer) + start raise ReaderError( self.name, position, ord(character), 'unicode', 'special characters are not allowed', ) def update(self, length): # type: (int) -> None if self.raw_buffer is None: return self.buffer = self.buffer[self.pointer:] self.pointer = 0 while len(self.buffer) < length: if not self.eof: self.update_raw() if self.raw_decode is not None: try: data, converted = self.raw_decode(self.raw_buffer, 'strict', self.eof) except UnicodeDecodeError as exc: if PY3: character = self.raw_buffer[exc.start] else: character = exc.object[exc.start] if self.stream is not None: position = self.stream_pointer - len( self.raw_buffer) + exc.start elif self.stream is not None: position = self.stream_pointer - len( self.raw_buffer) + exc.start else: position = exc.start raise ReaderError(self.name, position, character, exc.encoding, exc.reason) else: data = self.raw_buffer converted = len(data) self.check_printable(data) self.buffer += data self.raw_buffer = self.raw_buffer[converted:] if self.eof: self.buffer += '\0' self.raw_buffer = None break def update_raw(self, size=None): # type: (Optional[int]) -> None if size is None: size = 4096 if PY3 else 1024 data = self.stream.read(size) if self.raw_buffer is None: self.raw_buffer = data else: self.raw_buffer += data self.stream_pointer += len(data) if not data: self.eof = True