def test_emitter_styles(data_filename, canonical_filename, verbose=False): for filename in [data_filename, canonical_filename]: events = list(hughml.parse(open(filename, 'rb'))) for flow_style in [False, True]: for style in ['|', '>', '"', '\'', '']: styled_events = [] for event in events: if isinstance(event, hughml.ScalarEvent): event = hughml.ScalarEvent(event.anchor, event.tag, event.implicit, event.value, style=style) elif isinstance(event, hughml.SequenceStartEvent): event = hughml.SequenceStartEvent( event.anchor, event.tag, event.implicit, flow_style=flow_style) elif isinstance(event, hughml.MappingStartEvent): event = hughml.MappingStartEvent(event.anchor, event.tag, event.implicit, flow_style=flow_style) styled_events.append(event) output = hughml.emit(styled_events) if verbose: print "OUTPUT (filename=%r, flow_style=%r, style=%r)" % ( filename, flow_style, style) print output new_events = list(hughml.parse(output)) _compare_events(events, new_events)
def _compare_emitters(data, verbose): events = list(hughml.parse(data, Loader=hughml.PyLoader)) c_data = hughml.emit(events, Dumper=hughml.CDumper) if verbose: print c_data py_events = list(hughml.parse(c_data, Loader=hughml.PyLoader)) c_events = list(hughml.parse(c_data, Loader=hughml.CLoader)) try: assert len(events) == len(py_events), (len(events), len(py_events)) assert len(events) == len(c_events), (len(events), len(c_events)) for event, py_event, c_event in zip(events, py_events, c_events): for attribute in [ '__class__', 'anchor', 'tag', 'implicit', 'value', 'explicit', 'version', 'tags' ]: value = getattr(event, attribute, None) py_value = getattr(py_event, attribute, None) c_value = getattr(c_event, attribute, None) if attribute == 'tag' and value in [None, u'!'] \ and py_value in [None, u'!'] and c_value in [None, u'!']: continue if attribute == 'explicit' and (py_value or c_value): continue assert value == py_value, (event, py_event, attribute) assert value == c_value, (event, c_event, attribute) finally: if verbose: print "EVENTS:" pprint.pprint(events) print "PY_EVENTS:" pprint.pprint(py_events) print "C_EVENTS:" pprint.pprint(c_events)
def test_emitter_on_data(data_filename, canonical_filename, verbose=False): events = list(hughml.parse(open(data_filename, 'rb'))) output = hughml.emit(events) if verbose: print "OUTPUT:" print output new_events = list(hughml.parse(output)) _compare_events(events, new_events)
def test_emitter_on_canonical(canonical_filename, verbose=False): events = list(hughml.parse(open(canonical_filename, 'rb'))) for canonical in [False, True]: output = hughml.emit(events, canonical=canonical) if verbose: print "OUTPUT (canonical=%s):" % canonical print output new_events = list(hughml.parse(output)) _compare_events(events, new_events)
def test_emitter_events(events_filename, verbose=False): events = list(hughml.load(open(events_filename, 'rb'), Loader=EventsLoader)) output = hughml.emit(events) if verbose: print "OUTPUT:" print output new_events = list(hughml.parse(output)) _compare_events(events, new_events)
def test_unicode_transfer(unicode_filename, verbose=False): data = open(unicode_filename, 'rb').read().decode('utf-8') for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']: input = data if encoding is not None: input = (u'\ufeff' + input).encode(encoding) output1 = hughml.emit(hughml.parse(input), allow_unicode=True) stream = StringIO.StringIO() hughml.emit(hughml.parse(input), _unicode_open(stream, 'utf-8'), allow_unicode=True) output2 = stream.getvalue() if encoding is None: assert isinstance(output1, unicode), (type(output1), encoding) else: assert isinstance(output1, str), (type(output1), encoding) output1.decode(encoding) assert isinstance(output2, str), (type(output2), encoding) output2.decode('utf-8')
def _compare_parsers(py_data, c_data, verbose): py_events = list(hughml.parse(py_data, Loader=hughml.PyLoader)) c_events = [] try: for event in hughml.parse(c_data, Loader=hughml.CLoader): c_events.append(event) assert len(py_events) == len(c_events), (len(py_events), len(c_events)) for py_event, c_event in zip(py_events, c_events): for attribute in [ '__class__', 'anchor', 'tag', 'implicit', 'value', 'explicit', 'version', 'tags' ]: py_value = getattr(py_event, attribute, None) c_value = getattr(c_event, attribute, None) assert py_value == c_value, (py_event, c_event, attribute) finally: if verbose: print "PY_EVENTS:" pprint.pprint(py_events) print "C_EVENTS:" pprint.pprint(c_events)
def test_parser_on_canonical(canonical_filename, verbose=False): events1 = None events2 = None try: events1 = list(hughml.parse(open(canonical_filename, 'rb'))) events2 = list(hughml.canonical_parse(open(canonical_filename, 'rb'))) _compare_events(events1, events2, full=True) finally: if verbose: print("EVENTS1:") pprint.pprint(events1) print("EVENTS2:") pprint.pprint(events2)
def test_parser(data_filename, canonical_filename, verbose=False): events1 = None events2 = None try: events1 = list(hughml.parse(open(data_filename, 'rb'))) events2 = list(hughml.canonical_parse(open(canonical_filename, 'rb'))) _compare_events(events1, events2) finally: if verbose: print "EVENTS1:" pprint.pprint(events1) print "EVENTS2:" pprint.pprint(events2)
def highlight(self): input = self.input.read() if input.startswith(codecs.BOM_UTF16_LE): input = unicode(input, 'utf-16-le') elif input.startswith(codecs.BOM_UTF16_BE): input = unicode(input, 'utf-16-be') else: input = unicode(input, 'utf-8') substitutions = self.style.substitutions tokens = hughml.scan(input) events = hughml.parse(input) markers = [] number = 0 for token in tokens: number += 1 if token.start_mark.index != token.end_mark.index: cls = token.__class__ if (cls, -1) in substitutions: markers.append([token.start_mark.index, +2, number, substitutions[cls, -1]]) if (cls, +1) in substitutions: markers.append([token.end_mark.index, -2, number, substitutions[cls, +1]]) number = 0 for event in events: number += 1 cls = event.__class__ if (cls, -1) in substitutions: markers.append([event.start_mark.index, +1, number, substitutions[cls, -1]]) if (cls, +1) in substitutions: markers.append([event.end_mark.index, -1, number, substitutions[cls, +1]]) markers.sort() markers.reverse() chunks = [] position = len(input) for index, weight1, weight2, substitution in markers: if index < position: chunk = input[index:position] for substring, replacement in self.style.replaces: chunk = chunk.replace(substring, replacement) chunks.append(chunk) position = index chunks.append(substitution) chunks.reverse() result = u''.join(chunks) if self.style.header: self.output.write(self.style.header) self.output.write(result.encode('utf-8')) if self.style.footer: self.output.write(self.style.footer)
def canonical_parse(stream): return hughml.parse(stream, Loader=CanonicalLoader)