示例#1
0
def test_emitter_styles(data_filename, canonical_filename, verbose=False):
    for filename in [data_filename, canonical_filename]:
        events = list(hughml.parse(open(filename, 'rb')))
        for flow_style in [False, True]:
            for style in ['|', '>', '"', '\'', '']:
                styled_events = []
                for event in events:
                    if isinstance(event, hughml.ScalarEvent):
                        event = hughml.ScalarEvent(event.anchor,
                                                   event.tag,
                                                   event.implicit,
                                                   event.value,
                                                   style=style)
                    elif isinstance(event, hughml.SequenceStartEvent):
                        event = hughml.SequenceStartEvent(
                            event.anchor,
                            event.tag,
                            event.implicit,
                            flow_style=flow_style)
                    elif isinstance(event, hughml.MappingStartEvent):
                        event = hughml.MappingStartEvent(event.anchor,
                                                         event.tag,
                                                         event.implicit,
                                                         flow_style=flow_style)
                    styled_events.append(event)
                output = hughml.emit(styled_events)
                if verbose:
                    print "OUTPUT (filename=%r, flow_style=%r, style=%r)" % (
                        filename, flow_style, style)
                    print output
                new_events = list(hughml.parse(output))
                _compare_events(events, new_events)
示例#2
0
def _compare_emitters(data, verbose):
    events = list(hughml.parse(data, Loader=hughml.PyLoader))
    c_data = hughml.emit(events, Dumper=hughml.CDumper)
    if verbose:
        print c_data
    py_events = list(hughml.parse(c_data, Loader=hughml.PyLoader))
    c_events = list(hughml.parse(c_data, Loader=hughml.CLoader))
    try:
        assert len(events) == len(py_events), (len(events), len(py_events))
        assert len(events) == len(c_events), (len(events), len(c_events))
        for event, py_event, c_event in zip(events, py_events, c_events):
            for attribute in [
                    '__class__', 'anchor', 'tag', 'implicit', 'value',
                    'explicit', 'version', 'tags'
            ]:
                value = getattr(event, attribute, None)
                py_value = getattr(py_event, attribute, None)
                c_value = getattr(c_event, attribute, None)
                if attribute == 'tag' and value in [None, u'!'] \
                        and py_value in [None, u'!'] and c_value in [None, u'!']:
                    continue
                if attribute == 'explicit' and (py_value or c_value):
                    continue
                assert value == py_value, (event, py_event, attribute)
                assert value == c_value, (event, c_event, attribute)
    finally:
        if verbose:
            print "EVENTS:"
            pprint.pprint(events)
            print "PY_EVENTS:"
            pprint.pprint(py_events)
            print "C_EVENTS:"
            pprint.pprint(c_events)
示例#3
0
def test_emitter_on_data(data_filename, canonical_filename, verbose=False):
    events = list(hughml.parse(open(data_filename, 'rb')))
    output = hughml.emit(events)
    if verbose:
        print "OUTPUT:"
        print output
    new_events = list(hughml.parse(output))
    _compare_events(events, new_events)
示例#4
0
def test_emitter_on_canonical(canonical_filename, verbose=False):
    events = list(hughml.parse(open(canonical_filename, 'rb')))
    for canonical in [False, True]:
        output = hughml.emit(events, canonical=canonical)
        if verbose:
            print "OUTPUT (canonical=%s):" % canonical
            print output
        new_events = list(hughml.parse(output))
        _compare_events(events, new_events)
示例#5
0
def test_emitter_events(events_filename, verbose=False):
    events = list(hughml.load(open(events_filename, 'rb'),
                              Loader=EventsLoader))
    output = hughml.emit(events)
    if verbose:
        print "OUTPUT:"
        print output
    new_events = list(hughml.parse(output))
    _compare_events(events, new_events)
示例#6
0
def test_unicode_transfer(unicode_filename, verbose=False):
    data = open(unicode_filename, 'rb').read().decode('utf-8')
    for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']:
        input = data
        if encoding is not None:
            input = (u'\ufeff' + input).encode(encoding)
        output1 = hughml.emit(hughml.parse(input), allow_unicode=True)
        stream = StringIO.StringIO()
        hughml.emit(hughml.parse(input),
                    _unicode_open(stream, 'utf-8'),
                    allow_unicode=True)
        output2 = stream.getvalue()
        if encoding is None:
            assert isinstance(output1, unicode), (type(output1), encoding)
        else:
            assert isinstance(output1, str), (type(output1), encoding)
            output1.decode(encoding)
        assert isinstance(output2, str), (type(output2), encoding)
        output2.decode('utf-8')
示例#7
0
def _compare_parsers(py_data, c_data, verbose):
    py_events = list(hughml.parse(py_data, Loader=hughml.PyLoader))
    c_events = []
    try:
        for event in hughml.parse(c_data, Loader=hughml.CLoader):
            c_events.append(event)
        assert len(py_events) == len(c_events), (len(py_events), len(c_events))
        for py_event, c_event in zip(py_events, c_events):
            for attribute in [
                    '__class__', 'anchor', 'tag', 'implicit', 'value',
                    'explicit', 'version', 'tags'
            ]:
                py_value = getattr(py_event, attribute, None)
                c_value = getattr(c_event, attribute, None)
                assert py_value == c_value, (py_event, c_event, attribute)
    finally:
        if verbose:
            print "PY_EVENTS:"
            pprint.pprint(py_events)
            print "C_EVENTS:"
            pprint.pprint(c_events)
示例#8
0
def test_parser_on_canonical(canonical_filename, verbose=False):
    events1 = None
    events2 = None
    try:
        events1 = list(hughml.parse(open(canonical_filename, 'rb')))
        events2 = list(hughml.canonical_parse(open(canonical_filename, 'rb')))
        _compare_events(events1, events2, full=True)
    finally:
        if verbose:
            print("EVENTS1:")
            pprint.pprint(events1)
            print("EVENTS2:")
            pprint.pprint(events2)
示例#9
0
def test_parser(data_filename, canonical_filename, verbose=False):
    events1 = None
    events2 = None
    try:
        events1 = list(hughml.parse(open(data_filename, 'rb')))
        events2 = list(hughml.canonical_parse(open(canonical_filename, 'rb')))
        _compare_events(events1, events2)
    finally:
        if verbose:
            print "EVENTS1:"
            pprint.pprint(events1)
            print "EVENTS2:"
            pprint.pprint(events2)
示例#10
0
 def highlight(self):
     input = self.input.read()
     if input.startswith(codecs.BOM_UTF16_LE):
         input = unicode(input, 'utf-16-le')
     elif input.startswith(codecs.BOM_UTF16_BE):
         input = unicode(input, 'utf-16-be')
     else:
         input = unicode(input, 'utf-8')
     substitutions = self.style.substitutions
     tokens = hughml.scan(input)
     events = hughml.parse(input)
     markers = []
     number = 0
     for token in tokens:
         number += 1
         if token.start_mark.index != token.end_mark.index:
             cls = token.__class__
             if (cls, -1) in substitutions:
                 markers.append([token.start_mark.index, +2, number, substitutions[cls, -1]])
             if (cls, +1) in substitutions:
                 markers.append([token.end_mark.index, -2, number, substitutions[cls, +1]])
     number = 0
     for event in events:
         number += 1
         cls = event.__class__
         if (cls, -1) in substitutions:
             markers.append([event.start_mark.index, +1, number, substitutions[cls, -1]])
         if (cls, +1) in substitutions:
             markers.append([event.end_mark.index, -1, number, substitutions[cls, +1]])
     markers.sort()
     markers.reverse()
     chunks = []
     position = len(input)
     for index, weight1, weight2, substitution in markers:
         if index < position:
             chunk = input[index:position]
             for substring, replacement in self.style.replaces:
                 chunk = chunk.replace(substring, replacement)
             chunks.append(chunk)
             position = index
         chunks.append(substitution)
     chunks.reverse()
     result = u''.join(chunks)
     if self.style.header:
         self.output.write(self.style.header)
     self.output.write(result.encode('utf-8'))
     if self.style.footer:
         self.output.write(self.style.footer)
示例#11
0
def canonical_parse(stream):
    return hughml.parse(stream, Loader=CanonicalLoader)