示例#1
0
def _compare_scanners(py_data, c_data, verbose):
    py_tokens = list(hughml.scan(py_data, Loader=hughml.PyLoader))
    c_tokens = []
    try:
        for token in hughml.scan(c_data, Loader=hughml.CLoader):
            c_tokens.append(token)
        assert len(py_tokens) == len(c_tokens), (len(py_tokens), len(c_tokens))
        for py_token, c_token in zip(py_tokens, c_tokens):
            assert py_token.__class__ == c_token.__class__, (py_token, c_token)
            if hasattr(py_token, 'value'):
                assert py_token.value == c_token.value, (py_token, c_token)
            if isinstance(py_token, hughml.StreamEndToken):
                continue
            py_start = (py_token.start_mark.index, py_token.start_mark.line,
                        py_token.start_mark.column)
            py_end = (py_token.end_mark.index, py_token.end_mark.line,
                      py_token.end_mark.column)
            c_start = (c_token.start_mark.index, c_token.start_mark.line,
                       c_token.start_mark.column)
            c_end = (c_token.end_mark.index, c_token.end_mark.line,
                     c_token.end_mark.column)
            assert py_start == c_start, (py_start, c_start)
            assert py_end == c_end, (py_end, c_end)
    finally:
        if verbose:
            print "PY_TOKENS:"
            pprint.pprint(py_tokens)
            print "C_TOKENS:"
            pprint.pprint(c_tokens)
示例#2
0
def test_scanner(data_filename, canonical_filename, verbose=False):
    for filename in [data_filename, canonical_filename]:
        tokens = []
        try:
            for token in hughml.scan(open(filename, 'rb')):
                tokens.append(token.__class__.__name__)
        finally:
            if verbose:
                pprint.pprint(tokens)
示例#3
0
def test_tokens(data_filename, tokens_filename, verbose=False):
    tokens1 = []
    tokens2 = open(tokens_filename, 'r').read().split()
    try:
        for token in hughml.scan(open(data_filename, 'rb')):
            if not isinstance(
                    token, (hughml.StreamStartToken, hughml.StreamEndToken)):
                tokens1.append(_replaces[token.__class__])
    finally:
        if verbose:
            print("TOKENS1:", ' '.join(tokens1))
            print("TOKENS2:", ' '.join(tokens2))
    assert len(tokens1) == len(tokens2), (tokens1, tokens2)
    for token1, token2 in zip(tokens1, tokens2):
        assert token1 == token2, (token1, token2)
示例#4
0
 def highlight(self):
     input = self.input.read()
     if input.startswith(codecs.BOM_UTF16_LE):
         input = unicode(input, 'utf-16-le')
     elif input.startswith(codecs.BOM_UTF16_BE):
         input = unicode(input, 'utf-16-be')
     else:
         input = unicode(input, 'utf-8')
     substitutions = self.style.substitutions
     tokens = hughml.scan(input)
     events = hughml.parse(input)
     markers = []
     number = 0
     for token in tokens:
         number += 1
         if token.start_mark.index != token.end_mark.index:
             cls = token.__class__
             if (cls, -1) in substitutions:
                 markers.append([token.start_mark.index, +2, number, substitutions[cls, -1]])
             if (cls, +1) in substitutions:
                 markers.append([token.end_mark.index, -2, number, substitutions[cls, +1]])
     number = 0
     for event in events:
         number += 1
         cls = event.__class__
         if (cls, -1) in substitutions:
             markers.append([event.start_mark.index, +1, number, substitutions[cls, -1]])
         if (cls, +1) in substitutions:
             markers.append([event.end_mark.index, -1, number, substitutions[cls, +1]])
     markers.sort()
     markers.reverse()
     chunks = []
     position = len(input)
     for index, weight1, weight2, substitution in markers:
         if index < position:
             chunk = input[index:position]
             for substring, replacement in self.style.replaces:
                 chunk = chunk.replace(substring, replacement)
             chunks.append(chunk)
             position = index
         chunks.append(substitution)
     chunks.reverse()
     result = u''.join(chunks)
     if self.style.header:
         self.output.write(self.style.header)
     self.output.write(result.encode('utf-8'))
     if self.style.footer:
         self.output.write(self.style.footer)
示例#5
0
def canonical_scan(stream):
    return hughml.scan(stream, Loader=CanonicalLoader)