This repository has been archived by the owner on Apr 25, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
csstokenizer.py
68 lines (64 loc) · 2.03 KB
/
csstokenizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# encoding: utf-8;
import re
from tokenizer import Token, TokenList, Tokenizer
from tokenexceptions import *
class MalformedIdentifier( TokenException ):
"""Exception raised when an identifier is malformed."""
def __init__( self, partial_token = '', start_char = 0, end_char = 0 ):
TokenException.__init__( self, partial_token, start_char, end_char )
self.message = "You've tried to create a identifier, but with a crap name"
class CSSTokenizer( Tokenizer ):
"""
Given a string, Tokenizer breaks it into strings according to the
rules specified in the CSS standard.
There's nothing new here.
"""
def __init__( self, string_to_tokenize = '' ):
Tokenizer.__init__( self, string_to_tokenize )
### Setup CSSTokenizer-specific regexen
### Throwing everything away after reading through the CSS spec.
### I ought be using the specified tokens, so I will.
# IDENT {ident}
# ATKEYWORD @{ident}
# STRING {string}
# INVALID {invalid}
# HASH #{name}
# NUMBER {num}
# PERCENTAGE {num}%
# DIMENSION {num}{ident}
# URI url\({w}{string}{w}\)
# |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)
# UNICODE-RANGE U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
# CDO <!--
# CDC -->
# ; ;
# { \{
# } \}
# ( \(
# ) \)
# [ \[
# ] \]
# S [ \t\r\n\f]+
# COMMENT \/\*[^*]*\*+([^/*][^*]*\*+)*\/
# FUNCTION {ident}\(
# INCLUDES ~=
# DASHMATCH |=
# DELIM any other character not matched by the above rules, and neither a single nor a double quote
#
#
# ident [-]?{nmstart}{nmchar}*
# name {nmchar}+
# nmstart [_a-z]|{nonascii}|{escape}
# nonascii [^\0-\177]
# unicode \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
# escape {unicode}|\\[^\n\r\f0-9a-f]
# nmchar [_a-z0-9-]|{nonascii}|{escape}
# num [0-9]+|[0-9]*\.[0-9]+
# string {string1}|{string2}
# string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
# string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
# invalid {invalid1}|{invalid2}
# invalid1 \"([^\n\r\f\\"]|\\{nl}|{escape})*
# invalid2 \'([^\n\r\f\\']|\\{nl}|{escape})*
# nl \n|\r\n|\r|\f
# w [ \t\r\n\f]*