示例#1
0
def compileCombinedRegex(expressions, ignoreList=None):
    """
    expressions -- List of tuples (r, s) where r is single compiled RE,
            s is a number from FormatTypes
    ignoreList -- List of FormatTypes for which the related
            expression shouldn't be taken into the compiled expression
    returns: compiled combined RE to feed into StringOps.Tokenizer
    """ 
    result = []
    if ignoreList == None:
        ignoreList = []

    for i in range(len(expressions)):
        r, s = expressions[i]

        if s in ignoreList:
            continue

        if type(r) is type(EMPTY_RE):
            r = r.pattern
        else:
            r = unicode(r)
        result.append(u"(?P<style%i>%s)" % (i, r))

    return re.compile(u"|".join(result),
            re.DOTALL | re.UNICODE | re.MULTILINE)
示例#2
0
def splitpasswd(user):
    """splitpasswd('user:passwd') -> 'user', 'passwd'."""
    global _passwdprog
    if _passwdprog is None:
        import srePersistent as re
        _passwdprog = re.compile('^([^:]*):(.*)$')

    match = _passwdprog.match(user)
    if match: return match.group(1, 2)
    return user, None
示例#3
0
def splithost(url):
    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
    global _hostprog
    if _hostprog is None:
        import srePersistent as re
        _hostprog = re.compile('^//([^/]*)(.*)$')

    match = _hostprog.match(url)
    if match: return match.group(1, 2)
    return None, url
示例#4
0
def splituser(host):
    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
    global _userprog
    if _userprog is None:
        import srePersistent as re
        _userprog = re.compile('^(.*)@(.*)$')

    match = _userprog.match(host)
    if match: return map(unquote, match.group(1, 2))
    return None, host
示例#5
0
def splitvalue(attr):
    """splitvalue('attr=value') --> 'attr', 'value'."""
    global _valueprog
    if _valueprog is None:
        import srePersistent as re
        _valueprog = re.compile('^([^=]*)=(.*)$')

    match = _valueprog.match(attr)
    if match: return match.group(1, 2)
    return attr, None
示例#6
0
def splittag(url):
    """splittag('/path#tag') --> '/path', 'tag'."""
    global _tagprog
    if _tagprog is None:
        import srePersistent as re
        _tagprog = re.compile('^(.*)#([^#]*)$')

    match = _tagprog.match(url)
    if match: return match.group(1, 2)
    return url, None
示例#7
0
def splitquery(url):
    """splitquery('/path?query') --> '/path', 'query'."""
    global _queryprog
    if _queryprog is None:
        import srePersistent as re
        _queryprog = re.compile('^(.*)\?([^?]*)$')

    match = _queryprog.match(url)
    if match: return match.group(1, 2)
    return url, None
示例#8
0
def splitport(host):
    """splitport('host:port') --> 'host', 'port'."""
    global _portprog
    if _portprog is None:
        import srePersistent as re
        _portprog = re.compile('^(.*):([0-9]+)$')

    match = _portprog.match(host)
    if match: return match.group(1, 2)
    return host, None
示例#9
0
def splittype(url):
    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
    global _typeprog
    if _typeprog is None:
        import srePersistent as re
        _typeprog = re.compile('^([^/:]+):')

    match = _typeprog.match(url)
    if match:
        scheme = match.group(1)
        return scheme.lower(), url[len(scheme) + 1:]
    return None, url
示例#10
0
def splitnport(host, defport=-1):
    """Split host and port, returning numeric port.
    Return given default port if no ':' found; defaults to -1.
    Return numerical port if a valid number are found after ':'.
    Return None if ':' but not a valid number."""
    global _nportprog
    if _nportprog is None:
        import srePersistent as re
        _nportprog = re.compile('^(.*):(.*)$')

    match = _nportprog.match(host)
    if match:
        host, port = match.group(1, 2)
        try:
            if not port: raise ValueError, "no digits"
            nport = int(port)
        except ValueError:
            nport = None
        return host, nport
    return host, defport
示例#11
0
    def rebuildFormatting(self, miscevt):
        """
        Called after a new wiki is loaded or options were changed.
        It rebuilds regexes and sets other variables according to
        the new settings
        """
        # In each list most specific single expressions first
        
        # These are the full lists with all possible expressions
        # they might be reduced afterwards

        self.formatExpressions = [
                (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar),
                (self.TableRE, FormatTypes.Table),
                (self.PreBlockRE, FormatTypes.PreBlock),
                (self.SuppressHighlightingRE, FormatTypes.SuppressHighlight),
                (self.ScriptRE, FormatTypes.Script),
                (self.TitledUrlRE, FormatTypes.Url),
                (self.UrlRE, FormatTypes.Url),
                (self.ToDoREWithContent, FormatTypes.ToDo),
                (self.PropertyRE, FormatTypes.Property),
                (self.FootnoteRE, FormatTypes.Footnote),
                (self.WikiWordEditorRE2, FormatTypes.WikiWord2),
                (self.WikiWordEditorRE, FormatTypes.WikiWord),
                (self.BoldRE, FormatTypes.Bold),
                (self.ItalicRE, FormatTypes.Italic),
                (self.HtmlTagRE, FormatTypes.HtmlTag),
                (self.Heading4RE, FormatTypes.Heading4),
                (self.Heading3RE, FormatTypes.Heading3),
                (self.Heading2RE, FormatTypes.Heading2),
                (self.Heading1RE, FormatTypes.Heading1),
                (self.AnchorRE, FormatTypes.Anchor),
                (self.BulletRE, FormatTypes.Bullet),
                (self.NumericBulletRE, FormatTypes.Numeric),
                (self.HorizLineRE, FormatTypes.HorizLine),
                (self.InsertionRE, FormatTypes.Insertion)
#                 (self.PlainCharactersRE, FormatTypes.Default)
                ]
                
                
        self.formatTodoExpressions = [
                (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar),
                (self.TitledUrlRE, FormatTypes.Url),
                (self.UrlRE, FormatTypes.Url),
                (self.PropertyRE, FormatTypes.Property),
                (self.FootnoteRE, FormatTypes.Footnote),
                (self.WikiWordEditorRE2, FormatTypes.WikiWord2),
                (self.WikiWordEditorRE, FormatTypes.WikiWord),
                (self.BoldRE, FormatTypes.Bold),
                (self.ItalicRE, FormatTypes.Italic),
                (self.HtmlTagRE, FormatTypes.HtmlTag)
#                 (self.PlainCharactersRE, FormatTypes.Default)
                ]
                

        self.formatTableContentExpressions = [
                (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar),
                (self.TitleWikiWordDelimiterPAT, FormatTypes.TableCellSplit),
                (self.TableRowDelimiterPAT, FormatTypes.TableRowSplit),
                (self.TitledUrlRE, FormatTypes.Url),
                (self.UrlRE, FormatTypes.Url),
#                 (self.ToDoREWithContent, FormatTypes.ToDo),  # TODO Doesn't work
                (self.FootnoteRE, FormatTypes.Footnote),
                (self.WikiWordEditorRE2, FormatTypes.WikiWord2),
                (self.WikiWordEditorRE, FormatTypes.WikiWord),
                (self.BoldRE, FormatTypes.Bold),
                (self.ItalicRE, FormatTypes.Italic),
                (self.HtmlTagRE, FormatTypes.HtmlTag)
#                 (self.PlainCharactersRE, FormatTypes.Default)
                ]


        self.formatWwTitleExpressions = [
                (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar),
                (self.BoldRE, FormatTypes.Bold),
                (self.ItalicRE, FormatTypes.Italic),
                (self.HtmlTagRE, FormatTypes.HtmlTag)
#                 (self.PlainCharactersRE, FormatTypes.Default)
                ]


        ignoreList = []  # List of FormatTypes not to compile into the comb. regex

        #if self.wikiDocument.getWikiConfig().getboolean(
        #            "main", "footnotes_as_wikiwords", False):
        #    ignoreList.append(FormatTypes.Footnote)
        #    self.footnotesAsWws = self.wikiDocument.getWikiConfig().getboolean(
        #            "main", "footnotes_as_wikiwords", False)


        self.combinedPageRE = compileCombinedRegex(self.formatExpressions,
                ignoreList)
        self.combinedTodoRE = compileCombinedRegex(self.formatTodoExpressions,
                ignoreList)
        self.combinedTableContentRE = compileCombinedRegex(
                self.formatTableContentExpressions, ignoreList)
        self.combinedWwTitleRE = compileCombinedRegex(
                self.formatWwTitleExpressions, ignoreList)


        self.wikiWordStart = u"["
        self.wikiWordEnd = u"]"
        
        self.wikiWordStartEsc = ur"\["
        self.wikiWordEndEsc = ur"\]"
        
#         if self.pWiki.wikiConfigFilename:
#             self.footnotesAsWws = self.pWiki.getConfig().getboolean(
#                     "main", "footnotes_as_wikiwords", False)

        # Needed in PageAst.Table.buildSubAst (placed here because of threading
        #   problem with re.compile            
        self.tableCutRe = re.compile(ur"\n|" + self.TitleWikiWordDelimiterPAT +
                ur"|" + self.PlainCharacterPAT + ur"+?(?=\n|" +
                self.TitleWikiWordDelimiterPAT + ur"|(?!.))", 
                re.DOTALL | re.UNICODE | re.MULTILINE)  # TODO Explain (if it works)
示例#12
0
# from Config import faces

import srePersistent as re

from StringOps import Tokenizer, matchWhole, Token, htmlColorToRgbTuple, \
        unescapeWithRe


FormatTypes = Enumeration("FormatTypes", ["Default", "WikiWord",
        "AvailWikiWord", "Bold", "Italic", "Heading4", "Heading3", "Heading2",
        "Heading1", "Url", "Script", "Property", "ToDo", "WikiWord2",
        "HorizLine", "Bullet", "Numeric", "Suppress", "Footnote", "Table",
        "EscapedChar", "HtmlTag", "TableCellSplit", "TableRowSplit", "PreBlock",
        "SuppressHighlight", "Insertion", "Anchor"], 0)

EMPTY_RE = re.compile(ur"", re.DOTALL | re.UNICODE | re.MULTILINE)


def compileCombinedRegex(expressions, ignoreList=None):
    """
    expressions -- List of tuples (r, s) where r is single compiled RE,
            s is a number from FormatTypes
    ignoreList -- List of FormatTypes for which the related
            expression shouldn't be taken into the compiled expression
    returns: compiled combined RE to feed into StringOps.Tokenizer
    """ 
    result = []
    if ignoreList == None:
        ignoreList = []

    for i in range(len(expressions)):
示例#13
0
from StringOps import mbcsDec

locale.setlocale(locale.LC_ALL, "")


# String containing the delimiter between the title of a wiki word (to show in
# HTML and the real word, as e.g. [title | WikiWord]
TitleWikiWordDelimiter = ur"|"

# Same, escaped for regular expression
TitleWikiWordDelimiterPAT = ur"\|"


PlainCharacterPAT = ur"(?:[^\\]|\\.)"

PlainEscapedCharacterRE = re.compile(ur"\\(.)", re.DOTALL | re.UNICODE | re.MULTILINE)

# PlainCharactersRE = re.compile(PlainCharacterPAT + "+",
#         re.DOTALL | re.UNICODE | re.MULTILINE)


# basic formatting
BoldRE = re.compile(ur"\*(?=\S)(?P<boldContent>" + PlainCharacterPAT + ur"+?)\*", re.DOTALL | re.UNICODE | re.MULTILINE)
ItalicRE = re.compile(ur"\b_(?P<italicContent>" + PlainCharacterPAT + ur"+?)_\b", re.DOTALL | re.UNICODE | re.MULTILINE)
HtmlTagRE = re.compile(ur"</?[A-Za-z][A-Za-z0-9]*(?:/| [^\n>]*)?>", re.DOTALL | re.UNICODE | re.MULTILINE)
Heading4RE = re.compile(
    u"^\\+\\+\\+\\+(?!\\+) ?(?P<h4Content>" + PlainCharacterPAT + ur"+?)\n", re.DOTALL | re.UNICODE | re.MULTILINE
)
Heading3RE = re.compile(
    u"^\\+\\+\\+(?!\\+) ?(?P<h3Content>" + PlainCharacterPAT + ur"+?)\n", re.DOTALL | re.UNICODE | re.MULTILINE
)
示例#14
0
文件: StringOps.py 项目: fsav/diffrev
import threading

from struct import pack, unpack

import difflib, codecs, os.path, random

import urllib_red as urllib

from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE

from Utilities import DUMBTHREADHOLDER

import srePersistent as _re

LINEEND_SPLIT_RE = _re.compile(r"\r\n?|\n")


from Configuration import isUnicode, isOSX, isLinux, isWindows, isWin9x


# To generate dependencies for py2exe/py2app
import encodings.utf_8, encodings.latin_1, encodings.utf_16, \
        encodings.utf_16_be, encodings.utf_16_le



# ---------- Encoding conversion ----------


utf8Enc = codecs.getencoder("utf-8")