def compileCombinedRegex(expressions, ignoreList=None): """ expressions -- List of tuples (r, s) where r is single compiled RE, s is a number from FormatTypes ignoreList -- List of FormatTypes for which the related expression shouldn't be taken into the compiled expression returns: compiled combined RE to feed into StringOps.Tokenizer """ result = [] if ignoreList == None: ignoreList = [] for i in range(len(expressions)): r, s = expressions[i] if s in ignoreList: continue if type(r) is type(EMPTY_RE): r = r.pattern else: r = unicode(r) result.append(u"(?P<style%i>%s)" % (i, r)) return re.compile(u"|".join(result), re.DOTALL | re.UNICODE | re.MULTILINE)
def splitpasswd(user): """splitpasswd('user:passwd') -> 'user', 'passwd'.""" global _passwdprog if _passwdprog is None: import srePersistent as re _passwdprog = re.compile('^([^:]*):(.*)$') match = _passwdprog.match(user) if match: return match.group(1, 2) return user, None
def splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: import srePersistent as re _hostprog = re.compile('^//([^/]*)(.*)$') match = _hostprog.match(url) if match: return match.group(1, 2) return None, url
def splituser(host): """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" global _userprog if _userprog is None: import srePersistent as re _userprog = re.compile('^(.*)@(.*)$') match = _userprog.match(host) if match: return map(unquote, match.group(1, 2)) return None, host
def splitvalue(attr): """splitvalue('attr=value') --> 'attr', 'value'.""" global _valueprog if _valueprog is None: import srePersistent as re _valueprog = re.compile('^([^=]*)=(.*)$') match = _valueprog.match(attr) if match: return match.group(1, 2) return attr, None
def splittag(url): """splittag('/path#tag') --> '/path', 'tag'.""" global _tagprog if _tagprog is None: import srePersistent as re _tagprog = re.compile('^(.*)#([^#]*)$') match = _tagprog.match(url) if match: return match.group(1, 2) return url, None
def splitquery(url): """splitquery('/path?query') --> '/path', 'query'.""" global _queryprog if _queryprog is None: import srePersistent as re _queryprog = re.compile('^(.*)\?([^?]*)$') match = _queryprog.match(url) if match: return match.group(1, 2) return url, None
def splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: import srePersistent as re _portprog = re.compile('^(.*):([0-9]+)$') match = _portprog.match(host) if match: return match.group(1, 2) return host, None
def splittype(url): """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" global _typeprog if _typeprog is None: import srePersistent as re _typeprog = re.compile('^([^/:]+):') match = _typeprog.match(url) if match: scheme = match.group(1) return scheme.lower(), url[len(scheme) + 1:] return None, url
def splitnport(host, defport=-1): """Split host and port, returning numeric port. Return given default port if no ':' found; defaults to -1. Return numerical port if a valid number are found after ':'. Return None if ':' but not a valid number.""" global _nportprog if _nportprog is None: import srePersistent as re _nportprog = re.compile('^(.*):(.*)$') match = _nportprog.match(host) if match: host, port = match.group(1, 2) try: if not port: raise ValueError, "no digits" nport = int(port) except ValueError: nport = None return host, nport return host, defport
def rebuildFormatting(self, miscevt): """ Called after a new wiki is loaded or options were changed. It rebuilds regexes and sets other variables according to the new settings """ # In each list most specific single expressions first # These are the full lists with all possible expressions # they might be reduced afterwards self.formatExpressions = [ (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar), (self.TableRE, FormatTypes.Table), (self.PreBlockRE, FormatTypes.PreBlock), (self.SuppressHighlightingRE, FormatTypes.SuppressHighlight), (self.ScriptRE, FormatTypes.Script), (self.TitledUrlRE, FormatTypes.Url), (self.UrlRE, FormatTypes.Url), (self.ToDoREWithContent, FormatTypes.ToDo), (self.PropertyRE, FormatTypes.Property), (self.FootnoteRE, FormatTypes.Footnote), (self.WikiWordEditorRE2, FormatTypes.WikiWord2), (self.WikiWordEditorRE, FormatTypes.WikiWord), (self.BoldRE, FormatTypes.Bold), (self.ItalicRE, FormatTypes.Italic), (self.HtmlTagRE, FormatTypes.HtmlTag), (self.Heading4RE, FormatTypes.Heading4), (self.Heading3RE, FormatTypes.Heading3), (self.Heading2RE, FormatTypes.Heading2), (self.Heading1RE, FormatTypes.Heading1), (self.AnchorRE, FormatTypes.Anchor), (self.BulletRE, FormatTypes.Bullet), (self.NumericBulletRE, FormatTypes.Numeric), (self.HorizLineRE, FormatTypes.HorizLine), (self.InsertionRE, FormatTypes.Insertion) # (self.PlainCharactersRE, FormatTypes.Default) ] self.formatTodoExpressions = [ (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar), (self.TitledUrlRE, FormatTypes.Url), (self.UrlRE, FormatTypes.Url), (self.PropertyRE, FormatTypes.Property), (self.FootnoteRE, FormatTypes.Footnote), (self.WikiWordEditorRE2, FormatTypes.WikiWord2), (self.WikiWordEditorRE, FormatTypes.WikiWord), (self.BoldRE, FormatTypes.Bold), (self.ItalicRE, FormatTypes.Italic), (self.HtmlTagRE, FormatTypes.HtmlTag) # (self.PlainCharactersRE, FormatTypes.Default) ] self.formatTableContentExpressions = [ (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar), (self.TitleWikiWordDelimiterPAT, FormatTypes.TableCellSplit), (self.TableRowDelimiterPAT, FormatTypes.TableRowSplit), (self.TitledUrlRE, FormatTypes.Url), (self.UrlRE, FormatTypes.Url), # (self.ToDoREWithContent, FormatTypes.ToDo), # TODO Doesn't work (self.FootnoteRE, FormatTypes.Footnote), (self.WikiWordEditorRE2, FormatTypes.WikiWord2), (self.WikiWordEditorRE, FormatTypes.WikiWord), (self.BoldRE, FormatTypes.Bold), (self.ItalicRE, FormatTypes.Italic), (self.HtmlTagRE, FormatTypes.HtmlTag) # (self.PlainCharactersRE, FormatTypes.Default) ] self.formatWwTitleExpressions = [ (self.PlainEscapedCharacterRE, FormatTypes.EscapedChar), (self.BoldRE, FormatTypes.Bold), (self.ItalicRE, FormatTypes.Italic), (self.HtmlTagRE, FormatTypes.HtmlTag) # (self.PlainCharactersRE, FormatTypes.Default) ] ignoreList = [] # List of FormatTypes not to compile into the comb. regex #if self.wikiDocument.getWikiConfig().getboolean( # "main", "footnotes_as_wikiwords", False): # ignoreList.append(FormatTypes.Footnote) # self.footnotesAsWws = self.wikiDocument.getWikiConfig().getboolean( # "main", "footnotes_as_wikiwords", False) self.combinedPageRE = compileCombinedRegex(self.formatExpressions, ignoreList) self.combinedTodoRE = compileCombinedRegex(self.formatTodoExpressions, ignoreList) self.combinedTableContentRE = compileCombinedRegex( self.formatTableContentExpressions, ignoreList) self.combinedWwTitleRE = compileCombinedRegex( self.formatWwTitleExpressions, ignoreList) self.wikiWordStart = u"[" self.wikiWordEnd = u"]" self.wikiWordStartEsc = ur"\[" self.wikiWordEndEsc = ur"\]" # if self.pWiki.wikiConfigFilename: # self.footnotesAsWws = self.pWiki.getConfig().getboolean( # "main", "footnotes_as_wikiwords", False) # Needed in PageAst.Table.buildSubAst (placed here because of threading # problem with re.compile self.tableCutRe = re.compile(ur"\n|" + self.TitleWikiWordDelimiterPAT + ur"|" + self.PlainCharacterPAT + ur"+?(?=\n|" + self.TitleWikiWordDelimiterPAT + ur"|(?!.))", re.DOTALL | re.UNICODE | re.MULTILINE) # TODO Explain (if it works)
# from Config import faces import srePersistent as re from StringOps import Tokenizer, matchWhole, Token, htmlColorToRgbTuple, \ unescapeWithRe FormatTypes = Enumeration("FormatTypes", ["Default", "WikiWord", "AvailWikiWord", "Bold", "Italic", "Heading4", "Heading3", "Heading2", "Heading1", "Url", "Script", "Property", "ToDo", "WikiWord2", "HorizLine", "Bullet", "Numeric", "Suppress", "Footnote", "Table", "EscapedChar", "HtmlTag", "TableCellSplit", "TableRowSplit", "PreBlock", "SuppressHighlight", "Insertion", "Anchor"], 0) EMPTY_RE = re.compile(ur"", re.DOTALL | re.UNICODE | re.MULTILINE) def compileCombinedRegex(expressions, ignoreList=None): """ expressions -- List of tuples (r, s) where r is single compiled RE, s is a number from FormatTypes ignoreList -- List of FormatTypes for which the related expression shouldn't be taken into the compiled expression returns: compiled combined RE to feed into StringOps.Tokenizer """ result = [] if ignoreList == None: ignoreList = [] for i in range(len(expressions)):
from StringOps import mbcsDec locale.setlocale(locale.LC_ALL, "") # String containing the delimiter between the title of a wiki word (to show in # HTML and the real word, as e.g. [title | WikiWord] TitleWikiWordDelimiter = ur"|" # Same, escaped for regular expression TitleWikiWordDelimiterPAT = ur"\|" PlainCharacterPAT = ur"(?:[^\\]|\\.)" PlainEscapedCharacterRE = re.compile(ur"\\(.)", re.DOTALL | re.UNICODE | re.MULTILINE) # PlainCharactersRE = re.compile(PlainCharacterPAT + "+", # re.DOTALL | re.UNICODE | re.MULTILINE) # basic formatting BoldRE = re.compile(ur"\*(?=\S)(?P<boldContent>" + PlainCharacterPAT + ur"+?)\*", re.DOTALL | re.UNICODE | re.MULTILINE) ItalicRE = re.compile(ur"\b_(?P<italicContent>" + PlainCharacterPAT + ur"+?)_\b", re.DOTALL | re.UNICODE | re.MULTILINE) HtmlTagRE = re.compile(ur"</?[A-Za-z][A-Za-z0-9]*(?:/| [^\n>]*)?>", re.DOTALL | re.UNICODE | re.MULTILINE) Heading4RE = re.compile( u"^\\+\\+\\+\\+(?!\\+) ?(?P<h4Content>" + PlainCharacterPAT + ur"+?)\n", re.DOTALL | re.UNICODE | re.MULTILINE ) Heading3RE = re.compile( u"^\\+\\+\\+(?!\\+) ?(?P<h3Content>" + PlainCharacterPAT + ur"+?)\n", re.DOTALL | re.UNICODE | re.MULTILINE )
import threading from struct import pack, unpack import difflib, codecs, os.path, random import urllib_red as urllib from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE from Utilities import DUMBTHREADHOLDER import srePersistent as _re LINEEND_SPLIT_RE = _re.compile(r"\r\n?|\n") from Configuration import isUnicode, isOSX, isLinux, isWindows, isWin9x # To generate dependencies for py2exe/py2app import encodings.utf_8, encodings.latin_1, encodings.utf_16, \ encodings.utf_16_be, encodings.utf_16_le # ---------- Encoding conversion ---------- utf8Enc = codecs.getencoder("utf-8")