def _word_repl(self, word, groups): if self.in_dd: name = groups.get('word_name') current_page = self.formatter.page.page_name abs_name = AbsPageName(current_page, name) if abs_name == current_page: return self.__real_val(abs_name) else: # handle anchors try: abs_name, anchor = rsplit(abs_name, "#", 1) except ValueError: anchor = "" if self.cat_re.match(abs_name): return self.__real_val(abs_name) else: if not anchor: wholename = abs_name else: wholename = "%s#%s" % (abs_name, anchor) return self.__real_val(wholename) return apply(wikiParser._word_repl, (self, word, groups))
def metas_to_abs_links(request, page, values): new_values = list() stripped = False for value in values: if is_meta_link(value) != 'link': new_values.append(value) continue if ((value.startswith('[[') and value.endswith(']]')) or (value.startswith('{{') and value.endswith('}}'))): stripped = True value = value.lstrip('[') value = value.lstrip('{') attachment = '' for scheme in ('attachment:', 'inline:', 'drawing:'): if value.startswith(scheme): if len(value.split('/')) == 1: value = ':'.join(value.split(':')[1:]) if not '|' in value: # If page does not have descriptive text, try # to shorten the link to the attachment name. value = "%s|%s" % (value.rstrip(']').rstrip('}'), value) value = "%s%s/%s" % (scheme, page, value) else: att_page = value.split(':')[1] if (att_page.startswith('./') or att_page.startswith('/') or att_page.startswith('../')): attachment = scheme value = ':'.join(value.split(':')[1:]) if (value.startswith('./') or value.startswith('/') or value.startswith('../')): value = AbsPageName(page, value) if value.startswith('#'): value = page + value value = attachment + value if stripped: if value.endswith(']'): value = '[[' + value elif value.endswith('}'): value = '{{' + value new_values.append(value) return new_values
def _metatable_parseargs(request, args, cat_re, temp_re): # Arg placeholders argset = set([]) keyspec = list() excluded_keys = list() orderspec = list() limitregexps = dict() limitops = dict() # Capacity for storing indirection keys in metadata comparisons # and regexps, eg. k->c=/.+/ indirection_keys = list() # list styles styles = dict() # Flag: were there page arguments? pageargs = False # Regex preprocessing for arg in (x.strip() for x in args.split(',') if x.strip()): # metadata key spec, move on if arg.startswith('||') and arg.endswith('||'): # take order, strip empty ones, look at styles for key in arg.split('||'): if not key: continue # Grab styles if key.startswith('<') and '>' in key: style = parseAttributes(request, key[1:], '>') key = key[key.index('>') + 1:].strip() if style: styles[key] = style[0] # Grab key exclusions if key.startswith('!'): excluded_keys.append(key.lstrip('!')) continue keyspec.append(key.strip()) continue op_match = False # Check for Python operator comparisons for op in OPERATORS: if op in arg: data = arg.rsplit(op) # If this is not a comparison but indirection, # continue. Good: k->s>3, bad: k->s=/.+/ if op == '>' and data[0].endswith('-'): continue # Must have real comparison if not len(data) == 2: if op == '==': data.append('') else: continue key, comp = map(string.strip, data) # Add indirection key if '->' in key: indirection_keys.append(key) limitops.setdefault(key, list()).append((comp, op)) op_match = True # One of the operators matched, no need to go forward if op_match: break # One of the operators matched, process next arg if op_match: continue # Metadata regexp, move on if '=' in arg: data = arg.split("=") key = data[0] # Add indirection key if '->' in key: indirection_keys.append(key) val = '='.join(data[1:]) # Assume that value limits are regexps, if # not, escape them into exact regexp matches if not REGEX_RE.match(val): from MoinMoin.parser.text_moin_wiki import Parser # If the value is a page, make it a non-matching # regexp so that all link variations will generate a # match. An alternative would be to match from links # also, but in this case old-style metalinks, which # cannot be edited, would appear in metatables, which # is not wanted (old-style eg. [[Page| key: Page]]) # Only allow non-matching regexp for values if they # are WikiWords. Eg. 'WikiWord some text' would match # 'WikiWord', emulating ye olde matching behaviour, # but 'nonwikiword some text' would not match # 'nonwikiword' if re.match(Parser.word_rule_js, val): re_val = "(%s|" % (re.escape(val)) else: re_val = "(^%s$|" % (re.escape(val)) # or as bracketed link re_val += "(?P<sta>\[\[)%s(?(sta)\]\])|" % (re.escape(val)) # or as commented bracketed link re_val += "(?P<stb>\[\[)%s(?(stb)\|[^\]]*\]\]))" % \ (re.escape(val)) limitregexps.setdefault(key, set()).add( re.compile(re_val, re.UNICODE)) # else strip the //:s else: if len(val) > 1: val = val[1:-1] limitregexps.setdefault(key, set()).add( re.compile(val, re.IGNORECASE | re.UNICODE)) continue # order spec if arg.startswith('>>') or arg.startswith('<<'): # eg. [('<<', 'koo'), ('>>', 'kk')] orderspec = re.findall('(?:(<<|>>)([^<>]+))', arg) continue # Ok, we have a page arg, i.e. a page or page regexp in args pageargs = True # Normal pages, check perms, encode and move on if not REGEX_RE.match(arg): # Fix relative links if (arg.startswith('/') or arg.startswith('./') or arg.startswith('../')): arg = AbsPageName(request.page.page_name, arg) argset.add(arg) continue # Ok, it's a page regexp # if there's something wrong with the regexp, ignore it and move on try: arg = arg[1:-1] # Fix relative links if (arg.startswith('/') or arg.startswith('./') or arg.startswith('../')): arg = AbsPageName(request.page.page_name, arg) page_re = re.compile("%s" % arg) except: continue # Get all pages, check which of them match to the supplied regexp for page in request.graphdata: if page_re.match(page): argset.add(page) return (argset, pageargs, keyspec, excluded_keys, orderspec, limitregexps, limitops, indirection_keys, styles)
def add_matching_redirs(request, loadedPage, loadedOuts, loadedMeta, metakeys, key, curpage, curkey, prev='', formatLinks=False, linkdata=None): if not linkdata: linkdata = dict() args = curkey.split('->') inlink = False if args[0] == 'gwikiinlinks': inlink = True args = args[1:] newkey = '->'.join(args[2:]) last = False if not args: return if len(args) in [1, 2]: last = True if len(args) == 1: linked, target_key = prev, args[0] else: linked, target_key = args[:2] if inlink: pages = request.graphdata.get_in(curpage).get(linked, set()) else: pages = request.graphdata.get_out(curpage).get(linked, set()) for indir_page in set(pages): # Relative pages etc indir_page = AbsPageName(request.page.page_name, indir_page) if request.user.may.read(indir_page): pagedata = request.graphdata.getpage(indir_page) outs = pagedata.get('out', dict()) metas = pagedata.get('meta', dict()) # Add matches at first round if last: if target_key in metas: loadedMeta.setdefault(key, list()) linkdata.setdefault(key, dict()) if formatLinks: values = metas_to_abs_links(request, indir_page, metas[target_key]) else: values = metas[target_key] loadedMeta[key].extend(values) linkdata[key].setdefault(indir_page, list()).extend(values) else: linkdata.setdefault(key, dict()) linkdata[key].setdefault(indir_page, list()) continue elif not target_key in outs: continue # Handle inlinks separately if 'gwikiinlinks' in metakeys: inLinks = inlinks_key(request, loadedPage) loadedOuts[key] = inLinks continue linkdata = add_matching_redirs(request, loadedPage, loadedOuts, loadedMeta, metakeys, key, indir_page, newkey, target_key, formatLinks, linkdata) return linkdata
def _metatable_parseargs(request, args, cat_re, temp_re): # Arg placeholders argset = set([]) keyspec = list() excluded_keys = list() orderspec = list() limitregexps = dict() limitops = dict() # Capacity for storing indirection keys in metadata comparisons # and regexps, eg. k->c=/.+/ indirection_keys = list() # list styles styles = dict() # Flag: were there page arguments? pageargs = False # Regex preprocessing for arg in (x.strip() for x in args.split(',') if x.strip()): # metadata key spec, move on if arg.startswith('||') and arg.endswith('||'): # take order, strip empty ones, look at styles for key in arg.split('||'): if not key: continue # Grab styles if key.startswith('<') and '>' in key: style = parseAttributes(request, key[1:], '>') key = key[key.index('>') + 1:].strip() if style: styles[key] = style[0] # Grab key exclusions if key.startswith('!'): excluded_keys.append(key.lstrip('!')) continue keyspec.append(key.strip()) continue op_match = False # Check for Python operator comparisons for op in OPERATORS: if op in arg: data = arg.rsplit(op) # If this is not a comparison but indirection, # continue. Good: k->s>3, bad: k->s=/.+/ if op == '>' and data[0].endswith('-'): continue # Must have real comparison if not len(data) == 2: if op == '==': data.append('') else: continue key, comp = map(string.strip, data) # Add indirection key if '->' in key: indirection_keys.append(key) limitops.setdefault(key, list()).append((comp, op)) op_match = True # One of the operators matched, no need to go forward if op_match: break # One of the operators matched, process next arg if op_match: continue # Metadata regexp, move on if '=' in arg: data = arg.split("=") key = data[0] # Add indirection key if '->' in key: indirection_keys.append(key) val = '='.join(data[1:]) # Assume that value limits are regexps, if # not, escape them into exact regexp matches if not REGEX_RE.match(val): from MoinMoin.parser.text_moin_wiki import Parser # If the value is a page, make it a non-matching # regexp so that all link variations will generate a # match. An alternative would be to match from links # also, but in this case old-style metalinks, which # cannot be edited, would appear in metatables, which # is not wanted (old-style eg. [[Page| key: Page]]) # Only allow non-matching regexp for values if they # are WikiWords. Eg. 'WikiWord some text' would match # 'WikiWord', emulating ye olde matching behaviour, # but 'nonwikiword some text' would not match # 'nonwikiword' if re.match(Parser.word_rule_js, val): re_val = "(%s|" % (re.escape(val)) else: re_val = "(^%s$|" % (re.escape(val)) # or as bracketed link re_val += "(?P<sta>\[\[)%s(?(sta)\]\])|" % (re.escape(val)) # or as commented bracketed link re_val += "(?P<stb>\[\[)%s(?(stb)\|[^\]]*\]\]))" % \ (re.escape(val)) limitregexps.setdefault( key, set()).add(re.compile(re_val, re.UNICODE)) # else strip the //:s else: if len(val) > 1: val = val[1:-1] limitregexps.setdefault( key, set()).add(re.compile(val, re.IGNORECASE | re.UNICODE)) continue # order spec if arg.startswith('>>') or arg.startswith('<<'): # eg. [('<<', 'koo'), ('>>', 'kk')] orderspec = re.findall('(?:(<<|>>)([^<>]+))', arg) continue # Ok, we have a page arg, i.e. a page or page regexp in args pageargs = True # Normal pages, check perms, encode and move on if not REGEX_RE.match(arg): # Fix relative links if (arg.startswith('/') or arg.startswith('./') or arg.startswith('../')): arg = AbsPageName(request.page.page_name, arg) argset.add(arg) continue # Ok, it's a page regexp # if there's something wrong with the regexp, ignore it and move on try: arg = arg[1:-1] # Fix relative links if (arg.startswith('/') or arg.startswith('./') or arg.startswith('../')): arg = AbsPageName(request.page.page_name, arg) page_re = re.compile("%s" % arg) except: continue # Get all pages, check which of them match to the supplied regexp for page in request.graphdata: if page_re.match(page): argset.add(page) return (argset, pageargs, keyspec, excluded_keys, orderspec, limitregexps, limitops, indirection_keys, styles)
def parse_text(request, page, text): pagename = page.page_name newreq = request newreq.page = lcpage = LinkCollectingPage(newreq, pagename, text) parserclass = importPlugin(request.cfg, "parser", 'link_collect', "Parser") myformatter = importPlugin(request.cfg, "formatter", 'nullformatter', "Formatter") lcpage.formatter = myformatter(newreq) lcpage.formatter.page = lcpage p = parserclass(lcpage.get_raw_body(), newreq, formatter=lcpage.formatter) lcpage.parser = p lcpage.format(p) # These are the match types that really should be noted linktypes = ["wikiname_bracket", "word", "interwiki", "url", "url_bracket"] new_data = dict_with_getpage() # Add the page categories as links too categories, _, _ = parse_categories(request, text) # Process ACL:s pi, _ = get_processing_instructions(text) for verb, args in pi: if verb == u'acl': # Add all ACL:s on multiple lines to an one-lines acls = new_data.get(pagename, dict()).get('acl', '') acls = acls.strip() + args new_data.setdefault(pagename, dict())['acl'] = acls for metakey, value in p.definitions.iteritems(): for ltype, item in value: dnode = None if ltype in ['url', 'wikilink', 'interwiki', 'email']: dnode = item[1] if '#' in dnode: # Fix anchor links to point to the anchor page url = False for schema in config.url_schemas: if dnode.startswith(schema): url = True if not url: # Do not fix URLs if dnode.startswith('#'): dnode = pagename else: dnode = dnode.split('#')[0] if (dnode.startswith('/') or dnode.startswith('./') or dnode.startswith('../')): # Fix relative links dnode = AbsPageName(pagename, dnode) hit = item[0] elif ltype == 'category': # print "adding cat", item, repr(categories) dnode = item hit = item if item in categories: add_link(new_data, pagename, dnode, u"gwikicategory") elif ltype == 'meta': add_meta(new_data, pagename, (metakey, item)) elif ltype == 'include': # No support for regexp includes, for now! if not item[0].startswith("^"): included = AbsPageName(pagename, item[0]) add_link(new_data, pagename, included, u"gwikiinclude") if dnode: add_link(new_data, pagename, dnode, metakey) return new_data