def handle_templates(self, raw, title="Unnamed"): """ Returns a copy of raw where templates are either expanded, removed or kept """ self._update_expander(title) parsed = parser.parse(raw, included=False, replace_tags=self.exp.uniquifier.replace_tags) m = [] for i in parsed: logger.debug(repr(i)) if isinstance(i, nodes.Template): #t = MyTemplate(self, i) t = i res = [] t.flatten(self.exp, expander.get_template_args(t, self.exp), res) m.extend(res) else: m.append(i) ret = [] self._handle_marks(m, ret) logger.debug('cache size: ' + str(len(self.exp.parsedTemplateCache))) return self._expand(ret)
def write_templates_into_f(raw, f): """Extracts the templates from the raw text and writes them to the file @p f.""" from mwlib.templ.misc import DictDB from mwlib.expander import get_template_args from mwlib.templ.evaluate import Expander from mwlib.templ.parser import parse from mwlib.templ.nodes import Template e = Expander("", wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] result = True while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): d = get_template_args(n, e) f.write(u"Template\t{0}\n".format(unicode(n[0]).strip().replace("\n", "")).encode("utf-8")) for i in range(len(d)): try: f.write(unicode(d[i]).encode("utf-8") + "\n") except TypeError: result = False except AttributeError: # handling some mwlib bug. it raises this exception somehow result = False f.write("\n") todo.extend(n) return result
def write_templates_into_f(raw, f): """Extracts the templates from the raw text and writes them to the file @p f.""" from mwlib.templ.misc import DictDB from mwlib.expander import get_template_args from mwlib.templ.evaluate import Expander from mwlib.templ.parser import parse from mwlib.templ.nodes import Template e=Expander('', wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] result = True while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): d = get_template_args(n, e) f.write(u"Template\t{0}\n".format(unicode(n[0]).strip().replace("\n", "")).encode("utf-8")) for i in range(len(d)): try: f.write(unicode(d[i]).encode("utf-8") + "\n") except TypeError: result = False except AttributeError: # handling some mwlib bug. it raises this exception somehow result = False f.write("\n") todo.extend(n) return result
def _expand(self, parsed): markup = u'' for token in parsed: if isinstance(token, MyTemplate): args = expander.get_template_args(token, self.exp) m = ["\n"] token.flatten(self.exp, args, m) m[0] = u'' m = u''.join(m) markup += m elif isinstance(token, basestring): markup += token elif isinstance(token, Sequence): for e in token: markup += self._expand(e) return markup
def get_authors_from_template_args(template): args = get_template_args(template, expander) author_arg = args.get("Author", None) if author_arg: # userlinks = getUserLinks(author_arg) # if userlinks: # return userlinks node = uparser.parseString("", raw=args["Author"], wikidb=wikidb) advtree.extendClasses(node) txt = node.getAllDisplayText().strip() if txt: return [txt] if args.args: return getUserLinks("\n".join([args.get(i, u"") for i in range(len(args.args))])) return []
def get_authors_from_template_args(template): args = get_template_args(template, expander) author_arg = args.get('Author', None) if author_arg: # userlinks = getUserLinks(author_arg) # if userlinks: # return userlinks node = uparser.parseString('', raw=args['Author'], wikidb=wikidb) advtree.extendClasses(node) txt = node.getAllDisplayText().strip() if txt: return [txt] if args.args: return getUserLinks('\n'.join([args.get(i, u'') for i in range(len(args.args))])) return []
def getContributors(self, name, wikidb=None): """Return list of image contributors @param name: image name without namespace (e.g. without "Image:") @type name: unicode @param wikidb: WikiDB instance (optional) @type wikidb: object @returns: list of contributors @rtype: [unicode] or None """ desc_url = self.getDescriptionURL(name) if desc_url is None: return None # Note: We're always guessing the API helper b/c we'll get problems when # fetching from en.wp if we should've used commons.wikimedia.org instead. # A passed wikidb is only used as a fallback here. api_helper = get_api_helper(desc_url) if api_helper is None: if wikidb is None: return None else: wikidb = WikiDB(api_helper=api_helper) title = 'Image:%s' % name raw = wikidb.getRawArticle(title) if not raw: return None expander = Expander(u'', title, wikidb) def getUserLinks(raw): def isUserLink(node): return isinstance(node, parser.NamespaceLink) and node.namespace == namespace.NS_USER result = list(set([ u.target for u in uparser.parseString(title, raw=raw, wikidb=wikidb, ).filter(isUserLink) ])) result.sort() return result template = find_template(raw, 'Information') if template is not None: author = get_template_args(template, expander).get('Author', '').strip() if author: users = getUserLinks(author) if users: users = list(set(users)) users.sort() return users node = uparser.parseString('', raw=author, wikidb=wikidb) advtree.extendClasses(node) return [node.getAllDisplayText()] users = getUserLinks(raw) if users: return users return wikidb.getAuthors(title)