def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks(text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def add_template(self, source, dest, task, fromsite): """Place or remove the Link_GA/FA template on/from a page.""" def compile_link(site, templates): """compile one link template list.""" findtemplate = '(%s)' % '|'.join(templates) return re.compile( r"\{\{%s\|%s\}\}" % (findtemplate.replace(u' ', u'[ _]'), site.code), re.IGNORECASE) tosite = dest.site add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_Link_add = compile_link(fromsite, add_tl) re_Link_remove = compile_link(fromsite, remove_tl) text = dest.text m1 = add_tl and re_Link_add.search(text) m2 = remove_tl and re_Link_remove.search(text) changed = False interactive = self.getOption('interactive') if add_tl: if m1: pywikibot.output(u"(already added)") else: # insert just before interwiki if (not interactive or pywikibot.input( u'Connecting %s -> %s. Proceed? [Y/N]' % (source.title(), dest.title())) in ['Y', 'y']): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + u" {{%s|%s}}" % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = textlib.getLanguageLinks(text, tosite) text = textlib.removeLanguageLinks(text, tosite) text += u"%s{{%s|%s}}%s" % (LS, add_tl[0], fromsite.code, LS) text = textlib.replaceLanguageLinks(text, iw, tosite) changed = True if remove_tl: if m2: if (changed or # Don't force the user to say "Y" twice not interactive or pywikibot.input( u'Connecting %s -> %s. Proceed? [Y/N]' % (source.title(), dest.title())) in ['Y', 'y']): text = re.sub(re_Link_remove, '', text) changed = True elif task == 'former': pywikibot.output(u"(already removed)") if changed: comment = i18n.twtranslate(tosite, 'featured-' + task, {'page': unicode(source)}) try: dest.put(text, comment) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % dest.title()) except pywikibot.PageNotSaved: pywikibot.output(u"Page not saved")
def remove_links(self, object, qid, count): newtext = textlib.removeLanguageLinks(object['content'], self.enwp) #need to build a page object page = pywikibot.Page(self.enwp, object['title']) page.text = newtext summary = self.translate(editsummary).replace('$id', qid).replace( '$counter', str(count)) try: self.enwp.editpage( page, summary, nocreate=True, lastrev=int(object['revid']), token=self.enwp_token, skipec=True, useid=object['id'], timestamp=clean_timestamp(object['timestamp']), ) return except pywikibot.exceptions.LockedPage: print 'protected' return self.logger.error(object, 'Page was protected.', qid) except pywikibot.exceptions.EditConflict: print 'edit conflict' return self.logger.error(object, 'Edit conflict.', qid)
def getWordCount(self, text): text = textlib.removeDisabledParts(text) text = textlib.removeHTMLParts(text) text = textlib.removeLanguageLinks(text) text = textlib.removeCategoryLinks(text) word_list = re.findall(r"[\w']+", text) return len(word_list)
def removeEmptySections(self, text: str) -> str: """Cleanup empty sections.""" # userspace contains article stubs without nobots/in use templates if self.namespace == 2: return text skippings = ['comment', 'category'] skip_regexes = _get_regexes(skippings, self.site) # site defined templates skip_templates = { 'cs': ('Pahýl[ _]část',), # stub section } if self.site.code in skip_templates: for template in skip_templates[self.site.code]: skip_regexes.append( re.compile(r'\{\{\s*%s\s*\}\}' % template, re.I)) # empty lists skip_regexes.append(re.compile(r'(?m)^[\*#] *$')) # get stripped sections stripped_text = textlib.removeLanguageLinks(text, self.site, '\n') for reg in skip_regexes: stripped_text = reg.sub(r'', stripped_text) strip_sections = textlib.extract_sections( stripped_text, self.site)[1] # get proper sections header, sections, footer = textlib.extract_sections(text, self.site) # iterate stripped sections and create a new page body new_body = [] for i, strip_section in enumerate(strip_sections): current_heading = sections[i][0] try: next_heading = sections[i + 1][0] except IndexError: next_heading = '' current_dep = (len(current_heading) - len(current_heading.lstrip('='))) next_dep = len(next_heading) - len(next_heading.lstrip('=')) if strip_section[1].strip() or current_dep < next_dep: new_body.extend(sections[i]) return header + ''.join(new_body) + footer
def removeEmptySections(self, text): """Cleanup empty sections.""" # userspace contains article stubs without nobots/in use templates if self.namespace == 2: return text skippings = ['comment', 'category'] skip_regexes = _get_regexes(skippings, self.site) # site defined templates skip_templates = { 'cs': ('Pahýl[ _]část',), # stub section } if self.site.code in skip_templates: for template in skip_templates[self.site.code]: skip_regexes.append( re.compile(r'\{\{\s*%s\s*\}\}' % template, re.I)) # empty lists skip_regexes.append(re.compile(r'(?m)^[\*#] *$')) # get stripped sections stripped_text = textlib.removeLanguageLinks(text, self.site, '\n') for reg in skip_regexes: stripped_text = reg.sub(r'', stripped_text) strip_sections = textlib.extract_sections( stripped_text, self.site)[1] # get proper sections header, sections, footer = textlib.extract_sections(text, self.site) # iterate stripped sections and create a new page body new_body = [] for i, strip_section in enumerate(strip_sections): current_heading = sections[i][0] try: next_heading = sections[i + 1][0] except IndexError: next_heading = '' current_dep = (len(current_heading) - len(current_heading.lstrip('='))) next_dep = len(next_heading) - len(next_heading.lstrip('=')) if strip_section[1].strip() or current_dep < next_dep: new_body.extend(sections[i]) return header + ''.join(new_body) + footer
def remove_links(self, object, qid, count): newtext = textlib.removeLanguageLinks(object['content'], self.enwp) #need to build a page object page = pywikibot.Page(self.enwp, object['title']) page.text = newtext summary = self.translate(editsummary).replace('$id', qid).replace('$counter', str(count)) try: self.enwp.editpage( page, summary, nocreate=True, lastrev=int(object['revid']), token=self.enwp_token, skipec=True, useid=object['id'], timestamp=clean_timestamp(object['timestamp']), ) return except pywikibot.exceptions.LockedPage: print 'protected' return self.logger.error(object, 'Page was protected.',qid) except pywikibot.exceptions.EditConflict: print 'edit conflict' return self.logger.error(object, 'Edit conflict.',qid)
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put into the correct position and into the right order. This combines the old instances of standardizeInterwiki and standardizeCategories. The page footer consists of the following parts in that sequence: 1. categories 2. additional information depending on the local site policy 3. interwiki """ categories = [] interwiki_links = [] # get categories if not self.template: categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: subpage = False if self.template: try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: loc = None if loc is not None and loc in self.title: subpage = True # get interwiki interwiki_links = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # remove interwiki text = textlib.removeLanguageLinks(text, site=self.site) # add categories, main to top if categories: # TODO: Sort categories in alphabetic order, e.g. using # categories.sort()? (T100265) # TODO: Get main categories from Wikidata? main = pywikibot.Category(self.site, 'Category:' + self.title, sort_key=' ') if main in categories: categories.pop(categories.index(main)) categories.insert(0, main) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # add interwiki if interwiki_links: text = textlib.replaceLanguageLinks(text, interwiki_links, site=self.site, template=self.template, template_subpage=subpage) return text
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links, categories and star templates are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. stars templates for featured and good articles 5. interwiki links """ starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligazón[ _]a[bd]', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] categories = None interwikiLinks = None allstars = [] # The PyWikipediaBot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Removing the stars' issue starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: text = regex.sub('', text) allstars += found # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title: # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding stars templates if allstars: text = text.strip() + self.site.family.interwiki_text_separator allstars.sort() for element in allstars: text += '%s%s' % (element.strip(), config.line_separator) pywikibot.log(u'%s' % element.strip()) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def standardizePageFooter(self, text): """ Standardize page footer. Makes sure that interwiki links and categories are put to the correct position and into the right order. This combines the old instances standardizeInterwiki and standardizeCategories. The page footer has the following section in that sequence: 1. categories 2. ## TODO: template beyond categories ## 3. additional information depending on local site policy 4. interwiki links """ categories = None interwikiLinks = None # Pywikibot is no longer allowed to touch categories on the # German Wikipedia. See # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22 # ignoring nn-wiki of cause of the comment line above iw section if not self.template and '{{Personendaten' not in text and \ '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \ self.site.code not in ('et', 'it', 'bg', 'ru'): categories = textlib.getCategoryLinks(text, site=self.site) if not self.talkpage: # and pywikibot.calledModuleName() <> 'interwiki': subpage = False if self.template: loc = None try: tmpl, loc = moved_links[self.site.code] del tmpl except KeyError: pass if loc is not None and loc in self.title: subpage = True interwikiLinks = textlib.getLanguageLinks( text, insite=self.site, template_subpage=subpage) # Removing the interwiki text = textlib.removeLanguageLinks(text, site=self.site) # Adding categories if categories: # TODO: Sorting categories in alphabetic order. # e.g. using categories.sort() # TODO: Taking main cats to top # for name in categories: # if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or # name.title() == name.title().split(":")[0] + title): # categories.remove(name) # categories.insert(0, name) text = textlib.replaceCategoryLinks(text, categories, site=self.site) # Adding the interwiki if interwikiLinks: text = textlib.replaceLanguageLinks(text, interwikiLinks, site=self.site, template=self.template, template_subpage=subpage) return text
def add_template(self, source, dest, task, fromsite): """Place or remove the Link_GA/FA template on/from a page.""" def compile_link(site, templates): """Compile one link template list.""" findtemplate = '(%s)' % '|'.join(templates) return re.compile( r'\{\{%s\|%s\}\}' % (findtemplate.replace(' ', '[ _]'), site.code), re.IGNORECASE) tosite = dest.site add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_link_add = compile_link(fromsite, add_tl) re_link_remove = compile_link(fromsite, remove_tl) text = dest.text m1 = add_tl and re_link_add.search(text) m2 = remove_tl and re_link_remove.search(text) changed = False interactive = self.getOption('interactive') if add_tl: if m1: pywikibot.output('(already added)') else: # insert just before interwiki if (not interactive or pywikibot.input_yn('Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + ' {{%s|%s}}' % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = textlib.getLanguageLinks(text, tosite) text = textlib.removeLanguageLinks(text, tosite) text += '%s{{%s|%s}}%s' % (config.LS, add_tl[0], fromsite.code, config.LS) text = textlib.replaceLanguageLinks(text, iw, tosite) changed = True if remove_tl: if m2: if (changed # Don't force the user to say "Y" twice or not interactive or pywikibot.input_yn('Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): text = re.sub(re_link_remove, '', text) changed = True elif task == 'former': pywikibot.output('(already removed)') if changed: comment = i18n.twtranslate(tosite, 'featured-' + task, {'page': source}) try: dest.put(text, comment) self._save_counter += 1 except pywikibot.LockedPage: pywikibot.output('Page %s is locked!' % dest.title()) except pywikibot.PageSaveRelatedError: pywikibot.output('Page not saved')
def linkedImages(page): """Return a list of Pages that this Page links to. Only returns pages from "normal" internal links. Category links are omitted unless prefixed with ":". Image links are omitted when parameter withImageLinks is False. Embedded templates are omitted (but links within them are returned). All interwiki and external links are omitted. @param thistxt: the wikitext of the page @return: a list of Page objects. """ Rlink = re.compile(r'\[\[(?P<title>[^\]\|\[]*)(\|[^\]]*)?\]\]') result = [] try: thistxt = textlib.removeLanguageLinks(page.get(get_redirect=True), page.site) except pywikibot.NoPage: raise except pywikibot.IsRedirectPage: raise except pywikibot.SectionError: return [] thistxt = textlib.removeCategoryLinks(thistxt, page.site) # remove HTML comments, pre, nowiki, and includeonly sections # from text before processing thistxt = textlib.removeDisabledParts(thistxt) # resolve {{ns:-1}} or {{ns:Help}} # thistxt = page.site.resolvemagicwords(thistxt) for match in Rlink.finditer(thistxt): try: #print(match.group(0)) title = match.group('title') title = title.replace("_", " ").strip(" ") # print title if title == "": # empty link - problem in the page continue # convert relative link to absolute link if title.startswith(".."): parts = self.title().split('/') parts.pop() title = '/'.join(parts) + title[2:] elif title.startswith("/"): title = '%s/%s' % (page.title(), title[1:]) if title.startswith("#"): # this is an internal section link continue if not page.site.isInterwikiLink(title): page2 = pywikibot.Page(page.site, title) try: hash(str(page2)) except Exception: pywikibot.output("Page %s contains invalid link to [[%s]]." % (page.title(), title)) continue if not page2.isImage(): continue if page2.title(withSection=False) and page2 not in result: result.append(page2) except pywikibot.NoUsername: continue except: raise return result
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '{}{}'.format(config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '{}{}'.format(config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if not putText: return (text, newtext, always) if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def add_template(self, source, dest, task, fromsite): """Place or remove the Link_GA/FA template on/from a page.""" def compile_link(site, templates): """Compile one link template list.""" findtemplate = '(%s)' % '|'.join(templates) return re.compile(r"\{\{%s\|%s\}\}" % (findtemplate.replace(u' ', u'[ _]'), site.code), re.IGNORECASE) tosite = dest.site add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_Link_add = compile_link(fromsite, add_tl) re_Link_remove = compile_link(fromsite, remove_tl) text = dest.text m1 = add_tl and re_Link_add.search(text) m2 = remove_tl and re_Link_remove.search(text) changed = False interactive = self.getOption('interactive') if add_tl: if m1: pywikibot.output(u"(already added)") else: # insert just before interwiki if (not interactive or pywikibot.input_yn( u'Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + u" {{%s|%s}}" % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = textlib.getLanguageLinks(text, tosite) text = textlib.removeLanguageLinks(text, tosite) text += u"%s{{%s|%s}}%s" % (config.LS, add_tl[0], fromsite.code, config.LS) text = textlib.replaceLanguageLinks(text, iw, tosite) changed = True if remove_tl: if m2: if (changed or # Don't force the user to say "Y" twice not interactive or pywikibot.input_yn( u'Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): text = re.sub(re_Link_remove, '', text) changed = True elif task == 'former': pywikibot.output(u"(already removed)") if changed: comment = i18n.twtranslate(tosite, 'featured-' + task, {'page': source}) try: dest.put(text, comment) self._save_counter += 1 except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % dest.title()) except pywikibot.PageNotSaved: pywikibot.output(u"Page not saved")
def add_text( page, addText: str, summary: Optional[str] = None, regexSkip: Optional[str] = None, regexSkipUrl: Optional[str] = None, always: bool = False, up: bool = False, putText: bool = True, oldTextGiven: Optional[str] = None, reorderEnabled: bool = True, create: bool = False ) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]: """ Add text to a page. @param page: The page to add text to @type page: pywikibot.page.BasePage @param addText: Text to add @param summary: Summary of changes. If None, beginning of addText is used. @param regexSkip: Abort if text on page matches @param regexSkipUrl: Abort if full url matches @param always: Always add text without user confirmation @param up: If True, add text to top of page, else add at bottom. @param putText: If True, save changes to the page, else return (_, newtext, _) @param oldTextGiven: If None fetch page text, else use this text @param reorderEnabled: If True place text above categories and interwiki, else place at page bottom. No effect if up = False. @param create: Create page if it does not exist @return: If putText=True: (success, success, always) else: (_, newtext, _) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) if putText: pywikibot.output('Loading {}...'.format(page.title())) text = get_text(page, oldTextGiven, create) if text is None: return (False, False, always) # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: {}'.format(result)) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', '\n') if reorderEnabled: # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += '\n' + addText # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += '\n' + addText else: newtext = addText + '\n' + text if not putText: # If someone load it as module, maybe it's not so useful to put the # text in the page return (text, newtext, always) if text != newtext: pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(text, newtext) # Let's put the changes. error_count = 0 while True: if not always: try: choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n') except QuitKeyboardInterrupt: sys.exit('User quit bot run.') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) continue # either always or choice == 'y' is selected result = put_text(page, newtext, summary, error_count, asynchronous=not always) if result is not None: return (result, result, always) error_count += 1
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output('Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output('Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile( '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def add_text(page, addText, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): """ Add text to a page. @rtype: tuple of (text, newtext, always) """ site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = page.full_url() result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( 'Exception! regex (or word) used with -exceptUrl ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( 'Exception! regex (or word) used with -except ' 'is in the page. Skip!\n' 'Match was: %s' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = textlib.getCategoryLinks(newtext, site) # Deleting the categories newtext = textlib.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = textlib.getLanguageLinks(newtext, site) # Removing the interwiki newtext = textlib.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = textlib.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = textlib.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = textlib.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.input_choice( u'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a'), ('open in Browser', 'b')], 'n', automatic_quit=False) if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': pywikibot.bot.open_webbrowser(page) if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)