class WsStatus(CanonicalBot): def __init__(self, wiki, debug): CanonicalBot.__init__(self, wiki, debug) self.text = None self.stat_page = None def task(self): if self.debug: lemma = 'Benutzer:THEbotIT/' + self.bot_name else: lemma = 'WS:Statistik' self.load_text_from_site(lemma) # self.new_row(str(RowSeitenstatistik(self.wiki, self.logger)), 'SEITENSTATISTIK') self.new_row(str(RowBearbeitungsstand(self.wiki, self.logger)), 'BEARBEITUNGSSTAND') self.save_text_to_site() return True def new_row(self, row, placeholder): self.text = re.sub('<!--BOT:{}-->'.format(placeholder), '<!--BOT:{}-->\n{}' .format(placeholder, row), self.text) def load_text_from_site(self, lemma): self.logger.info('Load text from {}'.format(lemma)) self.stat_page = Page(self.wiki, lemma) self.text = self.stat_page.text def save_text_to_site(self): self.stat_page.text = self.text self.stat_page.save('Statistik wurde aktualisiert.', botflag=True)
def task(self): # pragma: no cover page = Page(self.wiki, "Benutzer:THE IT/Werkstatt") new_text = self.replace_in_page(page.text) page.text = new_text if not self.debug: page.save("Time is ticking") return True
def get_wiki_bearbeitungen(self): dummypage = Page(self.wiki, 'Benutzer:THEbotIT/dummy') dummypage.text = '{{subst:NUMBEROFEDITS}}' dummypage.save('get_new_number') del dummypage dummypage = Page(self.wiki, 'Benutzer:THEbotIT/dummy') return dummypage.text
def finish_task(self): if self.data: if not self.debug: page = Page(self.wiki, "RE:Wartung:Strukturfehler") page.text = page.text + self._build_entry() page.save("Neue Fehlermeldungen", botflag=True) super().finish_task()
def find_sub_templates( lookingfor: str, page: Page, wholeword: bool, matchcase: bool ): found_templates = [] if page.isRedirectPage(): page = page.getRedirectTarget() pagetext = page.text if not matchcase: pagetext = pagetext.lower() lookingfor = lookingfor.lower() if wholeword: pattern = re.compile(r'\b' + re.escape(lookingfor) + r'\b') if pattern.search(pagetext): found_templates.append(page) elif lookingfor in pagetext: found_templates.append(page) for sub_template in page.templates(content=True): if sub_template.isRedirectPage(): sub_template = sub_template.getRedirectTarget() text = sub_template.text if matchcase else sub_template.text.lower() if wholeword: # noinspection PyUnboundLocalVariable if pattern.search(text): found_templates.append(sub_template) elif lookingfor in text: found_templates.append(sub_template) # Remove duplicate templates return {f.title(): f for f in found_templates}.values()
def get_all_sites(self): dummypage = Page(self.wiki, 'Benutzer:THEbotIT/dummy') dummypage.text = '{{subst:NUMBEROFARTICLES}}' dummypage.save('get_new_number') del dummypage dummypage = Page(self.wiki, 'Benutzer:THEbotIT/dummy') return '{}'.format(dummypage.text)
def process(day): """ one day bot processing arguments: day -- python date format """ if params.verbose: print("processing Journal des recréations ({day})".format(day=format_date(day))) start = to_date(day) end = to_date(day+ONE_DAY) result = "\n== {} ==\n".format(format_date(day)) for i,page in enumerate(creation_log(start,end),1): if params.verbose: print (i,page["timestamp"]) dl = deletelog(page["title"]) if dl: r = ("* {{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} recréé par {{{{u|{user}}}}} \n" .format(title = wiki_param(page["title"]) , pas = wiki_param("Discussion:"+page["title"]+"/Suppression"), user = wiki_param(page["user"]), date = format_date(from_date(dl["timestamp"])))) if params.verbose: print(r) result += r page = Page(Site(), params.prefix+"/"+format_date(day,skip_day=True)) try: result = page.get()+result except NoPage: pass page.put(result,comment="Journal des recréations ({day})".format(day=format_date(day)))
def make_magazines(self, dictionary_of_magazines_by_year): for idx_year, year in enumerate(dictionary_of_magazines_by_year): magazines = dictionary_of_magazines_by_year[year] self.logger.debug(f"make_mag_year {idx_year + 1}/" f"{len(dictionary_of_magazines_by_year)}") for idx_mag, magazine in enumerate(magazines): self.logger.debug("make_mag_mag {idx}/{len} ... issue:{year}/{mag}" .format(idx=idx_mag + 1, len=len(magazines), year=year, mag=magazine)) if year == "1986" and magazine == "31": self.logger.warning("There is magazine 1986, 31, " "this is special, no creating here") continue if self.debug: lemma = Page(self.wiki, "Benutzer:THEbotIT/Test") else: lemma = Page(self.wiki, f"Die Gartenlaube ({year})/Heft {int(magazine):d}") new_text = self.make_magazine(year, magazine) if new_text: if hash(new_text.strip()) != hash(lemma.text.strip()): self.logger.debug("Print [[Die Gartenlaube ({year})/Heft {magazine}]]." .format(year=year, magazine=magazine)) if lemma.text != '': lemma.text = new_text lemma.save("Automatische Aktualisierung des Heftes", botflag=True) else: lemma.text = new_text lemma.save("automatische Hefterstellung", botflag=True) else: self.logger.debug("Keine Änderung im Text ({year}/{magazine})." .format(year=year, magazine=magazine))
def history(self, fertig, korrigiert, unkorrigiert): page = Page(self.wiki, "Benutzer:THEbotIT/" + self.bot_name) temp_text = page.text composed_text = "".join(["|-\n", "|", self.timestamp.start_of_run.strftime("%Y%m%d-%H%M"), "||", str(unkorrigiert[1]), "||", str(unkorrigiert[0]), "||", str(int(unkorrigiert[0] / unkorrigiert[1])), "||", str(korrigiert[1]), "||", str(korrigiert[0]), "||", str(int(korrigiert[0] / korrigiert[1])), "||", str(fertig[1]), "||", str(fertig[0]), "||", str(int(fertig[0] / fertig[1])), "\n<!--new line-->"]) temp_text = re.sub("<!--new line-->", composed_text, temp_text) page.text = temp_text page.save("RE Statistik aktualisiert", botflag=True)
def task(self): # pragma: no cover regex = re.compile(r"\n\|PND=") searcher = PetScan() searcher.add_yes_template("ADBDaten") searcher.add_positive_category("ADB:Ohne GND-Link") lemma_list = searcher.run() for lemma in lemma_list: page = Page(self.wiki, lemma["title"]) temp_text = page.text if regex.search(temp_text): self.logger.info(f"change {lemma['title']}") temp_text = regex.sub("\n|GND=", temp_text) page.text = temp_text page.save("PND -> GND", botflag=True) return True
def task(self): lemma_list = self._run_searcher() self._build_database(lemma_list) if self.debug: dump = Page(self.wiki, f"Benutzer:THEbotIT/{self.bot_name}") else: dump = Page(self.wiki, "Liste der Autoren") old_text = dump.text new_text = self._convert_to_table() if new_text[150:] != old_text[150:]: # compare all but the date dump.text = new_text dump.save("Die Liste wurde auf den aktuellen Stand gebracht.", botflag=True) else: self.logger.info("Heute gab es keine Änderungen, " "daher wird die Seite nicht überschrieben.") return True
def task(self): # pragma: no cover error_task = ERROTask(wiki=self.wiki, debug=False, logger=self.logger) for lemma in self.search_pages(): page = Page(self.wiki, lemma["title"]) temp_text = page.text try: temp_text = self.convert_all(temp_text) page.text = temp_text re_page = RePage(page) if not self.debug: re_page.save("Entfernen veralteter Vorlagen.") except (ReDatenException, ValueError): error = traceback.format_exc().splitlines()[-1] error_task.task(lemma["title"], error) error_task.finish_task() if self.search_pages(): return False return True
def is_edited_by_bot_only(page: pywikibot.Page) -> bool: contributors = set(page.contributors()) is_edited_by_bot_only = True for contributor in contributors: if not contributor.lower().endswith('bot') \ and not contributor.lower().startswith('bot') \ and contributor not in bots: print(contributor, ' is not a bot!') is_edited_by_bot_only = False return is_edited_by_bot_only
def task(self): lemma_list = self._search() for idx, lemma in enumerate(lemma_list): page = Page(self.wiki, title='Index:{}'.format(lemma['title'])) self.logger.info('{}/{}:{}'.format(idx, len(lemma_list), page)) match = regex_picture.search(page.text) if match: self.logger.info(match.group(1)) temp = re.sub('\|\d{2,3}px', '', match.group(1)) if not re.search('thumb', match.group(1)): temp = temp + '|thumb' self.logger.info(temp) if temp == match.group(1): self.logger.info('nothing to do here.') continue temp = '|BILD=[[{}]]'.format(temp) temp_text = regex_picture.sub(temp, page.text) page.text = temp_text page.save(botflag=True, summary='set thumb as parameter') return True
def history(self, fertig: Tuple[int, int], korrigiert: Tuple[int, int], unkorrigiert: Tuple[int, int]): page = Page(self.wiki, "Benutzer:THEbotIT/" + self.bot_name) temp_text = page.text composed_text = "".join([ "|-\n", "|", self.timestamp.start_of_run.strftime("%Y%m%d-%H%M"), "||", str(unkorrigiert[1]), "||", str(unkorrigiert[0]), "||", str(int(unkorrigiert[0] / unkorrigiert[1])), "||", str(korrigiert[1]), "||", str(korrigiert[0]), "||", str(int(korrigiert[0] / korrigiert[1])), "||", str(fertig[1]), "||", str(fertig[0]), "||", str(int(fertig[0] / fertig[1])), "\n<!--new line-->" ]) temp_text = re.sub("<!--new line-->", composed_text, temp_text) page.text = temp_text page.save("RE Statistik aktualisiert", botflag=True)
def load_Proximos_Eventos(self): from pywikibot import Page self.page_proximos = Page(self.site, "Próximos Eventos") self.proximos = [] for line in self.page_proximos.text.split('\n'): if line.startswith("*'''"): try: self.proximos.append(Evento(line)) except: print( f"Falha ao tentar parsear linha da página 'Próximos Eventos':\n===\n{line}\n===" )
def _print_author(self): self.logger.info("Print author register.") overview = [ "{|class =\"wikitable sortable\" style=\"text-align:right;\"" "\n!Autor\n!Artikel\n!colspan=\"2\"|Erschließungsgrad" ] for register in self.registers.author: if register.author.last_name: self.logger.debug(register) self.save_if_changed( Page( self.wiki, f"Paulys Realencyclopädie der classischen " f"Altertumswissenschaft/Register/{register.author.name}" ), register.get_register_str(), "Register aktualisiert") overview.append(register.overview_line) overview.append("|}") self.save_if_changed( Page( self.wiki, "Paulys Realencyclopädie der classischen " "Altertumswissenschaft/Register/Autorenübersicht"), "\n".join(overview), "Register aktualisiert")
def getFilesFromPage(siteSrc, nbPages, iTitles): (i, title) = iTitles pages = Page(siteSrc, title).imagelinks() nbFiles = len(list(pages)) if (nbFiles > 0): log("%i/%i Process %s : %i files found" % (i + 1, nbPages, title, nbFiles)) else: log("%i/%i Process %s : no files found" % (i + 1, nbPages, title)) return mapTitle(pages)
def check_templates(page: pywikibot.Page) -> bool: """Returns true if page has no license tag and is not tagged for deletion""" default_skip = [ "Template:Deletion_template_tag", "Template:License template tag" ] templates = { pywikibot.Page(site, title) for title in config.get("skip_templates", default_skip) } assert len(templates) >= 2 page_templates = set(page.itertemplates()) return page_templates.isdisjoint(templates)
def template_title_regex(tpl_page: pywikibot.Page) -> Pattern: """ Return a regex that matches to variations of the template title. It supports the transcluding variant as well as localized namespaces and case-insensitivity depending on the namespace. :param tpl_page: The template page :type tpl_page: pywikibot.page.Page """ ns = tpl_page.site.namespaces[tpl_page.namespace()] marker = '?' if ns.id == 10 else '' title = tpl_page.title(with_ns=False) if ns.case != 'case-sensitive': title = '[{}{}]{}'.format(re.escape(title[0].upper()), re.escape(title[0].lower()), re.escape(title[1:])) else: title = re.escape(title) return re.compile(r'(?:(?:%s):)%s%s' % ('|'.join(ns), marker, title))
def _add_category_page(self, title, categories): """Add a page with categories. Parameters ---------- title : str Title of the page. categories : list The categories to add to the page. """ page = Page(self._site, title, "Category") if page.exists() and not self._overwrite: logging.warning( "Category page '{}' already exists. It will not be created.". format(page.title()) # noqa: E501 ) else: page.text = "" for category in categories: if category != title: page.text += "[[Kategori:{}]]\n".format(category) logging.info("Writing to category page '{}'".format(page.title())) logging.debug(page.text) self._write_page(page)
def process(day): """ one day bot processing arguments: day -- python date format """ if params.verbose: print("processing Journal des recréations ({day})".format( day=format_date(day))) start = to_date(day) end = to_date(day + ONE_DAY) result = "\n== {} ==\n".format(format_date(day)) for i, page in enumerate(creation_log(start, end), 1): if params.verbose: print(i, page["timestamp"]) dl = deletelog(page["title"]) if dl: r = ( "* {{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} recréé par {{{{u|{user}}}}} \n" .format(title=wiki_param(page["title"]), pas=wiki_param("Discussion:" + page["title"] + "/Suppression"), user=wiki_param(page["user"]), date=format_date(from_date(dl["timestamp"])))) if params.verbose: print(r) result += r page = Page(Site(), params.prefix + "/" + format_date(day, skip_day=True)) try: result = page.get() + result except NoPage: pass page.put( result, comment="Journal des recréations ({day})".format(day=format_date(day)))
def userPut( self, page: pywikibot.Page, oldtext: str, newtext: str, summary: Optional[str] = None, minor: bool = True, botflag: Optional[bool] = None, ) -> None: if oldtext == newtext: pywikibot.output("No changes were needed on %s" % page.title(as_link=True)) return pywikibot.output("\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title(as_link=True)) pywikibot.showDiff(oldtext, newtext) if summary: pywikibot.output("Summary: %s" % summary) page.text = newtext try: page.save(summary=summary, minor=minor, botflag=botflag) except pywikibot.EditConflict: raise except pywikibot.Error as e: pywikibot.output("Failed to save %s: %r: %s" % (page.title(as_link=True), e, e))
def save_page( text: str, page: pywikibot.Page, summary: str, bot: bool = True, minor: bool = False, mode: str = "replace", ) -> None: logger.info(f"Saving to {page.title()}") if not text: raise pywikibot.PageNotSaved( page, message="New page text is blank, page %s was not saved") if mode == "replace": text = text elif mode == "append": text = page.get(force=True) + text elif mode == "prepend": text = text + page.get(force=True) else: raise ValueError( "mode must be 'replace', 'append', or 'prepend', not {mode}") if page.get(force=True) == text: raise pywikibot.PageNotSaved( page, message="Page text did not change, page %s was not saved") else: page.text = text page.save( summary=summary, minor=minor, botflag=bot, quiet=True, ) logger.info(f"Page {page.title(as_link=True)} saved")
def user_page_the_it(self, korrigiert: Tuple[int, int]): status_string = [] color = make_html_color(20e6, 22e6, korrigiert[0]) status_string.append( f"<span style=\"background:#FF{color}{color}\">{korrigiert[0]}</span>" ) color = make_html_color(5.0e3, 5.25e3, korrigiert[1]) status_string.append( f"<span style=\"background:#FF{color}{color}\">{korrigiert[1]}</span>" ) list_of_lemmas = self.petscan(["RE:Teilkorrigiert", "RE:Korrigiert"], ["RE:Unkorrigiert", "RE:Unvollständig"]) date_page = Page(self.wiki, list_of_lemmas[0]["title"]) date_of_first = str(date_page.oldest_revision.timestamp)[0:10] gap = datetime.now() - datetime.strptime(date_of_first, "%Y-%m-%d") color = make_html_color(3 * 365, 3.5 * 365, gap.days) status_string.append( f"<span style=\"background:#FF{color}{color}\">{date_of_first}</span>" ) user_page = Page(self.wiki, "Benutzer:THE IT/Werkstatt") temp_text = user_page.text temp_text = re.sub(r"<!--RE-->.*<!--RE-->", f"<!--RE-->{' ■ '.join(status_string)}<!--RE-->", temp_text) user_page.text = temp_text user_page.save("todo RE aktualisiert")
def save_page(page_name: str, text: str, sheet_name: str, template_name: str) -> None: '''実際に wiki のページを書き込む''' if template_name is None: template_name = page_name # Bot 編集ページであることを知らせるフッターを付加して更新する sheet_url = get_sheet_url(sheet_name) footer = '\n\n{{bot/編集の注意|template_name = %s | url = %s}}' \ % (template_name, sheet_url) text += footer # ページに変更がない場合には何もしない page = Page(site, page_name) if page.text == text: return page.text = text if args.debug: print(page.text) else: page.save()
def check_page(self, pagename): """Check one page.""" pywikibot.output("\nChecking %s" % pagename) sys.stdout.flush() page1 = Page(self.original, pagename) txt1 = page1.text if self.options.dest_namespace: dest_ns = int(self.options.dest_namespace) else: dest_ns = None for site in self.sites: if dest_ns is not None: page2 = Page(site, page1.title(withNamespace=False), dest_ns) pywikibot.output("\nCross namespace, new title: %s" % page2.title()) else: page2 = Page(site, pagename) if page2.exists(): txt2 = page2.text else: txt2 = '' if str(site) in config.replicate_replace: txt_new = multiple_replace(txt1, config.replicate_replace[str(site)]) if txt1 != txt_new: pywikibot.output( 'NOTE: text replaced using config.sync_replace') pywikibot.output('%s %s %s' % (txt1, txt_new, txt2)) txt1 = txt_new if txt1 != txt2: pywikibot.output("\n %s DIFFERS" % site) self.differences[site].append(pagename) if self.options.replace: page2.text = txt1 page2.save(self.put_message(site)) else: sys.stdout.write('.') sys.stdout.flush()
def extract_game_page_from_league_table_file(league_table_file_page): league_table_file_name = league_table_file_page.title() season = _get_word_after(league_table_file_name, "עונת").replace("-", "/") fixture_number = f'מחזור {_get_word_after(league_table_file_name, "מחזור")}' season_games = maccabi_games.get_games_by_season(season) game = [g for g in season_games if g.fixture == fixture_number] if len(game) != 1: raise RuntimeError(f"Too much matching games: {game}") game_page_name = generate_page_name_from_game(game[0]) return Page(site, game_page_name)
def generate_overviews(self): """Create page on wikis with overview of bot results.""" for site in self.sites: sync_overview_page = Page(site, 'User:%s/sync.py overview' % site.user()) output = "== Pages that differ from original ==\n\n" if self.differences[site]: output += "".join('* [[:%s]]\n' % l for l in self.differences[site]) else: output += "All important pages are the same" output += "\n\n== Admins from original that are missing here ==\n\n" if self.user_diff[site]: output += "".join('* %s\n' % l.replace('_', ' ') for l in self.user_diff[site]) else: output += "All users from original are also present on this wiki" pywikibot.output(output) sync_overview_page.text = output sync_overview_page.save(self.put_message(site))
def make_magazines(self, dictionary_of_magazines_by_year): for idx_year, year in enumerate(dictionary_of_magazines_by_year): magazines = dictionary_of_magazines_by_year[year] self.logger.debug(f"make_mag_year {idx_year + 1}/" f"{len(dictionary_of_magazines_by_year)}") for idx_mag, magazine in enumerate(magazines): self.logger.debug( f"make_mag_mag {idx_mag + 1}/{len(magazines)} ... issue:{year}/{magazine}" ) if year == "1986" and magazine == "31": self.logger.warning( "There is magazine 1986, 31, this is special, no creating here" ) continue if self.debug: lemma = Page(self.wiki, "Benutzer:THEbotIT/Test") else: lemma = Page( self.wiki, f"Die Gartenlaube ({year})/Heft {int(magazine):d}") new_text = self.make_magazine(year, magazine) if new_text: if hash(new_text.strip()) != hash(lemma.text.strip()): self.logger.debug( f"Print [[Die Gartenlaube ({year})/Heft {magazine}]]." ) if lemma.text != '': lemma.text = new_text lemma.save( "Automatische Aktualisierung des Heftes", botflag=True) else: lemma.text = new_text lemma.save("automatische Hefterstellung", botflag=True) else: self.logger.debug( f"Keine Änderung im Text ({year}/{magazine}).")
def check_page(self, pagename): """Check one page.""" pywikibot.output("\nChecking %s" % pagename) sys.stdout.flush() page1 = Page(self.original, pagename) txt1 = page1.text for site in self.sites: if self.options.dest_namespace: prefix = namespaces(site)[int(self.options.dest_namespace)] if prefix: prefix += ':' new_pagename = prefix + page1.titleWithoutNamespace() pywikibot.output("\nCross namespace, new title: %s" % new_pagename) else: new_pagename = pagename page2 = Page(site, new_pagename) if page2.exists(): txt2 = page2.text else: txt2 = '' if str(site) in config.replicate_replace: txt_new = multiple_replace(txt1, config.replicate_replace[str(site)]) if txt1 != txt_new: pywikibot.output( 'NOTE: text replaced using config.sync_replace') pywikibot.output('%s %s %s' % (txt1, txt_new, txt2)) txt1 = txt_new if txt1 != txt2: pywikibot.output("\n %s DIFFERS" % site) self.differences[site].append(pagename) if self.options.replace: page2.text = txt1 page2.save(self.put_message(site)) else: sys.stdout.write('.') sys.stdout.flush()
def _single_img_download(img: Page, img_dir: Path, params: "QueryParams") -> Tuple[bool, str]: img_name, img_path, img_path_orig = _get_img_path(img, img_dir) if not _valid_img_type(img_name, params.early_icons_removal): if img_path.exists(): img_path.unlink() return (False, "") if img_path.exists(): return (False, img_path.name) if img_path_orig.exists(): return (False, img_path_orig.name) if params.debug_info: print('Downloading image', img_name) try: urlretrieve(_get_url(img_name, params.img_width), img_path) return (True, img_path.name) except Exception as e: print(str(e)) img.download(filename=img_path_orig, chunk_size=8 * 1024) return (True, img_path_orig.name)
def ParseWikiPagePushInfo(page: pywikibot.Page): parsed_text = ParsePage(page) # If the score of a trivia is higher than this, # we'll try to show it only, without leading text. triviaSignificance = float(GetConfig("Wiki", "PushedTermsTTL", 180)) # Distill text bareTitle = BareDisambigTitle(page.title()) distilled = WikiPageDistiller.DistillHtml(parsed_text) info = WikiPagePushInfo(page.title(), page.full_url()) if distilled.trivia != None: # Trivia only info.postText = distilled.trivia # Leading + trivia if distilled.triviaScore < triviaSignificance or not bareTitle in info.postText: info.postText = distilled.introduction + info.postText else: # Leading info.postText = distilled.introduction #elif len(distilled.introduction) < 50 : #info.post # Choose cover image info.postImageName, info.postImageUrl = GetCoverImage(page) return info
def user_page_the_it(self, korrigiert): status_string = [] color = make_html_color(20e6, 22e6, korrigiert[0]) status_string.append(f"<span style=\"background:#FF{color}{color}\">{korrigiert[0]}</span>") color = make_html_color(5.0e3, 5.25e3, korrigiert[1]) status_string.append(f"<span style=\"background:#FF{color}{color}\">{korrigiert[1]}</span>") list_of_lemmas = self.petscan(["RE:Teilkorrigiert", "RE:Korrigiert"], ["RE:Unkorrigiert", "RE:Unvollständig"]) date_page = Page(self.wiki, list_of_lemmas[0]["title"]) date_of_first = str(date_page.oldest_revision.timestamp)[0:10] gap = datetime.now() - datetime.strptime(date_of_first, "%Y-%m-%d") color = make_html_color(3 * 365, 3.5 * 365, gap.days) status_string.append(f"<span style=\"background:#FF{color}{color}\">{date_of_first}</span>") user_page = Page(self.wiki, "Benutzer:THE IT/Werkstatt") temp_text = user_page.text temp_text = re.sub(r"<!--RE-->.*<!--RE-->", f"<!--RE-->{' ■ '.join(status_string)}<!--RE-->", temp_text) user_page.text = temp_text user_page.save("todo RE aktualisiert")
def task(self): if self.debug: # activate for debug purpose lemma = "Benutzer:THEbotIT/" + self.bot_name else: lemma = "Die Gartenlaube" page = Page(self.wiki, lemma) temp_text = page.text alle = self.petscan([]) fertig = self.petscan(["Fertig"]) korrigiert = self.petscan(["Korrigiert"]) unkorrigiert = self.petscan(["Unkorrigiert"]) articles = self.petscan([], article=True, not_categories=["Die Gartenlaube Hefte"]) temp_text = self.projektstand(temp_text, alle, fertig, korrigiert, unkorrigiert, articles) temp_text = self.alle_seiten(temp_text, alle) temp_text = self.korrigierte_seiten(temp_text, korrigiert) temp_text = self.fertige_seiten(temp_text, fertig) for year in range(1853, 1900): temp_text = self.year(year, temp_text) page.text = temp_text page.save("Ein neuer Datensatz wurde eingefügt.", botflag=True) return True
def handle_maariv_paper_page(paper_page: pw.Page) -> None: publish_date = _extract_paper_page_from_title(paper_page.title()) if publish_date is None: return parsed_mw_text = mwparserfromhell.parse(paper_page.text) paper_template = parsed_mw_text.filter_templates( mark_as_paper_template_name)[0] if publish_date_param_name in paper_template: logger.info( f'Page: {paper_page} is already marked with publish date, skipping this paper' ) return paper_template.add(publish_date_param_name, publish_date) logger.info( f'Added publish date: {publish_date} for page: {paper_page.title()}') paper_page.text = parsed_mw_text if SHOULD_SAVE: paper_page.save( summary="MaccabiBotAdd publish dates for maariv papers", botflag=True)
def generate_overviews(self): """Create page on wikis with overview of bot results.""" for site in self.sites: sync_overview_page = Page( site, 'User:{0}/sync.py overview'.format(site.user())) output = '== Pages that differ from original ==\n\n' if self.differences[site]: output += ''.join('* [[:{}]]\n'.format(page_title) for page_title in self.differences[site]) else: output += 'All important pages are the same' output += ( '\n\n== Admins from original that are missing here ==\n\n') if self.user_diff[site]: output += ''.join('* {}\n'.format(user_name.replace('_', ' ')) for user_name in self.user_diff[site]) else: output += ( 'All users from original are also present on this wiki') pywikibot.output(output) sync_overview_page.text = output sync_overview_page.save(self.put_message(site))
def remove_cfd_tpl(page: pywikibot.Page, summary: str) -> None: """ Remove the CfD template from the page. @param page: Page to edit @param summary: Edit summary """ text = re.sub( r'<!--\s*BEGIN CFD TEMPLATE\s*-->.*?' r'<!--\s*END CFD TEMPLATE\s*-->\n*', '', page.get(force=True), flags=re.I | re.M | re.S, ) wikicode = mwparserfromhell.parse(text, skip_style_tags=True) for tpl in wikicode.ifilter_templates(): try: template = pywikibot.Page(page.site, str(tpl.name), ns=10) if template in TPL['cfd']: wikicode.remove(tpl) except pywikibot.InvalidTitle: continue page.text = str(wikicode).strip() page.save(summary=summary)
def get_plain_text(self, page: pywikibot.Page): params = { 'action': 'query', 'prop': 'extracts', 'exsentences': 7, 'explaintext': 1, 'format': 'json', 'titles': page.title() } request = self.site._simple_request(**params) response = request.submit() try: return self.parse_text(next(iter(response['query']['pages'].values()), None)['extract']) except (KeyError, TypeError): pass
def feed_archive(self, archive: pywikibot.Page, thread: DiscussionThread, max_archive_size: Size, params=None) -> bool: """ Feed the thread to one of the archives. Also check for security violations. @return: whether the archive is full """ archive_page = self.get_archive_page(archive.title(with_ns=True), params) return archive_page.feed_thread(thread, max_archive_size)
def GetCoverImage(page: pywikibot.Page): ''' Gets the cover image name and url for a specific Page. Returns (None, None) if no cover image is found. ''' try: return page.__lmd_cover_image except: pass req = page.site._simple_request(action="query", titles=page.title(), prop="pageimages", piprop="thumbnail|name", pithumbsize=400) data = req.submit() assert "query" in data, "API request response lacks 'query' key" assert "pages" in data["query"], "API request response lacks 'pages' key" _, jpage = data["query"]["pages"].popitem() if "thumbnail" in jpage: page.__lmd_cover_image = (jpage["pageimage"], jpage["thumbnail"]["source"]) else: page.__lmd_cover_image = (None, None) return page.__lmd_cover_image
def get_wikidata_id(self, page: pywikibot.Page): if not page.exists(): return None # T256583, T87345 page.get(get_redirect=True) if page.isRedirectPage(): page = page.getRedirectTarget() page.get() item = pywikibot.ItemPage.fromPage(page) if not item or not item.exists(): return None return item.title()
def make_sure_league_table_file_is_on_game_page(league_table_file_page): game_page = extract_game_page_from_league_table_file( league_table_file_page) if not game_page.exists(): raise RuntimeError( f"Could not find this game page: {game_page.title()}." f"Created from this league table: {league_table_file_page.title()}" ) parsed_mw_text = mwparserfromhell.parse(game_page.text) football_game_template = parsed_mw_text.filter_templates( football_games_template_name)[0] table_arg_dont_exist = league_table_file_argument_name not in football_game_template # Contain just \n or spaces: empty_table_arg_exist = ( league_table_file_argument_name in football_game_template) and not football_game_template.get( league_table_file_argument_name).value.strip() if table_arg_dont_exist or empty_table_arg_exist: logger.info( f"Adding league table file to the page: {game_page.title()}") football_game_template.add(league_table_file_argument_name, league_table_file_page.title(with_ns=False)) game_page.text = parsed_mw_text game_page.save( summary= "MaccabiBot - Updating league tables files to the relevant game pages", botflag=True) else: # The current league table is a File (ns=6) current_league_table_file = Page( site, football_game_template.get(league_table_file_argument_name).value, ns=6) if current_league_table_file != league_table_file_page: logger.warning( f"Found an existing league table which is different from what we have." f"Current: {current_league_table_file}, We have: {league_table_file_page}" ) else: logger.info( f"Page: {game_page.title()} has an existing league table and its a good one!" )
def has_template( page: pywikibot.Page, templates: Union[str, Iterable[Union[pywikibot.Page, str]]], ) -> bool: """ Return True if the page has one of the templates. False otherwise. @param page: page to check @param templates: templates to check """ if isinstance(templates, str): templates = [templates] templates = get_redirects( tpl if isinstance(tpl, pywikibot.Page ) else pywikibot.Page(page.site, tpl, ns=10) for tpl in templates) return bool(templates & set(page.templates()))
def save_if_changed(page: Page, text: str, change_msg: str): if text.rstrip() != page.text: page.text = text page.save(change_msg, botflag=True)
# -*- coding: utf-8 -*- __author__ = 'eso' import sys sys.path.append('../../') import re from pywikibot import Page, Site from tools.petscan import PetScan wiki = Site() searcher = PetScan() searcher.add_yes_template('Biel') lemma_list = searcher.run() for idx, lemma in enumerate(lemma_list): print(idx, len(lemma_list), lemma['title']) link_page = Page(wiki, lemma['title']) temp_text = link_page.text if re.search('\{\{Biel\|', temp_text): temp_text = re.sub('\{\{Biel\|1240647\}\}', '{{Bielefeld|1240647}}', temp_text) temp_text = re.sub('\{\{Biel\|590504\}\}', '{{Bielefeld|590504}}', temp_text) temp_text = re.sub('\{\{Biel\|1732676\}\}', '{{Bielefeld|1732676}}', temp_text) temp_text = re.sub('\{\{Biel\|548435\}\}', '{{Bielefeld|548435}}', temp_text) temp_text = re.sub('\{\{Biel\|32920\}\}', '{{Bielefeld|32920}}', temp_text) if link_page.text != temp_text: link_page.text = temp_text link_page.save(botflag=True, summary='Biel -> Bielefeld')
def load_text_from_site(self, lemma): self.logger.info('Load text from {}'.format(lemma)) self.stat_page = Page(self.wiki, lemma) self.text = self.stat_page.text
def process(day): """ one day bot processing arguments: day -- python date format """ if params.verbose: print("processing Journal des recréations ({day})".format(day=format_date(day))) start = to_date(day) end = to_date(day+ONE_DAY) result = "\n\n== {} ==\n".format(format_date(day)) comment = [] for i,page in enumerate(creation_log(start,end),1): gras = '' date = '' if params.verbose: print (i,page["timestamp"]) dl = deletelog(page["title"]) if dl: page_pas = Page(Site(), "Discussion:" + page["title"] + "/Suppression") if page_pas.isRedirectPage(): page_pas = page_pas.getRedirectTarget() if page_pas.exists() and re.search(r'article supprimé', page_pas.get(), re.I): if re.search(r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à', page_pas.get(), re.I): date = u' de %s' % re.search(r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à', page_pas.get(), re.I).group(1) comment.append(u'[[%s]] (malgré [[%s|PàS]]%s)' % (page["title"], page_pas.title(), date)) gras = "'''" r = (u"* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} puis recréé par {{{{u|{user}}}}}{g} \n" .format(title = wiki_param(page["title"]), pas = page_pas.title(), user = wiki_param(page["user"]), date = format_date(from_date(dl["timestamp"])), g = gras)) if params.verbose: print(r) result += r page = Page(Site(), params.prefix + u'/' + format_date(day, skip_day=True)) try: result = page.get() + result except NoPage: result = u'{{mise à jour bot|Zérobot}}' + result if comment: comment.insert(0, '') page.put(result,comment="Journal des recréations ({day}) ".format(day=format_date(day)) + ' - '.join(comment))
import sys sys.path.append('../../') import re from pywikibot import Page, Site from tools.petscan import PetScan wiki = Site() searcher = PetScan() searcher.add_positive_category("RE:Korrigiert") lemma_list = searcher.run() list_for_pfaerrich = [] for idx_lem, lemma in enumerate(lemma_list): print(idx_lem) page = Page(wiki, lemma['title']) version_history = page.fullVersionHistory()[::-1] size_all_changes = 0 for idx_rev, revision in enumerate(version_history): user = revision.user if user == 'Pfaerrich': if idx_rev > 0: size_prev = len(version_history[idx_rev-1].text) else: size_prev = 0 size_all_changes += abs(len(version_history[idx_rev].text) - size_prev) korrigiert_flag = False if size_all_changes > 0: for version in page.getVersionHistory(): if version.user == 'Pfaerrich': if re.search('orrigiert', version.comment):
def send_log_to_wiki(self): wiki_log_page = f"Benutzer:THEbotIT/Logs/{self.bot_name}" page = Page(self.wiki, wiki_log_page) page.text += self.logger.create_wiki_log_lines() page.save(f"Update of Bot {self.bot_name}", botflag=True)