def __init__(self, endpoint=None, entity_url=None, repo=None): """ Create endpoint. @param endpoint: SPARQL endpoint URL @type endpoint: string @param entity_url: URL prefix for any entities returned in a query. @type entity_url: string @param repo: The Wikibase site which we want to run queries on. If provided this overrides any value in endpoint and entity_url. Defaults to Wikidata. @type repo: pywikibot.site.DataSite """ # default to Wikidata if not repo and not endpoint: repo = Site('wikidata', 'wikidata') if repo: try: self.endpoint = repo.sparql_endpoint self.entity_url = repo.concept_base_uri except NotImplementedError: raise NotImplementedError( 'Wiki version must be 1.28-wmf.23 or newer to ' 'automatically extract the sparql endpoint. ' 'Please provide the endpoint and entity_url ' 'parameters instead of a repo.') if not self.endpoint: raise Error('The site {0} does not provide a sparql endpoint.' .format(repo)) else: if not entity_url: raise Error('If initialised with an endpoint the entity_url ' 'must be provided.') self.endpoint = endpoint self.entity_url = entity_url self.last_response = None
def copy_delayed(src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty]) -> Iterable[api.Fix]: repo = Site().data_repository() src_item.get() dest_item.get() claims = [] for prop in props: src_claims = src_item.claims.get(prop.pid, []) if len(src_claims) > 1: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") continue targets = [claim.getTarget() for claim in src_claims] for target in targets: target.get() target_str = printable_target_value(target) print( f"Creating claim to copy {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(repo, prop.pid) new_claim.setTarget(target) summary = f"Setting {prop.pid} ({prop.name})" claims.append(api.ClaimFix(new_claim, summary, dest_item)) return claims
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() seen = set() for board_game_id, bgg_id in board_games_with_missing_labels(): if board_game_id in seen: continue seen.add(board_game_id) board_game_name = utils.bgg_title(bgg_id) if board_game_name is None: print(f"Unable to fetch name for {board_game_id}.") continue wiki_url = f"https://www.wikidata.org/wiki/{board_game_id}" print( f"{dry_str}Setting label='{board_game_name}' for {board_game_id} ( {wiki_url} )" ) if not dry: bg_item = ItemPage(repo, board_game_id) bg_item.get() bg_item.editLabels({"en": board_game_name})
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for item_link, item_id, title in items_with_missing_labels_with_title(): print( f"{dry_str} ( {str(item_link).ljust(40, ' ')} ) Fixing {str(item_id).ljust(9, ' ')}: {title}" ) if dry: continue # Labels have a character limit, so ignore if trying to add it will result in an error if len(title) >= 250: continue item = ItemPage(repo, item_id) item.get() try: item.editLabels({"en": title}) except (APIError, OtherPageSaveError) as e: print(f"An error occurred while adding label for {item_id}: {e}")
def main() -> None: """Run the bot.""" logging.basicConfig(level=logging.WARNING) # Initialize pywikibot. assert Site().code == 'en' utils.initLimits(editsLimits={'default': 4000}, brfaNumber=6, onlySimulateEdits=False, botTrial=False) EnglishWordList.init() journals: Set[str] = getCategoryAsSet('Academic journals by language') magazines: Set[str] = getCategoryAsSet('Magazines by language') # Let 'foreign' be the set of page titles in a language-category # other than English, or in the multilingual category. foreign: Set[str] = set() foreign = foreign | journals foreign = foreign | magazines foreign = foreign - getCategoryAsSet('English-language journals') foreign = foreign - getCategoryAsSet('English-language magazines') foreign = foreign | getCategoryAsSet('Multilingual journals') foreign = foreign | getCategoryAsSet('Multilingual magazines') for page in chain(journals, magazines, getPagesWithTemplate('Infobox journal'), getPagesWithTemplate('Infobox Journal'), getPagesWithTemplate('Infobox magazine'), getPagesWithTemplate('Infobox Magazine')): pageTitle = page if isinstance(page, str) else page.title() try: makeAmpersandRedirects(pageTitle, foreign) for rPage in getRedirectsToPage(pageTitle, namespaces=0): makeAmpersandRedirects(rPage.title(), foreign, pageTitle) except pywikibot.exceptions.TitleblacklistError: print('Skipping (title blacklist error): ', pageTitle)
def check_tv_show(tvshow_id=None, child_type="episode", autofix=False, accumulate=False, always=False, filter=""): """Check constraints for season/episodes of this TV show TVSHOW_ID is the ID of the television series, in the format Q######. """ if child_type == "episode": instance_types = [wp.TELEVISION_SERIES_EPISODE] elif child_type == "season": instance_types = [wp.TELEVISION_SERIES_SEASON] elif child_type == "series": instance_types = [wp.TELEVISION_SERIES] elif child_type == "all": instance_types = [ wp.TELEVISION_SERIES, wp.TELEVISION_SERIES_SEASON, wp.TELEVISION_SERIES_EPISODE ] for instance_of_type in instance_types: key_val_pairs = { wp.PART_OF_THE_SERIES.pid: tvshow_id, wp.INSTANCE_OF.pid: instance_of_type } query = generate_sparql_query(key_val_pairs) gen = WikidataSPARQLPageGenerator(query) if instance_of_type == wp.TELEVISION_SERIES: gen = [ItemPage(Site().data_repository(), tvshow_id)] bot = getbot(gen, autofix=autofix, accumulate=accumulate, always=always, property_filter=filter) bot.run()
def getBoundedCat(self, cat, currMinSize): print() print("getBoundedCat") site = Site("en") try: catObj = Category(site, title=cat) except: catObj = Category(site, title=cat.decode("utf-8")) subCats = self.recursiveCats(catObj) articleSet = set() for subCat in subCats: print("inside subCat", subCat) newArts = set(self.getArticles(subCat, recurse=False)) articleSet.update(newArts) print(len(articleSet)) if len(articleSet) > currMinSize: print("break") return currMinSize #continue return len(articleSet)
def __init__(self): self.output_path = "" self.maccabipedia = Site() self.games = dict() self.games_events = dict()
+ ' - '.join(comment)) if __name__ == "__main__": parser = argparse.ArgumentParser( description='recreaction log bot for french wikipedia.') parser.add_argument('--verbose', help="enable verbose output", action='store_true') parser.add_argument( '--prefix', help= "prefix of the list page of recreation example: 'Utilisateur:ZéroBot/Journal_des_recréations'", action='store', required=True) params = parser.parse_args() if sys.version_info < (3, 0): params.prefix = params.prefix.decode( 'utf-8') #params.prefix.decode(locale.getpreferredencoding()) Site().forceLogin() end = datetime.today() - ONE_DAY with open("recreation.timestamp.txt") as ts: start = datetime.strptime(ts.read().strip(), "%Y-%m-%d") + ONE_DAY while start <= end: process(start) with open("recreation.timestamp.txt", "w") as ts: print(start.strftime("%Y-%m-%d"), file=ts) start += ONE_DAY
from pywikibot.pagegenerators import CategorizedPageGenerator from pywikibot import Site, Category from dataskakare import GoogleTranslate import mwparserfromhell import hashlib import uuid import json site = Site('commons', 'commons') cat = Category(site, 'Category:Media_contributed_by_the_Swedish_Performing_Arts_Agency:_2019-03') translate = GoogleTranslate(input('google service account file:')) def thumb_from_title(title): safe_title = title.encode('utf-8') md5_title = hashlib.md5(safe_title).hexdigest() return 'https://upload.wikimedia.org/wikipedia/commons/thumb/{}/{}/{}/500px-{}.jpg'.format(md5_title[:1], md5_title[:2], title, title) final_pages = list() for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6): wikicode = mwparserfromhell.parse(page.text) template_to_parse = False for template in wikicode.filter_templates(): if template.name.matches('Musikverket-image'): template_to_parse = template if not template_to_parse: print('failed to find given template') continue
# -*- coding: utf-8 -*- __author__ = 'eso' import sys sys.path.append('../../') import re from pywikibot import Page, Site from tools.petscan import PetScan wiki = Site() searcher = PetScan() searcher.add_positive_category("RE:Korrigiert") lemma_list = searcher.run() list_for_pfaerrich = [] for idx_lem, lemma in enumerate(lemma_list): print(idx_lem) page = Page(wiki, lemma['title']) version_history = page.fullVersionHistory()[::-1] size_all_changes = 0 for idx_rev, revision in enumerate(version_history): user = revision.user if user == 'Pfaerrich': if idx_rev > 0: size_prev = len(version_history[idx_rev - 1].text) else: size_prev = 0 size_all_changes += abs( len(version_history[idx_rev].text) - size_prev) korrigiert_flag = False if size_all_changes > 0:
def __init__(self): site = Site('wikidata', 'wikidata') self.repo = site.data_repository()
def move_through_topic(starting_topic='History'): # starting site site = Site() starting_page = Page(site, starting_topic) return (i for i in pagegenerators.LinkedPageGenerator(starting_page))
def create_episode(series_id, season_id, title, series_ordinal, season_ordinal, dry): """Creates a season item on WikiData Arguments --------- series_id: str The Wiki ID of the series ItemPage season_id: str The Wiki ID of the season ItemPage title: str The title of this episode. This is used to set the label. series_ordinal: int The ordinal of this episode, within the series season_ordinal: int The ordinal of this episode, within the season dry: bool Whether or not this function should run in dry-run mode. In dry-run mode, no real changes are made to WikiData, they are only logged to stdout. Returns ------- episode_id: str The Wiki ID of the episode item """ dry_str = "[DRY-RUN] " if dry else "" print(f"{dry_str}Creating episode with label='{title}'") episode = None if not dry: repoutil = RepoUtils(Site().data_repository()) season = ItemPage(repoutil.repo, season_id) season.get() # Check if season has part_of_the_series set to series_id if wp.PART_OF_THE_SERIES.pid not in season.claims: raise ValueError( f"The season {season_id} does not have a PART_OF_THE_SERIES ({wp.PART_OF_THE_SERIES.pid} property). Check the input series and season IDs for correctness." ) actual_series_id = str( season.claims[wp.PART_OF_THE_SERIES.pid][0].getTarget().getID()) if actual_series_id != series_id: raise ValueError( f"The season {season_id} has PART_OF_THE_SERIES={actual_series_id} but expected={series_id}. Check the input series and season IDs for correctness." ) episode = ItemPage(repoutil.repo) episode.editLabels({"en": title}, summary="Setting label") print(f"Created a new Item: {episode.getID()}") print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_EPISODE}") if not dry: instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid) instance_claim.setTarget( ItemPage(repoutil.repo, wp.TELEVISION_SERIES_EPISODE)) episode.addClaim(instance_claim, summary=f"Setting {wp.INSTANCE_OF.pid}") print( f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}, with {wp.SERIES_ORDINAL}={series_ordinal}" ) if not dry: series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid) series_claim.setTarget(ItemPage(repoutil.repo, series_id)) series_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid) series_ordinal_claim.setTarget(series_ordinal) series_claim.addQualifier(series_ordinal_claim) episode.addClaim(series_claim, summary=f"Setting {wp.PART_OF_THE_SERIES.pid}") print( f"{dry_str}Setting {wp.SEASON}={season_id}, with {wp.SERIES_ORDINAL}={season_ordinal}" ) if not dry: season_claim = repoutil.new_claim(wp.SEASON.pid) season_claim.setTarget(ItemPage(repoutil.repo, season_id)) season_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid) season_ordinal_claim.setTarget(season_ordinal) season_claim.addQualifier(season_ordinal_claim) episode.addClaim(season_claim, summary=f"Setting {wp.SEASON.pid}") return episode.getID() if episode is not None else "Q-1"
def setUp(self): wikidata = Site('test', 'wikidata') self.ref_1 = Claim(wikidata, 'P55') self.ref_1.setTarget('foo') self.ref_2 = Claim(wikidata, 'P55') self.ref_2.setTarget('bar')
def __init__(self): self._site = Site('bg', fam='wikipedia') self._page = Page(self._site, 'Потребител:Iliev/Мерсиметър') self._settings_page = Page(self._site, 'Потребител:Iliev/Мерсиметър/settings.json') self._settings = jload(self._settings_page.text) self._user_blocks = {}
def move_to_random_topic(): # starting site site = Site() #page = pywikibot.Page(site, u"Whatever") return Page(next(pagegenerators.RandomRedirectPageGenerator(1)))
def fixRedirectAnchor(rTitle: str, anchor: str, target: str) -> bool: """Add an anchor to given redirect page.""" rPage = pywikibot.Page(Site(), rTitle) addJournal = False if rPage.exists() and not rPage.isRedirectPage(): addJournal = True if 'journal' in rTitle.lower(): print(f'Skip: [[{rTitle}]] already exists, ' 'title already has "journal".', flush=True) return False for cat in rPage.categories(): if 'journal' in cat.title().lower(): print(f'Skip: [[{rTitle}]] already exists, ' 'has category containing "journal".', flush=True) return False if addJournal: rPage = pywikibot.Page(Site(), rTitle + ' (journal)') if not rPage.exists() or not rPage.isRedirectPage(): print(f'Not exists/not a redirect: [[{rPage.title()}]]', flush=True) return False # Page.title() actually contains anchor, if redirect had one. actualTarget = rPage.getRedirectTarget().title().split('#', 1) if actualTarget[0] != target: print(f'Not a redirect to this list: ' f'[[{rPage.title()}]] -> [[{actualTarget[0]}]]', flush=True) return False if len(actualTarget) > 1: if actualTarget[1] != anchor: print(f'WARNING: Anchor mismatch: ' f'[[{rPage.title()}]] -> [[{actualTarget[0]}]].' f'Is "{actualTarget[1]}" should be "{anchor}".') return False else: return True predictedAnchor = getPredictedAnchor(rTitle) if predictedAnchor != anchor: print(f'WARNING: Anchor mismatch: ' f'[[{rPage.title()}]] -> [[{actualTarget[0]}]].' f'Predicted "{predictedAnchor}" should be "{anchor}".') return False rText = rPage.text rNewText = re.sub(r'''( \#\s*REDIRECT\s*\[\[ [^\]\#]+ # title ) (\#[^\]]*)? # anchor \]\]''', '\\1#' + anchor + ']]', rText, count=1, flags=re.VERBOSE) if rText == rNewText: print(f'Nothing to do on: [[{rPage.title()}]]') return True print(f'===CHANGING [[{rPage.title()}]] FROM==================') print(rText) print('==========TO===========') print(rNewText + '\n\n', flush=True) trySaving(rPage, rNewText, 'Add anchor to redirect, as it points to a long list.', overwrite=True) return True
#!/usr/bin/python3 from commonsbot import mysql from commonsbot.state import DeletionStateStore, DeletionState from pywikibot import Site, Category from pywikibot.pagegenerators import CategorizedPageGenerator from pywikibot.site import Namespace from pprint import pprint commons = Site('commons', 'commons') userdb = mysql.connect() store = DeletionStateStore(userdb) def load_files(categories, depth): """ Returns a list of unique files in categories @param categories: List of Commons category names as strings @type categories: list @param depth: Category recursion depth @type depth: int @rtype: list """ files = set() for cat in categories: cat = Category(commons, cat) generator = CategorizedPageGenerator(cat, recurse=depth, namespaces=Namespace.FILE) for page in generator:
def fixPageRedirects(page: pywikibot.Page) -> int: """Fix redirects to given page.""" title = page.title() pageData = state.getPageData(title) (requiredRedirects, skip) = getRequiredRedirects(page) nEditedPages = 0 for rTitle, rCats in requiredRedirects.items(): rNewContent = rcatSetToRedirectContent(title, rCats) # Attempt to create new redirect. if rTitle not in pageData['redirects']: try: exists = pywikibot.Page(Site(), rTitle).exists() except pywikibot.exceptions.InvalidTitle: exists = False if exists: print(f'--Skipping existing page [[{rTitle}]] ' f'(not a redirect to [[{title}]]).') if title == rTitle: continue if title not in pywikibot.Page(Site(), rTitle).text: reports.reportExistingOtherPage(title, rTitle) else: print(f'--Creating redirect ' f'from [[{rTitle}]] to [[{title}]]. ' f'Created content:\n{rNewContent}\n-----', flush=True) nEditedPages += 1 rPage = pywikibot.Page(Site(), rTitle) trySaving(rPage, rNewContent, 'Creating redirect from standard abbreviation. ', overwrite=False) else: rOldContent = pageData['redirects'][rTitle] if isValidISO4Redirect(rOldContent, title, rCats): print(f'--Skipping existing valid redirect ' f'from [[{rTitle}]] to [[{title}]].') elif isReplaceableRedirect(rOldContent, title, rCats | RCatSet.ISO4): # Don't log nor edit redirects that would be replaceable # except they have ISO4 and we're not sure it should have. if not (rCats & RCatSet.ISO4): continue print(f'--Replacing existing redirect ' f'from [[{rTitle}]] to [[{title}]].\n' f'RCatSet: {rCats}\n' f'Original content:\n{rOldContent}\n----- ' f'New content:\n{rNewContent}\n-----', flush=True) nEditedPages += 1 rPage = pywikibot.Page(Site(), rTitle) trySaving(rPage, rNewContent, 'Marking standard abbrev rcat. ', overwrite=True) elif not skip: print(f'--Skipping existing dubious redirect ' f'from [[{rTitle}]] to [[{title}]].\n' f'RCatSet: {rCats}\n' f'Original content:\n{rOldContent}\n----- ') reports.reportExistingOtherRedirect(title, rTitle, rOldContent) # Purge page cache to remove warnings about missing redirects. if nEditedPages > 0: tryPurging(page) # Report redirects that we wouldn't add, but exist and are marked as ISO-4. if requiredRedirects and not skip: expectedAbbrevs = \ [r.replace('.', '') for r in requiredRedirects] potentialAbbrevs = [] for rTitle, rContent in pageData['redirects'].items(): if 'from former name' in rContent or '.' not in rTitle: cAbbrevEng = state.tryGetAbbrev( abbrevUtils.stripTitle(rTitle), 'eng') or '' cAbbrevAll = state.tryGetAbbrev( abbrevUtils.stripTitle(rTitle), 'all') or '' cAbbrevEng = cAbbrevEng.replace('.', '') cAbbrevAll = cAbbrevAll.replace('.', '') if 'from former name' in rContent: if cAbbrevEng != rTitle.replace('.', ''): expectedAbbrevs.append(cAbbrevEng) if cAbbrevAll != rTitle.replace('.', ''): expectedAbbrevs.append(cAbbrevAll) elif '.' not in rTitle: if cAbbrevEng != rTitle.replace('.', ''): potentialAbbrevs.append((cAbbrevEng, rTitle)) if cAbbrevAll != rTitle.replace('.', ''): potentialAbbrevs.append((cAbbrevAll, rTitle)) expectedAbbrevs = [a for a in expectedAbbrevs if a] potentialAbbrevs = [(a, t) for (a, t) in potentialAbbrevs if a] for rTitle, rContent in pageData['redirects'].items(): if not re.search(r'R from ISO 4', rContent): continue # Ignore rTitle that contain a computed abbreviation as a # substring, assume that it's some valid variation on a subtitle. isExpected = False rTitleDotless = rTitle.replace('.', '') for computedAbbrev in expectedAbbrevs: if re.sub(r'\s*[:(].*', '', computedAbbrev) in rTitleDotless: isExpected = True break if not isExpected: # Find other titles in existing redirects # that would ISO-4 abbreviate to it potentials = [t for (a, t) in potentialAbbrevs if abbrevUtils.isSoftMatch(rTitleDotless, a)] potentials = list(sorted(set(potentials))) # Find closest computed abbrev. bestAbbrev = '' bestDist = len(rTitle) for computedAbbrev in sorted(requiredRedirects): dist = Levenshtein.distance(rTitle, computedAbbrev) if dist < bestDist: bestDist = dist bestAbbrev = computedAbbrev # Skip if closest abbrev. is far (assume it's from a former # title, since there's a ton of cases like that). if bestDist <= 8: reports.reportSuperfluousRedirect( title, rTitle, rContent, bestAbbrev, potentials) return nEditedPages
import argparse from pywikibot import Site, Page from bs4 import BeautifulSoup import requests from tabulate import tabulate from jinja2 import Template import pandas as pd import numpy as np import io from mypy.types import Dict site = Site() sheet_ids = { "📺 シナリオ一覧": 788224352, "🎉 イベント": 195852940, "ℹ️ このファイルについて": 569209742, "🌏 translation help": 1795795580, "ジャンプ(スタァ別)": 607126312, "🌈 ジャンプ": 397595116, "🎴 ブロマイド": 1442344221, "ジャンプコマンド": 1371879367, "合体ジャンプ": 1223304644, "🎶 楽曲リスト": 1572242050, "🎫 Prismチケット入手条件": 451938944, "ブロマイド(PPガチャ)": 951128679, "📕 応援グッズ": 1833623005, "🎁 日替わりプレゼント": 1247915676, "⛸️ 練習経験値": 2109961420, "⛸️ 練習コスト": 585549272, "ファンレベル": 1956343157,
def makeLanguageMismatchPatch( page: pywikibot.Page, infoboxId: int, infoboxAbbrev: str, computedAbbrev: str, matchingPatterns: str ) -> Optional[Dict[str, Any]]: """Make patchset for Stitchpitch: infobox param and redirects rcats.""" from unicodedata import normalize import mwparserfromhell startTimeStamp = datetime.now(timezone.utc).isoformat() diff = datetimeFromPWB(Site().server_time()) - datetime.now(timezone.utc) if diff > timedelta(minutes=2) or -diff > timedelta(minutes=2): raise Exception('Local zone misconfigured or server timezone not UTC!') latestRevision = page.latest_revision mainEdit = { 'patchtype': 'edit', # implies 'nocreate': True 'slug': f'{infoboxAbbrev} → {computedAbbrev}', 'details': matchingPatterns, 'title': page.title(), 'summary': 'Fix ISO-4 abbreviation to use all language rules.', 'minor': True, 'basetimestamp': datetimeFromPWB(latestRevision.timestamp).isoformat(), 'starttimestamp': startTimeStamp, 'oldtext': latestRevision.text, 'oldrevid': latestRevision.revid } if datetime.fromisoformat(mainEdit['basetimestamp']) > \ datetime.fromisoformat(startTimeStamp) - timedelta(hours=5): print(f'Skipping patch for "{page.title()}":' f' edited a short while ago ago.') return None code = mwparserfromhell.parse(normalize('NFC', latestRevision.text)) foundInfobox = None # type: Optional[mwparserfromhell.Template] foundId = -1 for t in code.filter_templates(): if t.name.matches('infobox journal') or \ t.name.matches('Infobox Journal'): foundId += 1 if foundId == infoboxId: foundInfobox = t break if not foundInfobox: print(f'Skipping patch for "{page.title()}":' f' infobox #{infoboxId} not found.') return None foundAbbrev = str(foundInfobox.get('abbreviation').value) if foundAbbrev.strip() != infoboxAbbrev: print(f'Skipping patch for "{page.title()}":' f' infobox abbrev mismatch (comments?).') return None foundInfobox.get('abbreviation').value = \ foundAbbrev.replace(infoboxAbbrev, computedAbbrev, 1) mainEdit['text'] = str(code) patches = [mainEdit] groupDetails = '' regex = r' *{{\s*(r|R) from ISO ?4( abbreviation)?\s*}} *\n?' abbrevRegex = r'{{\s*(r|R)(edirect)? (from )?(common )?ab[a-z]*\s*}}' for rPage in getRedirectsToPage(page.title(), namespaces=0, total=100, content=True): rTitle = rPage.title() rRevision = rPage.latest_revision cAbbrev = abbrevUtils.stripTitle(computedAbbrev.lower()) if cAbbrev + ' ' in rTitle.lower() + ' ' or \ cAbbrev.replace('.', '') + ' ' in rTitle.lower() + ' ': newtext = rRevision.text if re.search(regex, newtext): print(f'Skipping patch for existing page, already marked: {rTitle}') groupDetails += 'ok: ' + rTitle + '\n' continue if not isReplaceableRedirect(rRevision.text, page.title(), RCatSet.ISO4): print(f'Skipping patch for unreplaceable page: {rTitle}') groupDetails += 'unrepl: ' + rTitle + '\n' continue if re.search(abbrevRegex, newtext): newtext = re.sub(abbrevRegex, '{{R from ISO 4}}', newtext, 1) else: newtext += '\n{{R from ISO 4}}' markPatch = { 'patchtype': 'edit', 'slug': 'mark new?', 'title': rTitle, 'summary': 'Fix ISO-4 abbreviation to use all language rules.', 'minor': True, 'basetimestamp': datetimeFromPWB(rRevision.timestamp).isoformat(), 'starttimestamp': startTimeStamp, 'oldtext': rRevision.text, 'oldrevid': rRevision.revid, 'text': newtext } patches.append(markPatch) elif re.search(regex, rRevision.text): unmarkPatch = { 'patchtype': 'edit', 'slug': 'unmark old', 'title': rTitle, 'summary': 'Fix ISO-4 abbreviation to use all language rules.', 'minor': True, 'basetimestamp': datetimeFromPWB(rRevision.timestamp).isoformat(), 'starttimestamp': startTimeStamp, 'oldtext': rRevision.text, 'oldrevid': rRevision.revid, 'text': re.sub(regex, '{{R from abbreviation}}\n', rRevision.text) } if infoboxAbbrev.lower() in rTitle.lower() or \ infoboxAbbrev.replace('.', '').lower() in rTitle.lower(): patches.append(unmarkPatch) else: print(f'Skip patch unmark on unrecog ISO-4: {rTitle}') groupDetails += 'unrecog ISO-4: ' + rTitle + '\n' else: groupDetails += '??: ' + rTitle + '\n' shouldHave = [computedAbbrev] if computedAbbrev.replace('.', '') != computedAbbrev: shouldHave.append(computedAbbrev.replace('.', '')) for abbrev in shouldHave: rPage = pywikibot.Page(Site(), abbrev) if not rPage.exists(): createPatch = { 'patchtype': 'create', 'slug': 'create', 'title': rPage.title(), 'summary': 'R from ISO-4 abbreviation of journal title.', 'minor': True, 'starttimestamp': startTimeStamp, 'text': '#REDIRECT[[' + page.title() + ']]\n\n' '{{R from ISO 4}}\n' } patches.append(createPatch) return { 'patchtype': 'group', 'slug': f'{infoboxAbbrev} → {computedAbbrev}', 'details': groupDetails, 'patches': patches }
def is_football_coach_wiki(page): try: p = Page(Site('en', 'wikipedia'), page).get(get_redirect=True) return 'football' in p.lower() and 'coach' in p.lower() except NoPage: return False
import daty from pprint import pprint from pywikibot import Site, ItemPage site = Site('wikidata', 'wikidata') repo = site.data_repository() item_page = ItemPage(repo, 'Q4115189') data = item_page.get() target_page = ItemPage(repo, 'Q17') for P in data['claims']: for c in data['claims'][P]: if hasattr(c, 'qualifiers'): #print(c.qualifiers) if 'P710' in c.qualifiers: for q in c.qualifiers['P710']: print(q.hash) q.setTarget(target_page) print(q.hash) repo.editQualifier(c, q) #q.changeTarget(target_page) #print(q) #del item_page #print(data) #data['descriptions']['it'] = "LE MIE MANI SONO INCREDIBILI" #for p in data['claims']:
def _test_link(self, link, title, namespace, site_code, site_fam): """Test the separate contents of the link.""" self.assertEqual(link.title, title) self.assertEqual(link.namespace, namespace) self.assertEqual(link.site, Site(site_code, site_fam)) self.assertEqual(link.badges, [])
def __init__(self, title: str, **kwargs): site_plwikt = Site('pl', 'wiktionary') parse = kwargs.pop('parse', True) Page.__init__(self, source=site_plwikt, title=title, **kwargs) PagePL.__init__(self, parse=parse)
class UploadBlitzBot(OneTimeBot): def task(self): root_dir = Path.home().joinpath("Dropbox/blitzlexikon") date_folder = root_dir.joinpath(datetime.now().strftime("%y%m%d")) try: os.makedirs(date_folder) except FileExistsError: pass file_list: list[str] = list(os.listdir(str(root_dir))) max = len(file_list) for idx, file in enumerate(file_list): if not re.match(r"LA2-Blitz-\d{4}_.+?\.jpg", file): continue self.logger.debug(f"{idx}/{max} ... {root_dir.joinpath(file)}") imagepage = pywikibot.FilePage(self.wiki, file) # normalizes filename imagepage.text = file_description success = imagepage.upload(str(root_dir.joinpath(file)), comment="ausgeschnittenes Bild für Blitzlexikon") if success: os.rename(root_dir.joinpath(file), date_folder.joinpath(file)) self.logger.info("THE END") if __name__ == "__main__": WS_WIKI = Site(code="de", fam="wikisource", user="******") with UploadBlitzBot(wiki=WS_WIKI, debug=True, log_to_screen=True, log_to_wiki=False) as bot: bot.run()
def create_seasons(series_id, number_of_seasons, quickstatements=False, dry=False): Site().login() commands.create_seasons(series_id, number_of_seasons, quickstatements, dry)
def __init__(self, repo=None): if repo is None: repo = Site().data_repository() self.repo = repo
def __init__(self, itempage: ItemPage, repo=None): self._itempage = itempage self._itempage.get() self._repo = Site().data_repository() if repo is None else repo