Exemplos de Site.Site em Python, exemplos de pywikibot.Site.Site em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: sparql.py Projeto: sakuramochi0/kinpri-rush-live-wiki-bot

    def __init__(self, endpoint=None, entity_url=None, repo=None):
        """
        Create endpoint.

        @param endpoint: SPARQL endpoint URL
        @type endpoint: string
        @param entity_url: URL prefix for any entities returned in a query.
        @type entity_url: string
        @param repo: The Wikibase site which we want to run queries on. If
                     provided this overrides any value in endpoint and entity_url.
                     Defaults to Wikidata.
        @type repo: pywikibot.site.DataSite
        """
        # default to Wikidata
        if not repo and not endpoint:
            repo = Site('wikidata', 'wikidata')

        if repo:
            try:
                self.endpoint = repo.sparql_endpoint
                self.entity_url = repo.concept_base_uri
            except NotImplementedError:
                raise NotImplementedError(
                    'Wiki version must be 1.28-wmf.23 or newer to '
                    'automatically extract the sparql endpoint. '
                    'Please provide the endpoint and entity_url '
                    'parameters instead of a repo.')
            if not self.endpoint:
                raise Error('The site {0} does not provide a sparql endpoint.'
                            .format(repo))
        else:
            if not entity_url:
                raise Error('If initialised with an endpoint the entity_url '
                            'must be provided.')
            self.endpoint = endpoint
            self.entity_url = entity_url

        self.last_response = None

Exemplo n.º 2

0

Exibir arquivo

def copy_delayed(src_item: ItemPage, dest_item: ItemPage,
                 props: Iterable[wp.WikidataProperty]) -> Iterable[api.Fix]:
    repo = Site().data_repository()

    src_item.get()
    dest_item.get()

    claims = []

    for prop in props:
        src_claims = src_item.claims.get(prop.pid, [])

        if len(src_claims) > 1:
            print(
                f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied"
            )
            continue

        if prop.pid in dest_item.claims:
            print(f"{prop} already has a value in {format(dest_item)}")
            continue

        targets = [claim.getTarget() for claim in src_claims]

        for target in targets:
            target.get()

            target_str = printable_target_value(target)

            print(
                f"Creating claim to copy {prop}={target_str} from {format(src_item)} to {format(dest_item)}"
            )

            new_claim = Claim(repo, prop.pid)
            new_claim.setTarget(target)
            summary = f"Setting {prop.pid} ({prop.name})"
            claims.append(api.ClaimFix(new_claim, summary, dest_item))
    return claims

Exemplo n.º 3

0

Exibir arquivo

Arquivo: fix_missing_labels_on_board_games.py Projeto: saikirankv/wikidata-toolkit

def main(dry=False):
    dry_str = ""
    if dry:
        print("Running in dry-run mode, will not implement any changes")
        dry_str = "[DRY-RUN MODE] "
    repo = Site().data_repository()
    seen = set()
    for board_game_id, bgg_id in board_games_with_missing_labels():
        if board_game_id in seen:
            continue
        seen.add(board_game_id)
        board_game_name = utils.bgg_title(bgg_id)
        if board_game_name is None:
            print(f"Unable to fetch name for {board_game_id}.")
            continue
        wiki_url = f"https://www.wikidata.org/wiki/{board_game_id}"
        print(
            f"{dry_str}Setting label='{board_game_name}' for {board_game_id} ( {wiki_url} )"
        )
        if not dry:
            bg_item = ItemPage(repo, board_game_id)
            bg_item.get()
            bg_item.editLabels({"en": board_game_name})

Exemplo n.º 4

0

Exibir arquivo

def main(dry=False):
    dry_str = ""
    if dry:
        print("Running in dry-run mode, will not implement any changes")
        dry_str = "[DRY-RUN MODE] "
    repo = Site().data_repository()
    for item_link, item_id, title in items_with_missing_labels_with_title():
        print(
            f"{dry_str} ( {str(item_link).ljust(40, ' ')} ) Fixing {str(item_id).ljust(9, ' ')}: {title}"
        )
        if dry:
            continue

        # Labels have a character limit, so ignore if trying to add it will result in an error
        if len(title) >= 250:
            continue

        item = ItemPage(repo, item_id)
        item.get()
        try:
            item.editLabels({"en": title})
        except (APIError, OtherPageSaveError) as e:
            print(f"An error occurred while adding label for {item_id}: {e}")

Exemplo n.º 5

0

Exibir arquivo

Arquivo: andBot.py Projeto: marcinwrochna/tokenzeroBot

def main() -> None:
    """Run the bot."""
    logging.basicConfig(level=logging.WARNING)
    # Initialize pywikibot.
    assert Site().code == 'en'
    utils.initLimits(editsLimits={'default': 4000},
                     brfaNumber=6,
                     onlySimulateEdits=False,
                     botTrial=False)

    EnglishWordList.init()

    journals: Set[str] = getCategoryAsSet('Academic journals by language')
    magazines: Set[str] = getCategoryAsSet('Magazines by language')

    # Let 'foreign' be the set of page titles in a language-category
    # other than English, or in the multilingual category.
    foreign: Set[str] = set()
    foreign = foreign | journals
    foreign = foreign | magazines
    foreign = foreign - getCategoryAsSet('English-language journals')
    foreign = foreign - getCategoryAsSet('English-language magazines')
    foreign = foreign | getCategoryAsSet('Multilingual journals')
    foreign = foreign | getCategoryAsSet('Multilingual magazines')

    for page in chain(journals, magazines,
                      getPagesWithTemplate('Infobox journal'),
                      getPagesWithTemplate('Infobox Journal'),
                      getPagesWithTemplate('Infobox magazine'),
                      getPagesWithTemplate('Infobox Magazine')):
        pageTitle = page if isinstance(page, str) else page.title()
        try:
            makeAmpersandRedirects(pageTitle, foreign)
            for rPage in getRedirectsToPage(pageTitle, namespaces=0):
                makeAmpersandRedirects(rPage.title(), foreign, pageTitle)
        except pywikibot.exceptions.TitleblacklistError:
            print('Skipping (title blacklist error): ', pageTitle)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: check_tv_show.py Projeto: salgo60/wikidata-toolkit

def check_tv_show(tvshow_id=None,
                  child_type="episode",
                  autofix=False,
                  accumulate=False,
                  always=False,
                  filter=""):
    """Check constraints for season/episodes of this TV show

    TVSHOW_ID is the ID of the television series, in the format Q######.
    """
    if child_type == "episode":
        instance_types = [wp.TELEVISION_SERIES_EPISODE]
    elif child_type == "season":
        instance_types = [wp.TELEVISION_SERIES_SEASON]
    elif child_type == "series":
        instance_types = [wp.TELEVISION_SERIES]
    elif child_type == "all":
        instance_types = [
            wp.TELEVISION_SERIES, wp.TELEVISION_SERIES_SEASON,
            wp.TELEVISION_SERIES_EPISODE
        ]

    for instance_of_type in instance_types:
        key_val_pairs = {
            wp.PART_OF_THE_SERIES.pid: tvshow_id,
            wp.INSTANCE_OF.pid: instance_of_type
        }
        query = generate_sparql_query(key_val_pairs)
        gen = WikidataSPARQLPageGenerator(query)
        if instance_of_type == wp.TELEVISION_SERIES:
            gen = [ItemPage(Site().data_repository(), tvshow_id)]
        bot = getbot(gen,
                     autofix=autofix,
                     accumulate=accumulate,
                     always=always,
                     property_filter=filter)
        bot.run()

Exemplo n.º 7

0

Exibir arquivo

    def getBoundedCat(self, cat, currMinSize):
        print()
        print("getBoundedCat")
        site = Site("en")
        try:
            catObj = Category(site, title=cat)
        except:
            catObj = Category(site, title=cat.decode("utf-8"))
        
        subCats = self.recursiveCats(catObj) 
        
        articleSet = set()
        
        for subCat in subCats:
            print("inside subCat", subCat)
            newArts = set(self.getArticles(subCat, recurse=False))
            articleSet.update(newArts)
            print(len(articleSet))
            if len(articleSet) > currMinSize:
                print("break")
                return currMinSize
                #continue

        return len(articleSet)

Exemplo n.º 8

0

Exibir arquivo

 def __init__(self):
     self.output_path = ""
     self.maccabipedia = Site()
     self.games = dict()
     self.games_events = dict()

Exemplo n.º 9

0

Exibir arquivo

        + ' - '.join(comment))


if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description='recreaction log bot for french wikipedia.')
    parser.add_argument('--verbose',
                        help="enable verbose output",
                        action='store_true')
    parser.add_argument(
        '--prefix',
        help=
        "prefix of the list page of recreation example: 'Utilisateur:ZéroBot/Journal_des_recréations'",
        action='store',
        required=True)

    params = parser.parse_args()
    if sys.version_info < (3, 0):
        params.prefix = params.prefix.decode(
            'utf-8')  #params.prefix.decode(locale.getpreferredencoding())
    Site().forceLogin()
    end = datetime.today() - ONE_DAY
    with open("recreation.timestamp.txt") as ts:
        start = datetime.strptime(ts.read().strip(), "%Y-%m-%d") + ONE_DAY
    while start <= end:
        process(start)
        with open("recreation.timestamp.txt", "w") as ts:
            print(start.strftime("%Y-%m-%d"), file=ts)
        start += ONE_DAY

Exemplo n.º 10

0

Exibir arquivo

Arquivo: generate.py Projeto: riksantikvarieambetet/musikverket-description-translations

from pywikibot.pagegenerators import CategorizedPageGenerator
from pywikibot import Site, Category
from dataskakare import GoogleTranslate
import mwparserfromhell
import hashlib
import uuid
import json

site = Site('commons', 'commons')
cat = Category(site, 'Category:Media_contributed_by_the_Swedish_Performing_Arts_Agency:_2019-03')
translate = GoogleTranslate(input('google service account file:'))

def thumb_from_title(title):
    safe_title = title.encode('utf-8')
    md5_title = hashlib.md5(safe_title).hexdigest()

    return 'https://upload.wikimedia.org/wikipedia/commons/thumb/{}/{}/{}/500px-{}.jpg'.format(md5_title[:1], md5_title[:2], title, title)

final_pages = list()
for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6):
    wikicode = mwparserfromhell.parse(page.text)

    template_to_parse = False
    for template in wikicode.filter_templates():
        if template.name.matches('Musikverket-image'):
            template_to_parse = template

    if not template_to_parse:
        print('failed to find given template')
        continue

Exemplo n.º 11

0

Exibir arquivo

# -*- coding: utf-8 -*-
__author__ = 'eso'
import sys
sys.path.append('../../')
import re
from pywikibot import Page, Site
from tools.petscan import PetScan

wiki = Site()

searcher = PetScan()
searcher.add_positive_category("RE:Korrigiert")
lemma_list = searcher.run()

list_for_pfaerrich = []
for idx_lem, lemma in enumerate(lemma_list):
    print(idx_lem)
    page = Page(wiki, lemma['title'])
    version_history = page.fullVersionHistory()[::-1]
    size_all_changes = 0
    for idx_rev, revision in enumerate(version_history):
        user = revision.user
        if user == 'Pfaerrich':
            if idx_rev > 0:
                size_prev = len(version_history[idx_rev - 1].text)
            else:
                size_prev = 0
            size_all_changes += abs(
                len(version_history[idx_rev].text) - size_prev)
    korrigiert_flag = False
    if size_all_changes > 0:

Exemplo n.º 12

0

Exibir arquivo

 def __init__(self):
     site = Site('wikidata', 'wikidata')
     self.repo = site.data_repository()

Exemplo n.º 13

0

Exibir arquivo

def move_through_topic(starting_topic='History'):
    # starting site
    site = Site()
    starting_page = Page(site, starting_topic)
    return (i for i in pagegenerators.LinkedPageGenerator(starting_page))

Exemplo n.º 14

0

Exibir arquivo

def create_episode(series_id, season_id, title, series_ordinal, season_ordinal,
                   dry):
    """Creates a season item on WikiData

    Arguments
    ---------
    series_id: str
        The Wiki ID of the series ItemPage
    season_id: str
        The Wiki ID of the season ItemPage
    title: str
        The title of this episode. This is used to set the label.
    series_ordinal: int
        The ordinal of this episode, within the series
    season_ordinal: int
        The ordinal of this episode, within the season
    dry: bool
        Whether or not this function should run in dry-run mode.
        In dry-run mode, no real changes are made to WikiData, they are only
        logged to stdout.

    Returns
    -------
    episode_id: str
        The Wiki ID of the episode item
    """
    dry_str = "[DRY-RUN] " if dry else ""
    print(f"{dry_str}Creating episode with label='{title}'")

    episode = None
    if not dry:
        repoutil = RepoUtils(Site().data_repository())

        season = ItemPage(repoutil.repo, season_id)
        season.get()

        # Check if season has part_of_the_series set to series_id
        if wp.PART_OF_THE_SERIES.pid not in season.claims:
            raise ValueError(
                f"The season {season_id} does not have a PART_OF_THE_SERIES ({wp.PART_OF_THE_SERIES.pid} property). Check the input series and season IDs for correctness."
            )
        actual_series_id = str(
            season.claims[wp.PART_OF_THE_SERIES.pid][0].getTarget().getID())
        if actual_series_id != series_id:
            raise ValueError(
                f"The season {season_id} has PART_OF_THE_SERIES={actual_series_id} but expected={series_id}. Check the input series and season IDs for correctness."
            )

        episode = ItemPage(repoutil.repo)

        episode.editLabels({"en": title}, summary="Setting label")
        print(f"Created a new Item: {episode.getID()}")

    print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_EPISODE}")
    if not dry:
        instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid)
        instance_claim.setTarget(
            ItemPage(repoutil.repo, wp.TELEVISION_SERIES_EPISODE))
        episode.addClaim(instance_claim,
                         summary=f"Setting {wp.INSTANCE_OF.pid}")

    print(
        f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}, with {wp.SERIES_ORDINAL}={series_ordinal}"
    )
    if not dry:
        series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid)
        series_claim.setTarget(ItemPage(repoutil.repo, series_id))

        series_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid)
        series_ordinal_claim.setTarget(series_ordinal)
        series_claim.addQualifier(series_ordinal_claim)

        episode.addClaim(series_claim,
                         summary=f"Setting {wp.PART_OF_THE_SERIES.pid}")

    print(
        f"{dry_str}Setting {wp.SEASON}={season_id}, with {wp.SERIES_ORDINAL}={season_ordinal}"
    )
    if not dry:
        season_claim = repoutil.new_claim(wp.SEASON.pid)
        season_claim.setTarget(ItemPage(repoutil.repo, season_id))

        season_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid)
        season_ordinal_claim.setTarget(season_ordinal)
        season_claim.addQualifier(season_ordinal_claim)

        episode.addClaim(season_claim, summary=f"Setting {wp.SEASON.pid}")

    return episode.getID() if episode is not None else "Q-1"

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_components.py Projeto: Vesihiisi/wikidata-stuff

 def setUp(self):
     wikidata = Site('test', 'wikidata')
     self.ref_1 = Claim(wikidata, 'P55')
     self.ref_1.setTarget('foo')
     self.ref_2 = Claim(wikidata, 'P55')
     self.ref_2.setTarget('bar')

Exemplo n.º 16

0

Exibir arquivo

Arquivo: thanksmeter.py Projeto: kerberizer/wikimedia-scripts

 def __init__(self):
     self._site = Site('bg', fam='wikipedia')
     self._page = Page(self._site, 'Потребител:Iliev/Мерсиметър')
     self._settings_page = Page(self._site, 'Потребител:Iliev/Мерсиметър/settings.json')
     self._settings = jload(self._settings_page.text)
     self._user_blocks = {}

Exemplo n.º 17

0

Exibir arquivo

def move_to_random_topic():
    # starting site
    site = Site()
    #page = pywikibot.Page(site, u"Whatever")
    return Page(next(pagegenerators.RandomRedirectPageGenerator(1)))

Exemplo n.º 18

0

Exibir arquivo

Arquivo: anchorBot.py Projeto: marcinwrochna/tokenzeroBot

def fixRedirectAnchor(rTitle: str, anchor: str, target: str) -> bool:
    """Add an anchor to given redirect page."""
    rPage = pywikibot.Page(Site(), rTitle)
    addJournal = False
    if rPage.exists() and not rPage.isRedirectPage():
        addJournal = True
        if 'journal' in rTitle.lower():
            print(f'Skip: [[{rTitle}]] already exists, '
                  'title already has "journal".', flush=True)
            return False
        for cat in rPage.categories():
            if 'journal' in cat.title().lower():
                print(f'Skip: [[{rTitle}]] already exists, '
                      'has category containing "journal".', flush=True)
                return False
    if addJournal:
        rPage = pywikibot.Page(Site(), rTitle + ' (journal)')
    if not rPage.exists() or not rPage.isRedirectPage():
        print(f'Not exists/not a redirect: [[{rPage.title()}]]', flush=True)
        return False
    # Page.title() actually contains anchor, if redirect had one.
    actualTarget = rPage.getRedirectTarget().title().split('#', 1)
    if actualTarget[0] != target:
        print(f'Not a redirect to this list: '
              f'[[{rPage.title()}]] -> [[{actualTarget[0]}]]', flush=True)
        return False
    if len(actualTarget) > 1:
        if actualTarget[1] != anchor:
            print(f'WARNING: Anchor mismatch: '
                  f'[[{rPage.title()}]] -> [[{actualTarget[0]}]].'
                  f'Is "{actualTarget[1]}" should be "{anchor}".')
            return False
        else:
            return True
    predictedAnchor = getPredictedAnchor(rTitle)
    if predictedAnchor != anchor:
        print(f'WARNING: Anchor mismatch: '
              f'[[{rPage.title()}]] -> [[{actualTarget[0]}]].'
              f'Predicted "{predictedAnchor}" should be "{anchor}".')
        return False

    rText = rPage.text
    rNewText = re.sub(r'''(
                              \#\s*REDIRECT\s*\[\[
                              [^\]\#]+             # title
                          )
                          (\#[^\]]*)?              # anchor
                          \]\]''',
                      '\\1#' + anchor + ']]',
                      rText, count=1, flags=re.VERBOSE)
    if rText == rNewText:
        print(f'Nothing to do on: [[{rPage.title()}]]')
        return True
    print(f'===CHANGING [[{rPage.title()}]] FROM==================')
    print(rText)
    print('==========TO===========')
    print(rNewText + '\n\n', flush=True)
    trySaving(rPage, rNewText,
              'Add anchor to redirect, as it points to a long list.',
              overwrite=True)
    return True

Exemplo n.º 19

0

Exibir arquivo

Arquivo: make-list.py Projeto: milkyway1/CommonsNotifier

#!/usr/bin/python3

from commonsbot import mysql
from commonsbot.state import DeletionStateStore, DeletionState
from pywikibot import Site, Category
from pywikibot.pagegenerators import CategorizedPageGenerator
from pywikibot.site import Namespace
from pprint import pprint

commons = Site('commons', 'commons')
userdb = mysql.connect()
store = DeletionStateStore(userdb)


def load_files(categories, depth):
    """
    Returns a list of unique files in categories

    @param categories: List of Commons category names as strings
    @type categories: list
    @param depth: Category recursion depth
    @type depth: int
    @rtype: list
    """
    files = set()
    for cat in categories:
        cat = Category(commons, cat)
        generator = CategorizedPageGenerator(cat,
                                             recurse=depth,
                                             namespaces=Namespace.FILE)
        for page in generator:

Exemplo n.º 20

0

Exibir arquivo

Arquivo: __main__.py Projeto: marcinwrochna/tokenzeroBot

def fixPageRedirects(page: pywikibot.Page) -> int:
    """Fix redirects to given page."""
    title = page.title()
    pageData = state.getPageData(title)
    (requiredRedirects, skip) = getRequiredRedirects(page)
    nEditedPages = 0
    for rTitle, rCats in requiredRedirects.items():
        rNewContent = rcatSetToRedirectContent(title, rCats)
        # Attempt to create new redirect.
        if rTitle not in pageData['redirects']:
            try:
                exists = pywikibot.Page(Site(), rTitle).exists()
            except pywikibot.exceptions.InvalidTitle:
                exists = False
            if exists:
                print(f'--Skipping existing page [[{rTitle}]] '
                      f'(not a redirect to [[{title}]]).')
                if title == rTitle:
                    continue
                if title not in pywikibot.Page(Site(), rTitle).text:
                    reports.reportExistingOtherPage(title, rTitle)
            else:
                print(f'--Creating redirect '
                      f'from [[{rTitle}]] to [[{title}]]. '
                      f'Created content:\n{rNewContent}\n-----',
                      flush=True)
                nEditedPages += 1
                rPage = pywikibot.Page(Site(), rTitle)
                trySaving(rPage, rNewContent,
                          'Creating redirect from standard abbreviation. ',
                          overwrite=False)
        else:
            rOldContent = pageData['redirects'][rTitle]
            if isValidISO4Redirect(rOldContent, title, rCats):
                print(f'--Skipping existing valid redirect '
                      f'from [[{rTitle}]] to [[{title}]].')
            elif isReplaceableRedirect(rOldContent, title,
                                       rCats | RCatSet.ISO4):
                # Don't log nor edit redirects that would be replaceable
                # except they have ISO4 and we're not sure it should have.
                if not (rCats & RCatSet.ISO4):
                    continue
                print(f'--Replacing existing redirect '
                      f'from [[{rTitle}]] to [[{title}]].\n'
                      f'RCatSet: {rCats}\n'
                      f'Original content:\n{rOldContent}\n----- '
                      f'New content:\n{rNewContent}\n-----',
                      flush=True)
                nEditedPages += 1
                rPage = pywikibot.Page(Site(), rTitle)
                trySaving(rPage, rNewContent,
                          'Marking standard abbrev rcat. ',
                          overwrite=True)
            elif not skip:
                print(f'--Skipping existing dubious redirect '
                      f'from [[{rTitle}]] to [[{title}]].\n'
                      f'RCatSet: {rCats}\n'
                      f'Original content:\n{rOldContent}\n----- ')
                reports.reportExistingOtherRedirect(title, rTitle, rOldContent)
    # Purge page cache to remove warnings about missing redirects.
    if nEditedPages > 0:
        tryPurging(page)

    # Report redirects that we wouldn't add, but exist and are marked as ISO-4.
    if requiredRedirects and not skip:
        expectedAbbrevs = \
            [r.replace('.', '') for r in requiredRedirects]
        potentialAbbrevs = []
        for rTitle, rContent in pageData['redirects'].items():
            if 'from former name' in rContent or '.' not in rTitle:
                cAbbrevEng = state.tryGetAbbrev(
                    abbrevUtils.stripTitle(rTitle), 'eng') or ''
                cAbbrevAll = state.tryGetAbbrev(
                    abbrevUtils.stripTitle(rTitle), 'all') or ''
                cAbbrevEng = cAbbrevEng.replace('.', '')
                cAbbrevAll = cAbbrevAll.replace('.', '')
                if 'from former name' in rContent:
                    if cAbbrevEng != rTitle.replace('.', ''):
                        expectedAbbrevs.append(cAbbrevEng)
                    if cAbbrevAll != rTitle.replace('.', ''):
                        expectedAbbrevs.append(cAbbrevAll)
                elif '.' not in rTitle:
                    if cAbbrevEng != rTitle.replace('.', ''):
                        potentialAbbrevs.append((cAbbrevEng, rTitle))
                    if cAbbrevAll != rTitle.replace('.', ''):
                        potentialAbbrevs.append((cAbbrevAll, rTitle))
        expectedAbbrevs = [a for a in expectedAbbrevs if a]
        potentialAbbrevs = [(a, t) for (a, t) in potentialAbbrevs if a]
        for rTitle, rContent in pageData['redirects'].items():
            if not re.search(r'R from ISO 4', rContent):
                continue
            # Ignore rTitle that contain a computed abbreviation as a
            # substring, assume that it's some valid variation on a subtitle.
            isExpected = False
            rTitleDotless = rTitle.replace('.', '')
            for computedAbbrev in expectedAbbrevs:
                if re.sub(r'\s*[:(].*', '', computedAbbrev) in rTitleDotless:
                    isExpected = True
                    break
            if not isExpected:
                # Find other titles in existing redirects
                # that would ISO-4 abbreviate to it
                potentials = [t for (a, t) in potentialAbbrevs
                              if abbrevUtils.isSoftMatch(rTitleDotless, a)]
                potentials = list(sorted(set(potentials)))
                # Find closest computed abbrev.
                bestAbbrev = ''
                bestDist = len(rTitle)
                for computedAbbrev in sorted(requiredRedirects):
                    dist = Levenshtein.distance(rTitle, computedAbbrev)
                    if dist < bestDist:
                        bestDist = dist
                        bestAbbrev = computedAbbrev
                # Skip if closest abbrev. is far (assume it's from a former
                # title, since there's a ton of cases like that).
                if bestDist <= 8:
                    reports.reportSuperfluousRedirect(
                        title, rTitle, rContent, bestAbbrev, potentials)
    return nEditedPages

Exemplo n.º 21

0

Exibir arquivo

import argparse
from pywikibot import Site, Page
from bs4 import BeautifulSoup
import requests
from tabulate import tabulate
from jinja2 import Template
import pandas as pd
import numpy as np
import io
from mypy.types import Dict

site = Site()

sheet_ids = {
    "📺 シナリオ一覧": 788224352,
    "🎉 イベント": 195852940,
    "ℹ️ このファイルについて": 569209742,
    "🌏 translation help": 1795795580,
    "ジャンプ(スタァ別)": 607126312,
    "🌈 ジャンプ": 397595116,
    "🎴 ブロマイド": 1442344221,
    "ジャンプコマンド": 1371879367,
    "合体ジャンプ": 1223304644,
    "🎶 楽曲リスト": 1572242050,
    "🎫 Prismチケット入手条件": 451938944,
    "ブロマイド(PPガチャ)": 951128679,
    "📕 応援グッズ": 1833623005,
    "🎁 日替わりプレゼント": 1247915676,
    "⛸️ 練習経験値": 2109961420,
    "⛸️ 練習コスト": 585549272,
    "ファンレベル": 1956343157,

Exemplo n.º 22

0

Exibir arquivo

Arquivo: __main__.py Projeto: marcinwrochna/tokenzeroBot

def makeLanguageMismatchPatch(
        page: pywikibot.Page,
        infoboxId: int,
        infoboxAbbrev: str,
        computedAbbrev: str,
        matchingPatterns: str
) -> Optional[Dict[str, Any]]:
    """Make patchset for Stitchpitch: infobox param and redirects rcats."""
    from unicodedata import normalize
    import mwparserfromhell
    startTimeStamp = datetime.now(timezone.utc).isoformat()
    diff = datetimeFromPWB(Site().server_time()) - datetime.now(timezone.utc)
    if diff > timedelta(minutes=2) or -diff > timedelta(minutes=2):
        raise Exception('Local zone misconfigured or server timezone not UTC!')
    latestRevision = page.latest_revision
    mainEdit = {
        'patchtype': 'edit',  # implies 'nocreate': True
        'slug': f'{infoboxAbbrev} → {computedAbbrev}',
        'details': matchingPatterns,
        'title': page.title(),
        'summary': 'Fix ISO-4 abbreviation to use all language rules.',
        'minor': True,
        'basetimestamp': datetimeFromPWB(latestRevision.timestamp).isoformat(),
        'starttimestamp': startTimeStamp,
        'oldtext': latestRevision.text,
        'oldrevid': latestRevision.revid
    }
    if datetime.fromisoformat(mainEdit['basetimestamp']) > \
       datetime.fromisoformat(startTimeStamp) - timedelta(hours=5):
        print(f'Skipping patch for "{page.title()}":'
              f' edited a short while ago ago.')
        return None
    code = mwparserfromhell.parse(normalize('NFC', latestRevision.text))
    foundInfobox = None  # type: Optional[mwparserfromhell.Template]
    foundId = -1
    for t in code.filter_templates():
        if t.name.matches('infobox journal') or \
           t.name.matches('Infobox Journal'):
            foundId += 1
            if foundId == infoboxId:
                foundInfobox = t
                break
    if not foundInfobox:
        print(f'Skipping patch for "{page.title()}":'
              f' infobox #{infoboxId} not found.')
        return None
    foundAbbrev = str(foundInfobox.get('abbreviation').value)
    if foundAbbrev.strip() != infoboxAbbrev:
        print(f'Skipping patch for "{page.title()}":'
              f' infobox abbrev mismatch (comments?).')
        return None
    foundInfobox.get('abbreviation').value = \
        foundAbbrev.replace(infoboxAbbrev, computedAbbrev, 1)
    mainEdit['text'] = str(code)

    patches = [mainEdit]
    groupDetails = ''

    regex = r' *{{\s*(r|R) from ISO ?4( abbreviation)?\s*}} *\n?'
    abbrevRegex = r'{{\s*(r|R)(edirect)? (from )?(common )?ab[a-z]*\s*}}'
    for rPage in getRedirectsToPage(page.title(), namespaces=0,
                                    total=100, content=True):
        rTitle = rPage.title()
        rRevision = rPage.latest_revision
        cAbbrev = abbrevUtils.stripTitle(computedAbbrev.lower())
        if cAbbrev + ' ' in rTitle.lower() + ' ' or \
           cAbbrev.replace('.', '') + ' ' in rTitle.lower() + ' ':
            newtext = rRevision.text
            if re.search(regex, newtext):
                print(f'Skipping patch for existing page, already marked: {rTitle}')
                groupDetails += 'ok: ' + rTitle + '\n'
                continue
            if not isReplaceableRedirect(rRevision.text, page.title(),
                                         RCatSet.ISO4):
                print(f'Skipping patch for unreplaceable page: {rTitle}')
                groupDetails += 'unrepl: ' + rTitle + '\n'
                continue
            if re.search(abbrevRegex, newtext):
                newtext = re.sub(abbrevRegex, '{{R from ISO 4}}', newtext, 1)
            else:
                newtext += '\n{{R from ISO 4}}'
            markPatch = {
                'patchtype': 'edit',
                'slug': 'mark new?',
                'title': rTitle,
                'summary': 'Fix ISO-4 abbreviation to use all language rules.',
                'minor': True,
                'basetimestamp':
                    datetimeFromPWB(rRevision.timestamp).isoformat(),
                'starttimestamp': startTimeStamp,
                'oldtext': rRevision.text,
                'oldrevid': rRevision.revid,
                'text': newtext
            }
            patches.append(markPatch)
        elif re.search(regex, rRevision.text):
            unmarkPatch = {
                'patchtype': 'edit',
                'slug': 'unmark old',
                'title': rTitle,
                'summary': 'Fix ISO-4 abbreviation to use all language rules.',
                'minor': True,
                'basetimestamp':
                    datetimeFromPWB(rRevision.timestamp).isoformat(),
                'starttimestamp': startTimeStamp,
                'oldtext': rRevision.text,
                'oldrevid': rRevision.revid,
                'text': re.sub(regex, '{{R from abbreviation}}\n', rRevision.text)
            }
            if infoboxAbbrev.lower() in rTitle.lower() or \
               infoboxAbbrev.replace('.', '').lower() in rTitle.lower():
                patches.append(unmarkPatch)
            else:
                print(f'Skip patch unmark on unrecog ISO-4: {rTitle}')
                groupDetails += 'unrecog ISO-4: ' + rTitle + '\n'
        else:
            groupDetails += '??: ' + rTitle + '\n'
    shouldHave = [computedAbbrev]
    if computedAbbrev.replace('.', '') != computedAbbrev:
        shouldHave.append(computedAbbrev.replace('.', ''))

    for abbrev in shouldHave:
        rPage = pywikibot.Page(Site(), abbrev)
        if not rPage.exists():
            createPatch = {
                'patchtype': 'create',
                'slug': 'create',
                'title': rPage.title(),
                'summary': 'R from ISO-4 abbreviation of journal title.',
                'minor': True,
                'starttimestamp': startTimeStamp,
                'text': '#REDIRECT[[' + page.title() + ']]\n\n'
                           '{{R from ISO 4}}\n'
            }
            patches.append(createPatch)

    return {
        'patchtype': 'group',
        'slug': f'{infoboxAbbrev} → {computedAbbrev}',
        'details': groupDetails,
        'patches': patches
    }

Exemplo n.º 23

0

Exibir arquivo

def is_football_coach_wiki(page):
    try:
        p = Page(Site('en', 'wikipedia'), page).get(get_redirect=True)
        return 'football' in p.lower() and 'coach' in p.lower()
    except NoPage:
        return False

Exemplo n.º 24

0

Exibir arquivo

import daty
from pprint import pprint
from pywikibot import Site, ItemPage

site = Site('wikidata', 'wikidata')
repo = site.data_repository()

item_page = ItemPage(repo, 'Q4115189')
data = item_page.get()

target_page = ItemPage(repo, 'Q17')

for P in data['claims']:
    for c in data['claims'][P]:
        if hasattr(c, 'qualifiers'):
            #print(c.qualifiers)
            if 'P710' in c.qualifiers:
                for q in c.qualifiers['P710']:
                    print(q.hash)
                    q.setTarget(target_page)
                    print(q.hash)
                    repo.editQualifier(c, q)
                    #q.changeTarget(target_page)
                    #print(q)

#del item_page
#print(data)

#data['descriptions']['it'] = "LE MIE MANI SONO INCREDIBILI"

#for p in data['claims']:

Exemplo n.º 25

0

Exibir arquivo

Arquivo: link_tests.py Projeto: PersianWikipedia/pywikibot-core

 def _test_link(self, link, title, namespace, site_code, site_fam):
     """Test the separate contents of the link."""
     self.assertEqual(link.title, title)
     self.assertEqual(link.namespace, namespace)
     self.assertEqual(link.site, Site(site_code, site_fam))
     self.assertEqual(link.badges, [])

Exemplo n.º 26

0

Exibir arquivo

Arquivo: pagepl.py Projeto: alkamid/plwiktbot

 def __init__(self, title: str, **kwargs):
     site_plwikt = Site('pl', 'wiktionary')
     parse = kwargs.pop('parse', True)
     Page.__init__(self, source=site_plwikt, title=title, **kwargs)
     PagePL.__init__(self, parse=parse)

Exemplo n.º 27

0

Exibir arquivo

Arquivo: upload.py Projeto: the-it/WS_THEbotIT


class UploadBlitzBot(OneTimeBot):
    def task(self):
        root_dir = Path.home().joinpath("Dropbox/blitzlexikon")
        date_folder = root_dir.joinpath(datetime.now().strftime("%y%m%d"))
        try:
            os.makedirs(date_folder)
        except FileExistsError:
            pass

        file_list: list[str] = list(os.listdir(str(root_dir)))
        max = len(file_list)
        for idx, file in enumerate(file_list):
            if not re.match(r"LA2-Blitz-\d{4}_.+?\.jpg", file):
                continue
            self.logger.debug(f"{idx}/{max} ... {root_dir.joinpath(file)}")
            imagepage = pywikibot.FilePage(self.wiki, file)  # normalizes filename
            imagepage.text = file_description
            success = imagepage.upload(str(root_dir.joinpath(file)), comment="ausgeschnittenes Bild für Blitzlexikon")
            if success:
                os.rename(root_dir.joinpath(file), date_folder.joinpath(file))

        self.logger.info("THE END")


if __name__ == "__main__":
    WS_WIKI = Site(code="de", fam="wikisource", user="******")
    with UploadBlitzBot(wiki=WS_WIKI, debug=True, log_to_screen=True, log_to_wiki=False) as bot:
        bot.run()

Exemplo n.º 28

0

Exibir arquivo

def create_seasons(series_id,
                   number_of_seasons,
                   quickstatements=False,
                   dry=False):
    Site().login()
    commands.create_seasons(series_id, number_of_seasons, quickstatements, dry)

Exemplo n.º 29

0

Exibir arquivo

 def __init__(self, repo=None):
     if repo is None:
         repo = Site().data_repository()
     self.repo = repo

Exemplo n.º 30

0

Exibir arquivo

 def __init__(self, itempage: ItemPage, repo=None):
     self._itempage = itempage
     self._itempage.get()
     self._repo = Site().data_repository() if repo is None else repo