Exemplo n.º 1
0
    def login(self, retry=False):
        if not self.password:
            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {'name': self.username, 'site': self.site},
                password=True)
#        self.password = self.password.encode(self.site.encoding())

        pywikibot.output(u"Logging in to %(site)s as %(name)s"
                         % {'name': self.username, 'site': self.site})
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
##        # Show a warning according to the local bot policy
##   FIXME: disabled due to recursion; need to move this to the Site object after
##   login
##        if not self.botAllowed():
##            logger.error(
##                u"Username '%(name)s' is not listed on [[%(page)s]]."
##                 % {'name': self.username,
##                    'page': botList[self.site.family.name][self.site.code]})
##            logger.error(
##"Please make sure you are allowed to use the robot before actually using it!")
##            return False
        return True
Exemplo n.º 2
0
    def input_list_choice(self, question, answers, default=None, force=False):
        """Ask the user to select one entry from a list of entries."""
        message = question
        clist = answers

        line_template = '{{0: >{0}}}: {{1}}'.format(
            int(math.log10(len(clist)) + 1))
        for n, i in enumerate(clist):
            pywikibot.output(line_template.format(n + 1, i))

        while True:
            choice = self.input(message, default=default, force=force)
            try:
                choice = int(choice) - 1
            except ValueError:
                try:
                    choice = clist.index(choice)
                except IndexError:
                    choice = -1

            # User typed choice number
            if 0 <= choice < len(clist):
                return clist[choice]
            else:
                pywikibot.error('Invalid response')
Exemplo n.º 3
0
 def revert(self, item):
     history = pywikibot.Page(self.site, item['title']).fullVersionHistory(
         total=2, rollback=self.rollback)
     if len(history) > 1:
         rev = history[1]
     else:
         return False
     comment = i18n.twtranslate(pywikibot.Site(), 'revertbot-revert', {'revid': rev[0], 'author': rev[2], 'timestamp': rev[1]})
     if self.comment:
         comment += ': ' + self.comment
     page = pywikibot.Page(self.site, item['title'])
     pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                      % page.title(asLink=True, forceInterwiki=True,
                                   textlink=True))
     if not self.rollback:
         old = page.text
         page.text = rev[3]
         pywikibot.showDiff(old, page.text)
         page.save(comment)
         return comment
     try:
         pywikibot.data.api.Request(action="rollback", title=page.title(), user=self.user,
                                        token=rev[4], markbot=1).submit()
     except pywikibot.data.api.APIError as e:
         if e.code == 'badtoken':
             pywikibot.error("There was an API token error rollbacking the edit")
         else:
             pywikibot.exception()
         return False
     return u"The edit(s) made in %s by %s was rollbacked" % (page.title(), self.user)
Exemplo n.º 4
0
 def delete_redirect(self, page, summary_key):
     """Delete the redirect page."""
     assert page.site == self.site, (
         'target page is on different site {0}'.format(page.site))
     reason = i18n.twtranslate(self.site, summary_key)
     if page.site.logged_in(sysop=True):
         page.delete(reason, prompt=False)
     elif i18n.twhas_key(page.site,
                         'redirect-broken-redirect-template'):
         pywikibot.output(u"No sysop in user-config.py, "
                          u"put page to speedy deletion.")
         try:
             content = page.get(get_redirect=True)
         except pywikibot.SectionError:
             content_page = pywikibot.Page(page.site,
                                           page.title(withSection=False))
             content = content_page.get(get_redirect=True)
         # TODO: Add bot's signature if needed (Bug: T131517)
         content = i18n.twtranslate(
             page.site,
             'redirect-broken-redirect-template') + '\n' + content
         try:
             page.put(content, reason)
         except pywikibot.PageSaveRelatedError as e:
             pywikibot.error(e)
     else:
         pywikibot.output(
             u'No speedy deletion template available')
Exemplo n.º 5
0
    def _template_link_target(self, item, link_text):
        link = pywikibot.Link(link_text)
        try:
            linked_page = pywikibot.Page(link)
        except pywikibot.exceptions.InvalidTitle:
            pywikibot.error('%s is not a valid title so it cannot be linked. '
                            'Skipping.' % link_text)
            return

        if not linked_page.exists():
            pywikibot.output('%s does not exist so it cannot be linked. '
                             'Skipping.' % (linked_page))
            return

        if linked_page.isRedirectPage():
            linked_page = linked_page.getRedirectTarget()

        try:
            linked_item = pywikibot.ItemPage.fromPage(linked_page)
        except pywikibot.NoPage:
            linked_item = None

        if not linked_item or not linked_item.exists():
            pywikibot.output('%s does not have a wikidata item to link with. '
                             'Skipping.' % (linked_page))
            return

        if linked_item.title() == item.title():
            pywikibot.output('%s links to itself. Skipping.' % (linked_page))
            return

        return linked_item
Exemplo n.º 6
0
    def run(self):
        pywikibot.output(u'\n\ninit complete: ' +
                         (datetime.datetime.now()
                          .strftime('%d. %B %Y, %H:%M:%S')).decode('utf-8'))

        if self.adtTitle is not None:
            pywikibot.output(u'Heutiger AdT: ' + self.adtTitle)
            try:
                self.addto_verwaltung()
            except Exception as inst:
                pywikibot.output(u'ERROR: ' + str(type(inst)))
                pywikibot.output(inst)
            try:
                self.addto_chron()
            except Exception as inst:
                pywikibot.output(u'ERROR: ' + str(type(inst)))
                pywikibot.output(inst)
            try:
                self.add_template()
            except Exception as inst:
                pywikibot.output(u'ERROR: ' + str(type(inst)))
                pywikibot.output(inst)
            # self.cleanup_templates()

            # Purge yesterdays AdT disc page
            yesterday = self.today - datedelta.relativedelta(days=1)
            self.get_adt(yesterday)
            if self.adtTitle is not None:
                pywikibot.output(u'Purge Disc. von ' + self.adtTitle)
                page = pywikibot.Page(self.site, self.adtTitle, ns=1)
                page.purge()
        else:
            pywikibot.error(u'Konnte heutigen AdT nicht finden!')
def listchoice(clist, message=None, default=None):
    """Ask the user to select one entry from a list of entries."""
    if not message:
        message = u"Select"

    if default:
        message += u" (default: %s)" % default

    message += u": "

    line_template = u"{{0: >{0}}}: {{1}}".format(int(math.log10(len(clist)) + 1))
    for n, i in enumerate(clist):
        pywikibot.output(line_template.format(n + 1, i))

    while True:
        choice = pywikibot.input(message)

        if choice == '' and default:
            return default
        try:
            choice = int(choice) - 1
        except ValueError:
            try:
                choice = clist.index(choice)
            except IndexError:
                choice = -1

        # User typed choice number
        if 0 <= choice < len(clist):
            return clist[choice]
        else:
            pywikibot.error("Invalid response")
Exemplo n.º 8
0
 def treat_page(self):
     commons = pywikibot.Site(code = u'commons', fam = u'commons')
     today = datetime.date.today()
     # fileTemplate = pywikibot.Page(commons, u'Template:Potd filename')
     # captionTemplate = pywikibot.Page(commons, u'Template:Potd description') # (Potd page, POTD description)
     filePage = pywikibot.Page(commons, u'Template:Potd/%s' % today.isoformat())
     file = get_template_parameter_value(filePage, u'Potd filename', u'1')
     # TODO: use languages instead of lang
     captionPage = pywikibot.Page(commons, u'Template:Potd/%s (%s)'
         % (today.isoformat(), self.current_page.site.lang))
     if self.current_page.site.lang != u'en' and not captionPage.exists():
         pywikibot.warning(u'%s does not exist' % captionPage.title(asLink=True))
         # try en instead
         captionPage = pywikibot.Page(commons, u'Template:Potd/%s (en)' % today.isoformat())
     caption = get_template_parameter_value(captionPage, u'Potd description', u'1')
     # TODO: Complete caption parsing to fix links (if not an interwiki then make it an interwiki to Commons)
     caption = re.sub(r"\[\[([^:])", r"[[:\1", caption, flags=re.UNICODE) # Force links to start with ':'
     caption = re.sub(r"\[\[(:Category:)", r"[[:c\1", caption, flags=re.UNICODE | re.IGNORECASE) # Make category links interwiki links
     # TODO: Use [[d:Q4608595]] to get the local {{Documentation}}
     doc = u'Documentation'
     if file != u'':
         summary = u'Updating Commons picture of the day'
         if caption != u'':
             summary = summary + u', [[:c:%s|caption attribution]]' % captionPage.title()
         else:
             summary = summary + u', failed to parse caption'
             pywikibot.error(u'Failed to parse parameter 1 from {{Potd description}} on %s'
                 % captionPage.title(asLink=True))
         self.put_current(u'<includeonly>{{#switch:{{{1|}}}|caption=%s|#default=%s}}</includeonly><noinclude>\n{{%s}}</noinclude>'
             % (caption, file, doc), summary=summary, minor=False)
     else:
         pywikibot.error(u'Failed to parse parameter 1 from {{Potd filename}} on %s'
             % filePage.title(asLink=True))
Exemplo n.º 9
0
 def translate(self, string):
     """Translate expiry time string into german."""
     table = {
         'gmt': 'UTC',
         'mon': 'Montag',
         'sat': 'Samstag',
         'sun': 'Sonntag',
         'second': 'Sekunde',
         'seconds': 'Sekunden',
         'min': 'Min.',
         'minute': 'Minute',
         'minutes': 'Minuten',
         'hour': 'Stunde',
         'hours': 'Stunden',
         'day': 'Tag',
         'days': 'Tage',
         'week': 'Woche',
         'weeks': 'Wochen',
         'month': 'Monat',
         'months': 'Monate',
         'year': 'Jahr',
         'years': 'Jahre',
         'infinite': 'unbeschränkt',
         'indefinite': 'unbestimmt',
     }
     for pattern in re.findall('([DHIMSWYa-z]+)', string):
         try:
             string = string.replace(pattern, table[pattern.lower()])
         except KeyError:
             pywikibot.error(pattern + ' not found.')
     return string
Exemplo n.º 10
0
    def _ocr_callback(self, cmd_uri, parser_func=None):
        """OCR callback function.

        @return: tuple (error, text [error description in case of error]).
        """
        def id(x):
            return x

        if not cmd_uri:
            raise ValueError('Parameter cmd_uri is mandatory.')

        if parser_func is None:
            parser_func = id

        if not callable(parser_func):
            raise TypeError('Keyword parser_func must be callable.')

        # wrong link fail with Exceptions
        try:
            response = http.fetch(cmd_uri, charset='utf-8')
        except Exception as e:
            pywikibot.error('Querying %s: %s' % (cmd_uri, e))
            return (True, e)

        data = json.loads(response.content)

        assert 'error' in data, 'Error from phe-tools: %s' % data
        assert data['error'] in [0, 1], 'Error from phe-tools: %s' % data

        error = bool(data['error'])
        if error:
            pywikibot.error('Querying %s: %s' % (cmd_uri, data['text']))
            return (error, data['text'])
        else:
            return (error, parser_func(data['text']))
Exemplo n.º 11
0
def _call_cmd(args, lib='djvulibre'):
    """
    Tiny wrapper around subprocess.Popen().

    @param args: same as Popen()
    @type args: sequence or string

    @param library: library to be logged in logging messages
    @type library: string

    @param log: log process output; errors are always logged.
    @type library: bool


    @return: returns a tuple (res, stdoutdata), where
        res is True if dp.returncode != 0 else False
    """
    if not isinstance(args, StringTypes):
        # upcast if any param in sequence args is not in StringTypes
        args = [str(a) if not isinstance(a, StringTypes) else a for a in args]
        cmd = ' '.join(args)
    else:
        cmd = args

    dp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdoutdata, stderrdata = dp.communicate()

    if dp.returncode != 0:
        pywikibot.error('{0} error; {1}'.format(lib, cmd))
        pywikibot.error('{0}'.format(stderrdata))
        return (False, stdoutdata)

    pywikibot.log('SUCCESS: {0} (PID: {1})'.format(cmd, dp.pid))

    return (True, stdoutdata)
Exemplo n.º 12
0
 def new_from_site(cls, site):
     try:
         page = site.page_from_repository('Q10784379')
     except (NotImplementedError, UnknownExtension) as e:
         pywikibot.error(e)
         return None
     return cls.new_from_text(page.text, site.dbName())
def main(*args):
    options = {}
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()
    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg.startswith('-'):
            arg, sep, value = arg.partition(':')
            if value != '':
                options[arg[1:]] = value if not value.isdigit() else int(value)
            else:
                options[arg[1:]] = True

    generator = genFactory.getCombinedGenerator(preload=True)
    site = pywikibot.Site()
    if not generator:
        try:
            category = site.page_from_repository('Q11925744')
        except (NotImplementedError, UnknownExtension) as e:
            pywikibot.error(e)
            return

        if not category:
            pywikibot.output("%s doesn't have an appropriate category" % site)
            return

        gen_combined = pagegenerators.CombinedPageGenerator(
            [category.articles(namespaces=0), category.subcategories()])
        generator = pagegenerators.WikibaseItemFilterPageGenerator(gen_combined)

    bot = CommonscatCleaningBot(generator, site=site, **options)
    bot.run()
Exemplo n.º 14
0
    def login(self, retry=False, force=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if exception occurs during authentication.
        @type retry: bool
        @param force: force to re-authenticate
        @type force: bool
        """
        if self.access_token is None or force:
            pywikibot.output('Logging in to {site!s} via OAuth consumer {key!s}'.format(**{'key': self.consumer_token[0],
                                'site': self.site}))
            consumer_token = mwoauth.ConsumerToken(self.consumer_token[0],
                                                   self.consumer_token[1])
            handshaker = mwoauth.Handshaker(
                self.site.base_url(self.site.path()), consumer_token)
            try:
                redirect, request_token = handshaker.initiate()
                pywikibot.stdout('Authenticate via web browser..')
                webbrowser.open(redirect)
                pywikibot.stdout('If your web browser does not open '
                                 'automatically, please point it to: %s'
                                 % redirect)
                request_qs = pywikibot.input('Response query string: ')
                access_token = handshaker.complete(request_token,
                                                   request_qs)
                self._access_token = (access_token.key, access_token.secret)
            except Exception as e:
                pywikibot.error(e)
                if retry:
                    self.login(retry=True, force=force)
        else:
            pywikibot.output('Logged in to {site!s} via consumer {key!s}'.format(**{'key': self.consumer_token[0],
                                'site': self.site}))
Exemplo n.º 15
0
def add_mbid_claim_to_item(pid, item, mbid, donefunc, simulate=False):
    """
    Adds a claim with pid `pid` with value `mbid` to `item` and call `donefunc`
    with `mbid` to signal the completion.

    :type pid: str
    :type mbid: str
    :type item: pywikibot.ItemPage
    """
    claim = wp.Claim(const.WIKIDATA, pid)
    claim.setTarget(mbid)
    wp.output(u"Adding property {pid}, value {mbid} to {title}".format
              (pid=pid, mbid=mbid, title=item.title()))
    if simulate:
        wp.output("Simulation, no property has been added")
        return
    try:
        item.addClaim(claim, True)
    except wp.UserBlocked as e:
        wp.error("I have been blocked")
        exit(1)
    except wp.Error as e:
        wp.warning(e)
        return
    else:
        wp.output("Adding the source Claim")
        claim.addSource(const.MUSICBRAINZ_CLAIM, bot=True)
        donefunc(mbid)
Exemplo n.º 16
0
def _oauth_login(site):
    consumer_key, consumer_secret = _get_consumer_token(site)
    login_manager = OauthLoginManager(consumer_secret, False, site,
                                      consumer_key)
    login_manager.login()
    identity = login_manager.identity
    if identity is None:
        pywikibot.error('Invalid OAuth info for %(site)s.' %
                        {'site': site})
    elif site.username() != identity['username']:
        pywikibot.error('Logged in on %(site)s via OAuth as %(wrong)s, '
                        'but expect as %(right)s'
                        % {'site': site,
                           'wrong': identity['username'],
                           'right': site.username()})
    else:
        oauth_token = login_manager.consumer_token + login_manager.access_token
        pywikibot.output('Logged in on %(site)s as %(username)s'
                         'via OAuth consumer %(consumer)s'
                         % {'site': site,
                            'username': site.username(sysop=False),
                            'consumer': consumer_key})
        pywikibot.output('NOTE: To use OAuth, you need to copy the '
                         'following line to your user-config.py:')
        pywikibot.output('authenticate[\'%(hostname)s\'] = %(oauth_token)s' %
                         {'hostname': site.hostname(),
                          'oauth_token': oauth_token})
Exemplo n.º 17
0
    def upload_image(self, html, data, imgfile):
        site = self.targetSite

        # Construct the name
        commons_filename = "AMH-%s-%s_%s.jpg" % (
            data["amh_id"],
            data["institution_shortcode"].upper(),
            data["title_en"][:150]
        )

        if self.page_exists(commons_filename):
            pywikibot.output("%s already exists, skipping" % commons_filename)
            return

        imagepage = pywikibot.ImagePage(site, commons_filename)  # normalizes filename
        imagepage.text = html

        pywikibot.output(u'Uploading file %s to %s via API....' % (commons_filename, site))

        try:
            site.upload(imagepage, source_filename = imgfile)
        except pywikibot.UploadWarning as warn:
            pywikibot.output(u"We got a warning message: ", newline=False)
            pywikibot.output(str(warn))
        except Exception as e:
            pywikibot.error("Upload error: ", exc_info=True)
        else:
            # No warning, upload complete.
            pywikibot.output(u"Upload successful.")
Exemplo n.º 18
0
    def cleanup_templates(self):
        for adt in self.erl_props:
            if adt in self.props:
                # mehrmals für AdT vorgeschlagen
                continue

            page = pywikibot.Page(self.site, adt, ns=1)

            if not page.exists():
                pywikibot.error(u'ERROR: disc for AdT-Vorschlag ' + adt
                                + u' does not exist!')
                return

            oldtext = page.text
            code = mwparser.parse(page.text)

            for template in code.filter_templates(recursive=False):
                if template.name.matches("AdT-Vorschlag Hinweis"):
                    code.remove(template)
                    pywikibot.output(adt +
                                     u': {{AdT-Vorschlag Hinweis}} '
                                     u'gefunden, entfernt')
            page.text = unicode(code)
            if page.text == oldtext:
                continue

            page.text = page.text.lstrip(u'\n')
            pywikibot.showDiff(oldtext, page.text)
            comment = u'Bot: [[Vorlage:AdT-Vorschlag Hinweis]] entfernt'
            if not self.dry:
                page.save(comment=comment, botflag=True, minor=True)
Exemplo n.º 19
0
def main():
    local_args = pywikibot.handleArgs()
    cache_paths = None
    delete = False
    command = None

    for arg in local_args:
        if command == '':
            command = arg
        elif arg == '-delete':
            delete = True
        elif arg == '-password':
            command = 'has_password(entry)'
        elif arg == '-c':
            if command:
                pywikibot.error('Only one command may be executed.')
                exit(1)
            command = ''
        else:
            if not cache_paths:
                cache_paths = [arg]
            else:
                cache_paths.append(arg)

    func = None

    if not cache_paths:
        cache_paths = ['apicache', 'tests/apicache']

        # Also process the base directory, if it isnt the current directory
        if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir:
            cache_paths += [
                os.path.join(pywikibot.config2.base_dir, 'apicache')]

        # Also process the user home cache, if it isnt the config directory
        if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir:
            cache_paths += [
                os.path.join(os.path.expanduser('~/.pywikibot'), 'apicache')]

    if delete:
        action_func = lambda entry: entry._delete()
    else:
        action_func = lambda entry: pywikibot.output(entry)

    if command:
        try:
            command_func = eval('lambda entry: ' + command)
        except:
            pywikibot.exception()
            pywikibot.error(u'Can not compile command: %s' % command)
            exit(1)

        func = lambda entry: command_func(entry) and action_func(entry)
    else:
        func = action_func

    for cache_path in cache_paths:
        if len(cache_paths) > 1:
            pywikibot.output(u'Processing %s' % cache_path)
        process_entries(cache_path, func)
Exemplo n.º 20
0
 def revert(self, item):
     history = pywikibot.Page(self.site, item["title"]).fullVersionHistory(total=2, rollback=self.rollback)
     if len(history) > 1:
         rev = history[1]
     else:
         return False
     comment = i18n.twtranslate(
         pywikibot.Site(), "revertbot-revert", {"revid": rev[0], "author": rev[2], "timestamp": rev[1]}
     )
     if self.comment:
         comment += ": " + self.comment
     page = pywikibot.Page(self.site, item["title"])
     pywikibot.output(
         "\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title(asLink=True, forceInterwiki=True, textlink=True)
     )
     if not self.rollback:
         old = page.text
         page.text = rev[3]
         pywikibot.showDiff(old, page.text)
         page.save(comment)
         return comment
     try:
         pywikibot.data.api.Request(
             self.site,
             parameters={"action": "rollback", "title": page, "user": self.user, "token": rev[4], "markbot": True},
         ).submit()
     except pywikibot.data.api.APIError as e:
         if e.code == "badtoken":
             pywikibot.error("There was an API token error rollbacking the edit")
         else:
             pywikibot.exception()
         return False
     return "The edit(s) made in %s by %s was rollbacked" % (page.title(), self.user)
Exemplo n.º 21
0
    def run(self):
        """Run bot."""
        # early check that upload is enabled
        if self.targetSite.is_uploaddisabled():
            pywikibot.error(
                "Upload error: Local file uploads are disabled on %s."
                % self.targetSite)
            return

        # early check that user has proper rights to upload
        if "upload" not in self.targetSite.userinfo["rights"]:
            pywikibot.error(
                "User '%s' does not have upload rights on site %s."
                % (self.targetSite.user(), self.targetSite))
            return

        try:
            if isinstance(self.url, basestring):
                self._treat_counter = 1
                return self.upload_file(self.url)
            for file_url in self.url:
                self.upload_file(file_url)
                self._treat_counter += 1
        except QuitKeyboardInterrupt:
            pywikibot.output('\nUser quit %s bot run...' %
                             self.__class__.__name__)
        except KeyboardInterrupt:
            if config.verbose_output:
                raise
            else:
                pywikibot.output('\nKeyboardInterrupt during %s bot run...' %
                                 self.__class__.__name__)
        finally:
            self.exit()
Exemplo n.º 22
0
    def put_page(self, page, new):
        """ Print diffs between orginal and new (text), put new text for page

        """
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(page.get(), new)
        if not self.acceptall:
            choice = pywikibot.inputChoice(u'Do you want to accept ' +
                                           u'these changes?',
                                           ['Yes', 'No', 'All'],
                                           ['y', 'N', 'a'], 'N')
            if choice == 'a':
                self.acceptall = True
            if choice == 'y':
                page.text = new
                page.save(self.msg, async=True)
        if self.acceptall:
            try:
                page.text = new
                page.save(self.msg)
            except pywikibot.EditConflict:
                pywikibot.output(u'Skipping %s because of edit conflict'
                                  % (page.title(),))
            except pywikibot.SpamfilterError as e:
                pywikibot.output(
                    u'Cannot change %s because of blacklist entry %s'
                    % (page.title(), e.url))
            except pywikibot.PageNotSaved as error:
                pywikibot.error(u'putting page: %s' % (error.args,))
            except pywikibot.LockedPage:
                pywikibot.output(u'Skipping %s (locked page)'
                                  % (page.title(),))
            except pywikibot.ServerError as e:
                pywikibot.output(u'Server Error : %s' % e)
Exemplo n.º 23
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    @rtype: bool
    """
    exists_arg = ''
    commandline_claims = list()

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    gen = pagegenerators.GeneratorFactory()

    for arg in local_args:
        # Handle args specifying how to handle duplicate claims
        if arg.startswith('-exists:'):
            exists_arg = arg.split(':')[1]
            continue
        # Handle page generator args
        if gen.handleArg(arg):
            continue
        commandline_claims.append(arg)
    if len(commandline_claims) % 2:
        pywikibot.error('Incomplete command line property-value pair.')
        return False

    claims = list()
    repo = pywikibot.Site().data_repository()
    for i in range(0, len(commandline_claims), 2):
        claim = pywikibot.Claim(repo, commandline_claims[i])
        if claim.type == 'wikibase-item':
            target = pywikibot.ItemPage(repo, commandline_claims[i + 1])
        elif claim.type == 'string':
            target = commandline_claims[i + 1]
        elif claim.type == 'globe-coordinate':
            coord_args = [float(c) for c in commandline_claims[i + 1].split(',')]
            if len(coord_args) >= 3:
                precision = coord_args[2]
            else:
                precision = 0.0001  # Default value (~10 m at equator)
            target = pywikibot.Coordinate(coord_args[0], coord_args[1], precision=precision)
        else:
            raise NotImplementedError(
                "%s datatype is not yet supported by claimit.py"
                % claim.type)
        claim.setTarget(target)
        claims.append(claim)

    generator = gen.getCombinedGenerator()
    if not generator:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    bot = ClaimRobot(generator, claims, exists_arg)
    bot.run()
    return True
Exemplo n.º 24
0
    def add_mbid_claim_to_item(self, item, mbid):
        """
        Adds a claim with pid `pid` with value `mbid` to `item` and call `donefunc`
        with `mbid` to signal the completion.

        :type pid: str
        :type mbid: str
        :type item: pywikibot.ItemPage
        """
        claim = wp.Claim(const.WIKIDATA_DATASITE, self.property_id)
        claim.setTarget(mbid)
        wp.debug(u"Adding property {pid}, value {mbid} to {title}".format
                 (pid=self.property_id, mbid=mbid, title=item.title()),
                 layer="")
        if wp.config.simulate:
            wp.output("Simulation, no property has been added")
            return
        try:
            item.addClaim(claim, True)
        except wp.UserBlocked as e:
            wp.error("I have been blocked")
            exit(1)
        except wp.Error as e:
            wp.warning(e)
            return
        else:
            wp.debug("Adding the source Claim", layer="")
            claim.addSources([const.MUSICBRAINZ_CLAIM, const.RETRIEVED_CLAIM], bot=True)
            self.donefunc(mbid)
Exemplo n.º 25
0
def get_wikidata_itempage_from_wikilink(wikilink):
    """Given a link to a wikipedia page, retrieve its page on Wikidata"""
    parsed_url = urlparse(wikilink)
    if "wikipedia" in parsed_url.netloc:
        pagename = parsed_url.path.replace(WIKI_PREFIX, "")
        wikilanguage = parsed_url.netloc.split(".")[0]
        wikisite = wp.Site(wikilanguage, "wikipedia")
        enwikipage = wp.Page(wikisite, pagename)
        check_url_needs_to_be_skipped(wikilink, enwikipage)
        try:
            wikidatapage = wp.ItemPage.fromPage(enwikipage)
        except wp.NoPage:
            wp.error("%s does not exist" % enwikipage)
            return None
    elif "wikidata" in parsed_url.netloc:
        pagename = parsed_url.path.replace(WIKI_PREFIX, "")
        wikidatapage = wp.ItemPage(const.WIKIDATA_DATASITE, pagename)
    else:
        raise ValueError("%s is not a link to a wikipedia page" % wikilink)
    try:
        wikidatapage.get(get_redirect=True)
    except wp.NoPage:
        wp.error("%s does not exist" % pagename)
        return None
    check_url_needs_to_be_skipped(wikilink, wikidatapage)
    return wikidatapage
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()
    csv_dir = None

    for arg in local_args:
        if arg.startswith("-csvdir:"):
            csv_dir = arg[8:]
        else:
            genFactory.handleArg(arg)

    config_generator = genFactory.getCombinedGenerator()

    if not config_generator or not csv_dir:
        pywikibot.bot.suggest_help(
            missing_parameters=[] if csv_dir else ["-csvdir"], missing_generator=not config_generator
        )
        return False

    for config_page in config_generator:
        try:
            config_page.get()
        except pywikibot.NoPage:
            pywikibot.error("%s does not exist" % config_page)
            continue

        configuration = DataIngestionBot.parseConfigurationPage(config_page)

        filename = os.path.join(csv_dir, configuration["csvFile"])
        try:

            f = codecs.open(filename, "r", configuration["csvEncoding"])
        except (IOError, OSError) as e:
            pywikibot.error("%s could not be opened: %s" % (filename, e))
            continue

        try:
            files = CSVReader(
                f,
                urlcolumn="url",
                site=config_page.site,
                dialect=configuration["csvDialect"],
                delimiter=str(configuration["csvDelimiter"]),
            )

            bot = DataIngestionBot(files, configuration["titleFormat"], configuration["formattingTemplate"], site=None)

            bot.run()
        finally:
            f.close()
Exemplo n.º 27
0
 def searchSirutaInWD(self, siruta):
     query = "SELECT ?item WHERE { ?item wdt:P843 \"%d\" .     SERVICE wikibase:label { bd:serviceParam wikibase:language \"ro\" }}" % siruta
     query_object = sparql.SparqlQuery()
     data = query_object.get_items(query, result_type=list)
     if len(data) != 1:
         pywikibot.error("There are %d items with siruta %d" % (len(data), siruta))
         return
     return data[0]
Exemplo n.º 28
0
def main():
    local_args = pywikibot.handleArgs()
    cache_paths = None
    delete = False
    command = None

    for arg in local_args:
        if command == "":
            command = arg
        elif arg == "-delete":
            delete = True
        elif arg == "-password":
            command = "has_password(entry)"
        elif arg == "-c":
            if command:
                pywikibot.error("Only one command may be executed.")
                exit(1)
            command = ""
        else:
            if not cache_paths:
                cache_paths = [arg]
            else:
                cache_paths.append(arg)

    func = None

    if not cache_paths:
        cache_paths = ["apicache", "tests/apicache"]

        # Also process the base directory, if it isnt the current directory
        if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir:
            cache_paths += [os.path.join(pywikibot.config2.base_dir, "apicache")]

        # Also process the user home cache, if it isnt the config directory
        if os.path.expanduser("~/.pywikibot") != pywikibot.config2.base_dir:
            cache_paths += [os.path.join(os.path.expanduser("~/.pywikibot"), "apicache")]

    if delete:
        action_func = lambda entry: entry._delete()
    else:
        action_func = lambda entry: pywikibot.output(entry)

    if command:
        try:
            command_func = eval("lambda entry: " + command)
        except:
            pywikibot.exception()
            pywikibot.error("Can not compile command: %s" % command)
            exit(1)

        func = lambda entry: command_func(entry) and action_func(entry)
    else:
        func = action_func

    for cache_path in cache_paths:
        if len(cache_paths) > 1:
            pywikibot.output("Processing %s" % cache_path)
        process_entries(cache_path, func)
Exemplo n.º 29
0
    def login(self, retry=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @raises NoUsername: Username is not recognised by the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {'name': self.login_name, 'site': self.site},
                password=True)

        pywikibot.output(u"Logging in to %(site)s as %(name)s"
                         % {'name': self.login_name, 'site': self.site})
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if e.code == 'NotExists':
                raise NoUsername(u"Username '%s' does not exist on %s"
                                 % (self.login_name, self.site))
            elif e.code == 'Illegal':
                raise NoUsername(u"Username '%s' is invalid on %s"
                                 % (self.login_name, self.site))
            elif e.code == 'readapidenied':
                raise NoUsername(
                    'Username "{0}" does not have read permissions on '
                    '{1}'.format(self.login_name, self.site))
            # TODO: investigate other unhandled API codes (bug T75539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
#        # Show a warning according to the local bot policy
#   FIXME: disabled due to recursion; need to move this to the Site object after
#   login
#        if not self.botAllowed():
#            logger.error(
#                u"Username '%(name)s' is not listed on [[%(page)s]]."
#                 % {'name': self.username,
#                    'page': botList[self.site.family.name][self.site.code]})
#            logger.error(
# "Please make sure you are allowed to use the robot before actually using it!")
#            return False
        return True
Exemplo n.º 30
0
 def number_of_images(self):
     """Return the (cached) number of images in the djvu file."""
     if not hasattr(self, '_image_count'):
         dp = subprocess.Popen(['djvused', '-e', 'n', self.file_djvu],
                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         (stdoutdata, stderrdata) = dp.communicate()
         if dp.returncode != 0:
             pywikibot.error('djvulibre library error!\n%s' % stderrdata)
         self._image_count = int(stdoutdata)
     return self._image_count
Exemplo n.º 31
0
def main():
    """Process command line arguments and invoke bot."""
    local_args = pywikibot.handleArgs()
    cache_paths = None
    delete = False
    command = None
    output = None

    for arg in local_args:
        if command == '':
            command = arg
        elif output == '':
            output = arg
        elif arg == '-delete':
            delete = True
        elif arg == '-password':
            command = 'has_password(entry)'
        elif arg == '-c':
            if command:
                pywikibot.error('Only one command may be executed.')
                exit(1)
            command = ''
        elif arg == '-o':
            if output:
                pywikibot.error('Only one output may be defined.')
                exit(1)
            output = ''
        else:
            if not cache_paths:
                cache_paths = [arg]
            else:
                cache_paths.append(arg)

    if not cache_paths:
        cache_paths = ['apicache', 'tests/apicache']

        # Also process the base directory, if it isnt the current directory
        if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir:
            cache_paths += [
                os.path.join(pywikibot.config2.base_dir, 'apicache')
            ]

        # Also process the user home cache, if it isnt the config directory
        if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir:
            cache_paths += [
                os.path.join(os.path.expanduser('~/.pywikibot'), 'apicache')
            ]

    if delete:
        action_func = CacheEntry._delete
    else:
        action_func = None

    if output:
        output_func = _parse_command(output, 'output')
        if output_func is None:
            return False
    else:
        output_func = None

    if command:
        filter_func = _parse_command(command, 'filter')
        if filter_func is None:
            return False
    else:
        filter_func = None

    for cache_path in cache_paths:
        if len(cache_paths) > 1:
            pywikibot.output('Processing %s' % cache_path)
        process_entries(cache_path,
                        filter_func,
                        output_func=output_func,
                        action_func=action_func)
Exemplo n.º 32
0
def main():
    global site, language

    import sys

    filename = None
    pagename = None
    namespace = None
    salt = None
    force = False
    calc = None
    args = []

    def if_arg_value(arg, name):
        if arg.startswith(name):
            yield arg[len(name) + 1:]

    for arg in pywikibot.handleArgs(*sys.argv):
        for v in if_arg_value(arg, '-file'):
            filename = v
        for v in if_arg_value(arg, '-locale'):
            #Required for english month names
            locale.setlocale(locale.LC_TIME, v.encode('utf8'))
        for v in if_arg_value(arg, '-timezone'):
            os.environ['TZ'] = v.timezone
            #Or use the preset value
            if hasattr(time, 'tzset'):
                time.tzset()
        for v in if_arg_value(arg, '-calc'):
            calc = v
        for v in if_arg_value(arg, '-salt'):
            salt = v
        for v in if_arg_value(arg, '-force'):
            force = True
        for v in if_arg_value(arg, '-filename'):
            filename = v
        for v in if_arg_value(arg, '-page'):
            pagename = v
        for v in if_arg_value(arg, '-namespace'):
            namespace = v
        if not arg.startswith('-'):
            args.append(arg)

    if calc:
        if not salt:
            pywikibot.error('Note: you must specify a salt to calculate a key')
            return

        s = new_hash()
        s.update(salt + '\n')
        s.update(calc + '\n')
        pywikibot.output(u'key = ' + s.hexdigest())
        return

    if not salt:
        salt = ''

    site = pywikibot.Site()
    language = site.language()

    if not args or len(args) <= 1:
        pywikibot.output(u'NOTE: you must specify a template to run the bot')
        pywikibot.showHelp('archivebot')
        return

    for a in args[1:]:
        pagelist = []
        a = a.decode('utf8')
        if not filename and not pagename:
            if namespace is not None:
                ns = [str(namespace)]
            else:
                ns = []
            for pg in generate_transclusions(site, a, ns):
                pagelist.append(pg)
        if filename:
            for pg in file(filename, 'r').readlines():
                pagelist.append(pywikibot.Page(site, pg, ns=10))
        if pagename:
            pagelist.append(pywikibot.Page(site, pagename, ns=3))
        pagelist = sorted(pagelist)
        for pg in iter(pagelist):
            pywikibot.output(u'Processing %s' % pg)
            # Catching exceptions, so that errors in one page do not bail out
            # the entire process
            try:
                archiver = PageArchiver(pg, a, salt, force)
                archiver.run()
                time.sleep(10)
            except Exception as e:
                pywikibot.output(
                    u'Error occured while processing page %s: %s' % (pg, e))
                pywikibot.output(traceback.format_exc())
Exemplo n.º 33
0
def process_entries(cache_path,
                    func,
                    use_accesstime=None,
                    output_func=None,
                    action_func=None):
    """
    Check the contents of the cache.

    This program tries to use file access times to determine
    whether cache files are being used.
    However file access times are not always usable.
    On many modern filesystems, they have been disabled.
    On unix, check the filesystem mount options. You may
    need to remount with 'strictatime'.

    @param use_accesstime: Whether access times should be used.
    @type use_accesstime: bool tristate:
         - None  = detect
         - False = dont use
         - True  = always use
    """
    if not cache_path:
        cache_path = os.path.join(pywikibot.config2.base_dir, 'apicache')

    if not os.path.exists(cache_path):
        pywikibot.error('%s: no such file or directory' % cache_path)
        return

    if os.path.isdir(cache_path):
        filenames = [
            os.path.join(cache_path, filename)
            for filename in os.listdir(cache_path)
        ]
    else:
        filenames = [cache_path]

    for filepath in filenames:
        filename = os.path.basename(filepath)
        cache_dir = os.path.dirname(filepath)
        if use_accesstime is not False:
            stinfo = os.stat(filepath)

        entry = CacheEntry(cache_dir, filename)
        try:
            entry._load_cache()
        except ValueError as e:
            pywikibot.error('Failed loading {0}'.format(
                entry._cachefile_path()))
            pywikibot.exception(e, tb=True)
            continue

        if use_accesstime is None:
            stinfo2 = os.stat(filepath)
            use_accesstime = stinfo.st_atime != stinfo2.st_atime

        if use_accesstime:
            # Reset access times to values before loading cache entry.
            os.utime(filepath, (stinfo.st_atime, stinfo.st_mtime))
            entry.stinfo = stinfo

        try:
            entry.parse_key()
        except ParseError:
            pywikibot.error('Problems parsing %s with key %s' %
                            (entry.filename, entry.key))
            pywikibot.exception()
            continue

        try:
            entry._rebuild()
        except Exception as e:
            pywikibot.error('Problems loading %s with key %s, %r' %
                            (entry.filename, entry.key, entry._parsed_key))
            pywikibot.exception(e, tb=True)
            continue

        if func is None or func(entry):
            if output_func or action_func is None:
                if output_func is None:
                    output = entry
                else:
                    output = output_func(entry)
                if output is not None:
                    pywikibot.output(output)
            if action_func:
                action_func(entry)
def main(*args):
    """
    Process command line arguments and generate user-config.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    global base_dir

    default_args = (config.family, config.mylang, None)

    local_args = pywikibot.handle_args(args)
    if local_args:
        pywikibot.output('Unknown arguments: %s' % ' '.join(local_args))
        return False

    username = config.usernames[config.family].get(config.mylang)
    args = (config.family, config.mylang, username)

    if args != default_args:
        force = True
        pywikibot.output(u'Automatically generating user-config.py')
    else:
        force = False

    # Force default
    if config.family == 'wikipedia' and config.mylang == 'language':
        args = ('wikipedia', 'en', username)

    while not force or config.verbose_output:
        pywikibot.output(u'\nYour default user directory is "%s"' % base_dir)
        if pywikibot.input_yn("Do you want to use that directory?",
                              default=True,
                              automatic_quit=False,
                              force=force):
            break
        else:
            new_base = change_base_dir()
            if new_base:
                base_dir = new_base
                break

    copied_config = False
    copied_fixes = False
    while not force or config.verbose_output:
        if os.path.exists(os.path.join(base_dir, "user-config.py")):
            break
        if pywikibot.input_yn(
                "Do you want to copy user files from an existing Pywikibot "
                "installation?",
                default=False,
                force=force,
                automatic_quit=False):
            oldpath = pywikibot.input("Path to existing user-config.py?")
            if not os.path.exists(oldpath):
                pywikibot.error("Not a valid path")
                continue
            if os.path.isfile(oldpath):
                # User probably typed /user-config.py at the end, so strip it
                oldpath = os.path.dirname(oldpath)
            if not os.path.isfile(os.path.join(oldpath, "user-config.py")):
                pywikibot.error("No user_config.py found in that directory")
                continue
            shutil.copyfile(os.path.join(oldpath, "user-config.py"),
                            os.path.join(base_dir, "user-config.py"))
            copied_config = True

            if os.path.isfile(os.path.join(oldpath, "user-fixes.py")):
                shutil.copyfile(os.path.join(oldpath, "user-fixes.py"),
                                os.path.join(base_dir, "user-fixes.py"))
                copied_fixes = True

        else:
            break
    if not os.path.isfile(os.path.join(base_dir, "user-config.py")):
        if ((force and not config.verbose_output) or pywikibot.input_yn(
                'Create user-config.py file? Required for '
                'running bots.',
                default=True,
                automatic_quit=False,
                force=force)):
            create_user_config(args, force=force)
    elif not copied_config:
        pywikibot.output("user-config.py already exists in the directory")
    if not os.path.isfile(os.path.join(base_dir, "user-fixes.py")):
        if ((force and not config.verbose_output) or pywikibot.input_yn(
                'Create user-fixes.py file? Optional and '
                'for advanced users.',
                force=force,
                default=False,
                automatic_quit=False)):
            create_user_fixes()
    elif not copied_fixes:
        pywikibot.output("user-fixes.py already exists in the directory")
Exemplo n.º 35
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-replacementfile':
                replacement_file = pywikibot.input(
                    u'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-replacementfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error(
            '-replacementfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error(u'Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip(u'\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input(u'Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(site, 'replace-replacing', {
                'description':
                ' (-%s +%s)' % (replacement.old, replacement.new)
            })
        replacements.append(replacement)

    if not edit_summary:
        if single_summary:
            pywikibot.output(u'The summary message for the command line '
                             'replacements will be something like: %s' %
                             single_summary)
        if fixes_set:
            pywikibot.output('If a summary is defined for the fix, this '
                             'default summary won\'t be applied.')
        edit_summary = pywikibot.input(
            'Press Enter to use this automatic message, or enter a '
            'description of the\nchanges your bot will make:')

    # Perform one of the predefined actions.
    for fix in fixes_set:
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             ', '.join(fixes.fixes.keys()))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'), set_summary)
        for replacement in fix['replacements']:
            summary = None if len(replacement) < 3 else replacement[2]
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if chars.contains_invisible(replacement[1]):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacements.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
            for (old_regexp, new_text) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary,
                       site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
Exemplo n.º 36
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    url = u''
    description = ['Automatic upload by pywikibot']
    summary = None
    keepFilename = True
    always = False
    useFilename = None
    verifyDescription = False
    aborts = set()
    ignorewarn = set()
    chunk_size = 0
    chunk_size_regex = r'^-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$'
    chunk_size_regex = re.compile(chunk_size_regex, re.I)
    recursive = False

    # process all global bot args
    # returns a list of non-global args, i.e. args for upload.py
    for arg in pywikibot.handle_args(args):
        if arg:
            if arg == '-always':
                keepFilename = True
                always = True
                verifyDescription = False
            elif arg == '-recursive':
                recursive = True
            elif arg.startswith('-keep'):
                keepFilename = True
            elif arg.startswith('-filename:'):
                useFilename = arg[10:]
            elif arg.startswith('-summary'):
                summary = arg[9:]
            elif arg.startswith('-noverify'):
                verifyDescription = False
            elif arg.startswith('-abortonwarn'):
                if len(arg) > len('-abortonwarn:') and aborts is not True:
                    aborts.add(arg[len('-abortonwarn:'):])
                else:
                    aborts = True
            elif arg.startswith('-ignorewarn'):
                if len(arg) > len('-ignorewarn:') and ignorewarn is not True:
                    ignorewarn.add(arg[len('-ignorewarn:'):])
                else:
                    ignorewarn = True
            elif arg.startswith('-chunked'):
                match = chunk_size_regex.match(arg)
                if match:
                    if match.group(1):  # number was in there
                        base = float(match.group(1))
                        if match.group(2):  # suffix too
                            suffix = match.group(2).lower()
                            if suffix == "k":
                                suffix = 1000
                            elif suffix == "m":
                                suffix = 1000000
                            elif suffix == "ki":
                                suffix = 1 << 10
                            elif suffix == "mi":
                                suffix = 1 << 20
                            else:
                                pass  # huh?
                        else:
                            suffix = 1
                        chunk_size = math.trunc(base * suffix)
                    else:
                        chunk_size = 1 << 20  # default to 1 MiB
                else:
                    pywikibot.error('Chunk size parameter is not valid.')
            elif url == u'':
                url = arg
            else:
                description.append(arg)
    description = u' '.join(description)
    while not ("://" in url or os.path.exists(url)):
        if not url:
            error = 'No input filename given.'
        else:
            error = 'Invalid input filename given.'
            if not always:
                error += ' Try again.'
        if always:
            url = None
            break
        else:
            pywikibot.output(error)
        url = pywikibot.input(u'URL, file or directory where files are now:')
    if always and ((aborts is not True and ignorewarn is not True) or
                   not description or url is None):
        additional = ''
        missing = []
        if url is None:
            missing += ['filename']
            additional = error + ' '
        if description is None:
            missing += ['description']
        if aborts is not True and ignorewarn is not True:
            additional += ('Either -ignorewarn or -abortonwarn must be '
                           'defined for all codes. ')
        additional += 'Unable to run in -always mode'
        suggest_help(missing_parameters=missing, additional_text=additional)
        return False
    if os.path.isdir(url):
        file_list = []
        for directory_info in os.walk(url):
            if not recursive:
                # Do not visit any subdirectories
                directory_info[1][:] = []
            for dir_file in directory_info[2]:
                if not (pywikibot.FilePage(pywikibot.Site(), "File:" + dir_file).exists()):
                    file_list.append(os.path.join(directory_info[0], dir_file))
                else:
                    pywikibot.output("File:{0} has been uploaded, skipping".format(dir_file))
				
        url = file_list
    else:
        url = [url]
    bot = UploadRobot(url, description=description, useFilename=useFilename,
                      keepFilename=keepFilename,
                      verifyDescription=verifyDescription,
                      aborts=aborts, ignoreWarning=ignorewarn,
                      chunk_size=chunk_size, always=always,
                      summary=summary)
    bot.run()
Exemplo n.º 37
0
    def _ocr_callback(self, cmd_uri, parser_func=None, ocr_tool=None):
        """OCR callback function.

        @return: tuple (error, text [error description in case of error]).
        """
        def identity(x):
            return x

        if not cmd_uri:
            raise ValueError('Parameter cmd_uri is mandatory.')

        if parser_func is None:
            parser_func = identity

        if not callable(parser_func):
            raise TypeError('Keyword parser_func must be callable.')

        if ocr_tool not in self._OCR_METHODS:
            raise TypeError("ocr_tool must be in %s, not '%s'." %
                            (self._OCR_METHODS, ocr_tool))

        # wrong link fail with Exceptions
        retry = 0
        while retry < 5:
            pywikibot.debug('{0}: get URI {1!r}'.format(ocr_tool, cmd_uri),
                            _logger)
            try:
                response = http.fetch(cmd_uri)
            except requests.exceptions.ReadTimeout as e:
                retry += 1
                pywikibot.warning('ReadTimeout %s: %s' % (cmd_uri, e))
                pywikibot.warning('retrying in %s seconds ...' % (retry * 5))
                time.sleep(retry * 5)
            except Exception as e:
                pywikibot.error('"%s": %s' % (cmd_uri, e))
                return (True, e)
            else:
                pywikibot.debug('{0}: {1}'.format(ocr_tool, response.text),
                                _logger)
                break

        if 400 <= response.status < 600:
            return (True, 'Http response status {0}'.format(response.status))

        data = json.loads(response.text)

        if ocr_tool == self._PHETOOLS:  # phetools
            assert 'error' in data, 'Error from phetools: %s' % data
            assert data['error'] in [0, 1, 2,
                                     3], ('Error from phetools: %s' % data)
            error, _text = bool(data['error']), data['text']
        else:  # googleOCR
            if 'error' in data:
                error, _text = True, data['error']
            else:
                error, _text = False, data['text']

        if error:
            pywikibot.error('OCR query %s: %s' % (cmd_uri, _text))
            return (error, _text)
        else:
            return (error, parser_func(_text))
Exemplo n.º 38
0
    def treat(self, page, item):
        """Process a single page/item."""
        if willstop:
            raise KeyboardInterrupt
        self.current_page = page
        item.get()
        if set(self.fields.values()) <= set(item.claims.keys()):
            pywikibot.output('%s item %s has claims for all properties. '
                             'Skipping.' % (page, item.title()))
            return

        pagetext = page.get()
        templates = textlib.extract_templates_and_params(pagetext)
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(withNamespace=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error(
                    "Failed parsing template; '%s' should be the template name."
                    % template)
                continue
            # We found the template we were looking for
            if template in self.templateTitles:
                for field, value in fielddict.items():
                    field = field.strip()
                    value = value.strip()
                    if not field or not value:
                        continue

                    # This field contains something useful for us
                    if field in self.fields:
                        # Check if the property isn't already set
                        claim = pywikibot.Claim(self.repo, self.fields[field])
                        if claim.getID() in item.get().get('claims'):
                            pywikibot.output(
                                'A claim for %s already exists. Skipping.' %
                                claim.getID())
                            # TODO: Implement smarter approach to merging
                            # harvested values with existing claims esp.
                            # without overwriting humans unintentionally.
                        else:
                            if claim.type == 'wikibase-item':
                                # Try to extract a valid page
                                match = re.search(pywikibot.link_regex, value)
                                if not match:
                                    pywikibot.output(
                                        '%s field %s value %s is not a '
                                        'wikilink. Skipping.' %
                                        (claim.getID(), field, value))
                                    continue

                                link_text = match.group(1)
                                linked_item = self._template_link_target(
                                    item, link_text)
                                if not linked_item:
                                    continue

                                claim.setTarget(linked_item)
                            elif claim.type in ('string', 'external-id'):
                                claim.setTarget(value.strip())
                            elif claim.type == 'commonsMedia':
                                commonssite = pywikibot.Site(
                                    'commons', 'commons')
                                imagelink = pywikibot.Link(value,
                                                           source=commonssite,
                                                           defaultNamespace=6)
                                image = pywikibot.FilePage(imagelink)
                                if image.isRedirectPage():
                                    image = pywikibot.FilePage(
                                        image.getRedirectTarget())
                                if not image.exists():
                                    pywikibot.output(
                                        "{0} doesn't exist. I can't link to it"
                                        ''.format(image.title(asLink=True)))
                                    continue
                                claim.setTarget(image)
                            else:
                                pywikibot.output(
                                    '%s is not a supported datatype.' %
                                    claim.type)
                                continue

                            pywikibot.output(
                                'Adding %s --> %s' %
                                (claim.getID(), claim.getTarget()))
                            item.addClaim(claim)
                            # A generator might yield pages from multiple sites
                            source = self.getSource(page.site)
                            if source:
                                claim.addSource(source, bot=True)
    def treat_page_and_item(self, page, item):
        """Process a single page/item."""
        if willstop:
            raise KeyboardInterrupt

        templates = page.raw_extracted_templates
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(with_ns=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error("Failed parsing template; '{}' should be "
                                'the template name.'.format(template))
                continue

            if template not in self.templateTitles:
                continue
            # We found the template we were looking for
            for field, value in fielddict.items():
                field = field.strip()
                # todo: extend the list of tags to ignore
                value = textlib.removeDisabledParts(
                    # todo: eventually we may want to import the references
                    value,
                    tags=['ref'],
                    site=page.site).strip()
                if not field or not value:
                    continue

                if field not in self.fields:
                    continue

                # This field contains something useful for us
                prop, options = self.fields[field]
                claim = pywikibot.Claim(self.repo, prop)
                exists_arg = self._get_option_with_fallback(options, 'exists')
                if claim.type == 'wikibase-item':
                    do_multi = self._get_option_with_fallback(options, 'multi')
                    matched = False
                    # Try to extract a valid page
                    for match in pywikibot.link_regex.finditer(value):
                        matched = True
                        link_text = match.group(1)
                        linked_item = self._template_link_target(
                            item, link_text)
                        added = False
                        if linked_item:
                            claim.setTarget(linked_item)
                            added = self.user_add_claim_unless_exists(
                                item, claim, exists_arg, page.site,
                                pywikibot.output)
                            claim = pywikibot.Claim(self.repo, prop)
                        # stop after the first match if not supposed to add
                        # multiple values
                        if not do_multi:
                            break
                        # update exists_arg, so we can add more values
                        if 'p' not in exists_arg and added:
                            exists_arg += 'p'

                    if matched:
                        continue

                    if not self._get_option_with_fallback(options, 'islink'):
                        pywikibot.output(
                            '{} field {} value {} is not a wikilink. Skipping.'
                            .format(claim.getID(), field, value))
                        continue

                    linked_item = self._template_link_target(item, value)
                    if not linked_item:
                        continue

                    claim.setTarget(linked_item)
                elif claim.type in ('string', 'external-id'):
                    claim.setTarget(value.strip())
                elif claim.type == 'url':
                    match = self.linkR.search(value)
                    if not match:
                        continue
                    claim.setTarget(match.group('url'))
                elif claim.type == 'commonsMedia':
                    commonssite = pywikibot.Site('commons', 'commons')
                    imagelink = pywikibot.Link(value,
                                               source=commonssite,
                                               default_namespace=6)
                    image = pywikibot.FilePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.FilePage(image.getRedirectTarget())
                    if not image.exists():
                        pywikibot.output(
                            "{} doesn't exist. I can't link to it".format(
                                image.title(as_link=True)))
                        continue
                    claim.setTarget(image)
                else:
                    pywikibot.output('{} is not a supported datatype.'.format(
                        claim.type))
                    continue

                # A generator might yield pages from multiple sites
                self.user_add_claim_unless_exists(item, claim, exists_arg,
                                                  page.site, pywikibot.output)
Exemplo n.º 40
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    index = None
    djvu_path = '.'  # default djvu file directory
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        if arg.startswith('-index:'):
            index = arg[7:]
        elif arg.startswith('-djvu:'):
            djvu_path = arg[len('-djvu:'):]
        elif arg.startswith('-pages:'):
            pages = arg[7:]
        elif arg.startswith('-summary:'):
            options['summary'] = arg[len('-summary:'):]
        elif arg == '-force':
            options['force'] = True
        elif arg == '-always':
            options['always'] = True
        else:
            pywikibot.output('Unknown argument %s' % arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return False

    # If djvu_path is not a fle, build djvu_path from dir+index.
    djvu_path = os.path.expanduser(djvu_path)
    djvu_path = os.path.abspath(djvu_path)
    if not os.path.exists(djvu_path):
        pywikibot.error('No such file or directory: %s' % djvu_path)
        return False
    if os.path.isdir(djvu_path):
        djvu_path = os.path.join(djvu_path, index)

    # Check the djvu file exists and, if so, create the DjVuFile wrapper.
    djvu = DjVuFile(djvu_path)

    if not djvu.has_text():
        pywikibot.error('No text layer in djvu file %s' % djvu.file_djvu)
        return False

    # Parse pages param.
    pages = pages.split(',')
    for interval in range(len(pages)):
        start, sep, end = pages[interval].partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else djvu.number_of_images()
        pages[interval] = (start, end)

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error('Site %s must have ProofreadPage extension.' % site)
        return False

    index_page = pywikibot.Page(site, index, ns=site.proofread_index_ns)

    if not index_page.exists():
        raise pywikibot.NoPage(index)

    pywikibot.output('uploading text from %s to %s' %
                     (djvu.file_djvu, index_page.title(asLink=True)))

    bot = DjVuTextBot(djvu, index_page, pages, **options)
    bot.run()
Exemplo n.º 41
0
#
# Distributed under the terms of the MIT license.
#
from __future__ import absolute_import, division, unicode_literals

import re
import sys

import pywikibot

from pywikibot import i18n

try:
    import pycountry
except ImportError:
    pywikibot.error('This script requires the python-pycountry module')
    pywikibot.error('See: https://pypi.org/project/pycountry')
    pywikibot.exception()
    sys.exit(1)


class StatesRedirectBot(pywikibot.Bot):
    """Bot class used for implementation of re-direction norms."""
    def __init__(self, start, force):
        """Initializer.

        Parameters:
            @param start:xxx Specify the place in the alphabet to start
            searching.
            @param force: Don't ask whether to create pages, just create
            them.
Exemplo n.º 42
0
        gens = [
            pagegenerators.ReferringPageGenerator(t,
                                                  onlyTemplateInclusion=True)
            for t in oldTemplates
        ]
        gen = pagegenerators.CombinedPageGenerator(gens)
        gen = pagegenerators.DuplicateFilterPageGenerator(gen)
    if user:
        gen = pagegenerators.UserEditFilterGenerator(gen,
                                                     user,
                                                     timestamp,
                                                     skip,
                                                     max_revision_depth=100,
                                                     show_filtered=True)

    if not genFactory.gens:
        # make sure that proper namespace filtering etc. is handled
        gen = genFactory.getCombinedGenerator(gen)

    preloadingGen = pagegenerators.PreloadingGenerator(gen)

    bot = TemplateRobot(preloadingGen, templates, **options)
    bot.run()


if __name__ == "__main__":
    try:
        main()
    except Exception:
        pywikibot.error("Fatal error:", exc_info=True)
Exemplo n.º 43
0
 def replacements(self):
     if self.can_load:
         yield (FULL_ARTICLE_REGEX, self.replace)
     else:
         pywikibot.error('Cannot run SectionsFix when mwparserfromhell '
                         'is not installed')
Exemplo n.º 44
0
            text = rule.apply(text, replaced)
        page.text = text
        count = len(replaced)
        if count > 0:  # todo: separate function
            if count > 1:
                max_typos = self.maxsummarytypos
                summary = 'oprava překlepů: %s' % ', '.join(
                    replaced[:max_typos])
                if count > max_typos:
                    if count - max_typos > 1:
                        summary += ' a %s dalších' % (count - max_typos)
                    else:
                        summary += ' a jednoho dalšího'
            else:
                summary = 'oprava překlepu: %s' % replaced[0]

            summaries.append(summary)


lazy_fixes = {
    fix.key: fix
    for fix in (CategoriesFix, CheckWikiFix, FilesFix, RedirectFix,
                RedirectsFromFileFix, RefSortFix, SectionsFix, TemplateFix,
                TypoFix)
}
all_fixes = {fix.key: fix for fix in (AdataFix, InterwikiFix, StyleFix)}
all_fixes.update(lazy_fixes)

if __name__ == '__main__':
    pywikibot.error('Run wikitext.py instead')
Exemplo n.º 45
0
 def test_error(self):
     pywikibot.error('error')
     self.assertEqual(newstdout.getvalue(), '')
     self.assertEqual(newstderr.getvalue(), 'ERROR: error\n')
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    template_title = None

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    gen = pg.GeneratorFactory()

    current_args = []
    fields = {}
    options = {}
    for arg in local_args:
        if arg.startswith('-template'):
            if len(arg) == 9:
                template_title = pywikibot.input(
                    'Please enter the template to work on:')
            else:
                template_title = arg[10:]
        elif arg.startswith('-create'):
            options['create'] = True
        elif gen.handleArg(arg):
            if arg.startswith('-transcludes:'):
                template_title = arg[13:]
        else:
            optional = arg.startswith('-')
            complete = len(current_args) == 3
            if optional:
                needs_second = len(current_args) == 1
                if needs_second:
                    break  # will stop below

                arg, sep, value = arg[1:].partition(':')
                if len(current_args) == 0:
                    assert not fields
                    options[arg] = value or True
                else:
                    assert complete
                    current_args[2][arg] = value or True
            else:
                if complete:
                    handler = PropertyOptionHandler(**current_args[2])
                    fields[current_args[0]] = (current_args[1], handler)
                    del current_args[:]
                current_args.append(arg)
                if len(current_args) == 2:
                    current_args.append({})

    # handle leftover
    if len(current_args) == 3:
        handler = PropertyOptionHandler(**current_args[2])
        fields[current_args[0]] = (current_args[1], handler)
    elif len(current_args) == 1:
        pywikibot.error('Incomplete command line param-property pair.')
        return False

    if not template_title:
        pywikibot.error(
            'Please specify either -template or -transcludes argument')
        return

    generator = gen.getCombinedGenerator(preload=True)
    if not generator:
        gen.handleArg('-transcludes:' + template_title)
        generator = gen.getCombinedGenerator(preload=True)

    bot = HarvestRobot(generator, template_title, fields, **options)
    bot.run()
Exemplo n.º 47
0
    def process_filename(self, file_url=None):
        """Return base filename portion of file_url."""
        if not file_url:
            file_url = self.url
            pywikibot.warning('file_url is not given. '
                              'Set to self.url by default.')

        always = self.getOption('always')
        # Isolate the pure name
        filename = file_url
        # Filename may be either a URL or a local file path
        if '://' in filename:
            # extract the path portion of the URL
            filename = urlparse(filename).path
        filename = os.path.basename(filename)
        if self.use_filename:
            filename = self.use_filename
        if self.filename_prefix:
            filename = self.filename_prefix + filename
        if not self.keep_filename:
            pywikibot.output(
                'The filename on the target wiki will default to: %s' %
                filename)
            assert not always
            newfn = pywikibot.input(
                'Enter a better name, or press enter to accept:')
            if newfn != '':
                filename = newfn
        # FIXME: these 2 belong somewhere else, presumably in family
        # forbidden characters are handled by pywikibot/page.py
        forbidden = ':*?/\\'  # to be extended
        try:
            allowed_formats = self.target_site.siteinfo.get('fileextensions',
                                                            get_default=False)
        except KeyError:
            allowed_formats = []
        else:
            allowed_formats = [item['ext'] for item in allowed_formats]

        # ask until it's valid
        first_check = True
        while True:
            if not first_check:
                if always:
                    filename = None
                else:
                    filename = pywikibot.input('Enter a better name, or press '
                                               'enter to skip the file:')
                if not filename:
                    return None
            first_check = False
            ext = os.path.splitext(filename)[1].lower().strip('.')
            # are any chars in forbidden also in filename?
            invalid = set(forbidden) & set(filename)
            if invalid:
                c = ''.join(invalid)
                pywikibot.output('Invalid character(s): %s. Please try again' %
                                 c)
                continue
            if allowed_formats and ext not in allowed_formats:
                if always:
                    pywikibot.output('File format is not one of '
                                     '[{0}]'.format(' '.join(allowed_formats)))
                    continue
                elif not pywikibot.input_yn(
                        'File format is not one of [%s], but %s. Continue?' %
                    (' '.join(allowed_formats), ext),
                        default=False,
                        automatic_quit=False):
                    continue
            potential_file_page = pywikibot.FilePage(self.target_site,
                                                     filename)
            if potential_file_page.exists():
                overwrite = self._handle_warning('exists')
                if overwrite is False:
                    pywikibot.output(
                        'File exists and you asked to abort. Skipping.')
                    return None
                if potential_file_page.has_permission():
                    if overwrite is None:
                        overwrite = not pywikibot.input_yn(
                            'File with name %s already exists. '
                            'Would you like to change the name? '
                            '(Otherwise file will be overwritten.)' % filename,
                            default=True,
                            automatic_quit=False)
                    if not overwrite:
                        continue
                    else:
                        break
                else:
                    pywikibot.output('File with name %s already exists and '
                                     'cannot be overwritten.' % filename)
                    continue
            else:
                try:
                    if potential_file_page.fileIsShared():
                        pywikibot.output(
                            'File with name %s already exists in shared '
                            'repository and cannot be overwritten.' % filename)
                        continue
                    else:
                        break
                except pywikibot.NoPage:
                    break

        # A proper description for the submission.
        # Empty descriptions are not accepted.
        if self.description:
            pywikibot.output('The suggested description is:\n%s' %
                             self.description)

        while not self.description or self.verify_description:
            if not self.description:
                pywikibot.output(
                    color_format(
                        '{lightred}It is not possible to upload a file '
                        'without a description.{default}'))
            assert not always
            # if no description, ask if user want to add one or quit,
            # and loop until one is filled.
            # if self.verify_description, ask if user want to change it
            # or continue.
            if self.description:
                question = 'Do you want to change this description?'
            else:
                question = 'No description was given. Add one?'
            if pywikibot.input_yn(question,
                                  default=not self.description,
                                  automatic_quit=self.description):
                from pywikibot import editor as editarticle
                editor = editarticle.TextEditor()
                try:
                    new_description = editor.edit(self.description)
                except ImportError:
                    raise
                except Exception as e:
                    pywikibot.error(e)
                    continue
                # if user saved / didn't press Cancel
                if new_description:
                    self.description = new_description
            elif not self.description:
                raise QuitKeyboardInterrupt
            self.verify_description = False

        return filename
Exemplo n.º 48
0
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else djvu.number_of_images()
        pages[i] = (start, end)

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error(
            'Site {} must have ProofreadPage extension.'.format(site))
        return

    index_page = pywikibot.Page(site, index, ns=site.proofread_index_ns)

    if not index_page.exists():
        raise NoPageError(index)

    pywikibot.output('uploading text from {} to {}'.format(
        djvu.file, index_page.title(as_link=True)))

    bot = DjVuTextBot(djvu, index_page, pages=pages, site=site, **options)
    bot.run()


if __name__ == '__main__':
    try:
        main()
    except Exception:
        pywikibot.error('Fatal error:', exc_info=True)
Exemplo n.º 49
0
    def _ocr_callback(self, cmd_uri, parser_func=None, ocr_tool=None):
        """OCR callback function.

        :return: tuple (error, text [error description in case of error]).
        """
        def identity(x):
            return x

        if not cmd_uri:
            raise ValueError('Parameter cmd_uri is mandatory.')

        if parser_func is None:
            parser_func = identity

        if not callable(parser_func):
            raise TypeError('Keyword parser_func must be callable.')

        if ocr_tool not in self._OCR_METHODS:
            raise TypeError("ocr_tool must be in {}, not '{}'.".format(
                self._OCR_METHODS, ocr_tool))

        # wrong link fail with Exceptions
        for retry in range(5, 30, 5):
            pywikibot.debug('{}: get URI {!r}'.format(ocr_tool, cmd_uri),
                            _logger)
            try:
                response = http.fetch(cmd_uri)
            except ReadTimeout as e:
                pywikibot.warning('ReadTimeout {}: {}'.format(cmd_uri, e))
            except Exception as e:
                pywikibot.error('"{}": {}'.format(cmd_uri, e))
                return True, e
            else:
                pywikibot.debug('{}: {}'.format(ocr_tool, response.text),
                                _logger)
                break

            pywikibot.warning('retrying in {} seconds ...'.format(retry))
            time.sleep(retry)
        else:
            return True, ReadTimeout

        if HTTPStatus.BAD_REQUEST <= response.status_code < 600:
            return True, 'Http response status {}'.format(response.status_code)

        data = json.loads(response.text)

        if ocr_tool == self._PHETOOLS:  # phetools
            assert 'error' in data, 'Error from phetools: {}'.format(data)
            assert data['error'] in [0, 1, 2, 3], \
                'Error from phetools: {}'.format(data)
            error, _text = bool(data['error']), data['text']
        else:  # googleOCR
            if 'error' in data:
                error, _text = True, data['error']
            else:
                error, _text = False, data['text']

        if error:
            pywikibot.error('OCR query {}: {}'.format(cmd_uri, _text))
            return error, _text
        return error, parser_func(_text)
Exemplo n.º 50
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    options = {}
    gen = None
    # summary message
    edit_summary = ''
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a separate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    sql_query = None
    # Set the default regular expression flags
    flags = 0
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()
    local_args = genFactory.handle_args(local_args)

    for arg in local_args:
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith('-mysqlquery'):
            useSql = True
            sql_query = arg.partition(':')[2]
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            options['sleep'] = float(arg[7:])
        elif arg in ('-always', '-recursive', '-allowoverlap'):
            options[arg[1:]] = True
        elif arg == '-nocase':
            flags |= re.IGNORECASE
        elif arg == '-dotall':
            flags |= re.DOTALL
        elif arg == '-multiline':
            flags |= re.MULTILINE
        elif arg.startswith('-addcat:'):
            options['addcat'] = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-automaticsummary'):
            edit_summary = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-pairsfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-pairsfile':
                replacement_file = pywikibot.input(
                    'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-pairsfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return

    if replacement_file_arg_misplaced:
        pywikibot.error('-pairsfile used between a pattern replacement pair.')
        return

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except OSError as e:
            pywikibot.error('Error loading {0}: {1}'.format(
                replacement_file, e))
            return

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return

        # Strip BOM from first line
        file_replacements[0].lstrip('\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input('Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input('Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    # The summary stored here won't be actually used but is only an example
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing', {
                    'description':
                    ' (-{0} +{1})'.format(replacement.old, replacement.new)
                })
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    generators_given = bool(genFactory.gens)
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output('Available predefined fixes are: {0}'.format(
                ', '.join(fixes.fixes.keys())))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if not fix['replacements']:
            pywikibot.warning('No replacements defined for fix '
                              '"{0}"'.format(fix_name))
            continue
        if 'msg' in fix:
            if isinstance(fix['msg'], str):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        if not generators_given and 'generator' in fix:
            gen_args = fix['generator']
            if isinstance(gen_args, str):
                gen_args = [gen_args]
            for gen_arg in gen_args:
                genFactory.handle_arg(gen_arg)
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary,
                                          name=fix_name)
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix['replacements'], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append('"{0}" (replacement #{1})'.format(
                    fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if (not callable(replacement[1])
                    and chars.contains_invisible(replacement[1])):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacement_set.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

        # Exceptions specified via 'fix' shall be merged to those via CLI.
        if replacement_set:
            replacements.extend(replacement_set)
            if replacement_set._exceptions is not None:
                for k, v in replacement_set._exceptions.items():
                    if k in exceptions:
                        exceptions[k] = list(set(exceptions[k]) | set(v))
                    else:
                        exceptions[k] = v

        if len(fix['replacements']) == len(missing_fix_summaries):
            missing_fixes_summaries.append(
                '"{0}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if ((not edit_summary or edit_summary is True)
            and (missing_fixes_summaries or single_summary)):
        if single_summary:
            pywikibot.output('The summary message for the command line '
                             'replacements will be something like: ' +
                             single_summary)
        if missing_fixes_summaries:
            pywikibot.output('The summary will not be used when the fix has '
                             'one defined but the following fix(es) do(es) '
                             'not have a summary defined: '
                             '{0}'.format(', '.join(missing_fixes_summaries)))
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                'Press Enter to use this automatic message, or enter a '
                'description of the\nchanges your bot will make:')
        else:
            edit_summary = ''

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        if not sql_query:
            whereClause = 'WHERE (%s)' % ' OR '.join(
                "old_text RLIKE '%s'" %
                prepareRegexForMySQL(old_regexp.pattern)
                for (old_regexp, new_text) in replacements)
            if exceptions:
                exceptClause = 'AND NOT (%s)' % ' OR '.join(
                    "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                    for exc in exceptions)
            else:
                exceptClause = ''
        query = sql_query or """
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen, preload=True)

    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return

    bot = ReplaceRobot(gen,
                       replacements,
                       exceptions,
                       site=site,
                       summary=edit_summary,
                       **options)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme(). It will make sure the callback is
    # triggered before replace.py is unloaded.
    pywikibot.stopme()
Exemplo n.º 51
0
    def treat_page_and_item(self, page, item):
        """Process a single page/item."""
        if willstop:
            raise KeyboardInterrupt

        templates = page.raw_extracted_templates
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(withNamespace=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error(
                    "Failed parsing template; '%s' should be the template name."
                    % template)
                continue

            if template not in self.templateTitles:
                continue
            # We found the template we were looking for
            for field, value in fielddict.items():
                field = field.strip()
                value = value.strip()
                if not field or not value:
                    continue

                if field not in self.fields:
                    continue

                # This field contains something useful for us
                prop, options = self.fields[field]
                claim = pywikibot.Claim(self.repo, prop)
                if claim.type == 'wikibase-item':
                    # Try to extract a valid page
                    match = pywikibot.link_regex.search(value)
                    if match:
                        link_text = match.group(1)
                    else:
                        if self._get_option_with_fallback(options, 'islink'):
                            link_text = value
                        else:
                            pywikibot.output(
                                '%s field %s value %s is not a wikilink. '
                                'Skipping.' % (claim.getID(), field, value))
                            continue

                    linked_item = self._template_link_target(item, link_text)
                    if not linked_item:
                        continue

                    claim.setTarget(linked_item)
                elif claim.type in ('string', 'external-id'):
                    claim.setTarget(value.strip())
                elif claim.type == 'url':
                    match = self.linkR.search(value)
                    if not match:
                        continue
                    claim.setTarget(match.group('url'))
                elif claim.type == 'commonsMedia':
                    commonssite = pywikibot.Site('commons', 'commons')
                    imagelink = pywikibot.Link(value,
                                               source=commonssite,
                                               defaultNamespace=6)
                    image = pywikibot.FilePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.FilePage(image.getRedirectTarget())
                    if not image.exists():
                        pywikibot.output(
                            "{0} doesn't exist. I can't link to it"
                            ''.format(image.title(asLink=True)))
                        continue
                    claim.setTarget(image)
                else:
                    pywikibot.output('%s is not a supported datatype.' %
                                     claim.type)
                    continue

                # A generator might yield pages from multiple sites
                self.user_add_claim_unless_exists(
                    item, claim,
                    self._get_option_with_fallback(options, 'exists'),
                    page.site, pywikibot.output)
Exemplo n.º 52
0
    def handle_bad_page(self, *values):
        """Process one bad page."""
        try:
            self.content = self.page.get()
        except IsRedirectPageError:
            pywikibot.output('Already redirected, skipping.')
            return
        except NoPageError:
            pywikibot.output('Already deleted')
            return

        for d in pywikibot.translate(self.site.code, done):
            if d in self.content:
                pywikibot.output(
                    'Found: "{}" in content, nothing necessary'.format(d))
                return
        pywikibot.output('---- Start content ----------------')
        pywikibot.output(self.content)
        pywikibot.output('---- End of content ---------------')

        # Loop other user answer
        answered = False
        while not answered:
            answer = pywikibot.input(self.question)

            if answer == 'q':
                raise QuitKeyboardInterrupt
            if answer == 'd':
                pywikibot.output('Trying to delete page [[{}]].'.format(
                    self.page.title()))
                self.page.delete()
                return
            if answer == 'e':
                old = self.content
                new = editor.TextEditor().edit(old)
                msg = pywikibot.input('Summary message:')
                self.userPut(self.page, old, new, summary=msg)
                return
            if answer == 'b':
                pywikibot.output('Blanking page [[{}]].'.format(
                    self.page.title()))
                try:
                    self.page.put('',
                                  summary=i18n.twtranslate(
                                      self.site.lang, 'followlive-blanking',
                                      {'content': self.content}))
                except EditConflictError:
                    pywikibot.output(
                        'An edit conflict occurred! Automatically retrying')
                    self.handle_bad_page(self)
                return
            if answer == '':
                pywikibot.output('Page correct! Proceeding with next pages.')
                return
            # Check user input:
            if answer[0] == 'u':
                # Answer entered as string
                answer = answer[1:]
            try:
                choices = answer.split(',')
            except ValueError:
                # User entered wrong value
                pywikibot.error('"{}" is not valid'.format(answer))
                continue
            # test input
            for choice in choices:
                try:
                    x = int(choice)
                except ValueError:
                    break
                else:
                    answered = (x >= 1 and x <= len(self.questionlist))
            if not answered:
                pywikibot.error('"{}" is not valid'.format(answer))
                continue
        summary = ''
        for choice in choices:
            answer = int(choice)
            # grab the template parameters
            tpl = pywikibot.translate(self.site,
                                      templates)[self.questionlist[answer]]
            if tpl['pos'] == 'top':
                pywikibot.output('prepending {}...'.format(
                    self.questionlist[answer]))
                self.content = self.questionlist[answer] + '\n' + self.content
            elif tpl['pos'] == 'bottom':
                pywikibot.output('appending {}...'.format(
                    self.questionlist[answer]))
                self.content += '\n' + self.questionlist[answer]
            else:
                raise RuntimeError(
                    '"pos" should be "top" or "bottom" for template {}. '
                    'Contact a developer.'.format(self.questionlist[answer]))
            summary += tpl['msg'] + ' '
            pywikibot.output('Probably added ' + self.questionlist[answer])

        self.page.put(self.content, summary=summary)
        pywikibot.output('with comment {}\n'.format(summary))
Exemplo n.º 53
0
 def _closed_error(self, notice=''):
     """An error instead of pointless API call."""
     pywikibot.error('Site {} has been closed. {}'.format(
         self.sitename, notice))
Exemplo n.º 54
0
def main():
    enablePage = None  # Check if someone set an enablePage or not
    limit = 50000    # Hope that there aren't so many lonely pages in a project
    generator = None  # Check if bot should use default generator or not
    nwpages = False  # Check variable for newpages
    always = False  # Check variable for always
    disambigPage = None  # If no disambigPage given, not use it.

    # Arguments!
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-enable'):
            if len(arg) == 7:
                enablePage = pywikibot.input(
                    u'Would you like to check if the bot should run or not?')
            else:
                enablePage = arg[8:]
        if arg.startswith('-disambig'):
            if len(arg) == 9:
                disambigPage = pywikibot.input(
                    u'In which page should the bot save the disambig pages?')
            else:
                disambigPage = arg[10:]
        elif arg.startswith('-limit'):
            if len(arg) == 6:
                limit = int(pywikibot.input(
                    u'How many pages do you want to check?'))
            else:
                limit = int(arg[7:])
        elif arg.startswith('-newpages'):
            if len(arg) == 9:
                nwlimit = 50  # Default: 50 pages
            else:
                nwlimit = int(arg[10:])
            generator = pywikibot.Site().newpages(number=nwlimit)
            nwpages = True
        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)
    # Retrive the site
    wikiSite = pywikibot.Site()

    if not generator:
        generator = genFactory.getCombinedGenerator()

    # If the generator is not given, use the default one
    if not generator:
        generator = wikiSite.lonelypages(repeat=True, number=limit)
    # Take the configurations according to our project
    comment = i18n.twtranslate(wikiSite, 'lonelypages-comment-add-template')
    commentdisambig = i18n.twtranslate(wikiSite, 'lonelypages-comment-add-disambig-template')
    template = i18n.translate(wikiSite, Template)
    exception = i18n.translate(wikiSite, exception_regex)
    if template is None or exception is None:
        raise Exception("Missing configuration for site %r" % wikiSite)
    # EnablePage part
    if enablePage is not None:
        # Define the Page Object
        enable = pywikibot.Page(wikiSite, enablePage)
        # Loading the page's data
        try:
            getenable = enable.text
        except pywikibot.NoPage:
            pywikibot.output(
                u"%s doesn't esist, I use the page as if it was blank!"
                % enable.title())
            getenable = ''
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % enable.title())
            getenable = ''
        # If the enable page is set to disable, turn off the bot
        # (useful when the bot is run on a server)
        if getenable != 'enable':
            pywikibot.output('The bot is disabled')
            return
    # DisambigPage part
    if disambigPage is not None:
        disambigpage = pywikibot.Page(wikiSite, disambigPage)
        try:
            disambigtext = disambigpage.get()
        except pywikibot.NoPage:
            pywikibot.output(u"%s doesn't esist, skip!" % disambigpage.title())
            disambigtext = ''
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, don't use it!"
                             % disambigpage.title())
            disambigPage = None
    # Main Loop
    for page in generator:
        if nwpages:
            # The newpages generator returns a tuple, not a Page object.
            page = page[0]
        pywikibot.output(u"Checking %s..." % page.title())
        if page.isRedirectPage():  # If redirect, skip!
            pywikibot.output(u'%s is a redirect! Skip...' % page.title())
            continue
        # refs is not a list, it's a generator while resList... is a list, yes.
        refs = page.getReferences()
        refsList = list()
        for j in refs:
            if j is None:
                # We have to find out why the function returns that value
                pywikibot.error(u'1 --> Skip page')
                continue
            refsList.append(j)
        # This isn't possible with a generator
        if refsList != []:
            pywikibot.output(u"%s isn't orphan! Skip..." % page.title())
            continue
        # Never understood how a list can turn in "None", but it happened :-S
        elif refsList is None:
            # We have to find out why the function returns that value
            pywikibot.error(u'2 --> Skip page')
            continue
        else:
            # no refs, no redirect; check if there's already the template
            try:
                oldtxt = page.get()
            except pywikibot.NoPage:
                pywikibot.output(u"%s doesn't exist! Skip..." % page.title())
                continue
            except pywikibot.IsRedirectPage:
                pywikibot.output(u"%s is a redirect! Skip..." % page.title())
                continue
            # I've used a loop in a loop. If I use continue in the second loop,
            # it won't do anything in the first. So let's create a variable to
            # avoid this problem.
            for regexp in exception:
                res = re.findall(regexp, oldtxt.lower())
                # Found a template! Let's skip the page!
                if res != []:
                    pywikibot.output(
                        u'Your regex has found something in %s, skipping...'
                        % page.title())
                    break
            else:
                continue
            if page.isDisambig() and disambigPage is not None:
                pywikibot.output(u'%s is a disambig page, report..'
                                 % page.title())
                if not page.title().lower() in disambigtext.lower():
                    disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title())
                    disambigpage.put(disambigtext, commentdisambig)
                    continue
            # Is the page a disambig but there's not disambigPage? Skip!
            elif page.isDisambig():
                pywikibot.output(u'%s is a disambig page, skip...'
                                 % page.title())
                continue
            else:
                # Ok, the page need the template. Let's put it there!
                # Adding the template in the text
                newtxt = u"%s\n%s" % (template, oldtxt)
                pywikibot.output(u"\t\t>>> %s <<<" % page.title())
                pywikibot.showDiff(oldtxt, newtxt)
                choice = 'y'
                if not always:
                    choice = pywikibot.inputChoice(
                        u'Orphan page found, add template?',
                        ['Yes', 'No', 'All'], 'yna')
                    if choice == 'a':
                        always = True
                        choice = 'y'
                if choice == 'y':
                    page.text = newtxt
                    try:
                        page.save(comment)
                    except pywikibot.EditConflict:
                        pywikibot.output(u'Edit Conflict! Skip...')
                        continue
Exemplo n.º 55
0
def main(*args: Tuple[str, ...]):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    """
    index = None
    djvu_path = '.'  # default djvu file directory
    pages = '1-'
    options = {}

    # Parse command line arguments.
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        opt, _, value = arg.partition(':')
        if opt == '-index':
            index = value
        elif opt == '-djvu':
            djvu_path = value
        elif opt == '-pages':
            pages = value
        elif opt == '-summary':
            options['summary'] = value
        elif opt in ('-force', '-always'):
            options[opt[1:]] = True
        else:
            pywikibot.output('Unknown argument ' + arg)

    # index is mandatory.
    if not index:
        pywikibot.bot.suggest_help(missing_parameters=['-index'])
        return

    # If djvu_path is not a file, build djvu_path from dir+index.
    djvu_path = os.path.expanduser(djvu_path)
    djvu_path = os.path.abspath(djvu_path)
    if not os.path.exists(djvu_path):
        pywikibot.error('No such file or directory: ' + djvu_path)
        return

    if os.path.isdir(djvu_path):
        djvu_path = os.path.join(djvu_path, index)

    # Check the djvu file exists and, if so, create the DjVuFile wrapper.
    djvu = DjVuFile(djvu_path)

    if not djvu.has_text():
        pywikibot.error('No text layer in djvu file {}'.format(djvu.file))
        return

    # Parse pages param.
    pages = pages.split(',')
    for i, page in enumerate(pages):
        start, sep, end = page.partition('-')
        start = 1 if not start else int(start)
        if not sep:
            end = start
        else:
            end = int(end) if end else djvu.number_of_images()
        pages[i] = (start, end)

    site = pywikibot.Site()
    if not site.has_extension('ProofreadPage'):
        pywikibot.error(
            'Site {} must have ProofreadPage extension.'.format(site))
        return

    index_page = pywikibot.Page(site, index, ns=site.proofread_index_ns)

    if not index_page.exists():
        raise NoPageError(index)

    pywikibot.output('uploading text from {} to {}'.format(
        djvu.file, index_page.title(as_link=True)))

    bot = DjVuTextBot(djvu, index_page, pages=pages, site=site, **options)
    bot.run()
Exemplo n.º 56
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    url = ''
    description = []
    summary = None
    keep_filename = False
    always = False
    use_filename = None
    filename_prefix = None
    verify_description = True
    aborts = set()
    ignorewarn = set()
    chunk_size = 0
    recursive = False
    description_file = None

    # process all global bot args
    # returns a list of non-global args, i.e. args for upload.py
    local_args = pywikibot.handle_args(args)
    for option in local_args:
        arg, _, value = option.partition(':')
        if arg == '-always':
            keep_filename = True
            always = True
            verify_description = False
        elif arg == '-recursive':
            recursive = True
        elif arg == '-keep':
            keep_filename = True
        elif arg == '-filename':
            use_filename = value
        elif arg == '-prefix':
            filename_prefix = value
        elif arg == '-summary':
            summary = value
        elif arg == '-noverify':
            verify_description = False
        elif arg == '-abortonwarn':
            if value and aborts is not True:
                aborts.add(value)
            else:
                aborts = True
        elif arg == '-ignorewarn':
            if value and ignorewarn is not True:
                ignorewarn.add(value)
            else:
                ignorewarn = True
        elif arg == '-chunked':
            match = CHUNK_SIZE_REGEX.match(option)
            chunk_size = get_chunk_size(match)
        elif arg == '-descfile':
            description_file = value
        elif not url:
            url = option
        else:
            description.append(option)

    description = ' '.join(description)

    if description_file:
        if description:
            pywikibot.error('Both a description and a -descfile were '
                            'provided. Please specify only one of those.')
            return False
        with codecs.open(description_file,
                         encoding=pywikibot.config.textfile_encoding) as f:
            description = f.read().replace('\r\n', '\n')

    while not ('://' in url or os.path.exists(url)):
        if not url:
            error = 'No input filename given.'
        else:
            error = 'Invalid input filename given.'
            if not always:
                error += ' Try again.'
        if always:
            url = None
            break
        else:
            pywikibot.output(error)
        url = pywikibot.input('URL, file or directory where files are now:')

    if always and (aborts is not True and ignorewarn is not True
                   or not description or url is None):
        additional = ''
        missing = []
        if url is None:
            missing += ['filename']
            additional = error + ' '
        if description is None:
            missing += ['description']
        if aborts is not True and ignorewarn is not True:
            additional += ('Either -ignorewarn or -abortonwarn must be '
                           'defined for all codes. ')
        additional += 'Unable to run in -always mode'
        suggest_help(missing_parameters=missing, additional_text=additional)
        return False

    if os.path.isdir(url):
        file_list = []
        for directory_info in os.walk(url):
            if not recursive:
                # Do not visit any subdirectories
                directory_info[1][:] = []
            for dir_file in directory_info[2]:
                file_list.append(os.path.join(directory_info[0], dir_file))
        url = file_list
    else:
        url = [url]

    bot = UploadRobot(url, description=description, useFilename=use_filename,
                      keepFilename=keep_filename,
                      verifyDescription=verify_description, aborts=aborts,
                      ignoreWarning=ignorewarn, chunk_size=chunk_size,
                      always=always, summary=summary,
                      filename_prefix=filename_prefix)
    bot.run()
Exemplo n.º 57
0
    def review_hunks(self) -> None:
        """Review hunks."""
        def find_pending(start: int, end: int) -> Optional[int]:
            step = -1 if start > end else +1
            for pending in range(start, end, step):
                if super_hunks[pending].reviewed == Hunk.PENDING:
                    return pending
            return None

        # TODO: Missing commands (compared to git --patch): edit and search
        help_msg = {
            'y': 'accept this hunk',
            'n': 'do not accept this hunk',
            'q': 'do not accept this hunk and quit reviewing',
            'a': 'accept this hunk and all other pending',
            'd': 'do not apply this hunk or any of the later hunks in '
            'the file',
            'g': 'select a hunk to go to',
            'j': 'leave this hunk undecided, see next undecided hunk',
            'J': 'leave this hunk undecided, see next hunk',
            'k': 'leave this hunk undecided, see previous undecided '
            'hunk',
            'K': 'leave this hunk undecided, see previous hunk',
            's': 'split this hunk into smaller ones',
            '?': 'help',
        }

        super_hunks = self._generate_super_hunks(h for h in self.hunks
                                                 if h.reviewed == Hunk.PENDING)
        position = 0  # type: Optional[int]

        while any(
                any(hunk.reviewed == Hunk.PENDING for hunk in super_hunk)
                for super_hunk in super_hunks):

            assert position is not None
            super_hunk = super_hunks[position]

            next_pending = find_pending(position + 1, len(super_hunks))
            prev_pending = find_pending(position - 1, -1)

            answers = ['y', 'n', 'q', 'a', 'd', 'g']
            if next_pending is not None:
                answers += ['j']
            if position < len(super_hunks) - 1:
                answers += ['J']
            if prev_pending is not None:
                answers += ['k']
            if position > 0:
                answers += ['K']
            if len(super_hunk) > 1:
                answers += ['s']
            answers += ['?']

            pywikibot.output(self._generate_diff(super_hunk))
            choice = pywikibot.input('Accept this hunk [{}]?'.format(
                ','.join(answers)))
            if choice not in answers:
                choice = '?'

            if choice in ['y', 'n']:
                super_hunk.reviewed = \
                    Hunk.APPR if choice == 'y' else Hunk.NOT_APPR
                if next_pending is not None:
                    position = next_pending
                else:
                    position = find_pending(0, position)
            elif choice == 'q':
                for super_hunk in super_hunks:
                    for hunk in super_hunk:
                        if hunk.reviewed == Hunk.PENDING:
                            hunk.reviewed = Hunk.NOT_APPR
            elif choice in ['a', 'd']:
                for super_hunk in super_hunks[position:]:
                    for hunk in super_hunk:
                        if hunk.reviewed == Hunk.PENDING:
                            hunk.reviewed = \
                                Hunk.APPR if choice == 'a' else Hunk.NOT_APPR
                position = find_pending(0, position)
            elif choice == 'g':
                hunk_list = []
                rng_width = 18
                for index, super_hunk in enumerate(super_hunks, start=1):
                    assert -1 <= super_hunk.reviewed <= 1, \
                        "The super hunk's review status is unknown."
                    status = ' +-'[super_hunk.reviewed]

                    if super_hunk[0].a_rng[1] - super_hunk[0].a_rng[0] > 0:
                        mode = '-'
                        first = self.a[super_hunk[0].a_rng[0]]
                    else:
                        mode = '+'
                        first = self.b[super_hunk[0].b_rng[0]]
                    hunk_list += [(status, index,
                                   Hunk.get_header_text(
                                       *self._get_context_range(super_hunk),
                                       affix=''), mode, first)]
                    rng_width = max(len(hunk_list[-1][2]), rng_width)
                line_template = ('{0}{1} {2: >' +
                                 str(int(math.log10(len(super_hunks)) + 1)) +
                                 '}: {3: <' + str(rng_width) + '} {4}{5}')
                # the last entry is the first changed line which usually ends
                # with a \n (only the last may not, which is covered by the
                # if-condition following this block)
                hunk_list_str = ''.join(
                    line_template.format(
                        '*' if hunk_entry[1] == position +
                        1 else ' ', *hunk_entry) for hunk_entry in hunk_list)
                if hunk_list_str.endswith('\n'):
                    hunk_list_str = hunk_list_str[:-1]
                pywikibot.output(hunk_list_str)
                next_hunk = pywikibot.input('Go to which hunk?')
                try:
                    next_hunk_position = int(next_hunk) - 1
                except ValueError:
                    next_hunk_position = False
                if (next_hunk_position is not False
                        and 0 <= next_hunk_position < len(super_hunks)):
                    position = next_hunk_position
                elif next_hunk:  # nothing entered is silently ignored
                    pywikibot.error(
                        'Invalid hunk number "{}"'.format(next_hunk))
            elif choice == 'j':
                assert next_pending is not None
                position = next_pending
            elif choice == 'J':
                position += 1
            elif choice == 'k':
                assert prev_pending is not None
                position = prev_pending
            elif choice == 'K':
                position -= 1
            elif choice == 's':
                super_hunks = (super_hunks[:position] +
                               super_hunks[position].split() +
                               super_hunks[position + 1:])
                pywikibot.output('Split into {} hunks'.format(
                    len(super_hunk._hunks)))
            else:  # choice == '?':
                pywikibot.output(
                    color_format(
                        '{purple}{0}{default}',
                        '\n'.join('{0} -> {1}'.format(answer, help_msg[answer])
                                  for answer in answers)))
Exemplo n.º 58
0
            (page.title(asLink=True), targetpage.title(asLink=True)))

        pywikibot.log("Getting page text.")
        text = page.get(get_redirect=True)
        text += ("<noinclude>\n\n<small>This page was moved from %s. It's "
                 "edit history can be viewed at %s</small></noinclude>" %
                 (page.title(asLink=True, insite=targetpage.site),
                  edithistpage.title(asLink=True, insite=targetpage.site)))

        pywikibot.log("Getting edit history.")
        historytable = page.getVersionHistoryTable()

        pywikibot.log("Putting page text.")
        targetpage.put(text, summary=summary)

        pywikibot.log("Putting edit history.")
        edithistpage.put(historytable, summary=summary)


if __name__ == "__main__":
    try:
        main()
    except TargetSiteMissing as e:
        pywikibot.error(u'Need to specify a target site and/or language')
        pywikibot.error(u'Try running this script with -help for help/usage')
        pywikibot.exception()
    except TargetPagesMissing as e:
        pywikibot.error(u'Need to specify a page range')
        pywikibot.error(u'Try running this script with -help for help/usage')
        pywikibot.exception()
Exemplo n.º 59
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    filename = None
    pagename = None
    namespace = None
    salt = ''
    force = False
    calc = None
    args = []

    def if_arg_value(arg, name):
        if arg.startswith(name):
            yield arg[len(name) + 1:]

    for arg in pywikibot.handle_args(args):
        for v in if_arg_value(arg, '-file'):
            filename = v
        for v in if_arg_value(arg, '-locale'):
            # Required for english month names
            locale.setlocale(locale.LC_TIME, v.encode('utf8'))
        for v in if_arg_value(arg, '-timezone'):
            os.environ['TZ'] = v.timezone
            # Or use the preset value
            if hasattr(time, 'tzset'):
                time.tzset()
        for v in if_arg_value(arg, '-calc'):
            calc = v
        for v in if_arg_value(arg, '-salt'):
            salt = v
        for v in if_arg_value(arg, '-force'):
            force = True
        for v in if_arg_value(arg, '-filename'):
            filename = v
        for v in if_arg_value(arg, '-page'):
            pagename = v
        for v in if_arg_value(arg, '-namespace'):
            namespace = v
        if not arg.startswith('-'):
            args.append(arg)

    site = pywikibot.Site()

    if calc:
        if not salt:
            pywikibot.bot.suggest_help(missing_parameters=['-salt'])
            return False
        page = pywikibot.Page(site, calc)
        if page.exists():
            calc = page.title()
        else:
            pywikibot.output(
                u'NOTE: the specified page "%s" does not (yet) exist.' % calc)
        pywikibot.output('key = %s' % calc_md5_hexdigest(calc, salt))
        return

    if not args:
        pywikibot.bot.suggest_help(
            additional_text='No template was specified.')
        return False

    for a in args:
        pagelist = []
        a = pywikibot.Page(site, a, ns=10).title()
        if not filename and not pagename:
            if namespace is not None:
                ns = [str(namespace)]
            else:
                ns = []
            for pg in generate_transclusions(site, a, ns):
                pagelist.append(pg)
        if filename:
            for pg in open(filename, 'r').readlines():
                pagelist.append(pywikibot.Page(site, pg, ns=10))
        if pagename:
            pagelist.append(pywikibot.Page(site, pagename, ns=3))
        pagelist = sorted(pagelist)
        for pg in iter(pagelist):
            pywikibot.output(u'Processing %s' % pg)
            # Catching exceptions, so that errors in one page do not bail out
            # the entire process
            try:
                archiver = PageArchiver(pg, a, salt, force)
                archiver.run()
            except ArchiveBotSiteConfigError as e:
                # no stack trace for errors originated by pages on-site
                pywikibot.error(
                    'Missing or malformed template in page %s: %s' % (pg, e))
            except Exception:
                pywikibot.error(u'Error occurred while processing page %s' %
                                pg)
                pywikibot.exception(tb=True)
Exemplo n.º 60
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # If none, the var is setted only for check purpose.
    summary = None
    addText = None
    regexSkip = None
    regexSkipUrl = None
    always = False
    textfile = None
    talkPage = False
    reorderEnabled = True

    # Put the text above or below the text?
    up = False

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    # Loading the arguments
    for arg in local_args:
        if arg.startswith('-textfile'):
            if len(arg) == 9:
                textfile = pywikibot.input(
                    u'Which textfile do you want to add?')
            else:
                textfile = arg[10:]
        elif arg.startswith('-text'):
            if len(arg) == 5:
                addText = pywikibot.input(u'What text do you want to add?')
            else:
                addText = arg[6:]
        elif arg.startswith('-summary'):
            if len(arg) == 8:
                summary = pywikibot.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-excepturl'):
            if len(arg) == 10:
                regexSkipUrl = pywikibot.input(u'What text should I skip?')
            else:
                regexSkipUrl = arg[11:]
        elif arg.startswith('-except'):
            if len(arg) == 7:
                regexSkip = pywikibot.input(u'What text should I skip?')
            else:
                regexSkip = arg[8:]
        elif arg == '-up':
            up = True
        elif arg == '-noreorder':
            reorderEnabled = False
        elif arg == '-always':
            always = True
        elif arg == '-talk' or arg == '-talkpage':
            talkPage = True
        else:
            genFactory.handleArg(arg)
    if textfile and not addText:
        with codecs.open(textfile, 'r', config.textfile_encoding) as f:
            addText = f.read()
    generator = genFactory.getCombinedGenerator()
    if not generator:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
    if not addText:
        pywikibot.error("The text to add wasn't given.")
        return
    if talkPage:
        generator = pagegenerators.PageWithTalkPageGenerator(generator, True)
    for page in generator:
        (text, newtext, always) = add_text(page,
                                           addText,
                                           summary,
                                           regexSkip,
                                           regexSkipUrl,
                                           always,
                                           up,
                                           True,
                                           reorderEnabled=reorderEnabled,
                                           create=talkPage)