Exemplo n.º 1
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    all = False
    new = False
    sysop = False
    for arg in pywikibot.handle_args(args):
        if arg in ('-all', '-update'):
            all = True
        elif arg == '-new':
            new = True
        elif arg == '-sysop':
            sysop = True
    if all:
        refresh_all(sysop=sysop)
    elif new:
        refresh_new(sysop=sysop)
    else:
        site = pywikibot.Site()
        watchlist = refresh(site, sysop=sysop)
        pywikibot.output(u'%i pages in the watchlist.' % len(watchlist))
        for page in watchlist:
            try:
                pywikibot.stdout(page.title())
            except pywikibot.InvalidTitle:
                pywikibot.exception()
Exemplo n.º 2
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    all = False
    new = False
    sysop = False
    for arg in pywikibot.handle_args(args):
        if arg in ('-all', '-update'):
            all = True
        elif arg == '-new':
            new = True
        elif arg == '-sysop':
            sysop = True
    if all:
        refresh_all(sysop=sysop)
    elif new:
        refresh_new(sysop=sysop)
    else:
        site = pywikibot.Site()
        watchlist = refresh(site, sysop=sysop)
        pywikibot.output(u'{0:d} pages in the watchlist.'.format(len(watchlist)))
        for page in watchlist:
            try:
                pywikibot.stdout(page.title())
            except pywikibot.InvalidTitle:
                pywikibot.exception()
Exemplo n.º 3
0
def main(*args):
    gen = None
    notitle = False
    page_get = False

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs(*args)
    genFactory = GeneratorFactory()

    for arg in local_args:
        if arg == '-notitle':
            notitle = True
        elif arg == '-get':
            page_get = True
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if gen:
        for i, page in enumerate(gen, start=1):
            if not notitle:
                pywikibot.stdout("%4d: %s" % (i, page.title()))
            if page_get:
                # TODO: catch exceptions
                pywikibot.output(page.text, toStdout=True)
    else:
        pywikibot.showHelp()
Exemplo n.º 4
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    opt_all = False
    opt_new = False
    for arg in pywikibot.handle_args(args):
        if arg in ('-all', '-update'):
            opt_all = True
        elif arg == '-new':
            opt_new = True
    if opt_all:
        refresh_all()
    elif opt_new:
        refresh_new()
    else:
        site = pywikibot.Site()
        watchlist = refresh(site)
        pywikibot.output('{} pages in the watchlist.'.format(len(watchlist)))
        for page in watchlist:
            try:
                pywikibot.stdout(page.title())
            except pywikibot.InvalidTitle:
                pywikibot.exception()
Exemplo n.º 5
0
def out(text, newline=True, date=False, color=None):
    """Just output some text to the consoloe or log."""
    if color:
        text = "\03{%s}%s\03{default}" % (color, text)
    dstr = ("%s: " %
            datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") if date else "")
    pywikibot.stdout("%s%s" % (dstr, text), newline=newline)
Exemplo n.º 6
0
def main(*args):
    gen = None
    notitle = False
    page_get = False

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs(*args)
    genFactory = GeneratorFactory()

    for arg in local_args:
        if arg == '-notitle':
            notitle = True
        elif arg == '-get':
            page_get = True
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if gen:
        for i, page in enumerate(gen, start=1):
            if not notitle:
                pywikibot.stdout("%4d: %s" % (i, page.title()))
            if page_get:
                # TODO: catch exceptions
                pywikibot.output(page.text, toStdout=True)
    else:
        pywikibot.showHelp()
Exemplo n.º 7
0
    def login(self, retry=False, force=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if exception occurs during authentication.
        @type retry: bool
        @param force: force to re-authenticate
        @type force: bool
        """
        if self.access_token is None or force:
            pywikibot.output('Logging in to {site!s} via OAuth consumer {key!s}'.format(**{'key': self.consumer_token[0],
                                'site': self.site}))
            consumer_token = mwoauth.ConsumerToken(self.consumer_token[0],
                                                   self.consumer_token[1])
            handshaker = mwoauth.Handshaker(
                self.site.base_url(self.site.path()), consumer_token)
            try:
                redirect, request_token = handshaker.initiate()
                pywikibot.stdout('Authenticate via web browser..')
                webbrowser.open(redirect)
                pywikibot.stdout('If your web browser does not open '
                                 'automatically, please point it to: %s'
                                 % redirect)
                request_qs = pywikibot.input('Response query string: ')
                access_token = handshaker.complete(request_token,
                                                   request_qs)
                self._access_token = (access_token.key, access_token.secret)
            except Exception as e:
                pywikibot.error(e)
                if retry:
                    self.login(retry=True, force=force)
        else:
            pywikibot.output('Logged in to {site!s} via consumer {key!s}'.format(**{'key': self.consumer_token[0],
                                'site': self.site}))
Exemplo n.º 8
0
def main(*args):
    gen = None
    notitle = False
    fmt = '1'
    outputlang = None
    page_get = False

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs(*args)
    genFactory = GeneratorFactory()

    for arg in local_args:
        if arg == '-notitle':
            notitle = True
        elif arg.startswith("-format:"):
            fmt = arg[len("-format:"):]
            fmt = fmt.replace(u'\\03{{', u'\03{{')
        elif arg.startswith("-outputlang:"):
            outputlang = arg[len("-outputlang:"):]
        elif arg == '-get':
            page_get = True
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if gen:
        for i, page in enumerate(gen, start=1):
            if not notitle:
                page_fmt = Formatter(page, outputlang)
                pywikibot.stdout(page_fmt.output(num=i, fmt=fmt))
            if page_get:
                # TODO: catch exceptions
                pywikibot.output(page.text, toStdout=True)
    else:
        pywikibot.showHelp()
    def param_Wikipedia(self, p, pname, m_wp_pagename_raw):
        WP, m_wp_pagename = self.wd.get_WPsite(m_wp_pagename_raw)
        if not WP:
            return

        m_wp_page_item = self.wd.get_item(WP, title=m_wp_pagename)
        if not m_wp_page_item:
            return
        topic_item = m_wp_page_item
        # if self.require_ruwiki_sitelink_in_item and m_wp_page_item.sitelinks.get('ruwiki'):
        #     # if m_wp_pagename in m_wp_page_item.sitelinks.values():
        #     # if m_wp_pagename == ruwiki:
        #     if m_wp_pagename == m_wp_page_item.sitelinks.get(f'{WP.lang}wiki', ''):
        #         p.params_to_delete.append(pname)
        #         return

        if topic_item.isRedirectPage():
            topic_item = topic_item.getRedirectTarget()
        topic_item.get()

        # не работать по ссылкам на дизамбиги
        if self.skip_wd_links_to_disambigs:
            for e in topic_item.claims.get(self.wd.item_type, []):
                if e.target and e.target.id == self.wd.disambig:
                    pwb.stdout('ссылка на дизамбиг')
                    return

        if self.require_ruwiki_sitelink:
            sitelink = m_wp_page_item.sitelinks.get(f'{WP.lang}wiki')
            if sitelink:
                if m_wp_pagename == sitelink.title:
                    p.params_to_delete.append(pname)
                    return

        # todo слишком общие страницы в ВИКИПЕДИЯ, не имеет смысла их связывать с элементом

        # todo Создаются дубли описаний в темах. Проверить в unittest
        # добавить свойство "основная тема"
        if self.make_wd_links:
            if self.skip_existing_topics:
                if self.wd.is_another_id_in_item_topics(
                        p.itemWD, m_wp_page_item.id):
                    pwb.stdout(
                        'Item уже имеет темы, отличные от ручной ссылки. Возможно в ручной ссылке - дизамбиг'
                    )
                    return

            if not self.wd.is_id_in_item_topics(p.itemWD, m_wp_page_item.id):
                self.wd.add_main_subject(p.itemWD, target=m_wp_page_item)
            if not self.wd.is_id_in_item_describes(p.rootpagename, p.itemWD.id,
                                                   m_wp_page_item):
                self.wd.add_article_in_subjectitem(p.rootpagename,
                                                   m_wp_page_item, p.itemWD)

        if self.wd.is_id_in_item_topics(p.itemWD, m_wp_page_item.id) \
                and self.wd.is_id_in_item_describes(p.rootpagename, p.itemWD.id, m_wp_page_item):
            p.params_to_delete.append(pname)
            return
Exemplo n.º 10
0
 def treat(self, page):
     match = self.current_rule.find.search(page.text)
     if not match:
         return
     text = self.remove_disabled_parts(page.text)
     match = self.current_rule.find.search(text)
     if match:
         text = self.pattern.format(page.title(as_link=True), match.group(0))
         pywikibot.stdout(text)
         self.data.append(text)
Exemplo n.º 11
0
 def treat(self, page):
     match = self.current_rule.find.search(page.text)
     if not match:
         return
     text = textlib.removeDisabledParts(page.text,
                                        TypoRule.exceptions,
                                        site=self.site)
     match = self.current_rule.find.search(text)
     if match:
         text = self.pattern.format(page.title(as_link=True),
                                    match.group(0))
         pywikibot.stdout(text)
         self.data.append(text)
Exemplo n.º 12
0
    def listTemplates(cls, templates, namespaces):
        """
        Display transcluded pages for a list of templates.

        Displays each transcluded page in the given 'namespaces' for
        each template given by 'templates' list.

        @param templates: list of template names
        @type templates: list
        @param namespaces: list of namespace numbers
        @type namespaces: list
        """
        templateDict = cls.template_dict(templates, namespaces)
        pywikibot.stdout('\nList of pages transcluding templates:')
        for key in templates:
            pywikibot.output(u'* %s' % key)
        pywikibot.stdout('-' * 36)
        total = 0
        for key in templateDict:
            for page in templateDict[key]:
                pywikibot.stdout(page.title())
                total += 1
        pywikibot.output(u'Total page count: %d' % total)
        pywikibot.stdout('Report generated on {0}'
                         ''.format(datetime.datetime.utcnow().isoformat()))
Exemplo n.º 13
0
def main(*args):
    gen = None
    fmt = "{num:4d} {page.title}"
    genFactory = GeneratorFactory()
    for arg in pywikibot.handleArgs(*args):
        if arg.startswith("-format:"):
            fmt = arg[len("-format:"):]
        genFactory.handleArg(arg)
    gen = genFactory.getCombinedGenerator()
    if gen:
        for i, page in enumerate(gen):
            pywikibot.stdout(fmt.format(num=i, page=page))
    else:
        pywikibot.showHelp()
Exemplo n.º 14
0
    def listTemplates(cls, templates, namespaces):
        """
        Display transcluded pages for a list of templates.

        Displays each transcluded page in the given 'namespaces' for
        each template given by 'templates' list.

        @param templates: list of template names
        @type templates: list
        @param namespaces: list of namespace numbers
        @type namespaces: list
        """
        template_dict = cls.template_dict(templates, namespaces)
        pywikibot.stdout('\nList of pages transcluding templates:')
        for key in templates:
            pywikibot.output('* ' + key)
        pywikibot.stdout('-' * 36)
        total = 0
        for key in template_dict:
            for page in template_dict[key]:
                pywikibot.stdout(page.title())
                total += 1
        pywikibot.output('Total page count: {0}'.format(total))
        pywikibot.stdout('Report generated on {0}'
                         ''.format(datetime.datetime.utcnow().isoformat()))
Exemplo n.º 15
0
def main(*args):
    try:
        gen = None
        genFactory = GeneratorFactory()
        for arg in pywikibot.handleArgs(*args):
            genFactory.handleArg(arg)
        gen = genFactory.getCombinedGenerator()
        if gen:
            for page in gen:
                pywikibot.stdout(page.title())
        else:
            pywikibot.showHelp()
    except Exception:
        pywikibot.error("Fatal error", exc_info=True)
    finally:
        pywikibot.stopme()
Exemplo n.º 16
0
 def treat(self, page):
     match = self.current_rule.find.search(page.text)
     if not match:
         return
     text = self.remove_disabled_parts(page.text)
     found = set()
     for match in self.current_rule.find.finditer(text):
         match_text = match.group(0)
         if match_text in found:
             continue
         found.add(match_text)
         link = page.title(as_link=True)
         put_text = self.pattern.format(link, match_text)
         if put_text[2:] not in self.false_positives:
             pywikibot.stdout(put_text)
             if not self.data.get(link):
                 self.order.append(link)
             self.data[link].append(match_text)
Exemplo n.º 17
0
def main(*args):
    try:
        gen = None
        genFactory = GeneratorFactory()
        for arg in pywikibot.handleArgs(*args):
            genFactory.handleArg(arg)
        gen = genFactory.getCombinedGenerator()
        if gen:
            i = 0
            for page in gen:
                i += 1
                pywikibot.stdout("%4d: %s" % (i, page.title()))
        else:
            pywikibot.showHelp()
    except Exception:
        pywikibot.error("Fatal error", exc_info=True)
    finally:
        pywikibot.stopme()
Exemplo n.º 18
0
    def login(self, retry=False, force=False):
        """
        Attempt to log into the server.

        @see: U{https://www.mediawiki.org/wiki/API:Login}

        @param retry: infinitely retry if exception occurs during
            authentication.
        @type retry: bool
        @param force: force to re-authenticate
        @type force: bool
        """
        if self.access_token is None or force:
            pywikibot.output(
                'Logging in to %(site)s via OAuth consumer %(key)s' % {
                    'key': self.consumer_token[0],
                    'site': self.site
                })
            consumer_token = mwoauth.ConsumerToken(self.consumer_token[0],
                                                   self.consumer_token[1])
            handshaker = mwoauth.Handshaker(
                self.site.base_url(self.site.path()), consumer_token)
            try:
                redirect, request_token = handshaker.initiate()
                pywikibot.stdout('Authenticate via web browser..')
                webbrowser.open(redirect)
                pywikibot.stdout('If your web browser does not open '
                                 'automatically, please point it to: %s' %
                                 redirect)
                request_qs = pywikibot.input('Response query string: ')
                access_token = handshaker.complete(request_token, request_qs)
                self._access_token = (access_token.key, access_token.secret)
            except Exception as e:
                pywikibot.error(e)
                if retry:
                    self.login(retry=True, force=force)
        else:
            pywikibot.output('Logged in to %(site)s via consumer %(key)s' % {
                'key': self.consumer_token[0],
                'site': self.site
            })
Exemplo n.º 19
0
    def check_page(self, pagename):
        """Check one page."""
        pywikibot.output('\nChecking ' + pagename)
        page1 = Page(self.original, pagename)
        txt1 = page1.text

        if self.options.dest_namespace:
            dest_ns = int(self.options.dest_namespace)
        else:
            dest_ns = None

        for site in self.sites:
            if dest_ns is not None:
                page2 = Page(site, page1.title(with_ns=False), dest_ns)
                pywikibot.output('\nCross namespace, new title: ' +
                                 page2.title())
            else:
                page2 = Page(site, pagename)

            txt2 = page2.text

            if str(site) in config.replicate_replace:
                txt_new = multiple_replace(txt1,
                                           config.replicate_replace[str(site)])
                if txt1 != txt_new:
                    pywikibot.output(
                        'NOTE: text replaced using config.sync_replace')
                    pywikibot.output('{0} {1} {2}'.format(txt1, txt_new, txt2))
                    txt1 = txt_new

            if txt1 != txt2:
                pywikibot.output('\n {0} DIFFERS'.format(site))
                self.differences[site].append(pagename)

            if self.options.replace:
                page2.text = txt1
                page2.save(self.put_message(site))
            else:
                pywikibot.stdout('.', newline=False)
Exemplo n.º 20
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    opt_all = False
    opt_new = False
    opt_count = False
    opt_count_all = False
    for arg in pywikibot.handle_args(args):
        if arg in ('-all', '-update'):
            opt_all = True
        elif arg == '-new':
            opt_new = True
        elif arg == '-count':
            opt_count = True
        elif arg == '-count:all':
            opt_count_all = True
    if opt_all:
        refresh_all()
    elif opt_new:
        refresh_new()
    elif opt_count:
        count_watchlist()
    elif opt_count_all:
        count_watchlist_all()
    else:
        site = pywikibot.Site()
        count_watchlist(site)
        watchlist = list(site.watched_pages(force=True))
        for page in watchlist:
            try:
                pywikibot.stdout(page.title())
            except pywikibot.InvalidTitle:
                pywikibot.exception()
Exemplo n.º 21
0
    def login(self, retry: bool = False, force: bool = False) -> bool:
        """
        Attempt to log into the server.

        :see: https://www.mediawiki.org/wiki/API:Login

        :param retry: infinitely retry if exception occurs during
            authentication.
        :param force: force to re-authenticate
        """
        if self.access_token is None or force:
            pywikibot.output(
                'Logging in to {site} via OAuth consumer {key}'.format(
                    key=self.consumer_token[0], site=self.site))
            consumer_token = mwoauth.ConsumerToken(*self.consumer_token)
            handshaker = mwoauth.Handshaker(
                self.site.base_url(self.site.path()), consumer_token)
            try:
                redirect, request_token = handshaker.initiate()
                pywikibot.stdout('Authenticate via web browser..')
                webbrowser.open(redirect)
                pywikibot.stdout(
                    'If your web browser does not open '
                    'automatically, please point it to: {}'.format(redirect))
                request_qs = pywikibot.input('Response query string: ')
                access_token = handshaker.complete(request_token, request_qs)
                self._access_token = (access_token.key, access_token.secret)
                return True
            except Exception as e:
                pywikibot.error(e)
                if retry:
                    return self.login(retry=True, force=force)
                else:
                    return False
        else:
            pywikibot.output('Logged in to {site} via consumer {key}'.format(
                key=self.consumer_token[0], site=self.site))
            return True
Exemplo n.º 22
0
    def countTemplates(cls, templates, namespaces):
        """
        Display number of transclusions for a list of templates.

        Displays the number of transcluded page in the given 'namespaces' for
        each template given by 'templates' list.

        @param templates: list of template names
        @type templates: list
        @param namespaces: list of namespace numbers
        @type namespaces: list
        """
        FORMAT = '{0:<10}: {1:>5}'
        templateDict = cls.template_dict(templates, namespaces)
        pywikibot.stdout('\nNumber of transclusions per template')
        pywikibot.stdout('-' * 36)
        total = 0
        for key in templateDict:
            count = len(templateDict[key])
            pywikibot.stdout(FORMAT.format(key, count))
            total += count
        pywikibot.stdout(FORMAT.format('TOTAL', total))
        pywikibot.stdout('Report generated on {0}'
                         ''.format(datetime.datetime.utcnow().isoformat()))
Exemplo n.º 23
0
    def countTemplates(cls, templates, namespaces):
        """
        Display number of transclusions for a list of templates.

        Displays the number of transcluded page in the given 'namespaces' for
        each template given by 'templates' list.

        @param templates: list of template names
        @type templates: list
        @param namespaces: list of namespace numbers
        @type namespaces: list
        """
        format = '{0:<10}: {1:>5}'
        template_dict = cls.template_dict(templates, namespaces)
        pywikibot.stdout('\nNumber of transclusions per template')
        pywikibot.stdout('-' * 36)
        total = 0
        for key in template_dict:
            count = len(template_dict[key])
            pywikibot.stdout(format.format(key, count))
            total += count
        pywikibot.stdout(format.format('TOTAL', total))
        pywikibot.stdout('Report generated on {0}'
                         ''.format(datetime.datetime.utcnow().isoformat()))
Exemplo n.º 24
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    gen = None
    notitle = False
    fmt = '1'
    outputlang = None
    page_get = False
    base_dir = None
    encoding = config.textfile_encoding
    page_target = None
    overwrite = False
    summary = 'listpages-save-list'

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = GeneratorFactory()

    for arg in local_args:
        if arg == '-notitle':
            notitle = True
        elif arg.startswith('-format:'):
            fmt = arg[len('-format:'):]
            fmt = fmt.replace(u'\\03{{', u'\03{{')
        elif arg.startswith('-outputlang:'):
            outputlang = arg[len('-outputlang:'):]
        elif arg == '-get':
            page_get = True
        elif arg.startswith('-save'):
            base_dir = arg.partition(':')[2] or '.'
        elif arg.startswith('-encode:'):
            encoding = arg.partition(':')[2]
        elif arg.startswith('-put:'):
            page_target = arg.partition(':')[2]
        elif arg.startswith('-overwrite'):
            overwrite = True
        elif arg.startswith('-summary:'):
            summary = arg.partition(':')[2]
        else:
            genFactory.handleArg(arg)

    if base_dir:
        base_dir = os.path.expanduser(base_dir)
        if not os.path.isabs(base_dir):
            base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir))

        if not os.path.exists(base_dir):
            pywikibot.output(u'Directory "{0!s}" does not exist.'.format(base_dir))
            choice = pywikibot.input_yn(
                u'Do you want to create it ("No" to continue without saving)?')
            if choice:
                os.makedirs(base_dir, mode=0o744)
            else:
                base_dir = None
        elif not os.path.isdir(base_dir):
            # base_dir is a file.
            pywikibot.warning(u'Not a directory: "%s"\n'
                              u'Skipping saving ...'
                              % base_dir)
            base_dir = None

    if page_target:
        site = pywikibot.Site()
        page_target = pywikibot.Page(site, page_target)
        if not overwrite and page_target.exists():
            pywikibot.bot.suggest_help(
                additional_text='Page "{0}" already exists.'.format(
                    page_target.title()))
            return False
        if re.match('^[a-z_-]+$', summary):
            summary = i18n.twtranslate(site, summary)

    gen = genFactory.getCombinedGenerator()
    if gen:
        i = 0
        output_list = []
        for i, page in enumerate(gen, start=1):
            if not notitle:
                page_fmt = Formatter(page, outputlang)
                output_list += [page_fmt.output(num=i, fmt=fmt)]
                pywikibot.stdout(output_list[-1])
            if page_get:
                try:
                    pywikibot.stdout(page.text)
                except pywikibot.Error as err:
                    pywikibot.output(err)
            if base_dir:
                filename = os.path.join(base_dir, page.title(as_filename=True))
                pywikibot.output(u'Saving {0!s} to {1!s}'.format(page.title(), filename))
                with open(filename, mode='wb') as f:
                    f.write(page.text.encode(encoding))
        pywikibot.output(u"{0:d} page(s) found".format(i))
        if page_target:
            page_target.text = '\n'.join(output_list)
            page_target.save(summary=summary)
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 25
0
def showHelp(name=""):
    # argument, if given, is ignored
    modname = calledModuleName()
    if not modname:
        try:
            modname = sys.modules['__main__'].main.__module__
        except NameError:
            modname = "no_module"

    globalHelp = u'''
Global arguments available for all bots:

-dir:PATH         Read the bot's configuration data from directory given by
                  PATH, instead of from the default directory.

-lang:xx          Set the language of the wiki you want to work on, overriding
                  the configuration in user-config.py. xx should be the
                  language code.

-family:xyz       Set the family of the wiki you want to work on, e.g.
                  wikipedia, wiktionary, wikitravel, ...
                  This will override the configuration in user-config.py.

-user:xyz         Log in as user 'xyz' instead of the default username.

-daemonize:xyz    Immediately return control to the terminal and redirect
                  stdout and stderr to xyz (only use for bots that require
                  no input from stdin).

-help             Show this help text.

-log              Enable the logfile, using the default filename
                  '%s-bot.log'
                  Logs will be stored in the logs subdirectory.

-log:xyz          Enable the logfile, using 'xyz' as the filename.

-nolog            Disable the logfile (if it is enabled by default).

-maxlag           Sets a new maxlag parameter to a number of seconds. Defer bot
                  edits during periods of database server lag. Default is set by
                  config.py

-putthrottle:n    Set the minimum time (in seconds) the bot will wait between
-pt:n             saving pages.
-put_throttle:n

-debug:item       Enable the logfile and include extensive debugging data
-debug            for component "item" (for all components if the second form
                  is used).

-verbose          Have the bot provide additional console output that may be
-v                useful in debugging.

-cosmeticchanges  Toggles the cosmetic_changes setting made in config.py or
-cc               user_config.py to its inverse and overrules it. All other
                  settings and restrictions are untouched.

-simulate         Disables writing to the server. Useful for testing and
                  debugging of new code (if given, doesn't do any real
                  changes, but only shows what would have been changed).

-<config var>:n   You may use all given numeric config variables as option and
                  modify it with command line.

''' % modname
    try:
        module = __import__('%s' % modname)
        helpText = module.__doc__.decode('utf-8')
        if hasattr(module, 'docuReplacements'):
            for key, value in module.docuReplacements.items():
                helpText = helpText.replace(key, value.strip('\n\r'))
        pywikibot.stdout(helpText)  # output to STDOUT
    except Exception:
        if modname:
            pywikibot.stdout(u'Sorry, no help available for %s' % modname)
        pywikibot.log('showHelp:', exc_info=True)
    pywikibot.stdout(globalHelp)
Exemplo n.º 26
0
def main(*args):
    """
    Process command line arguments and perform task.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    cfd_page = DEFAULT_CFD_PAGE
    local_args = pywikibot.handle_args(args)

    for arg in local_args:
        if arg.startswith('-page'):
            if len(arg) == len('-page'):
                cfd_page = pywikibot.input(
                    'Enter the CFD working page to use:')
            else:
                cfd_page = arg[len('-page:'):]

    page = pywikibot.Page(pywikibot.Site(), cfd_page)
    try:
        page.get()
    except pywikibot.NoPage:
        pywikibot.error(
            'CFD working page "{0}" does not exist!'.format(cfd_page))
        sys.exit(1)

    # Variable declarations
    day = 'None'
    mode = 'None'
    summary = ''
    robot = None

    m = ReCheck()
    for line in page.text.split('\n'):
        if nobots.search(line) or example.search(line):
            # NO BOTS or example line
            continue
        if speedymode.search(line):
            mode = 'Speedy'
            day = 'None'
        elif movemode.search(line):
            mode = 'Move'
            day = 'None'
        elif emptymode.search(line):
            mode = 'Empty'
            day = 'None'
        elif deletemode.search(line):
            mode = 'Delete'
            day = 'None'
        elif maintenance.search(line):
            # It's probably best not to try to handle these in an automated
            # fashion.
            mode = 'None'
            day = 'None'
        elif m.check(dateheader, line):
            day = m.result.group(1)
            pywikibot.output('Found day header: {}'.format(day))
        elif m.check(movecat, line):
            src = m.result.group(1)
            dest = m.result.group(2)
            thisDay = findDay(src, day)
            if mode == 'Move' and thisDay != 'None':
                summary = ('Robot - Moving category ' + src +
                           ' to [[:Category:' + dest +
                           ']] per [[WP:CFD|CFD]] at ' + thisDay + '.')
                action_summary = \
                    'Robot - Result of [[WP:CFD|CFD]] at ' + thisDay + '.'
            elif mode == 'Speedy':
                summary = ('Robot - Speedily moving category ' + src +
                           ' to [[:Category:' + dest +
                           ']] per [[WP:CFDS|CFDS]].')
                action_summary = 'Robot - Speedily moved per [[WP:CFDS|CFDS]].'
            else:
                continue
            # If the category is redirect, we do NOT want to move articles to
            # it. The safest thing to do here is abort and wait for human
            # intervention.
            destpage = pywikibot.Page(page.site, dest, ns=14)
            if destpage.isCategoryRedirect():
                summary = 'CANCELED. Destination is redirect: ' + summary
                pywikibot.stdout(summary)
                robot = None
            else:
                deletion_comment_same = (
                    CategoryMoveBot.DELETION_COMMENT_SAME_AS_EDIT_COMMENT)
                robot = CategoryMoveBot(oldcat=src,
                                        newcat=dest,
                                        batch=True,
                                        comment=summary,
                                        inplace=True,
                                        move_oldcat=True,
                                        delete_oldcat=True,
                                        deletion_comment=deletion_comment_same,
                                        move_comment=action_summary)
        elif m.check(deletecat, line):
            src = m.result.group(1)
            # I currently don't see any reason to handle these two cases
            # separately, though if are guaranteed that the category in the
            # "Delete" case is empty, it might be easier to call delete.py on
            # it.
            thisDay = findDay(src, day)
            if (mode == 'Empty' or mode == 'Delete') and thisDay != 'None':
                summary = ('Robot - Removing category {0} per [[WP:CFD|CFD]] '
                           'at {1}.'.format(src, thisDay))
                action_summary = \
                    'Robot - Result of [[WP:CFD|CFD]] at ' + thisDay + '.'
            else:
                continue
            robot = CategoryMoveBot(oldcat=src,
                                    batch=True,
                                    comment=summary,
                                    deletion_comment=action_summary,
                                    inplace=True)
        if summary and robot is not None:
            pywikibot.stdout(summary)
            # Run, robot, run!
            robot.run()
        summary = ''
        robot = None
Exemplo n.º 27
0
    def treat(self, page):
        """Process one page."""
        # Load the page's text from the wiki
        new_text = page.text
        raw_text = textlib.removeDisabledParts(new_text)
        # for each link to change
        for match in linksInRef.finditer(raw_text):

            link = match.group('url')
            if 'jstor.org' in link:
                # TODO: Clean URL blacklist
                continue

            ref = RefLink(link, match.group('name'), site=self.site)

            try:
                r = comms.http.fetch(
                    ref.url, use_fake_user_agent=self._use_fake_user_agent)

                # Try to get Content-Type from server
                content_type = r.headers.get('content-type')
                if content_type and not self.MIME.search(content_type):
                    if ref.link.lower().endswith('.pdf') \
                       and not self.opt.ignorepdf:
                        # If file has a PDF suffix
                        self.getPDFTitle(ref, r)
                    else:
                        pywikibot.output(color_format(
                            '{lightyellow}WARNING{default} : media : {} ',
                            ref.link))

                    if not ref.title:
                        repl = ref.refLink()
                    elif not re.match('(?i) *microsoft (word|excel|visio)',
                                      ref.title):
                        ref.transform(ispdf=True)
                        repl = ref.refTitle()
                    else:
                        pywikibot.output(color_format(
                            '{lightyellow}WARNING{default} : '
                            'PDF title blacklisted : {0} ', ref.title))
                        repl = ref.refLink()

                    new_text = new_text.replace(match.group(), repl)
                    continue

                # Get the real url where we end (http redirects !)
                redir = r.url
                if redir != ref.link \
                   and domain.findall(redir) == domain.findall(link):
                    if soft404.search(redir) \
                       and not soft404.search(ref.link):
                        pywikibot.output(color_format(
                            '{lightyellow}WARNING{default} : '
                            'Redirect 404 : {0} ', ref.link))
                        continue

                    if dirIndex.match(redir) \
                       and not dirIndex.match(ref.link):
                        pywikibot.output(color_format(
                            '{lightyellow}WARNING{default} : '
                            'Redirect to root : {0} ', ref.link))
                        continue

                if r.status_code != codes.ok:
                    pywikibot.stdout('HTTP error ({}) for {} on {}'
                                     .format(r.status_code, ref.url,
                                             page.title(as_link=True)))
                    # 410 Gone, indicates that the resource has been
                    # purposely removed
                    if r.status_code == 410 \
                       or (r.status_code == 404
                           and '\t{}\t'.format(
                               ref.url) in self.dead_links):
                        repl = ref.refDead()
                        new_text = new_text.replace(match.group(), repl)
                    continue

            except UnicodeError:
                # example:
                # http://www.adminet.com/jo/20010615¦/ECOC0100037D.html
                # in [[fr:Cyanure]]
                pywikibot.output(color_format(
                    '{lightred}Bad link{default} : {0} in {1}',
                    ref.url, page.title(as_link=True)))
                continue

            except (URLError,
                    socket.error,
                    IOError,
                    httplib.error,
                    pywikibot.FatalServerError,
                    pywikibot.Server414Error,
                    pywikibot.Server504Error) as e:
                pywikibot.output("Can't retrieve page {} : {}"
                                 .format(ref.url, e))
                continue

            linkedpagetext = r.content
            # remove <script>/<style>/comments/CDATA tags
            linkedpagetext = self.NON_HTML.sub(b'', linkedpagetext)

            meta_content = self.META_CONTENT.search(linkedpagetext)
            s = None
            if content_type:
                # use charset from http header
                s = self.CHARSET.search(content_type)
            if meta_content:
                tag = meta_content.group().decode()
                # Prefer the contentType from the HTTP header :
                if not content_type:
                    content_type = tag
                if not s:
                    # use charset from html
                    s = self.CHARSET.search(tag)
            if s:
                # Use encoding if found. Else use chardet apparent encoding
                encoding = s.group('enc').strip('"\' ').lower()
                naked = re.sub(r'[ _\-]', '', encoding)
                # Convert to python correct encoding names
                if naked == 'xeucjp':
                    encoding = 'euc_jp'
                r.encoding = encoding

            if not content_type:
                pywikibot.output('No content-type found for ' + ref.link)
                continue

            if not self.MIME.search(content_type):
                pywikibot.output(color_format(
                    '{lightyellow}WARNING{default} : media : {0} ',
                    ref.link))
                repl = ref.refLink()
                new_text = new_text.replace(match.group(), repl)
                continue

            # Retrieves the first non empty string inside <title> tags
            for m in self.TITLE.finditer(r.text):
                t = m.group()
                if t:
                    ref.title = t
                    ref.transform()
                    if ref.title:
                        break

            if not ref.title:
                repl = ref.refLink()
                new_text = new_text.replace(match.group(), repl)
                pywikibot.output('{} : No title found...'.format(ref.link))
                continue

            if self.titleBlackList.match(ref.title):
                repl = ref.refLink()
                new_text = new_text.replace(match.group(), repl)
                pywikibot.output(color_format(
                    '{lightred}WARNING{default} {0} : '
                    'Blacklisted title ({1})', ref.link, ref.title))
                continue

            # Truncate long titles. 175 is arbitrary
            ref.title = shorten(ref.title, width=178, placeholder='...')

            repl = ref.refTitle()
            new_text = new_text.replace(match.group(), repl)

        # Add <references/> when needed, but ignore templates !
        if page.namespace != 10:
            if self.norefbot.lacksReferences(new_text):
                new_text = self.norefbot.addReferences(new_text)

        new_text = self.deduplicator.process(new_text)
        old_text = page.text

        if old_text == new_text:
            return

        self.userPut(page, old_text, new_text, summary=self.msg,
                     ignore_save_related_errors=True,
                     ignore_server_errors=True)

        if not self._save_counter:
            return

        if self.opt.limit and self._save_counter >= self.opt.limit:
            pywikibot.output('Edited {} pages, stopping.'
                             .format(self.opt.limit))
            self.generator.close()

        if self.site_stop_page and self._save_counter % 20 == 0:
            self.stop_page = pywikibot.Page(self.site, self.site_stop_page)
            if self.stop_page.exists():
                pywikibot.output(color_format(
                    '{lightgreen}Checking stop page...{default}'))
                actual_rev = self.stop_page.latest_revision_id
                if actual_rev != self.stop_page_rev_id:
                    pywikibot.output(
                        '{} has been edited: Someone wants us to stop.'
                        .format(self.stop_page.title(as_link=True)))
                    self.generator.close()
Exemplo n.º 28
0
 def print_with_time(self, message, end='\n'):
     pwb.stdout('%s %s' % (self.get_timeutc(), message), end=end)
Exemplo n.º 29
0
 def test_stdout(self):
     pywikibot.stdout('output')
     self.assertEqual(newstdout.getvalue(), 'output\n')
     self.assertEqual(newstderr.getvalue(), '')
Exemplo n.º 30
0
 def httpError(self, err_num, link, pagetitleaslink):
     """Log HTTP Error."""
     pywikibot.stdout('HTTP error ({0}) for {1} on {2}'
                      ''.format(err_num, link, pagetitleaslink))
Exemplo n.º 31
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    gen = None
    notitle = False
    fmt = '1'
    outputlang = None
    page_get = False
    base_dir = None
    encoding = config.textfile_encoding
    page_target = None
    overwrite = False
    summary = 'listpages-save-list'

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = GeneratorFactory()

    for arg in local_args:
        if arg == '-notitle':
            notitle = True
        elif arg.startswith('-format:'):
            fmt = arg[len('-format:'):]
            fmt = fmt.replace(u'\\03{{', u'\03{{')
        elif arg.startswith('-outputlang:'):
            outputlang = arg[len('-outputlang:'):]
        elif arg == '-get':
            page_get = True
        elif arg.startswith('-save'):
            base_dir = arg.partition(':')[2] or '.'
        elif arg.startswith('-encode:'):
            encoding = arg.partition(':')[2]
        elif arg.startswith('-put:'):
            page_target = arg.partition(':')[2]
        elif arg.startswith('-overwrite'):
            overwrite = True
        elif arg.startswith('-summary:'):
            summary = arg.partition(':')[2]
        else:
            genFactory.handleArg(arg)

    if base_dir:
        base_dir = os.path.expanduser(base_dir)
        if not os.path.isabs(base_dir):
            base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir))

        if not os.path.exists(base_dir):
            pywikibot.output(u'Directory "%s" does not exist.' % base_dir)
            choice = pywikibot.input_yn(
                u'Do you want to create it ("No" to continue without saving)?')
            if choice:
                os.makedirs(base_dir, mode=0o744)
            else:
                base_dir = None
        elif not os.path.isdir(base_dir):
            # base_dir is a file.
            pywikibot.warning(u'Not a directory: "%s"\n'
                              u'Skipping saving ...'
                              % base_dir)
            base_dir = None

    if page_target:
        site = pywikibot.Site()
        page_target = pywikibot.Page(site, page_target)
        if not overwrite and page_target.exists():
            pywikibot.bot.suggest_help(
                additional_text='Page "{0}" already exists.'.format(
                    page_target.title()))
            return False
        if re.match('^[a-z_-]+$', summary):
            summary = i18n.twtranslate(site, summary)

    gen = genFactory.getCombinedGenerator()
    if gen:
        i = 0
        output_list = []
        for i, page in enumerate(gen, start=1):
            if not notitle:
                page_fmt = Formatter(page, outputlang)
                output_list += [page_fmt.output(num=i, fmt=fmt)]
                pywikibot.stdout(output_list[-1])
            if page_get:
                try:
                    pywikibot.output(page.text, toStdout=True)
                except pywikibot.Error as err:
                    pywikibot.output(err)
            if base_dir:
                filename = os.path.join(base_dir, page.title(as_filename=True))
                pywikibot.output(u'Saving %s to %s' % (page.title(), filename))
                with open(filename, mode='wb') as f:
                    f.write(page.text.encode(encoding))
        pywikibot.output(u"%i page(s) found" % i)
        if page_target:
            page_target.text = '\n'.join(output_list)
            page_target.save(summary=summary)
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 32
0
def showHelp(module_name=None):
    if not module_name:
        module_name = calledModuleName()
    if not module_name:
        try:
            module_name = sys.modules['__main__'].main.__module__
        except NameError:
            module_name = "no_module"

    globalHelp = u'''
Global arguments available for all bots:

-dir:PATH         Read the bot's configuration data from directory given by
                  PATH, instead of from the default directory.

-lang:xx          Set the language of the wiki you want to work on, overriding
                  the configuration in user-config.py. xx should be the
                  language code.

-family:xyz       Set the family of the wiki you want to work on, e.g.
                  wikipedia, wiktionary, wikitravel, ...
                  This will override the configuration in user-config.py.

-user:xyz         Log in as user 'xyz' instead of the default username.

-daemonize:xyz    Immediately return control to the terminal and redirect
                  stdout and stderr to xyz (only use for bots that require
                  no input from stdin).

-help             Show this help text.

-log              Enable the log file, using the default filename
                  '%s-bot.log'
                  Logs will be stored in the logs subdirectory.

-log:xyz          Enable the log file, using 'xyz' as the filename.

-nolog            Disable the log file (if it is enabled by default).

-maxlag           Sets a new maxlag parameter to a number of seconds. Defer bot
                  edits during periods of database server lag. Default is set by
                  config.py

-putthrottle:n    Set the minimum time (in seconds) the bot will wait between
-pt:n             saving pages.
-put_throttle:n

-debug:item       Enable the log file and include extensive debugging data
-debug            for component "item" (for all components if the second form
                  is used).

-verbose          Have the bot provide additional console output that may be
-v                useful in debugging.

-cosmeticchanges  Toggles the cosmetic_changes setting made in config.py or
-cc               user_config.py to its inverse and overrules it. All other
                  settings and restrictions are untouched.

-simulate         Disables writing to the server. Useful for testing and
                  debugging of new code (if given, doesn't do any real
                  changes, but only shows what would have been changed).

-<config var>:n   You may use all given numeric config variables as option and
                  modify it with command line.

''' % module_name
    try:
        module = __import__('%s' % module_name)
        helpText = module.__doc__
        if sys.version_info[0] < 3:
            helpText = helpText.decode('utf-8')
        if hasattr(module, 'docuReplacements'):
            for key, value in module.docuReplacements.items():
                helpText = helpText.replace(key, value.strip('\n\r'))
        pywikibot.stdout(helpText)  # output to STDOUT
    except Exception:
        if module_name:
            pywikibot.stdout(u'Sorry, no help available for %s' % module_name)
        pywikibot.log('showHelp:', exc_info=True)
    pywikibot.stdout(globalHelp)
 def test_stdout(self):
     pywikibot.stdout('output')
     self.assertEqual(newstdout.getvalue(), 'output\n')
     self.assertEqual(newstderr.getvalue(), '')
Exemplo n.º 34
0

if __name__ == '__main__':
    bot = CheckerBot()

    # Взять список новых страниц
    bot.get_newpages(length_listpages=500,
                     hours_offset_near=24,
                     hours_offset_far=25)
    # for debug
    # bot.newpages = [{'time_create': '2018-02-14 16:00', 'pagename': 'Название статьи', 'user': '******'}, ]

    if bot.newpages:
        # Отфильтровка страниц
        pwb.stdout(
            '%s Отфильтровка ноднозначностей и уже проверенных страниц' %
            (bot.get_timeutc()),
            end=' ')
        # По категории
        filterout_category = 'Категория:Страницы значений по алфавиту'
        bot.filter_pages_by_category(filterout_category)
        # Чистка от уже проверенных, сохраняемых в файле с пред. запуска
        bot.filter_already_checked_pages()
        bot.csv_save_dict(bot.last_newpages_filename, bot.newpages)
        pwb.stdout('...done')

        # Проверка страниц на КОПИВИО
        bot.req_copyvios(use_search_engine=True)

        # Запись результатов проверки в файлы, с отсортировкой по проценту
        bot.filter_by_persent_min_level_copivio()
        bot.save_results_to_files()
Exemplo n.º 35
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    gen = None
    notitle = False
    fmt = '1'
    outputlang = None
    page_get = False
    base_dir = None
    encoding = config.textfile_encoding
    page_target = None
    overwrite = False
    summary = 'listpages-save-list'

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    gen_factory = GeneratorFactory()

    for arg in local_args:
        option, sep, value = arg.partition(':')
        if option == '-notitle':
            notitle = True
        elif option == '-format':
            fmt = value.replace('\\03{{', '\03{{')
            if not fmt.strip():
                notitle = True
        elif option == '-outputlang:':
            outputlang = value
        elif option == '-get':
            page_get = True
        elif option == '-save':
            base_dir = value or '.'
        elif option == '-encode':
            encoding = value
        elif option == '-put':
            page_target = value
        elif option == '-overwrite':
            overwrite = True
        elif option == '-summary':
            summary = value
        else:
            gen_factory.handleArg(arg)

    if base_dir:
        base_dir = os.path.expanduser(base_dir)
        if not os.path.isabs(base_dir):
            base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir))

        if not os.path.exists(base_dir):
            pywikibot.output(
                'Directory "{0}" does not exist.'.format(base_dir))
            choice = pywikibot.input_yn(
                'Do you want to create it ("No" to continue without saving)?')
            if choice:
                os.makedirs(base_dir, mode=0o744)
            else:
                base_dir = None
        elif not os.path.isdir(base_dir):
            # base_dir is a file.
            pywikibot.warning('Not a directory: "{0}"\n'
                              'Skipping saving ...'.format(base_dir))
            base_dir = None

    if page_target:
        site = pywikibot.Site()
        page_target = pywikibot.Page(site, page_target)
        if not overwrite and page_target.exists():
            pywikibot.bot.suggest_help(
                additional_text='Page {0} already exists.\n'
                'You can use the -overwrite argument to '
                'replace the content of this page.'.format(
                    page_target.title(as_link=True)))
            return False
        if re.match('^[a-z_-]+$', summary):
            summary = i18n.twtranslate(site, summary)

    gen = gen_factory.getCombinedGenerator()
    if gen:
        i = 0
        output_list = []
        for i, page in enumerate(gen, start=1):
            if not notitle:
                page_fmt = Formatter(page, outputlang)
                output_list += [page_fmt.output(num=i, fmt=fmt)]
                pywikibot.stdout(output_list[-1])
            if page_get:
                try:
                    pywikibot.stdout(page.text)
                except pywikibot.Error as err:
                    pywikibot.output(err)
            if base_dir:
                filename = os.path.join(base_dir, page.title(as_filename=True))
                pywikibot.output('Saving {0} to {1}'.format(
                    page.title(), filename))
                with open(filename, mode='wb') as f:
                    f.write(page.text.encode(encoding))
        pywikibot.output('{0} page(s) found'.format(i))
        if page_target:
            page_target.text = '\n'.join(output_list)
            page_target.save(summary=summary)
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 36
0
def main(*args):
    """
    Process command line arguments and perform task.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    pywikibot.handle_args(args)

    if config.family != 'wikipedia' or config.mylang != 'en':
        pywikibot.warning('CFD does work only on the English Wikipedia.')
        return

    page = pywikibot.Page(pywikibot.Site(), cfdPage)

    # Variable declarations
    day = 'None'
    mode = 'None'
    summary = ''
    action_summary = ''
    robot = None

    m = ReCheck()
    for line in page.text.split("\n"):
        if nobots.search(line):
            # NO BOTS!!!
            pass
        elif example.search(line):
            # Example line
            pass
        elif speedymode.search(line):
            mode = "Speedy"
            day = "None"
        elif movemode.search(line):
            mode = "Move"
            day = "None"
        elif emptymode.search(line):
            mode = "Empty"
            day = "None"
        elif deletemode.search(line):
            mode = "Delete"
            day = "None"
        elif maintenance.search(line):
            # It's probably best not to try to handle these in an automated fashion.
            mode = "None"
            day = "None"
        elif m.check(dateheader, line):
            day = m.result.group(1)
            pywikibot.output("Found day header: %s" % day)
        elif m.check(movecat, line):
            src = m.result.group(1)
            dest = m.result.group(2)
            thisDay = findDay(src, day)
            if mode == "Move" and thisDay != "None":
                summary = (
                    'Robot - Moving category ' + src + ' to [[:Category:' +
                    dest + ']] per [[WP:CFD|CFD]] at ' + thisDay + '.')
                action_summary = 'Robot - Result of [[WP:CFD|CFD]] at ' + thisDay + '.'
            elif mode == "Speedy":
                summary = (
                    'Robot - Speedily moving category ' + src +
                    ' to [[:Category:' + dest + ']] per [[WP:CFDS|CFDS]].')
                action_summary = 'Robot - Speedily moved per [[WP:CFDS|CFDS]].'
            else:
                continue
            # If the category is redirect, we do NOT want to move articles to
            # it. The safest thing to do here is abort and wait for human
            # intervention.
            destpage = pywikibot.Page(page.site, dest, ns=14)
            if destpage.isCategoryRedirect():
                summary = 'CANCELED. Destination is redirect: ' + summary
                pywikibot.stdout(summary)
                robot = None
            else:
                deletion_comment_same = (
                    CategoryMoveBot.DELETION_COMMENT_SAME_AS_EDIT_COMMENT)
                robot = CategoryMoveBot(oldcat=src, newcat=dest, batch=True,
                                        comment=summary, inplace=True,
                                        move_oldcat=True, delete_oldcat=True,
                                        deletion_comment=deletion_comment_same,
                                        move_comment=action_summary)
        elif m.check(deletecat, line):
            src = m.result.group(1)
            # I currently don't see any reason to handle these two cases
            # separately, though if are guaranteed that the category in the
            # "Delete" case is empty, it might be easier to call delete.py on
            # it.
            thisDay = findDay(src, day)
            if (mode == "Empty" or mode == "Delete") and thisDay != "None":
                summary = (
                    'Robot - Removing category {0} per [[WP:CFD|CFD]] '
                    'at {1}.'.format(src, thisDay))
                action_summary = 'Robot - Result of [[WP:CFD|CFD]] at ' + thisDay + '.'
            else:
                continue
            robot = CategoryMoveBot(oldcat=src, batch=True, comment=summary,
                                    deletion_comment=action_summary,
                                    inplace=True)
        else:
            # This line does not fit any of our regular expressions,
            # so ignore it.
            pass
        if summary != "" and robot is not None:
            pywikibot.stdout(summary)
            # Run, robot, run!
            robot.run()
        summary = ""
        robot = None
Exemplo n.º 37
0
def showHelp(name=""):
    # argument, if given, is ignored
    modname = calledModuleName()
    if not modname:
        try:
            modname = sys.modules['__main__'].main.__module__
        except NameError:
            modname = "no_module"

    globalHelp = u'''\
Global arguments available for all bots:

-dir:PATH         Read the bot's configuration data from directory given by
                  PATH, instead of from the default directory.

-lang:xx          Set the language of the wiki you want to work on, overriding
                  the configuration in user-config.py. xx should be the
                  language code.

-family:xyz       Set the family of the wiki you want to work on, e.g.
                  wikipedia, wiktionary, wikitravel, ...
                  This will override the configuration in user-config.py.

-user:xyz         Log in as user 'xyz' instead of the default username.

-daemonize:xyz    Immediately return control to the terminal and redirect
                  stdout and stderr to xyz (only use for bots that require
                  no input from stdin).

-help             Show this help text.

-log              Enable the logfile, using the default filename
                  '%s-bot.log'

-log:xyz          Enable the logfile, using 'xyz' as the filename.

-nolog            Disable the logfile (if it is enabled by default).

-debug:item       Enable the logfile and include extensive debugging data
-debug            for component "item" (for all components if the second form
                  is used).

-putthrottle:n    Set the minimum time (in seconds) the bot will wait between
-pt:n             saving pages.

-verbose          Have the bot provide additional console output that may be
-v                useful in debugging.

''' % modname
    try:
        module = __import__('%s' % modname)
        helpText = module.__doc__.decode('utf-8')
        if hasattr(module, 'docuReplacements'):
            for key, value in module.docuReplacements.iteritems():
                helpText = helpText.replace(key, value.strip('\n\r'))
        pywikibot.stdout(helpText) # output to STDOUT
    except Exception:
        if modname:
            pywikibot.stdout(u'Sorry, no help available for %s' % modname)
        pywikibot.log('showHelp:', exc_info=True)
    pywikibot.stdout(globalHelp)
Exemplo n.º 38
0
 def httpError(self, err_num, link, pagetitleaslink):
     """Log HTTP Error."""
     pywikibot.stdout('HTTP error ({0}) for {1} on {2}'
                      ''.format(err_num, link, pagetitleaslink))
Exemplo n.º 39
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    gen = None
    notitle = False
    fmt = '1'
    outputlang = None
    page_get = False
    base_dir = None
    encoding = config.textfile_encoding

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = GeneratorFactory()

    for arg in local_args:
        if arg == '-notitle':
            notitle = True
        elif arg.startswith('-format:'):
            fmt = arg[len('-format:'):]
            fmt = fmt.replace(u'\\03{{', u'\03{{')
        elif arg.startswith('-outputlang:'):
            outputlang = arg[len('-outputlang:'):]
        elif arg == '-get':
            page_get = True
        elif arg.startswith('-save'):
            base_dir = arg.partition(':')[2] or '.'
        elif arg.startswith('-encode:'):
            encoding = arg.partition(':')[2]
        else:
            genFactory.handleArg(arg)

    if base_dir:
        base_dir = os.path.expanduser(base_dir)
        if not os.path.isabs(base_dir):
            base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir))

        if not os.path.exists(base_dir):
            pywikibot.output(u'Directory "%s" does not exist.' % base_dir)
            choice = pywikibot.input_yn(
                u'Do you want to create it ("No" to continue without saving)?')
            if choice:
                os.makedirs(base_dir, mode=0o744)
            else:
                base_dir = None
        elif not os.path.isdir(base_dir):
            # base_dir is a file.
            pywikibot.warning(u'Not a directory: "%s"\n'
                              u'Skipping saving ...'
                              % base_dir)
            base_dir = None

    gen = genFactory.getCombinedGenerator()
    if gen:
        i = 0
        for i, page in enumerate(gen, start=1):
            if not notitle:
                page_fmt = Formatter(page, outputlang)
                pywikibot.stdout(page_fmt.output(num=i, fmt=fmt))
            if page_get:
                try:
                    pywikibot.output(page.text, toStdout=True)
                except pywikibot.Error as err:
                    pywikibot.output(err)
            if base_dir:
                filename = os.path.join(base_dir, page.title(as_filename=True))
                pywikibot.output(u'Saving %s to %s' % (page.title(), filename))
                with open(filename, mode='wb') as f:
                    f.write(page.text.encode(encoding))
        pywikibot.output(u"%i page(s) found" % i)
    else:
        pywikibot.showHelp()
Exemplo n.º 40
0
def main(*args):
    """
    Process command line arguments and perform task.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    cfd_page = DEFAULT_CFD_PAGE
    local_args = pywikibot.handle_args(args)

    for arg in local_args:
        if arg.startswith('-page'):
            if len(arg) == len('-page'):
                cfd_page = pywikibot.input('Enter the CFD working page to use:')
            else:
                cfd_page = arg[len('-page:'):]

    page = pywikibot.Page(pywikibot.Site(), cfd_page)
    try:
        page.get()
    except pywikibot.NoPage:
        pywikibot.error(
            'CFD working page "{0}" does not exist!'.format(cfd_page))
        sys.exit(1)

    # Variable declarations
    day = 'None'
    mode = 'None'
    summary = ''
    action_summary = ''
    robot = None

    m = ReCheck()
    for line in page.text.split("\n"):
        if nobots.search(line):
            # NO BOTS!!!
            pass
        elif example.search(line):
            # Example line
            pass
        elif speedymode.search(line):
            mode = "Speedy"
            day = "None"
        elif movemode.search(line):
            mode = "Move"
            day = "None"
        elif emptymode.search(line):
            mode = "Empty"
            day = "None"
        elif deletemode.search(line):
            mode = "Delete"
            day = "None"
        elif maintenance.search(line):
            # It's probably best not to try to handle these in an automated fashion.
            mode = "None"
            day = "None"
        elif m.check(dateheader, line):
            day = m.result.group(1)
            pywikibot.output("Found day header: %s" % day)
        elif m.check(movecat, line):
            src = m.result.group(1)
            dest = m.result.group(2)
            thisDay = findDay(src, day)
            if mode == "Move" and thisDay != "None":
                summary = (
                    'Robot - Moving category ' + src + ' to [[:Category:' +
                    dest + ']] per [[WP:CFD|CFD]] at ' + thisDay + '.')
                action_summary = 'Robot - Result of [[WP:CFD|CFD]] at ' + thisDay + '.'
            elif mode == "Speedy":
                summary = (
                    'Robot - Speedily moving category ' + src +
                    ' to [[:Category:' + dest + ']] per [[WP:CFDS|CFDS]].')
                action_summary = 'Robot - Speedily moved per [[WP:CFDS|CFDS]].'
            else:
                continue
            # If the category is redirect, we do NOT want to move articles to
            # it. The safest thing to do here is abort and wait for human
            # intervention.
            destpage = pywikibot.Page(page.site, dest, ns=14)
            if destpage.isCategoryRedirect():
                summary = 'CANCELED. Destination is redirect: ' + summary
                pywikibot.stdout(summary)
                robot = None
            else:
                deletion_comment_same = (
                    CategoryMoveBot.DELETION_COMMENT_SAME_AS_EDIT_COMMENT)
                robot = CategoryMoveBot(oldcat=src, newcat=dest, batch=True,
                                        comment=summary, inplace=True,
                                        move_oldcat=True, delete_oldcat=True,
                                        deletion_comment=deletion_comment_same,
                                        move_comment=action_summary)
        elif m.check(deletecat, line):
            src = m.result.group(1)
            # I currently don't see any reason to handle these two cases
            # separately, though if are guaranteed that the category in the
            # "Delete" case is empty, it might be easier to call delete.py on
            # it.
            thisDay = findDay(src, day)
            if (mode == "Empty" or mode == "Delete") and thisDay != "None":
                summary = (
                    'Robot - Removing category {0} per [[WP:CFD|CFD]] '
                    'at {1}.'.format(src, thisDay))
                action_summary = 'Robot - Result of [[WP:CFD|CFD]] at ' + thisDay + '.'
            else:
                continue
            robot = CategoryMoveBot(oldcat=src, batch=True, comment=summary,
                                    deletion_comment=action_summary,
                                    inplace=True)
        if summary and robot is not None:
            pywikibot.stdout(summary)
            # Run, robot, run!
            robot.run()
        summary = ""
        robot = None