Exemplo n.º 1
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site,
                                  "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()
Exemplo n.º 2
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site, "%s:%s" % (site.category_namespace(),
                                                   cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()
Exemplo n.º 3
0
def main(*args):
    pywikibot.output('Starting hewiki-replacebot')
    editSummary = replaceConfig.defaultSummary
    xmlFilename = None
    xmlStart = None
    for arg in pywikibot.handle_args(*args):
        if arg.startswith('-summary:'):
            editSummary = arg[9:]
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input('Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]

    if xmlFilename is None:
        pywikibot.output('no xml dump specified. please fill -xml and the xml file to be used')
        return
    replaceDict = fillReplementsDict()

    safe_templates = replaceConfig.safeTemplates
    # add external links templates
    site = pywikibot.Site()
    for safeCategory in replaceConfig.safeTemplatesCategories:
        cite_templates = pywikibot.Category(site, safeCategory).articles(namespaces=10, recurse=True)
        cite_templates = [page.title(withNamespace=False) for page in cite_templates]
        safe_templates += cite_templates

    file_usage_rgx = re.compile(replaceConfig.fileUsageRgx, re.I)
    yiRgx = re.compile('\[\[yi:.*?\]\]')
    safeTemplatesRgx = re.compile('\{\{(' + '|'.join(safe_templates, ) + ').*?\}\}', re.I)
    exceptions = {
        'title': [],
        'text-contains': [re.compile(replaceConfig.redirectRgx, re.I)],
        'inside': [file_usage_rgx, safeTemplatesRgx, yiRgx],
        'inside-tags': ['nowiki', 'math', 'comment', 'pre', 'source', 'hyperlink', 'gallery'],
        'require-title': [],
    }

    # avoid searching in other namespaces in the xml
    exceptions_with_title_ns = dict(exceptions)
    exceptions_with_title_ns['title'] = [re.compile('^'+re.escape(ns_name)+':') for ns_index, ns
                                         in site.namespaces.items() if ns_index not in replaceConfig.namespaces
                                         for ns_name in ns]
    gen = XmlDumpReplacePageGeneratorHe(replaceDict, xmlFilename, xmlStart, exceptions_with_title_ns, site)
    gen_factory = pywikibot.pagegenerators.GeneratorFactory()
    gen = gen_factory.getCombinedGenerator(gen)
    gen = pywikibot.pagegenerators.NamespaceFilterPageGenerator(gen, replaceConfig.namespaces, site)
    gen = pywikibot.pagegenerators.PreloadingGenerator(gen)
    pywikibot.output('starting replace')
    bot = ReplaceRobotHe(gen, replaceDict, exceptions, editSummary, site=site)
    site.login()
    bot.run()
    pywikibot.output('finished all replacements')
Exemplo n.º 4
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            genFactory.gens.append(
                XmlDumpNoReferencesPageGenerator(xmlFilename))
        elif arg == '-always':
            options['always'] = True
        elif arg == '-quiet':
            options['verbose'] = False
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.Site()
        try:
            cat = site.expand_text(
                site.mediawiki_message(maintenance_category))
        except:
            pass
        else:
            cat = pywikibot.Category(site,
                                     "%s:%s" % (site.namespaces.CATEGORY, cat))
            gen = cat.articles(namespaces=genFactory.namespaces or [0])
    if gen:
        bot = NoReferencesBot(gen, **options)
        bot.run()
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 5
0
    def getCategoryGen(self, arg, length, recurse=False):
        site = pywikibot.getSite()
        if len(arg) == length:
            categoryname = i18n.input("pywikibot-enter-category-name")
        else:
            categoryname = arg[length + 1 :]
        categoryname = categoryname.replace("#", "|")
        ind = categoryname.find("|")
        startfrom = None
        if ind > 0:
            startfrom = categoryname[ind + 1 :]
            categoryname = categoryname[:ind]

        cat = catlib.Category(site, "%s:%s" % (site.namespace(14), categoryname))
        return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
Exemplo n.º 6
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            genFactory.gens.append(XmlDumpNoReferencesPageGenerator(xmlFilename))
        elif arg == '-always':
            options['always'] = True
        elif arg == '-quiet':
            options['verbose'] = False
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.Site()
        try:
            cat = site.expand_text(
                site.mediawiki_message(maintenance_category))
        except:
            pass
        else:
            cat = pywikibot.Category(site, "%s:%s" % (
                site.category_namespace(), cat))
            gen = cat.articles(namespaces=genFactory.namespaces or [0])
    if gen:
        bot = NoReferencesBot(gen, **options)
        bot.run()
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 7
0
    def getCategoryGen(self, arg, length, recurse=False):
        site = pywikibot.getSite()
        if len(arg) == length:
            categoryname = i18n.input('pywikibot-enter-category-name')
        else:
            categoryname = arg[length + 1:]
        categoryname = categoryname.replace('#', '|')
        ind = categoryname.find('|')
        startfrom = None
        if ind > 0:
            startfrom = categoryname[ind + 1:]
            categoryname = categoryname[:ind]

        cat = catlib.Category(site,
                              "%s:%s" % (site.namespace(14), categoryname))
        return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
    def setSubCategoriesGen(self, arg, length, recurse=False, content=False):
        if len(arg) == length:
            categoryname = i18n.input('pywikibot-enter-category-name')
        else:
            categoryname = arg[length + 1:]

        ind = categoryname.find('|')
        if ind > 0:
            startfrom = categoryname[ind + 1:]
            categoryname = categoryname[:ind]
        else:
            startfrom = None

        cat = pywikibot.Category(pywikibot.Link(categoryname,
                                                defaultNamespace=14))
        return SubCategoriesPageGenerator(cat,
               start=startfrom, recurse=recurse, content=content)
Exemplo n.º 9
0
    def setSubCategoriesGen(self, arg, length, recurse=False):
        site = pywikibot.getSite()
        if len(arg) == length:
            categoryname = i18n.input('pywikibot-enter-category-name')
        else:
            categoryname = arg[length + 1:]

        ind = categoryname.find('|')
        if ind > 0:
            startfrom = categoryname[ind + 1:]
            categoryname = categoryname[:ind]
        else:
            startfrom = None

        cat = catlib.Category(site,
                              "%s:%s" % (site.namespace(14), categoryname))
        return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
Exemplo n.º 10
0
    def setSubCategoriesGen(self, arg, length, recurse=False, content=False):
        if len(arg) == length:
            categoryname = i18n.input('pywikibot-enter-category-name')
        else:
            categoryname = arg[length + 1:]

        ind = categoryname.find('|')
        if ind > 0:
            startfrom = categoryname[ind + 1:]
            categoryname = categoryname[:ind]
        else:
            startfrom = None

        cat = pywikibot.Category(pywikibot.Link(categoryname,
                                                defaultNamespace=14))
        return SubCategoriesPageGenerator(cat,
               start=startfrom, recurse=recurse, content=content)
    def getCategoryGen(self, arg, length, recurse=False, content=False):
        if len(arg) == length:
            categoryname = i18n.input('pywikibot-enter-category-name')
        else:
            categoryname = arg[length + 1:]
        categoryname = categoryname.replace('#', '|')
        ind = categoryname.find('|')
        startfrom = None
        if ind > 0:
            startfrom = categoryname[ind + 1:]
            categoryname = categoryname[:ind]

        cat = pywikibot.Category(pywikibot.Link(categoryname,
                                                defaultNamespace=14))
        # Link constructor automatically prepends localized namespace
        # if not included in user's input
        return CategorizedPageGenerator(cat,
               start=startfrom, recurse=recurse, content=content)
Exemplo n.º 12
0
    def getCategoryGen(self, arg, length, recurse=False, content=False):
        if len(arg) == length:
            categoryname = i18n.input('pywikibot-enter-category-name')
        else:
            categoryname = arg[length + 1:]
        categoryname = categoryname.replace('#', '|')
        ind = categoryname.find('|')
        startfrom = None
        if ind > 0:
            startfrom = categoryname[ind + 1:]
            categoryname = categoryname[:ind]

        cat = pywikibot.Category(pywikibot.Link(categoryname,
                                                defaultNamespace=14))
        # Link constructor automatically prepends localized namespace
        # if not included in user's input
        return CategorizedPageGenerator(cat,
               start=startfrom, recurse=recurse, content=content)
Exemplo n.º 13
0
def main(*args) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    :type args: str
    """
    options = {}
    gen = None

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        opt, _, value = arg.partition(':')
        if opt == '-xml':
            xmlFilename = value or i18n.input('pywikibot-enter-xml-filename')
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif opt == '-always':
            options['always'] = True
        elif opt == '-quiet':
            options['verbose'] = False
        else:
            genFactory.handle_arg(arg)

    gen = genFactory.getCombinedGenerator(gen, preload=True)
    if not gen:
        site = pywikibot.Site()
        cat = site.page_from_repository(maintenance_category)
        if cat:
            gen = cat.articles(namespaces=genFactory.namespaces or [0])

    if gen:
        bot = NoReferencesBot(generator=gen, **options)
        bot.run()
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
Exemplo n.º 14
0
def main():
    options = {}

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            genFactory.gens.append(XmlDumpNoReferencesPageGenerator(xmlFilename))
        elif arg == '-always':
            options['always'] = True
        elif arg == '-quiet':
            options['verbose'] = False
        else:
            genFactory.handleArg(arg)

    gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.Site()
        try:
            cat = site.expand_text(
                site.mediawiki_message(maintenance_category))
        except:
            pass
        else:
            cat = pywikibot.Category(site, "%s:%s" % (
                site.category_namespace(), cat))
            gen = cat.articles(namespaces=genFactory.namespaces or [0])
    if gen:
        bot = NoReferencesBot(gen, **options)
        bot.run()
    else:
        pywikibot.showHelp()
Exemplo n.º 15
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title':         [],
        'text-contains': [],
        'inside':        [],
        'inside-tags':   [],
        'require-title': [],  # using a seperate requirements dict needs some
    }                        # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if (len(commandline_replacements) % 2):
        raise pywikibot.Error('require even number of replacements.')
    if not commandline_replacements:
        if fixes_set:
            manual = pywikibot.input_yn('Replacements via -fix: set. Apply '
                                        'also manual replacements?', default=False)
        else:
            manual = True
        if manual:
            old = pywikibot.input(u'Please enter the text that should be replaced:')
            while old:
                new = pywikibot.input(u'Please enter the new text:')
                commandline_replacements += [old, new]
                old = pywikibot.input(
                    u'Please enter another text that should be replaced,' +
                    u'\nor press Enter to start:')

    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing',
                {'description':
                 ' (-%s +%s)' % (replacement.old, replacement.new)}
            )
        replacements.append(replacement)

    if not edit_summary:
        if single_summary:
            pywikibot.output(u'The summary message for the command line '
                             'replacements will be something like: %s'
                             % single_summary)
        if fixes_set:
            pywikibot.output('If a summary is defined for the fix, this '
                             'default summary won\'t be applied.')
        edit_summary = pywikibot.input(
            u'Press Enter to use this automatic message, or enter a ' +
            u'description of the\nchanges your bot will make:')

    # Perform one of the predefined actions.
    for fix in fixes_set:
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s'
                             % ', '.join(fixes.fixes.keys()))
            return
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        for replacement in fix['replacements']:
            summary = set_summary if len(replacement) < 3 else replacement[2]
            replacements.append(Replacement(
                old=replacement[0],
                new=replacement[1],
                use_regex=fix.get('regex'),
                edit_summary=summary,
                exceptions=fix.get('exceptions'),
                case_insensitive=fix.get('nocase')
            ))

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions, site)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join(
            ["old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
             for (old_regexp, new_text) in replacements])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                 for exc in exceptions])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary,
                       site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = pywikibot.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = i18n.input(
                    'pywikibot-enter-file-links-processing')
            if fileLinksPageTitle.startswith(site.namespace(6)
                                             + ":"):
                fileLinksPage = pywikibot.ImagePage(site,
                                                    fileLinksPageTitle)
            else:
                fileLinksPage = pywikibot.ImagePage(site,
                                                    'Image:' +
                                                    fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number = int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number = int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            gen = UserContributionsGenerator(arg[14:])
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number = int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = i18n.input('pywikibot-enter-page-processing')
            page = pywikibot.Page(pywikibot.Link(title,
                                                 pywikibot.Site()))
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-recentchanges'):
            if len(arg) >= 15:
                gen = RecentChangesPageGenerator(total=int(arg[15:]))
            else:
                gen = RecentChangesPageGenerator(total=60)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = pywikibot.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-ns'):
            if len(arg) == len('-ns'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-ns:'):].split(","))
            return True
        elif arg.startswith('-step'):
            if len(arg) == len('-step'):
                self.step = int(pywikibot.input("What is the step value?"))
            else:
                self.step = int(arg[len('-step:'):])
            return True
        elif arg.startswith('-limit'):
            if len(arg) == len('-limit'):
                self.limit = int(pywikibot.input("What is the limit value?"))
            else:
                self.limit = int(arg[len('-limit:'):])
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse = True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse = True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [pywikibot.Page(
                           pywikibot.Link(
                               pywikibot.input(
                                   u'What page do you want to use?'),
                               pywikibot.getSite())
                           )]
            else:
                gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):],
                                                     pywikibot.getSite())
                                      )]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = pywikibot.input(
                    u'Links to which page should be processed?')
            referredPage = pywikibot.Page(pywikibot.Link(referredPageTitle,
                                                         pywikibot.Site()))
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = pywikibot.input(
                    u'Links from which page should be processed?')
            linkingPage = pywikibot.Page(pywikibot.Link(linkingPageTitle,
                                                        pywikibot.Site()))
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = pywikibot.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = pywikibot.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = pywikibot.Page(
                                   pywikibot.Link(transclusionPageTitle,
                                                  defaultNamespace=10,
                                                  source=pywikibot.Site()))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            firstpagelink = pywikibot.Link(firstPageTitle,
                                           pywikibot.Site())
            namespace = firstpagelink.namespace
            firstPageTitle = firstpagelink.title
            gen = AllpagesPageGenerator(firstPageTitle, namespace,
                                        includeredirects=False)
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = pywikibot.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or pywikibot.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(total=int(limit))
        elif arg.startswith('-newpages'):
            if len(arg) >= 10:
              gen = NewpagesPageGenerator(total=int(arg[10:]))
            else:
              gen = NewpagesPageGenerator(total=60)
        elif arg.startswith('-imagesused'):
            imagelinkstitle = arg[len('-imagesused:'):]
            if not imagelinkstitle:
                imagelinkstitle = pywikibot.input(
                    u'Images on which page should be processed?')
            imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle,
                                                           pywikibot.Site()))
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = pywikibot.input(
                    u'What do you want to search for?')
            # In order to be useful, all namespaces are required
            gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
        elif arg.startswith('-google'):
            gen = GoogleSearchPageGenerator(arg[8:])
        elif arg.startswith('-titleregex'):
            if len(arg) == 6:
                regex = pywikibot.input(
                    u'What page names are you looking for?')
            else:
                regex = arg[7:]
            gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex)
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        else:
            pass
        if gen:
            self.gens.append(gen)
            return True
        else:
            return False
Exemplo n.º 17
0
def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = False
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title':         [],
        'text-contains': [],
        'inside':        [],
        'inside-tags':   [],
        'require-title': [], # using a seperate requirements dict needs some
    }                        # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # This is the maximum number of pages to load per query
    maxquerysize = 60
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Load default summary message.
    # BUG WARNING: This is probably incompatible with the -lang parameter.
    editSummary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing',
                                   {'description': u''})
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Do not save the page titles, rather work on wiki
    filename = None # The name of the file to save titles
    titlefile = None # The file object itself
    # If we save, primary behaviour is append rather then new file
    append = True
    # Default: don't write titles to exception file and don't read them.
    excoutfilename = None # The name of the file to save exceptions
    excoutfile = None # The file object itself
    # excinfilename: reserved for later use (reading back exceptions)
    # If we save exceptions, primary behaviour is append
    excappend = True


    # Read commandline parameters.
    for arg in pywikibot.handleArgs(*args):
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg =='-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(pywikibot.input(
                    u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-saveexcnew'):
            excappend = False
            if len(arg) == 11:
                excoutfilename = pywikibot.input(
                    u'Please enter the filename to save the excepted titles' +
                    u'\n(will be deleted if exists):')
            else:
                excoutfilename = arg[12:]
        elif arg.startswith('-saveexc'):
            if len(arg) == 8:
                excoutfilename = pywikibot.input(
                    u'Please enter the filename to save the excepted titles:')
            else:
                excoutfilename = arg[9:]
        elif arg.startswith('-savenew'):
            append = False
            if len(arg) == 8:
                filename = pywikibot.input(
                    u'Please enter the filename to save the titles' +
                    u'\n(will be deleted if exists):')
            else:
                filename = arg[9:]
        elif arg.startswith('-save'):
            if len(arg) == 5:
                filename = pywikibot.input(
                    u'Please enter the filename to save the titles:')
            else:
                filename = arg[6:]
        elif arg.startswith('-replacementfile'):
            if len(arg) == len('-replacementfile'):
                replacefile = pywikibot.input(
                    u'Please enter the filename to read replacements from:')
            else:
                replacefile = arg[len('-replacementfile')+1:]
            try:
                commandline_replacements.extend(
                    [x.lstrip(u'\uFEFF').rstrip('\r\n')
                    for x in codecs.open(replacefile, 'r', 'utf-8')])
            except IOError:
                raise pywikibot.Error(
               '\n%s cannot be opened. Try again :-)' % replacefile)
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            editSummary = arg[9:]
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-query:'):
            maxquerysize = int(arg[7:])
        else:
            if not genFactory.handleArg(arg):
                commandline_replacements.append(arg)

    if pywikibot.verbose:
        pywikibot.output(u"commandline_replacements: " +
                         ', '.join(commandline_replacements))

    if (len(commandline_replacements) % 2):
        raise pywikibot.Error, 'require even number of replacements.'
    elif (len(commandline_replacements) == 2 and fix is None):
        replacements.append((commandline_replacements[0],
                             commandline_replacements[1]))
        if not summary_commandline:
            editSummary = i18n.twtranslate(pywikibot.getSite(),
                                           'replace-replacing',
                                           {'description': ' (-%s +%s)'
                                            % (commandline_replacements[0],
                                               commandline_replacements[1])})
    elif (len(commandline_replacements) > 1):
        if (fix is None):
            for i in xrange (0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if not summary_commandline:
                pairs = [( commandline_replacements[i],
                           commandline_replacements[i + 1] )
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                editSummary = i18n.twtranslate(pywikibot.getSite(),
                                               'replace-replacing',
                                               {'description':
                                                replacementsDescription})
        else:
           raise pywikibot.Error(
               'Specifying -fix with replacements is undefined')
    elif fix is None:
        old = pywikibot.input(u'Please enter the text that should be replaced:')
        new = pywikibot.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = pywikibot.input(
                    u'Please enter another text that should be replaced,' +
                    u'\nor press Enter to start:')
            if old == '':
                change += ')'
                break
            new = i18n.input('pywikibot-enter-new-text')
            change += ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline:
            default_summary_message = i18n.twtranslate(pywikibot.getSite(),
                                                       'replace-replacing',
                                                       {'description': change})
            pywikibot.output(u'The summary message will default to: %s'
                             % default_summary_message)
            summary_message = pywikibot.input(
                u'Press Enter to use this default message, or enter a ' +
                u'description of the\nchanges your bot will make:')
            if summary_message == '':
                summary_message = default_summary_message
            editSummary = summary_message

    else:
        # Perform one of the predefined actions.
        fixname = fix # Save the name for passing to exceptions function.
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s'
                             % fixes.fixes.keys())
            return
        if "regex" in fix:
            regex = fix['regex']
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                editSummary = i18n.twtranslate(pywikibot.getSite(),
                                               str(fix['msg']))
            else:
                editSummary = pywikibot.translate(pywikibot.getSite(),
                                                  fix['msg'])
        if "exceptions" in fix:
            exceptions = fix['exceptions']
            # Try to append common extensions for multiple fixes.
            # It must be either a dictionary or a function that returns a dict.
            if 'include' in exceptions:
                incl = exceptions['include']
                if callable(incl):
                    baseExcDict = incl(fixname)
                else:
                    try:
                        baseExcDict = incl
                    except NameError:
                        pywikibot.output(
                          u'\nIncluded exceptions dictionary does not exist.' +
                          u' Continuing with the exceptions\ngiven in fix.\n')
                        baseExcDict = None
                if baseExcDict:
                    for l in baseExcDict:
                        try:
                            exceptions[l].extend(baseExcDict[l])
                        except KeyError:
                            exceptions[l] = baseExcDict[l]
        if "recursive" in fix:
            recursive = fix['recursive']
        if "nocase" in fix:
            caseInsensitive = fix['nocase']
        try:
            replacements = fix['replacements']
            # enable regex/replacements as a dictionary for different langs
            if isinstance(replacements, dict):
                replacements = replacements[pywikibot.getSite().lang]
        except KeyError:
            pywikibot.output(
                u"No replacements given in fix.")
            return

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new

    for exceptionCategory in [
                        'title', 'require-title', 'text-contains', 'inside']:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join(
            ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
             for (old, new) in replacements])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                 for exc in exceptions])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [pywikibot.Page(pywikibot.getSite(), PageTitle)
                 for PageTitle in PageTitles]
        gen = iter(pages)

    gen = genFactory.getCombinedGenerator(gen)
    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen,
                                                       pageNumber=maxquerysize)

    # Finally we open the file for page titles or set parameter article to None
    if filename:
        try:
            # This opens in strict error mode, that means bot will stop
            # on encoding errors with ValueError.
            # See http://docs.python.org/library/codecs.html#codecs.open
            titlefile = codecs.open(filename, encoding='utf-8',
                                    mode=(lambda x: x and 'a' or 'w')(append))
        except IOError:
            pywikibot.output("%s cannot be opened for writing." %
                             filename)
            return
    # The same process with exceptions file:
    if excoutfilename:
        try:
            excoutfile = codecs.open(
                            excoutfilename, encoding='utf-8',
                            mode=(lambda x: x and 'a' or 'w')(excappend))
        except IOError:
            pywikibot.output("%s cannot be opened for writing." %
                             excoutfilename)
            return
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, editSummary,
                       titlefile, excoutfile)
    try:
        bot.run()
    finally:
        # Just for the spirit of programming (they were flushed)
        if titlefile:
            titlefile.close()
        if excoutfile:
            excoutfile.close()
Exemplo n.º 18
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    # what the bot should do (either resolve double redirs, or delete broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    step = None
    pagename = None

    for arg in pywikibot.handle_args(args):
        if arg == 'double' or arg == 'do':
            action = 'double'
        elif arg == 'broken' or arg == 'br':
            action = 'broken'
        elif arg == 'both':
            action = 'both'
        elif arg == '-fullscan':
            fullscan = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith('-moves'):
            moved_pages = True
        elif arg.startswith('-namespace:'):
            ns = arg[11:]
            if ns == '':
                # "-namespace:" does NOT yield -namespace:0 further down the road!
                ns = i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif arg.startswith('-offset:'):
            offset = int(arg[8:])
        elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg.startswith('-until:'):
            until = arg[7:]
        elif arg.startswith('-total:'):
            number = int(arg[7:])
        elif arg.startswith('-step:'):
            step = int(arg[6:])
        elif arg.startswith('-page:'):
            pagename = arg[6:]
        elif arg == '-always':
            options['always'] = True
        elif arg == '-delete':
            options['delete'] = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    if (
        not action or
        xmlFilename and moved_pages or
        fullscan and xmlFilename
    ):
        problems = []
        if xmlFilename and moved_pages:
            problems += ['Either use a XML file or the moved pages from the API']
        if xmlFilename and fullscan:
            problems += ['Either use a XML file or do a full scan using the API']
        pywikibot.bot.suggest_help(additional_text='\n'.join(problems),
                                   missing_action=not action)
    else:
        pywikibot.Site().login()
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, step, pagename)
        bot = RedirectRobot(action, gen, number=number, **options)
        bot.run()
Exemplo n.º 19
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = ''
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a separate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    sql_query = None
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith(('-sql', '-mysqlquery')):
            if arg.startswith('-sql'):
                issue_deprecation_warning('The usage of "-sql"',
                                          '-mysqlquery',
                                          1,
                                          ArgumentDeprecationWarning,
                                          since='20180617')
            useSql = True
            sql_query = arg.partition(':')[2]
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-automaticsummary'):
            edit_summary = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            issue_deprecation_warning('-replacementfile',
                                      '-pairsfile',
                                      2,
                                      ArgumentDeprecationWarning,
                                      since='20160304')
        elif arg.startswith('-pairsfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-pairsfile':
                replacement_file = pywikibot.input(
                    'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-pairsfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error('-pairsfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error('Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip('\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input('Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input('Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    # The summary stored here won't be actually used but is only an example
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing', {
                    'description':
                    ' (-{0} +{1})'.format(replacement.old, replacement.new)
                })
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    generators_given = bool(genFactory.gens)
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output('Available predefined fixes are: {0}'.format(
                ', '.join(fixes.fixes.keys())))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if not fix['replacements']:
            pywikibot.warning('No replacements defined for fix '
                              '"{0}"'.format(fix_name))
            continue
        if 'msg' in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        if not generators_given and 'generator' in fix:
            gen_args = fix['generator']
            if isinstance(gen_args, basestring):
                gen_args = [gen_args]
            for gen_arg in gen_args:
                genFactory.handleArg(gen_arg)
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary,
                                          name=fix_name)
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix['replacements'], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append('"{0}" (replacement #{1})'.format(
                    fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if (not callable(replacement[1])
                    and chars.contains_invisible(replacement[1])):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacement_set.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

        # Exceptions specified via 'fix' shall be merged to those via CLI.
        if replacement_set:
            replacements.extend(replacement_set)
            if replacement_set._exceptions is not None:
                for k, v in replacement_set._exceptions.items():
                    if k in exceptions:
                        exceptions[k] = list(set(exceptions[k]) | set(v))
                    else:
                        exceptions[k] = v

        if len(fix['replacements']) == len(missing_fix_summaries):
            missing_fixes_summaries.append(
                '"{0}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if ((not edit_summary or edit_summary is True)
            and (missing_fixes_summaries or single_summary)):
        if single_summary:
            pywikibot.output('The summary message for the command line '
                             'replacements will be something like: ' +
                             single_summary)
        if missing_fixes_summaries:
            pywikibot.output('The summary will not be used when the fix has '
                             'one defined but the following fix(es) do(es) '
                             'not have a summary defined: '
                             '{0}'.format(', '.join(missing_fixes_summaries)))
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                'Press Enter to use this automatic message, or enter a '
                'description of the\nchanges your bot will make:')
        else:
            edit_summary = ''

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        if not sql_query:
            whereClause = 'WHERE (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" %
                prepareRegexForMySQL(old_regexp.pattern)
                for (old_regexp, new_text) in replacements
            ])
            if exceptions:
                exceptClause = 'AND NOT (%s)' % ' OR '.join([
                    "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                    for exc in exceptions
                ])
            else:
                exceptClause = ''
        query = sql_query or """
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen, preload=True)

    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    bot = ReplaceRobot(gen,
                       replacements,
                       exceptions,
                       allowoverlap,
                       recursive,
                       add_cat,
                       sleep,
                       edit_summary,
                       always=acceptall,
                       site=site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme(). It will make sure the callback is
    # triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output('\n{0} pages changed.'.format(bot.changed_pages))
Exemplo n.º 20
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    gen_options = {}
    # what the bot should do (either resolve double redirs, or process broken
    # redirs)
    action = None
    namespaces = []
    source = set()

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(':')
        option = arg.partition('-')[2]
        # bot options
        if arg == 'do':
            action = 'double'
        elif arg == 'br':
            action = 'broken'
        elif arg in ('both', 'broken', 'double'):
            action = arg
        elif option in ('always', 'delete'):
            options[option] = True
        elif option == 'total':
            options[option] = gen_options[option] = int(value)
        elif option == 'sdtemplate':
            options['sdtemplate'] = value or pywikibot.input(
                'Which speedy deletion template to use?')
        # generator options
        elif option in ('fullscan', 'moves'):
            gen_options[option] = True
            source.add(arg)
        elif option == 'xml':
            gen_options[option] = value or i18n.input(
                'pywikibot-enter-xml-filename')
            source.add(arg)
        elif option == 'namespace':
            # "-namespace:" does NOT yield -namespace:0 further down the road!
            ns = value or i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif option == 'offset':
            gen_options[option] = int(value)
        elif option in ('page', 'start', 'until'):
            gen_options[option] = value
        # deprecated or unknown options
        elif option == 'step':
            issue_deprecation_warning('The usage of "{0}"'.format(arg), 2,
                                      ArgumentDeprecationWarning)
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    gen_options['namespaces'] = namespaces

    if len(source) > 1:
        problem = 'You can only use one of {0} options.'.format(
            ' or '.join(source))
        pywikibot.bot.suggest_help(additional_text=problem,
                                   missing_action=not action)
        return
    if not action:
        pywikibot.bot.suggest_help(missing_action=True)
    else:
        pywikibot.Site().login()
        options['generator'] = RedirectGenerator(action, **gen_options)
        bot = RedirectRobot(action, **options)
        bot.run()
Exemplo n.º 21
0
def main():
    gen = None
    singlePageTitle = []
    xmlFilename = None
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    HTTPignore = []
    day = 7

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-talk':
            config.report_dead_links_on_talk = True
        elif arg == '-notalk':
            config.report_dead_links_on_talk = False
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-repeat':
            gen = RepeatPageGenerator()
        elif arg.startswith('-ignore:'):
            HTTPignore.append(int(arg[8:]))
        elif arg.startswith('-day:'):
            day = int(arg[5:])
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        else:
            if not genFactory.handleArg(arg):
                singlePageTitle.append(arg)

    if singlePageTitle:
        singlePageTitle = ' '.join(singlePageTitle)
        page = pywikibot.Page(pywikibot.Site(), singlePageTitle)
        gen = iter([page])

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces)

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        # fetch at least 240 pages simultaneously from the wiki, but more if
        # a high thread number is set.
        pageNumber = max(240, config.max_external_links * 2)
        gen = pagegenerators.PreloadingGenerator(gen, step=pageNumber)
        gen = pagegenerators.RedirectFilterPageGenerator(gen)
        bot = WeblinkCheckerRobot(gen, HTTPignore, day)
        try:
            bot.run()
        finally:
            waitTime = 0
            # Don't wait longer than 30 seconds for threads to finish.
            while countLinkCheckThreads() > 0 and waitTime < 30:
                try:
                    pywikibot.output(u"Waiting for remaining %i threads to "
                                     u"finish, please wait..." %
                                     countLinkCheckThreads())
                    # wait 1 second
                    time.sleep(1)
                    waitTime += 1
                except KeyboardInterrupt:
                    pywikibot.output(u'Interrupted.')
                    break
            if countLinkCheckThreads() > 0:
                pywikibot.output(u'Remaining %i threads will be killed.' %
                                 countLinkCheckThreads())
                # Threads will die automatically because they are daemonic.
            if bot.history.reportThread:
                bot.history.reportThread.shutdown()
                # wait until the report thread is shut down; the user can
                # interrupt it by pressing CTRL-C.
                try:
                    while bot.history.reportThread.isAlive():
                        time.sleep(0.1)
                except KeyboardInterrupt:
                    pywikibot.output(u'Report thread interrupted.')
                    bot.history.reportThread.kill()
            pywikibot.output(u'Saving history...')
            bot.history.save()
    else:
        pywikibot.showHelp()
Exemplo n.º 22
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    # what the bot should do (either resolve double redirs, or process broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    pagename = None

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(':')
        option = arg[1:]
        # bot options
        if arg == 'do':
            action = 'double'
        elif arg == 'br':
            action = 'broken'
        elif arg in ('both', 'broken', 'double'):
            action = arg
        elif option in ('always', 'delete'):
            options[option] = True
        elif option == 'total':
            options['number'] = number = int(value)
        # generator options
        elif option == 'fullscan':
            fullscan = True
        elif option == 'xml':
            xmlFilename = value or i18n.input('pywikibot-enter-xml-filename')
        elif option == 'moves':
            moved_pages = True
        elif option == 'namespace':
            # "-namespace:" does NOT yield -namespace:0 further down the road!
            ns = value or i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif option == 'offset':
            offset = int(value)
        elif option == 'start':
            start = value
        elif option == 'until':
            until = value
        elif option == 'page':
            pagename = value
        # deprecated or unknown options
        elif option == 'step':
            issue_deprecation_warning('The usage of "{0}"'.format(arg), 2,
                                      ArgumentDeprecationWarning)
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    if not action or xmlFilename and (moved_pages or fullscan):
        problems = []
        if xmlFilename and moved_pages:
            problems += [
                'Either use a XML file or the moved pages from the API'
            ]
        if xmlFilename and fullscan:
            problems += [
                'Either use a XML file or do a full scan using the API'
            ]
        pywikibot.bot.suggest_help(additional_text='\n'.join(problems),
                                   missing_action=not action)
    else:
        pywikibot.Site().login()
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, pagename)
        bot = RedirectRobot(action, gen, **options)
        bot.run()
Exemplo n.º 23
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    gen = None
    xmlFilename = None
    HTTPignore = []

    if isinstance(memento_client, ImportError):
        warn('memento_client not imported: {0}'.format(memento_client),
             ImportWarning)

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-talk':
            config.report_dead_links_on_talk = True
        elif arg == '-notalk':
            config.report_dead_links_on_talk = False
        elif arg == '-repeat':
            gen = RepeatPageGenerator()
        elif arg.startswith('-ignore:'):
            HTTPignore.append(int(arg[8:]))
        elif arg.startswith('-day:'):
            config.weblink_dead_days = int(arg[5:])
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        else:
            genFactory.handleArg(arg)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpPageGenerator(xmlFilename, xmlStart,
                                   genFactory.namespaces)

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        if not genFactory.nopreload:
            # fetch at least 240 pages simultaneously from the wiki, but more
            # if a high thread number is set.
            pageNumber = max(240, config.max_external_links * 2)
            gen = pagegenerators.PreloadingGenerator(gen, groupsize=pageNumber)
        gen = pagegenerators.RedirectFilterPageGenerator(gen)
        bot = WeblinkCheckerRobot(gen, HTTPignore, config.weblink_dead_days)
        try:
            bot.run()
        finally:
            waitTime = 0
            # Don't wait longer than 30 seconds for threads to finish.
            while countLinkCheckThreads() > 0 and waitTime < 30:
                try:
                    pywikibot.output('Waiting for remaining {0} threads to '
                                     'finish, please wait...'.format(
                                         countLinkCheckThreads()))
                    # wait 1 second
                    time.sleep(1)
                    waitTime += 1
                except KeyboardInterrupt:
                    pywikibot.output('Interrupted.')
                    break
            if countLinkCheckThreads() > 0:
                pywikibot.output(
                    'Remaining {0} threads will be killed.'.format(
                        countLinkCheckThreads()))
                # Threads will die automatically because they are daemonic.
            if bot.history.reportThread:
                bot.history.reportThread.shutdown()
                # wait until the report thread is shut down; the user can
                # interrupt it by pressing CTRL-C.
                try:
                    while bot.history.reportThread.isAlive():
                        time.sleep(0.1)
                except KeyboardInterrupt:
                    pywikibot.output('Report thread interrupted.')
                    bot.history.reportThread.kill()
            pywikibot.output('Saving history...')
            bot.history.save()
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 24
0
 def run(self):
     """
     Starts the robot.
     """
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not (self.articles or page.canBeEdited()):
                 pywikibot.output(u"You can't edit page %s" %
                                  page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' %
                              page.title(asLink=True))
             continue
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.' %
                                  page.title(asLink=True))
                 break
             new_text = self.doReplacements(new_text)
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s' %
                                  page.title(asLink=True))
                 break
             if self.recursive:
                 newest_text = self.doReplacements(new_text)
                 while newest_text != new_text:
                     new_text = newest_text
                     newest_text = self.doReplacements(new_text)
             if hasattr(self, "addedCat"):
                 cats = page.categories()
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = pywikibot.replaceCategoryLinks(
                         new_text, cats)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                 page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.acceptall:
                 break
             if self.exctitles:
                 choice = pywikibot.inputChoice(
                     u'Do you want to accept these changes?', [
                         'Yes', 'No', 'no+eXcept', 'Edit',
                         'open in Browser', 'All', 'Quit'
                     ], ['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N')
             else:
                 choice = pywikibot.inputChoice(
                     u'Do you want to accept these changes?', [
                         'Yes', 'No', 'Edit', 'open in Browser', 'All',
                         'Quit'
                     ], ['y', 'N', 'e', 'b', 'a', 'q'], 'N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 webbrowser.open("http://%s%s" %
                                 (page.site.hostname(),
                                  page.site.nice_get_address(page.title())))
                 i18n.input('pywikibot-enter-finished-browser')
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.' %
                                      page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'q':
                 self.writeEditCounter()
                 self.writeExceptCounter()
                 return
             if choice == 'a':
                 self.acceptall = True
             if choice == 'x':  # May happen only if self.exctitles isn't None
                 self.exctitles.write(u"ur'^%s$',\n" %
                                      re.escape(page.title()))
                 self.exctitles.flush()
                 self.exceptcounter += 1
             if choice == 'y':
                 if not self.articles:
                     # Primary behaviour: working on wiki
                     page.put_async(new_text, self.editSummary)
                     self.editcounter += 1
                     # Bug: this increments even if put_async fails
                     # This is separately in two clauses of if for
                     # future purposes to get feedback form put_async
                 else:
                     # Save the title for later processing instead of editing
                     self.editcounter += 1
                     self.articles.write(u'#%s\n%s' % (page.title(
                         asLink=True, textlink=True), self.splitLine()))
                     self.articles.flush()  # For the peace of our soul :-)
             # choice must be 'N'
             break
         if self.acceptall and new_text != original_text:
             if not self.articles:
                 #Primary behaviour: working on wiki
                 try:
                     page.put(new_text, self.editSummary)
                     self.editcounter += 1  # increment only on success
                 except pywikibot.EditConflict:
                     pywikibot.output(
                         u'Skipping %s because of edit conflict' %
                         (page.title(), ))
                 except pywikibot.SpamfilterError, e:
                     pywikibot.output(
                         u'Cannot change %s because of blacklist entry %s' %
                         (page.title(), e.url))
                 except pywikibot.PageNotSaved, error:
                     pywikibot.error(u'putting page: %s' % (error.args, ))
                 except pywikibot.LockedPage:
                     pywikibot.output(u'Skipping %s (locked page)' %
                                      (page.title(), ))
Exemplo n.º 25
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    # what the bot should do (either resolve double redirs, or delete broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    step = None
    pagename = None

    for arg in pywikibot.handle_args(args):
        if arg == 'double' or arg == 'do':
            action = 'double'
        elif arg == 'broken' or arg == 'br':
            action = 'broken'
        elif arg == 'both':
            action = 'both'
        elif arg == '-fullscan':
            fullscan = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith('-moves'):
            moved_pages = True
        elif arg.startswith('-namespace:'):
            ns = arg[11:]
            if ns == '':
                # "-namespace:" does NOT yield -namespace:0 further down the road!
                ns = i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif arg.startswith('-offset:'):
            offset = int(arg[8:])
        elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg.startswith('-until:'):
            until = arg[7:]
        elif arg.startswith('-total:'):
            number = int(arg[7:])
        elif arg.startswith('-step:'):
            step = int(arg[6:])
        elif arg.startswith('-page:'):
            pagename = arg[6:]
        elif arg == '-always':
            options['always'] = True
        elif arg == '-delete':
            options['delete'] = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    if (not action or xmlFilename and moved_pages or fullscan and xmlFilename):
        problems = []
        if xmlFilename and moved_pages:
            problems += [
                'Either use a XML file or the moved pages from the API'
            ]
        if xmlFilename and fullscan:
            problems += [
                'Either use a XML file or do a full scan using the API'
            ]
        pywikibot.bot.suggest_help(additional_text='\n'.join(problems),
                                   missing_action=not action)
    else:
        pywikibot.Site().login()
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, step, pagename)
        bot = RedirectRobot(action, gen, number=number, **options)
        bot.run()
Exemplo n.º 26
0
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = pywikibot.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = i18n.input(
                    'pywikibot-enter-file-links-processing')
            if fileLinksPageTitle.startswith(site.namespace(6) + ":"):
                fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle)
            else:
                fileLinksPage = pywikibot.ImagePage(
                    site, 'Image:' + fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number=int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number=int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            args = arg[14:].split(';')
            number = None
            try:
                number = int(args[1])
            except:
                number = 250
            gen = UserContributionsGenerator(args[0],
                                             number,
                                             namespaces=self.getNamespaces)
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number=int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = i18n.input('pywikibot-enter-page-processing')
            page = pywikibot.Page(site, title)
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-randomredirect'):
            if len(arg) == 15:
                gen = RandomRedirectPageGenerator()
            else:
                gen = RandomRedirectPageGenerator(number=int(arg[16:]))
        elif arg.startswith('-random'):
            if len(arg) == 7:
                gen = RandomPageGenerator()
            else:
                gen = RandomPageGenerator(number=int(arg[8:]))
        elif arg.startswith('-recentchanges'):
            if len(arg) >= 15:
                gen = RecentchangesPageGenerator(number=int(arg[15:]),
                                                 nobots=False)
            else:
                gen = RecentchangesPageGenerator(nobots=False)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-rc-nobots'):
            if len(arg) >= 11:
                gen = RecentchangesPageGenerator(number=int(arg[11:]),
                                                 nobots=True)
            else:
                gen = RecentchangesPageGenerator(nobots=True)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = pywikibot.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-ns'):
            if len(arg) == len('-ns'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-ns:'):].split(","))
            return True
        elif arg.startswith('-limit'):
            if len(arg) == len('-limit'):
                self.limit = int(pywikibot.input("What is the limit value?"))
            else:
                self.limit = int(arg[len('-limit:'):])
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse=True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse=True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [
                    pywikibot.Page(
                        site,
                        pywikibot.input(u'What page do you want to use?'))
                ]
            else:
                gen = [pywikibot.Page(site, arg[len('-page:'):])]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncattemplates'):
            gen = UnCategorizedTemplatesGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = pywikibot.input(
                    u'Links to which page should be processed?')
            referredPage = pywikibot.Page(site, referredPageTitle)
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = pywikibot.input(
                    u'Links from which page should be processed?')
            linkingPage = pywikibot.Page(site, linkingPageTitle)
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = pywikibot.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = pywikibot.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = pywikibot.Page(
                site, "%s:%s" % (site.namespace(10), transclusionPageTitle))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-gorandom'):
            for firstPage in RandomPageGenerator(number=1):
                firstPageTitle = firstPage.title()
            namespace = pywikibot.Page(site, firstPageTitle).namespace()
            firstPageTitle = pywikibot.Page(
                site, firstPageTitle).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects=False)
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            if self.namespaces != []:
                namespace = self.namespaces[0]
            else:
                namespace = pywikibot.Page(site, firstPageTitle).namespace()

            firstPageTitle = pywikibot.Page(
                site, firstPageTitle).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects=False)
        elif arg.startswith('-redirectonly'):
            firstPageTitle = arg[14:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            namespace = pywikibot.Page(site, firstPageTitle).namespace()
            firstPageTitle = pywikibot.Page(
                site, firstPageTitle).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects='only')
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = pywikibot.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or pywikibot.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(number=int(limit))
        elif arg == ('-new') or arg.startswith('-new:'):
            if len(arg) >= 5:
                gen = NewpagesPageGenerator(number=int(arg[5:]))
            else:
                gen = NewpagesPageGenerator(number=60)
        elif arg.startswith('-imagelinks'):
            imagelinkstitle = arg[len('-imagelinks:'):]
            if not imagelinkstitle:
                imagelinkstitle = pywikibot.input(
                    u'Images on which page should be processed?')
            imagelinksPage = pywikibot.Page(site, imagelinkstitle)
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = pywikibot.input(
                    u'What do you want to search for?')
            gen = SearchPageGenerator(mediawikiQuery,
                                      number=None,
                                      namespaces=self.getNamespaces)
        elif arg.startswith('-titleregex'):
            if len(arg) == 11:
                regex = pywikibot.input(
                    u'What page names are you looking for?')
            else:
                regex = arg[12:]
            gen = RegexFilterPageGenerator(site.allpages(), [regex])
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        elif arg.startswith('-'):
            mode, log, user = arg.partition('log')
            # exclude -log, -nolog
            if log == 'log' and mode not in ['-', '-no']:
                number = 500
                if not user:
                    user = None
                else:
                    try:
                        number = int(user[1:])
                        user = None
                    except ValueError:
                        user = user[1:]
                if user:
                    result = user.split(';')
                    user = result[0]
                    try:
                        number = int(result[1])
                    except:
                        pass
                gen = LogpagesPageGenerator(number, mode[1:], user)
        if gen:
            self.gens.append(gen)
            return self.getCombinedGenerator()
        else:
            return False
Exemplo n.º 27
0
def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = False
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # This is the maximum number of pages to load per query
    maxquerysize = 60
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Load default summary message.
    # BUG WARNING: This is probably incompatible with the -lang parameter.
    editSummary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing',
                                   {'description': u''})
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Do not save the page titles, rather work on wiki
    filename = None  # The name of the file to save titles
    titlefile = None  # The file object itself
    # If we save, primary behaviour is append rather then new file
    append = True
    # Default: don't write titles to exception file and don't read them.
    excoutfilename = None  # The name of the file to save exceptions
    excoutfile = None  # The file object itself
    # excinfilename: reserved for later use (reading back exceptions)
    # If we save exceptions, primary behaviour is append
    excappend = True

    # Read commandline parameters.
    for arg in pywikibot.handleArgs(*args):
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    pywikibot.input(u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-saveexcnew'):
            excappend = False
            if len(arg) == 11:
                excoutfilename = pywikibot.input(
                    u'Please enter the filename to save the excepted titles' +
                    u'\n(will be deleted if exists):')
            else:
                excoutfilename = arg[12:]
        elif arg.startswith('-saveexc'):
            if len(arg) == 8:
                excoutfilename = pywikibot.input(
                    u'Please enter the filename to save the excepted titles:')
            else:
                excoutfilename = arg[9:]
        elif arg.startswith('-savenew'):
            append = False
            if len(arg) == 8:
                filename = pywikibot.input(
                    u'Please enter the filename to save the titles' +
                    u'\n(will be deleted if exists):')
            else:
                filename = arg[9:]
        elif arg.startswith('-save'):
            if len(arg) == 5:
                filename = pywikibot.input(
                    u'Please enter the filename to save the titles:')
            else:
                filename = arg[6:]
        elif arg.startswith('-replacementfile'):
            if len(arg) == len('-replacementfile'):
                replacefile = pywikibot.input(
                    u'Please enter the filename to read replacements from:')
            else:
                replacefile = arg[len('-replacementfile') + 1:]
            try:
                commandline_replacements.extend([
                    x.lstrip(u'\uFEFF').rstrip('\r\n')
                    for x in codecs.open(replacefile, 'r', 'utf-8')
                ])
            except IOError:
                raise pywikibot.Error('\n%s cannot be opened. Try again :-)' %
                                      replacefile)
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            editSummary = arg[9:]
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-query:'):
            maxquerysize = int(arg[7:])
        else:
            if not genFactory.handleArg(arg):
                commandline_replacements.append(arg)

    if pywikibot.verbose:
        pywikibot.output(u"commandline_replacements: " +
                         ', '.join(commandline_replacements))

    if (len(commandline_replacements) % 2):
        raise pywikibot.Error, 'require even number of replacements.'
    elif (len(commandline_replacements) == 2 and fix is None):
        replacements.append(
            (commandline_replacements[0], commandline_replacements[1]))
        if not summary_commandline:
            editSummary = i18n.twtranslate(
                pywikibot.getSite(), 'replace-replacing', {
                    'description':
                    ' (-%s +%s)' %
                    (commandline_replacements[0], commandline_replacements[1])
                })
    elif (len(commandline_replacements) > 1):
        if (fix is None):
            for i in xrange(0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if not summary_commandline:
                pairs = [(commandline_replacements[i],
                          commandline_replacements[i + 1])
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                editSummary = i18n.twtranslate(
                    pywikibot.getSite(), 'replace-replacing',
                    {'description': replacementsDescription})
        else:
            raise pywikibot.Error(
                'Specifying -fix with replacements is undefined')
    elif fix is None:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        new = pywikibot.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = pywikibot.input(
                u'Please enter another text that should be replaced,\n'
                u'or press Enter to start:')
            if old == '':
                change += ')'
                break
            new = i18n.input('pywikibot-enter-new-text')
            change += ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline:
            default_summary_message = i18n.twtranslate(pywikibot.getSite(),
                                                       'replace-replacing',
                                                       {'description': change})
            pywikibot.output(u'The summary message will default to: %s' %
                             default_summary_message)
            summary_message = pywikibot.input(
                u'Press Enter to use this default message, or enter a ' +
                u'description of the\nchanges your bot will make:')
            if summary_message == '':
                summary_message = default_summary_message
            editSummary = summary_message

    else:
        # Perform one of the predefined actions.
        fixname = fix  # Save the name for passing to exceptions function.
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             fixes.fixes.keys())
            return
        if "regex" in fix:
            regex = fix['regex']
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                editSummary = i18n.twtranslate(pywikibot.getSite(),
                                               str(fix['msg']))
            else:
                editSummary = pywikibot.translate(pywikibot.getSite(),
                                                  fix['msg'])
        if "exceptions" in fix:
            exceptions = fix['exceptions']
            # Try to append common extensions for multiple fixes.
            # It must be either a dictionary or a function that returns a dict.
            if 'include' in exceptions:
                incl = exceptions['include']
                if callable(incl):
                    baseExcDict = incl(fixname)
                else:
                    try:
                        baseExcDict = incl
                    except NameError:
                        pywikibot.output(
                            u'\nIncluded exceptions dictionary does not exist.'
                            u' Continuing with the exceptions\ngiven in fix.\n'
                        )
                        baseExcDict = None
                if baseExcDict:
                    for l in baseExcDict:
                        try:
                            exceptions[l].extend(baseExcDict[l])
                        except KeyError:
                            exceptions[l] = baseExcDict[l]
        if "recursive" in fix:
            recursive = fix['recursive']
        if "nocase" in fix:
            caseInsensitive = fix['nocase']
        try:
            replacements = fix['replacements']
            # enable regex/replacements as a dictionary for different langs
            if isinstance(replacements, dict):
                replacements = replacements[pywikibot.getSite().lang]
        except KeyError:
            pywikibot.output(u"No replacements given in fix.")
            return

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new

    for exceptionCategory in [
            'title', 'require-title', 'text-contains', 'inside'
    ]:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
            for (old, new) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [
            pywikibot.Page(pywikibot.getSite(), PageTitle)
            for PageTitle in PageTitles
        ]
        gen = iter(pages)

    gen = genFactory.getCombinedGenerator(gen)
    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen,
                                                       pageNumber=maxquerysize)

    # Finally we open the file for page titles or set parameter article to None
    if filename:
        try:
            # This opens in strict error mode, that means bot will stop
            # on encoding errors with ValueError.
            # See http://docs.python.org/library/codecs.html#codecs.open
            titlefile = codecs.open(filename,
                                    encoding='utf-8',
                                    mode=(lambda x: x and 'a' or 'w')(append))
        except IOError:
            pywikibot.output("%s cannot be opened for writing." % filename)
            return
    # The same process with exceptions file:
    if excoutfilename:
        try:
            excoutfile = codecs.open(
                excoutfilename,
                encoding='utf-8',
                mode=(lambda x: x and 'a' or 'w')(excappend))
        except IOError:
            pywikibot.output("%s cannot be opened for writing." %
                             excoutfilename)
            return
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, editSummary,
                       titlefile, excoutfile)
    try:
        bot.run()
    finally:
        # Just for the spirit of programming (they were flushed)
        if titlefile:
            titlefile.close()
        if excoutfile:
            excoutfile.close()
Exemplo n.º 28
0
def main(*args):
    pywikibot.output('Starting hewiki-replacebot')
    editSummary=replaceConfig.defaultSummary
    xmlFilename=None
    for arg in pywikibot.handleArgs(*args):
        if arg.startswith('-summary:'):
            editSummary = arg[9:]
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]

    if xmlFilename==None:
        pywikibot.output('no xml dump specified. please fill -xml and the xml file to be used')
        return
    replaceDict,exceptReplace=fillReplementsDict()
    try:
        xmlStart
    except NameError:
        xmlStart = None

    safeTemplates=replaceConfig.safeTemplates
    #add external links templates
    genFactory = pagegenerators.GeneratorFactory()
    for safeCategory in replaceConfig.safeTemplatesCategories:
        citeTemplates=genFactory.getCategoryGen(safeCategory,-1, True)
        citeTemplates=[page.title(withNamespace=False) for page in citeTemplates]
        safeTemplates+=citeTemplates
        
    fileUsageRgx=re.compile(replaceConfig.fileUsageRgx,re.I)
    yiRgx=re.compile('\[\[yi:.*?\]\]')
    safeTemplatesRgx=re.compile(u'\{\{('+string.join(safeTemplates,u'|')+').*?\}\}',re.I)
    exceptions = {
        'title':         [],
        'text-contains': [re.compile(replaceConfig.redirectRgx,re.I)],
        'inside':        [fileUsageRgx,safeTemplatesRgx, re.compile(u'('+string.join(exceptReplace,u'|')+')'),yiRgx],
        'inside-tags':   ['nowiki','math','comment','pre','source','hyperlink','gallery'],
        'require-title': [],
    }
    gen = XmlDumpReplacePageGeneratorHe(replaceDict, xmlFilename, xmlStart, exceptions)
    genFactory.namespaces=replaceConfig.namespaces
    #For debugging pupose, uncomment it to work on specific page
    #pages = [pywikibot.Page(pywikibot.getSite(), PageTitle)
    #                 for PageTitle in [u'PAGENAME']]
    #gen = iter(pages)
    #end of specific page
    maxquerysize=60
    gen = genFactory.getCombinedGenerator(gen)
    preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber=maxquerysize)
    gen=pagegenerators.EdittimeFilterPageGenerator(preloadingGen, endtime=datetime.datetime.utcnow()-datetime.timedelta(days=1))
    pywikibot.output('starting replace')
    bot=ReplaceRobotHe(gen,replaceDict,exceptions,editSummary)
    bot.run()
    pywikibot.output('finished all replacements')
Exemplo n.º 29
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    # what the bot should do (either resolve double redirs, or delete broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    pagename = None

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(':')
        option = arg[1:]
        # bot options
        if arg == 'do':
            action = 'double'
        elif arg == 'br':
            action = 'broken'
        elif arg in ('both', 'broken', 'double'):
            action = arg
        elif option in ('always', 'delete'):
            options[option] = True
        elif option == 'total':
            options['number'] = number = int(value)
        # generator options
        elif option == 'fullscan':
            fullscan = True
        elif option == 'xml':
            xmlFilename = value or i18n.input('pywikibot-enter-xml-filename')
        elif option == 'moves':
            moved_pages = True
        elif option == 'namespace':
            # "-namespace:" does NOT yield -namespace:0 further down the road!
            ns = value or i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif option == 'offset':
            offset = int(value)
        elif option == 'start':
            start = value
        elif option == 'until':
            until = value
        elif option == 'page':
            pagename = value
        # deprecated or unknown options
        elif option == 'step':
            issue_deprecation_warning('The usage of "{0}"'.format(arg),
                                      2, ArgumentDeprecationWarning)
        else:
            pywikibot.output(u'Unknown argument: {0!s}'.format(arg))

    if not action or xmlFilename and (moved_pages or fullscan):
        problems = []
        if xmlFilename and moved_pages:
            problems += ['Either use a XML file or the moved pages from the API']
        if xmlFilename and fullscan:
            problems += ['Either use a XML file or do a full scan using the API']
        pywikibot.bot.suggest_help(additional_text='\n'.join(problems),
                                   missing_action=not action)
    else:
        pywikibot.Site().login()
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, pagename)
        bot = RedirectRobot(action, gen, **options)
        bot.run()
Exemplo n.º 30
0
 def run(self):
     """
     Starts the robot.
     """
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page %s"
                                  % page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' % page.title(asLink=True))
             continue
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(
 u'Skipping %s because it contains text that is on the exceptions list.'
                                  % page.title(asLink=True))
                 break
             new_text = self.doReplacements(new_text)
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s'
                                  % page.title(asLink=True))
                 break
             if self.recursive:
                 newest_text = self.doReplacements(new_text)
                 while (newest_text!=new_text):
                     new_text = newest_text
                     newest_text = self.doReplacements(new_text)
             if hasattr(self, "addedCat"):
                 cats = page.categories(nofollow_redirects=True)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = pywikibot.replaceCategoryLinks(new_text,
                                                               cats)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                              % page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.acceptall:
                 break
             choice = pywikibot.inputChoice(
                         u'Do you want to accept these changes?',
                         ['Yes', 'No', 'Edit', 'open in Browser', 'All',
                          'Quit'],
                         ['y', 'N', 'e', 'b', 'a', 'q'], 'N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 webbrowser.open("http://%s%s" % (
                     page.site.hostname(),
                     page.site.nice_get_address(page.title())
                 ))
                 i18n.input('pywikibot-enter-finished-browser')
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.'
                                      % page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'q':
                 return
             if choice == 'a':
                 self.acceptall = True
             if choice == 'y':
                 page.put_async(new_text, self.summary)
             # choice must be 'N'
             break
         if self.acceptall and new_text != original_text:
             try:
                 page.put(new_text, self.summary)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping %s because of edit conflict'
                                  % (page.title(),))
             except pywikibot.SpamfilterError, e:
                 pywikibot.output(
                     u'Cannot change %s because of blacklist entry %s'
                     % (page.title(), e.url))
             except pywikibot.PageNotSaved, error:
                 pywikibot.output(u'Error putting page: %s'
                                  % (error.args,))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping %s (locked page)'
                                  % (page.title(),))
Exemplo n.º 31
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    gen = None
    xmlFilename = None
    HTTPignore = []
    day = 7

    if isinstance(memento_client, ImportError):
        warn('memento_client not imported: %s' % memento_client, ImportWarning)

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-talk':
            config.report_dead_links_on_talk = True
        elif arg == '-notalk':
            config.report_dead_links_on_talk = False
        elif arg == '-repeat':
            gen = RepeatPageGenerator()
        elif arg.startswith('-ignore:'):
            HTTPignore.append(int(arg[8:]))
        elif arg.startswith('-day:'):
            day = int(arg[5:])
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        else:
            genFactory.handleArg(arg)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, genFactory.namespaces)

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # fetch at least 240 pages simultaneously from the wiki, but more if
        # a high thread number is set.
        pageNumber = max(240, config.max_external_links * 2)
        gen = pagegenerators.PreloadingGenerator(gen, step=pageNumber)
        gen = pagegenerators.RedirectFilterPageGenerator(gen)
        bot = WeblinkCheckerRobot(gen, HTTPignore, day)
        try:
            bot.run()
        finally:
            waitTime = 0
            # Don't wait longer than 30 seconds for threads to finish.
            while countLinkCheckThreads() > 0 and waitTime < 30:
                try:
                    pywikibot.output(u"Waiting for remaining %i threads to "
                                     u"finish, please wait..."
                                     % countLinkCheckThreads())
                    # wait 1 second
                    time.sleep(1)
                    waitTime += 1
                except KeyboardInterrupt:
                    pywikibot.output(u'Interrupted.')
                    break
            if countLinkCheckThreads() > 0:
                pywikibot.output(u'Remaining %i threads will be killed.'
                                 % countLinkCheckThreads())
                # Threads will die automatically because they are daemonic.
            if bot.history.reportThread:
                bot.history.reportThread.shutdown()
                # wait until the report thread is shut down; the user can
                # interrupt it by pressing CTRL-C.
                try:
                    while bot.history.reportThread.isAlive():
                        time.sleep(0.1)
                except KeyboardInterrupt:
                    pywikibot.output(u'Report thread interrupted.')
                    bot.history.reportThread.kill()
            pywikibot.output(u'Saving history...')
            bot.history.save()
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Exemplo n.º 32
0
def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    """
    options = {}
    gen = None
    # summary message
    edit_summary = ''
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    file_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    xmlStart = None
    sql_query = None  # type: Optional[str]
    # Set the default regular expression flags
    flags = 0
    # Request manual replacements even if replacements are already defined
    manual_input = False

    # Read commandline parameters.
    genFactory = pagegenerators.GeneratorFactory(
        disabled_options=['mysqlquery'])
    local_args = pywikibot.handle_args(args)
    local_args = genFactory.handle_args(local_args)
    local_args, exceptions = handle_exceptions(*local_args)

    for arg in local_args:
        opt, _, value = arg.partition(':')
        if opt == '-regex':
            regex = True
        elif opt == '-xmlstart':
            xmlStart = value or pywikibot.input(
                'Please enter the dumped article to start with:')
        elif opt == '-xml':
            xmlFilename = value or i18n.input('pywikibot-enter-xml-filename')
        elif opt == '-mysqlquery':
            sql_query = value
        elif opt == '-fix':
            fixes_set.append(value)
        elif opt == '-sleep':
            options['sleep'] = float(value)
        elif opt in ('-allowoverlap', '-always', '-recursive'):
            options[opt[1:]] = True
        elif opt == '-nocase':
            flags |= re.IGNORECASE
        elif opt == '-dotall':
            flags |= re.DOTALL
        elif opt == '-multiline':
            flags |= re.MULTILINE
        elif opt == '-addcat':
            options['addcat'] = value
        elif opt == '-summary':
            edit_summary = value
        elif opt == '-automaticsummary':
            edit_summary = True
        elif opt == '-manualinput':
            manual_input = True
        elif opt == '-pairsfile':
            file_replacements = handle_pairsfile(value)
        else:
            commandline_replacements.append(arg)

    if file_replacements is None:
        return

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return

    commandline_replacements += file_replacements
    if not(commandline_replacements or fixes_set) or manual_input:
        commandline_replacements += handle_manual()

    # The summary stored here won't be actually used but is only an example
    site = pywikibot.Site()
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing',
                {'description':
                 ' (-{} +{})'.format(replacement.old, replacement.new)}
            )
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    generators_given = bool(genFactory.gens)
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output('Available predefined fixes are: {}'
                             .format(', '.join(fixes.fixes.keys())))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: {}'
                                 .format(fixes.filename))
            return
        if not fix['replacements']:
            pywikibot.warning('No replacements defined for fix "{}"'
                              .format(fix_name))
            continue
        if 'msg' in fix:
            if isinstance(fix['msg'], str):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        if not generators_given and 'generator' in fix:
            gen_args = fix['generator']
            if isinstance(gen_args, str):
                gen_args = [gen_args]
            for gen_arg in gen_args:
                genFactory.handle_arg(gen_arg)
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary,
                                          name=fix_name)
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix['replacements'], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append(
                    '"{}" (replacement #{})'.format(fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if (not callable(replacement[1])
                    and chars.contains_invisible(replacement[1])):
                pywikibot.warning('The new string "{}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacement_set.append(ReplacementListEntry(
                old=replacement[0],
                new=replacement[1],
                fix_set=replacement_set,
                edit_summary=summary,
            ))

        # Exceptions specified via 'fix' shall be merged to those via CLI.
        if replacement_set:
            replacements.extend(replacement_set)
            if replacement_set._exceptions is not None:
                for k, v in replacement_set._exceptions.items():
                    if k in exceptions:
                        exceptions[k] = list(set(exceptions[k]) | set(v))
                    else:
                        exceptions[k] = v

        if len(fix['replacements']) == len(missing_fix_summaries):
            missing_fixes_summaries.append(
                '"{}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if ((not edit_summary or edit_summary is True)
            and (missing_fixes_summaries or single_summary)):
        if single_summary:
            pywikibot.output('The summary message for the command line '
                             'replacements will be something like: '
                             + single_summary)
        if missing_fixes_summaries:
            pywikibot.output('The summary will not be used when the fix has '
                             'one defined but the following fix(es) do(es) '
                             'not have a summary defined: {}'
                             .format(', '.join(missing_fixes_summaries)))
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                'Press Enter to use this automatic message, or enter a '
                'description of the\nchanges your bot will make:')
        else:
            edit_summary = ''

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions, site)
    elif sql_query is not None:
        # Only -excepttext option is considered by the query. Other
        # exceptions are taken into account by the ReplaceRobot
        gen = handle_sql(sql_query, replacements, exceptions['text-contains'])

    gen = genFactory.getCombinedGenerator(gen, preload=True)
    if pywikibot.bot.suggest_help(missing_generator=not gen):
        return

    bot = ReplaceRobot(gen, replacements, exceptions, site=site,
                       summary=edit_summary, **options)
    site.login()
    bot.run()
Exemplo n.º 33
0
def main(*args):
    # read command line parameters
    # what the bot should do (either resolve double redirs, or delete broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    step = None
    always = False
    delete = False
    for arg in pywikibot.handleArgs(*args):
        if arg == 'double' or arg == 'do':
            action = 'double'
        elif arg == 'broken' or arg == 'br':
            action = 'broken'
        elif arg == 'both':
            action = 'both'
        elif arg == '-fullscan':
            fullscan = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith('-moves'):
            moved_pages = True
        elif arg.startswith('-namespace:'):
            ns = arg[11:]
            if ns == '':
                # "-namespace:" does NOT yield -namespace:0 further down the road!
                ns = i18n.input('pywikibot-enter-namespace-number')
            # TODO! at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif arg.startswith('-offset:'):
            offset = int(arg[8:])
        elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg.startswith('-until:'):
            until = arg[7:]
        elif arg.startswith('-total:'):
            number = int(arg[7:])
        elif arg.startswith('-step:'):
            step = int(arg[6:])
        elif arg == '-always':
            always = True
        elif arg == '-delete':
            delete = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    if (
        not action or
        xmlFilename and moved_pages or
        fullscan and xmlFilename
    ):
        pywikibot.showHelp()
    else:
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, step)
        bot = RedirectRobot(action, gen, always, number, delete)
        bot.run()
Exemplo n.º 34
0
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = pywikibot.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = i18n.input(
                    'pywikibot-enter-file-links-processing')
            if fileLinksPageTitle.startswith(site.namespace(6)
                                             + ":"):
                fileLinksPage = pywikibot.ImagePage(site,
                                                    fileLinksPageTitle)
            else:
                fileLinksPage = pywikibot.ImagePage(site,
                                                    'Image:' +
                                                    fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number = int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number = int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            gen = UserContributionsGenerator(arg[14:])
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number = int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = i18n.input('pywikibot-enter-page-processing')
            page = pywikibot.Page(pywikibot.Link(title,
                                                 pywikibot.Site()))
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-recentchanges'):
            if len(arg) >= 15:
                gen = RecentChangesPageGenerator(total=int(arg[15:]))
            else:
                gen = RecentChangesPageGenerator(total=60)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = pywikibot.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-ns'):
            if len(arg) == len('-ns'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-ns:'):].split(","))
            return True
        elif arg.startswith('-step'):
            if len(arg) == len('-step'):
                self.step = int(pywikibot.input("What is the step value?"))
            else:
                self.step = int(arg[len('-step:'):])
            return True
        elif arg.startswith('-limit'):
            if len(arg) == len('-limit'):
                self.limit = int(pywikibot.input("What is the limit value?"))
            else:
                self.limit = int(arg[len('-limit:'):])
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse = True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse = True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [pywikibot.Page(
                           pywikibot.Link(
                               pywikibot.input(
                                   u'What page do you want to use?'),
                               pywikibot.getSite())
                           )]
            else:
                gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):],
                                                     pywikibot.getSite())
                                      )]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = pywikibot.input(
                    u'Links to which page should be processed?')
            referredPage = pywikibot.Page(pywikibot.Link(referredPageTitle,
                                                         pywikibot.Site()))
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = pywikibot.input(
                    u'Links from which page should be processed?')
            linkingPage = pywikibot.Page(pywikibot.Link(linkingPageTitle,
                                                        pywikibot.Site()))
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = pywikibot.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = pywikibot.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = pywikibot.Page(
                                   pywikibot.Link(transclusionPageTitle,
                                                  defaultNamespace=10,
                                                  source=pywikibot.Site()))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            firstpagelink = pywikibot.Link(firstPageTitle,
                                           pywikibot.Site())
            namespace = firstpagelink.namespace
            firstPageTitle = firstpagelink.title
            gen = AllpagesPageGenerator(firstPageTitle, namespace,
                                        includeredirects=False)
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = pywikibot.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or pywikibot.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(total=int(limit))
        elif arg.startswith('-newpages'):
            if len(arg) >= 10:
              gen = NewpagesPageGenerator(total=int(arg[10:]))
            else:
              gen = NewpagesPageGenerator(total=60)
        elif arg.startswith('-imagesused'):
            imagelinkstitle = arg[len('-imagesused:'):]
            if not imagelinkstitle:
                imagelinkstitle = pywikibot.input(
                    u'Images on which page should be processed?')
            imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle,
                                                           pywikibot.Site()))
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = pywikibot.input(
                    u'What do you want to search for?')
            # In order to be useful, all namespaces are required
            gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
        elif arg.startswith('-google'):
            gen = GoogleSearchPageGenerator(arg[8:])
        elif arg.startswith('-titleregex'):
            if len(arg) == 6:
                regex = pywikibot.input(
                    u'What page names are you looking for?')
            else:
                regex = arg[7:]
            gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex)
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        else:
            pass
        if gen:
            self.gens.append(gen)
            return True
        else:
            return False
Exemplo n.º 35
0
def main(*args) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    options = {}  # type: Dict[str, Any]
    gen_options = {}  # type: Dict[str, Any]
    # what the bot should do (either resolve double redirs, or process broken
    # redirs)
    action = None
    source = set()
    gen_factory = pagegenerators.GeneratorFactory()

    local_args = pywikibot.handle_args(args)
    for argument in local_args:
        arg, sep, value = argument.partition(':')
        option = arg.partition('-')[2]
        # bot options
        if arg == 'do':
            action = 'double'
        elif arg == 'br':
            action = 'broken'
        elif arg in ('both', 'broken', 'double'):
            action = arg
        elif option in ('always', 'delete'):
            options[option] = True
        elif option == 'sdtemplate':
            options['sdtemplate'] = value or pywikibot.input(
                'Which speedy deletion template to use?')
        # generator options
        elif option in ('fullscan', 'moves'):
            gen_options[option] = True
            source.add(arg)
        elif option == 'xml':
            gen_options[option] = value or i18n.input(
                'pywikibot-enter-xml-filename')
            source.add(arg)
        elif option == 'offset':
            gen_options[option] = int(value)
        elif option in ('start', 'until'):
            gen_options[option] = value
        elif option == 'limit':
            options['limit'] = gen_options['limit'] = int(value)
        elif gen_factory.handle_arg(argument):
            pass
        else:
            pywikibot.output('Unknown argument: ' + arg)

    if len(source) > 1:
        problem = 'You can only use one of {0} options.'.format(
            ' or '.join(source))
        pywikibot.bot.suggest_help(additional_text=problem,
                                   missing_action=not action)
        return

    if not action:
        pywikibot.bot.suggest_help(missing_action=True)
        return

    gen = None
    if not gen_factory.gens:
        if gen_factory.namespaces:
            gen_options['namespaces'] = gen_factory.namespaces
        gen = RedirectGenerator(action, **gen_options)
    options['generator'] = gen_factory.getCombinedGenerator(gen=gen)
    bot = RedirectRobot(action, **options)
    bot.run()
Exemplo n.º 36
0
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = pywikibot.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = i18n.input(
                    'pywikibot-enter-file-links-processing')
            if fileLinksPageTitle.startswith(site.namespace(6)
                                             + ":"):
                fileLinksPage = pywikibot.ImagePage(site,
                                                    fileLinksPageTitle)
            else:
                fileLinksPage = pywikibot.ImagePage(site,
                                                    'Image:' +
                                                    fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number=int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number=int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            args = arg[14:].split(';')
            number = None
            try:
                number = int(args[1])
            except:
                number = 250
            gen = UserContributionsGenerator(args[0], number,
                                             namespaces=self.getNamespaces)
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number=int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = i18n.input('pywikibot-enter-page-processing')
            page = pywikibot.Page(site, title)
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-randomredirect'):
            if len(arg) == 15:
                gen = RandomRedirectPageGenerator()
            else:
                gen = RandomRedirectPageGenerator(number=int(arg[16:]))
        elif arg.startswith('-random'):
            if len(arg) == 7:
                gen = RandomPageGenerator()
            else:
                gen = RandomPageGenerator(number=int(arg[8:]))
        elif arg.startswith('-recentchanges'):
            if len(arg) >= 15:
                gen = RecentchangesPageGenerator(number=int(arg[15:]), nobots=False)
            else:
                gen = RecentchangesPageGenerator(nobots=False)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-rc-nobots'):
            if len(arg) >= 11:
                gen = RecentchangesPageGenerator(number=int(arg[11:]), nobots=True)
            else:
                gen = RecentchangesPageGenerator(nobots=True)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = pywikibot.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-ns'):
            if len(arg) == len('-ns'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-ns:'):].split(","))
            return True
        elif arg.startswith('-limit'):
            if len(arg) == len('-limit'):
                self.limit = int(pywikibot.input("What is the limit value?"))
            else:
                self.limit = int(arg[len('-limit:'):])
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse=True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse=True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [pywikibot.Page(site,
                                      pywikibot.input(
                                          u'What page do you want to use?'))]
            else:
                gen = [pywikibot.Page(site, arg[len('-page:'):])]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncattemplates'):
            gen = UnCategorizedTemplatesGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = pywikibot.input(
                    u'Links to which page should be processed?')
            referredPage = pywikibot.Page(site, referredPageTitle)
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = pywikibot.input(
                    u'Links from which page should be processed?')
            linkingPage = pywikibot.Page(site, linkingPageTitle)
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = pywikibot.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = pywikibot.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = pywikibot.Page(site,
                                              "%s:%s" % (site.namespace(10),
                                                         transclusionPageTitle))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-gorandom'):
            for firstPage in RandomPageGenerator(number=1):
                firstPageTitle = firstPage.title()
            namespace = pywikibot.Page(site, firstPageTitle).namespace()
            firstPageTitle = pywikibot.Page(site, firstPageTitle
                                            ).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle, namespace,
                                        includeredirects=False)
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            if self.namespaces != []:
                namespace = self.namespaces[0]
            else:
                namespace = pywikibot.Page(site, firstPageTitle).namespace()

            firstPageTitle = pywikibot.Page(site, firstPageTitle
                                            ).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle, namespace,
                                        includeredirects=False)
        elif arg.startswith('-redirectonly'):
            firstPageTitle = arg[14:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            namespace = pywikibot.Page(site, firstPageTitle).namespace()
            firstPageTitle = pywikibot.Page(site, firstPageTitle
                                            ).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle, namespace,
                                        includeredirects='only')
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = pywikibot.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or pywikibot.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(number=int(limit))
        elif arg == ('-new') or arg.startswith('-new:'):
            if len(arg) >= 5:
                gen = NewpagesPageGenerator(number=int(arg[5:]))
            else:
                gen = NewpagesPageGenerator(number=60)
        elif arg.startswith('-imagelinks'):
            imagelinkstitle = arg[len('-imagelinks:'):]
            if not imagelinkstitle:
                imagelinkstitle = pywikibot.input(
                    u'Images on which page should be processed?')
            imagelinksPage = pywikibot.Page(site, imagelinkstitle)
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = pywikibot.input(
                    u'What do you want to search for?')
            gen = SearchPageGenerator(mediawikiQuery, number=None,
                                      namespaces=self.getNamespaces)
        elif arg.startswith('-titleregex'):
            if len(arg) == 11:
                regex = pywikibot.input(u'What page names are you looking for?')
            else:
                regex = arg[12:]
            gen = RegexFilterPageGenerator(site.allpages(), [regex])
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        elif arg.startswith('-'):
            mode, log, user = arg.partition('log')
            # exclude -log, -nolog
            if log == 'log' and mode not in ['-', '-no']:
                number = 500
                if not user:
                    user = None
                else:
                    try:
                        number = int(user[1:])
                        user = None
                    except ValueError:
                        user = user[1:]
                if user:
                    result = user.split(';')
                    user = result[0]
                    try:
                        number = int(result[1])
                    except:
                        pass
                gen = LogpagesPageGenerator(number, mode[1:], user)
        if gen:
            self.gens.append(gen)
            return self.getCombinedGenerator()
        else:
            return False
Exemplo n.º 37
0
def main(*args):
    # read command line parameters
    # what the bot should do (either resolve double redirs, or delete broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    step = None
    always = False
    delete = False
    for arg in pywikibot.handleArgs(*args):
        if arg == 'double' or arg == 'do':
            action = 'double'
        elif arg == 'broken' or arg == 'br':
            action = 'broken'
        elif arg == 'both':
            action = 'both'
        elif arg == '-fullscan':
            fullscan = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith('-moves'):
            moved_pages = True
        elif arg.startswith('-namespace:'):
            ns = arg[11:]
            if ns == '':
                # "-namespace:" does NOT yield -namespace:0 further down the road!
                ns = i18n.input('pywikibot-enter-namespace-number')
            # TODO! at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif arg.startswith('-offset:'):
            offset = int(arg[8:])
        elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg.startswith('-until:'):
            until = arg[7:]
        elif arg.startswith('-total:'):
            number = int(arg[7:])
        elif arg.startswith('-step:'):
            step = int(arg[6:])
        elif arg == '-always':
            always = True
        elif arg == '-delete':
            delete = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    if (not action or xmlFilename and moved_pages or fullscan and xmlFilename):
        pywikibot.showHelp()
    else:
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, step)
        bot = RedirectRobot(action, gen, always, number, delete)
        bot.run()
Exemplo n.º 38
0
 def run(self):
     """
     Starts the robot.
     """
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not (self.articles or page.canBeEdited()):
                 pywikibot.output(u"You can't edit page %s"
                                  % page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' % page.title(asLink=True))
             continue
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(
 u'Skipping %s because it contains text that is on the exceptions list.'
                                  % page.title(asLink=True))
                 break
             new_text = self.doReplacements(new_text)
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s'
                                  % page.title(asLink=True))
                 break
             if self.recursive:
                 newest_text = self.doReplacements(new_text)
                 while (newest_text!=new_text):
                     new_text = newest_text
                     newest_text = self.doReplacements(new_text)
             if hasattr(self, "addedCat"):
                 cats = page.categories()
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = pywikibot.replaceCategoryLinks(new_text,
                                                               cats)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                              % page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.acceptall:
                 break
             if self.exctitles:
                 choice = pywikibot.inputChoice(
                         u'Do you want to accept these changes?',
                         ['Yes', 'No', 'no+eXcept', 'Edit',
                          'open in Browser', 'All', 'Quit'],
                         ['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N')
             else:
                 choice = pywikibot.inputChoice(
                         u'Do you want to accept these changes?',
                         ['Yes', 'No', 'Edit', 'open in Browser', 'All',
                          'Quit'],
                         ['y', 'N', 'e', 'b', 'a', 'q'], 'N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 webbrowser.open("http://%s%s" % (
                     page.site.hostname(),
                     page.site.nice_get_address(page.title())
                 ))
                 i18n.input('pywikibot-enter-finished-browser')
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.'
                                      % page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'q':
                 self.writeEditCounter()
                 self.writeExceptCounter()
                 return
             if choice == 'a':
                 self.acceptall = True
             if choice == 'x': #May happen only if self.exctitles isn't None
                 self.exctitles.write(
                     u"ur'^%s$',\n" % re.escape(page.title()))
                 self.exctitles.flush()
                 self.exceptcounter += 1
             if choice == 'y':
                 if not self.articles:
                     # Primary behaviour: working on wiki
                     page.put_async(new_text, self.editSummary)
                     self.editcounter += 1
                     # Bug: this increments even if put_async fails
                     # This is separately in two clauses of if for
                     # future purposes to get feedback form put_async
                 else:
                     #Save the title for later processing instead of editing
                     self.editcounter += 1
                     self.articles.write(u'#%s\n%s'
                                 % (page.title(asLink=True, textlink=True),
                                    self.splitLine()))
                     self.articles.flush() # For the peace of our soul :-)
             # choice must be 'N'
             break
         if self.acceptall and new_text != original_text:
             if not self.articles:
                 #Primary behaviour: working on wiki
                 try:
                     page.put(new_text, self.editSummary)
                     self.editcounter += 1 #increment only on success
                 except pywikibot.EditConflict:
                     pywikibot.output(u'Skipping %s because of edit conflict'
                                      % (page.title(),))
                 except pywikibot.SpamfilterError, e:
                     pywikibot.output(
                         u'Cannot change %s because of blacklist entry %s'
                         % (page.title(), e.url))
                 except pywikibot.PageNotSaved, error:
                     pywikibot.output(u'Error putting page: %s'
                                      % (error.args,))
                 except pywikibot.LockedPage:
                     pywikibot.output(u'Skipping %s (locked page)'
                                      % (page.title(),))
Exemplo n.º 39
0
def main():
    gen = None
    singlePageTitle = []
    xmlFilename = None
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    HTTPignore = []
    day = 7

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-talk':
            config.report_dead_links_on_talk = True
        elif arg == '-notalk':
            config.report_dead_links_on_talk = False
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-repeat':
            gen = RepeatPageGenerator()
        elif arg.startswith('-ignore:'):
            HTTPignore.append(int(arg[8:]))
        elif arg.startswith('-day:'):
            day = int(arg[5:])
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        else:
            if not genFactory.handleArg(arg):
                singlePageTitle.append(arg)

    if singlePageTitle:
        singlePageTitle = ' '.join(singlePageTitle)
        page = pywikibot.Page(pywikibot.Site(), singlePageTitle)
        gen = iter([page])

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces)

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        # fetch at least 240 pages simultaneously from the wiki, but more if
        # a high thread number is set.
        pageNumber = max(240, config.max_external_links * 2)
        gen = pagegenerators.PreloadingGenerator(gen, step=pageNumber)
        gen = pagegenerators.RedirectFilterPageGenerator(gen)
        bot = WeblinkCheckerRobot(gen, HTTPignore, day)
        try:
            bot.run()
        finally:
            waitTime = 0
            # Don't wait longer than 30 seconds for threads to finish.
            while countLinkCheckThreads() > 0 and waitTime < 30:
                try:
                    pywikibot.output(u"Waiting for remaining %i threads to "
                                     u"finish, please wait..."
                                     % countLinkCheckThreads())
                    # wait 1 second
                    time.sleep(1)
                    waitTime += 1
                except KeyboardInterrupt:
                    pywikibot.output(u'Interrupted.')
                    break
            if countLinkCheckThreads() > 0:
                pywikibot.output(u'Remaining %i threads will be killed.'
                                 % countLinkCheckThreads())
                # Threads will die automatically because they are daemonic.
            if bot.history.reportThread:
                bot.history.reportThread.shutdown()
                # wait until the report thread is shut down; the user can
                # interrupt it by pressing CTRL-C.
                try:
                    while bot.history.reportThread.isAlive():
                        time.sleep(0.1)
                except KeyboardInterrupt:
                    pywikibot.output(u'Report thread interrupted.')
                    bot.history.reportThread.kill()
            pywikibot.output(u'Saving history...')
            bot.history.save()
    else:
        pywikibot.showHelp()
Exemplo n.º 40
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    # what the bot should do (either resolve double redirs, or process broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ""
    until = ""
    number = None
    pagename = None

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(":")
        option = arg[1:]
        # bot options
        if arg == "do":
            action = "double"
        elif arg == "br":
            action = "broken"
        elif arg in ("both", "broken", "double"):
            action = arg
        elif option in ("always", "delete"):
            options[option] = True
        elif option == "total":
            options["number"] = number = int(value)
        # generator options
        elif option == "fullscan":
            fullscan = True
        elif option == "xml":
            xmlFilename = value or i18n.input("pywikibot-enter-xml-filename")
        elif option == "moves":
            moved_pages = True
        elif option == "namespace":
            # "-namespace:" does NOT yield -namespace:0 further down the road!
            ns = value or i18n.input("pywikibot-enter-namespace-number")
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == "":
                ns = "0"
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif option == "offset":
            offset = int(value)
        elif option == "start":
            start = value
        elif option == "until":
            until = value
        elif option == "page":
            pagename = value
        # deprecated or unknown options
        elif option == "step":
            issue_deprecation_warning('The usage of "{0}"'.format(arg), 2, ArgumentDeprecationWarning)
        else:
            pywikibot.output("Unknown argument: %s" % arg)

    if not action or xmlFilename and (moved_pages or fullscan):
        problems = []
        if xmlFilename and moved_pages:
            problems += ["Either use a XML file or the moved pages from the API"]
        if xmlFilename and fullscan:
            problems += ["Either use a XML file or do a full scan using the API"]
        pywikibot.bot.suggest_help(additional_text="\n".join(problems), missing_action=not action)
    else:
        pywikibot.Site().login()
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, pagename)
        bot = RedirectRobot(action, gen, **options)
        bot.run()
Exemplo n.º 41
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = ""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        "title": [],
        "text-contains": [],
        "inside": [],
        "inside-tags": [],
        "require-title": [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == "-regex":
            regex = True
        elif arg.startswith("-xmlstart"):
            if len(arg) == 9:
                xmlStart = pywikibot.input("Please enter the dumped article to start with:")
            else:
                xmlStart = arg[10:]
        elif arg.startswith("-xml"):
            if len(arg) == 4:
                xmlFilename = i18n.input("pywikibot-enter-xml-filename")
            else:
                xmlFilename = arg[5:]
        elif arg == "-sql":
            useSql = True
        elif arg.startswith("-excepttitle:"):
            exceptions["title"].append(arg[13:])
        elif arg.startswith("-requiretitle:"):
            exceptions["require-title"].append(arg[14:])
        elif arg.startswith("-excepttext:"):
            exceptions["text-contains"].append(arg[12:])
        elif arg.startswith("-exceptinside:"):
            exceptions["inside"].append(arg[14:])
        elif arg.startswith("-exceptinsidetag:"):
            exceptions["inside-tags"].append(arg[17:])
        elif arg.startswith("-fix:"):
            fixes_set += [arg[5:]]
        elif arg.startswith("-sleep:"):
            sleep = float(arg[7:])
        elif arg == "-always":
            acceptall = True
        elif arg == "-recursive":
            recursive = True
        elif arg == "-nocase":
            caseInsensitive = True
        elif arg == "-dotall":
            dotall = True
        elif arg == "-multiline":
            multiline = True
        elif arg.startswith("-addcat:"):
            add_cat = arg[8:]
        elif arg.startswith("-summary:"):
            edit_summary = arg[9:]
        elif arg.startswith("-automaticsummary"):
            edit_summary = True
        elif arg.startswith("-allowoverlap"):
            allowoverlap = True
        elif arg.startswith("-manualinput"):
            manual_input = True
        elif arg.startswith("-replacementfile"):
            issue_deprecation_warning("-replacementfile", "-pairsfile", 2, ArgumentDeprecationWarning)
        elif arg.startswith("-pairsfile"):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == "-pairsfile":
                replacement_file = pywikibot.input("Please enter the filename to read replacements from:")
            else:
                replacement_file = arg[len("-pairsfile:") :]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error("Incomplete command line pattern replacement pair.")
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error("-pairsfile used between a pattern replacement pair.")
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, "r", "utf-8") as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error("Error loading {0}: {1}".format(replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error("{0} contains an incomplete pattern replacement pair.".format(replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip("\uFEFF")
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input("Please enter the text that should be replaced:")
        while old:
            new = pywikibot.input("Please enter the new text:")
            commandline_replacements += [old, new]
            old = pywikibot.input("Please enter another text that should be replaced," "\nor press Enter to start:")

    # The summary stored here won't be actually used but is only an example
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, "replace-replacing", {"description": " (-%s +%s)" % (replacement.old, replacement.new)}
            )
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output("Available predefined fixes are: %s" % ", ".join(fixes.fixes.keys()))
            if not fixes.user_fixes_loaded:
                pywikibot.output("The user fixes file could not be found: " "{0}".format(fixes.filename))
            return
        if not fix["replacements"]:
            pywikibot.warning("No replacements defined for fix " '"{0}"'.format(fix_name))
            continue
        if "msg" in fix:
            if isinstance(fix["msg"], basestring):
                set_summary = i18n.twtranslate(site, str(fix["msg"]))
            else:
                set_summary = i18n.translate(site, fix["msg"], fallback=True)
        else:
            set_summary = None
        replacement_set = ReplacementList(
            fix.get("regex"), fix.get("exceptions"), fix.get("nocase"), set_summary, name=fix_name
        )
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix["replacements"], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append('"{0}" (replacement #{1})'.format(fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning(
                    'The old string "{0}" contains formatting '
                    "characters like U+200E".format(chars.replace_invisible(replacement[0]))
                )
            if not callable(replacement[1]) and chars.contains_invisible(replacement[1]):
                pywikibot.warning(
                    'The new string "{0}" contains formatting '
                    "characters like U+200E".format(chars.replace_invisible(replacement[1]))
                )
            replacement_set.append(
                ReplacementListEntry(
                    old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary
                )
            )

        if replacement_set:
            replacements.extend(replacement_set)

        if len(fix["replacements"]) == len(missing_fix_summaries):
            missing_fixes_summaries.append('"{0}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if (not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary):
        if single_summary:
            pywikibot.output(
                "The summary message for the command line " "replacements will be something like: %s" % single_summary
            )
        if missing_fixes_summaries:
            pywikibot.output(
                "The summary will not be used when the fix has "
                "one defined but the following fix(es) do(es) not "
                "have a summary defined: "
                "{0}".format(", ".join(missing_fixes_summaries))
            )
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                "Press Enter to use this automatic message, or enter a "
                "description of the\nchanges your bot will make:"
            )
        else:
            edit_summary = ""

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site)
    elif useSql:
        whereClause = "WHERE (%s)" % " OR ".join(
            [
                "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
                for (old_regexp, new_text) in replacements
            ]
        )
        if exceptions:
            exceptClause = "AND NOT (%s)" % " OR ".join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]
            )
        else:
            exceptClause = ""
        query = """
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (
            whereClause,
            exceptClause,
        )
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(
        preloadingGen,
        replacements,
        exceptions,
        allowoverlap,
        recursive,
        add_cat,
        sleep,
        edit_summary,
        always=acceptall,
        site=site,
    )
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output("\n%s pages changed." % bot.changed_pages)
Exemplo n.º 42
0
 def run(self):
     """
     Starts the bot.
     """
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page %s" %
                                  page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' %
                              page.title(asLink=True))
             continue
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.' %
                                  page.title(asLink=True))
                 break
             new_text = self.doReplacements(new_text)
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s' %
                                  page.title(asLink=True))
                 break
             if self.recursive:
                 newest_text = self.doReplacements(new_text)
                 while (newest_text != new_text):
                     new_text = newest_text
                     newest_text = self.doReplacements(new_text)
             if hasattr(self, "addedCat"):
                 cats = page.categories(nofollow_redirects=True)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = pywikibot.replaceCategoryLinks(
                         new_text, cats)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                 page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.acceptall:
                 break
             choice = pywikibot.inputChoice(
                 u'Do you want to accept these changes?',
                 ['Yes', 'No', 'Edit', 'open in Browser', 'All', 'Quit'],
                 ['y', 'N', 'e', 'b', 'a', 'q'], 'N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 webbrowser.open(
                     "http://%s%s" %
                     (page.site.hostname(),
                      page.site.nice_get_address(page.title(asUrl=True))))
                 i18n.input('pywikibot-enter-finished-browser')
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.' %
                                      page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'q':
                 return
             if choice == 'a':
                 self.acceptall = True
             if choice == 'y':
                 page.put_async(new_text, self.summary)
             # choice must be 'N'
             break
         if self.acceptall and new_text != original_text:
             try:
                 page.put(new_text, self.summary)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping %s because of edit conflict' %
                                  (page.title(), ))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     u'Cannot change %s because of blacklist entry %s' %
                     (page.title(), e.url))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output(u'Error putting page: %s' %
                                  (error.args, ))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping %s (locked page)' %
                                  (page.title(), ))
Exemplo n.º 43
0
def main(*args):
    pywikibot.output('Starting hewiki-replacebot')
    edit_summary = replaceConfig.defaultSummary
    xml_filename = None
    xml_start = None
    title_check_page = None
    gen = None
    gen_factory = pywikibot.pagegenerators.GeneratorFactory()
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        if gen_factory.handle_arg(arg):
            continue
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xml_start = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xml_start = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xml_filename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xml_filename = arg[5:]
        elif arg.startswith('-titlecheck'):
            title_check_page = arg[12:]

    replace_dict = fill_replacements_dict()

    safe_templates = replaceConfig.safeTemplates
    # add external links templates
    site = pywikibot.Site()
    site.login()
    for safeCategory in replaceConfig.safeTemplatesCategories:
        cite_templates = pywikibot.Category(site, safeCategory).articles(
            namespaces=10, recurse=True)
        cite_templates = [page.title(with_ns=False) for page in cite_templates]
        safe_templates += cite_templates
    safe_templates = list(set(a for a in safe_templates if '/' not in a))
    file_usage_rgx = re.compile(replaceConfig.fileUsageRgx, re.I)
    yi_rgx = re.compile('\[\[yi:.*?\]\]')
    safe_templates_rgx = re.compile(
        '\{\{(' + '|'.join(set(safe_templates)) + ').*?\}\}', re.I)
    exceptions = {
        'title': [],
        'text-contains': [re.compile(replaceConfig.redirectRgx, re.I)],
        'inside': [file_usage_rgx, safe_templates_rgx, yi_rgx],
        'inside-tags': [
            'nowiki', 'math', 'comment', 'pre', 'source', 'hyperlink',
            'gallery', 'interwiki', 'templatedata', 'syntaxhighlight'
        ],
        'require-title': [],
    }

    # avoid searching in other namespaces in the xml
    exceptions_with_title_ns = dict(exceptions)
    exceptions_with_title_ns['title'] = [
        re.compile('^' + re.escape(ns_name) + ':')
        for ns_index, ns in site.namespaces.items()
        if ns_index not in replaceConfig.namespaces for ns_name in ns
    ]
    if title_check_page:
        check_titles(site, title_check_page, replace_dict)
    if xml_filename:
        gen = XmlDumpReplacePageGeneratorHe(replace_dict, xml_filename,
                                            xml_start,
                                            exceptions_with_title_ns, site)

    gen = gen_factory.getCombinedGenerator(gen)
    if not gen:
        pywikibot.output(
            'no xml dump specified. please fill -xml and the xml file to be used, or other generator'
        )
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    gen = pywikibot.pagegenerators.NamespaceFilterPageGenerator(
        gen, replaceConfig.namespaces, site)
    gen = pywikibot.pagegenerators.PreloadingGenerator(gen)
    pywikibot.output('starting replace')
    bot = ReplaceRobotHe(gen, replace_dict, exceptions, edit_summary)
    bot.run()
    pywikibot.output('finished all replacements')
Exemplo n.º 44
0
def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = False
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None

    # Read commandline parameters.

    local_args = pywikibot.handleArgs(*args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    pywikibot.input(u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        else:
            commandline_replacements.append(arg)
    pywikibot.Site().login()
    gen = genFactory.getCombinedGenerator()
    if (len(commandline_replacements) % 2):
        raise pywikibot.Error('require even number of replacements.')
    elif (len(commandline_replacements) == 2 and fix is None):
        replacements.append(
            (commandline_replacements[0], commandline_replacements[1]))
        if not summary_commandline:
            edit_summary = i18n.twtranslate(
                pywikibot.Site(), 'replace-replacing', {
                    'description':
                    ' (-%s +%s)' %
                    (commandline_replacements[0], commandline_replacements[1])
                })
    elif (len(commandline_replacements) > 1):
        if (fix is None):
            for i in range(0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if not summary_commandline:
                pairs = [(commandline_replacements[i],
                          commandline_replacements[i + 1])
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                edit_summary = i18n.twtranslate(
                    pywikibot.Site(), 'replace-replacing',
                    {'description': replacementsDescription})
        else:
            raise pywikibot.Error(
                'Specifying -fix with replacements is undefined')
    elif fix is None:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        new = pywikibot.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = pywikibot.input(
                u'Please enter another text that should be replaced,' +
                u'\nor press Enter to start:')
            if old == '':
                change += ')'
                break
            new = i18n.input('pywikibot-enter-new-text')
            change += ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline:
            default_summary_message = i18n.twtranslate(pywikibot.Site(),
                                                       'replace-replacing',
                                                       {'description': change})
            pywikibot.output(u'The summary message will default to: %s' %
                             default_summary_message)
            summary_message = pywikibot.input(
                u'Press Enter to use this default message, or enter a ' +
                u'description of the\nchanges your bot will make:')
            if summary_message == '':
                summary_message = default_summary_message
            edit_summary = summary_message

    else:
        # Perform one of the predefined actions.
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             fixes.fixes.keys())
            return
        if "regex" in fix:
            regex = fix['regex']
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                edit_summary = i18n.twtranslate(pywikibot.Site(),
                                                str(fix['msg']))
            else:
                edit_summary = i18n.translate(pywikibot.Site(),
                                              fix['msg'],
                                              fallback=True)
        if "exceptions" in fix:
            exceptions = fix['exceptions']
        if "nocase" in fix:
            caseInsensitive = fix['nocase']
        replacements = fix['replacements']

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new

    for exceptionCategory in [
            'title', 'require-title', 'text-contains', 'inside'
    ]:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
            for (old, new) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [
            pywikibot.Page(pywikibot.Site(), PageTitle)
            for PageTitle in PageTitles
        ]
        gen = iter(pages)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary)
    bot.run()
Exemplo n.º 45
0
 def test_pagegen_i18n_input(self):
     """Test i18n.input falls back with missing message package."""
     rv = i18n.input('pywikibot-enter-category-name',
                     fallback_prompt='dummy output')
     self.assertEqual(rv, 'dummy input')
     self.assertIn('dummy output: ', self.output_text)
Exemplo n.º 46
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-replacementfile':
                replacement_file = pywikibot.input(
                    u'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-replacementfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error(
            '-replacementfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error(u'Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip(u'\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input(u'Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(site, 'replace-replacing', {
                'description':
                ' (-%s +%s)' % (replacement.old, replacement.new)
            })
        replacements.append(replacement)

    if not edit_summary:
        if single_summary:
            pywikibot.output(u'The summary message for the command line '
                             'replacements will be something like: %s' %
                             single_summary)
        if fixes_set:
            pywikibot.output('If a summary is defined for the fix, this '
                             'default summary won\'t be applied.')
        edit_summary = pywikibot.input(
            'Press Enter to use this automatic message, or enter a '
            'description of the\nchanges your bot will make:')

    # Perform one of the predefined actions.
    for fix in fixes_set:
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             ', '.join(fixes.fixes.keys()))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'), set_summary)
        for replacement in fix['replacements']:
            summary = None if len(replacement) < 3 else replacement[2]
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if chars.contains_invisible(replacement[1]):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacements.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
            for (old_regexp, new_text) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary,
                       site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
Exemplo n.º 47
0
def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = False
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title':         [],
        'text-contains': [],
        'inside':        [],
        'inside-tags':   [],
        'require-title': [], # using a seperate requirements dict needs some
    }                        # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None

    # Read commandline parameters.
    for arg in pywikibot.handleArgs(*args):
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg =='-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(pywikibot.input(
                    u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        else:
            commandline_replacements.append(arg)
    pywikibot.Site().login()
    gen = genFactory.getCombinedGenerator()
    if (len(commandline_replacements) % 2):
        raise pywikibot.Error, 'require even number of replacements.'
    elif (len(commandline_replacements) == 2 and fix == None):
        replacements.append((commandline_replacements[0],
                             commandline_replacements[1]))
        if not summary_commandline:
            edit_summary = i18n.twtranslate(pywikibot.getSite(),
                                            'replace-replacing',
                                            {'description': ' (-%s +%s)'
                                            % (commandline_replacements[0],
                                               commandline_replacements[1])})
    elif (len(commandline_replacements) > 1):
        if (fix == None):
            for i in xrange (0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if not summary_commandline:
                pairs = [( commandline_replacements[i],
                           commandline_replacements[i + 1] )
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                edit_summary = i18n.twtranslate(pywikibot.getSite(),
                                                'replace-replacing',
                                                {'description':
                                                 replacementsDescription})
        else:
           raise pywikibot.Error(
               'Specifying -fix with replacements is undefined')
    elif fix == None:
        old = pywikibot.input(u'Please enter the text that should be replaced:')
        new = pywikibot.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = pywikibot.input(
                    u'Please enter another text that should be replaced,' +
                    u'\nor press Enter to start:')
            if old == '':
                change += ')'
                break
            new = i18n.input('pywikibot-enter-new-text')
            change += ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline:
            default_summary_message = i18n.twtranslate(pywikibot.getSite(),
                                                       'replace-replacing',
                                                       {'description': change})
            pywikibot.output(u'The summary message will default to: %s'
                             % default_summary_message)
            summary_message = pywikibot.input(
                u'Press Enter to use this default message, or enter a ' +
                u'description of the\nchanges your bot will make:')
            if summary_message == '':
                summary_message = default_summary_message
            edit_summary = summary_message

    else:
        # Perform one of the predefined actions.
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s'
                             % fixes.fixes.keys())
            return
        if "regex" in fix:
            regex = fix['regex']
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                edit_summary = i18n.twtranslate(pywikibot.getSite(),
                                                str(fix['msg']))
            else:
                edit_summary = pywikibot.translate(pywikibot.getSite(),
                                                   fix['msg'])
        if "exceptions" in fix:
            exceptions = fix['exceptions']
        if "nocase" in fix:
            caseInsensitive = fix['nocase']
        replacements = fix['replacements']

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new

    for exceptionCategory in [
                        'title', 'require-title', 'text-contains', 'inside']:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join(
            ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
             for (old, new) in replacements])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                 for exc in exceptions])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [pywikibot.Page(pywikibot.getSite(), PageTitle)
                 for PageTitle in PageTitles]
        gen = iter(pages)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary)
    bot.run()
Exemplo n.º 48
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    gen_options = {}
    # what the bot should do (either resolve double redirs, or process broken
    # redirs)
    action = None
    namespaces = []
    source = set()

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(':')
        option = arg.partition('-')[2]
        # bot options
        if arg == 'do':
            action = 'double'
        elif arg == 'br':
            action = 'broken'
        elif arg in ('both', 'broken', 'double'):
            action = arg
        elif option in ('always', 'delete'):
            options[option] = True
        elif option == 'total':
            options[option] = gen_options[option] = int(value)
        elif option == 'sdtemplate':
            options['sdtemplate'] = value or pywikibot.input(
                'Which speedy deletion template to use?')
        # generator options
        elif option in ('fullscan', 'moves'):
            gen_options[option] = True
            source.add(arg)
        elif option == 'xml':
            gen_options[option] = value or i18n.input(
                'pywikibot-enter-xml-filename')
            source.add(arg)
        elif option == 'namespace':
            # "-namespace:" does NOT yield -namespace:0 further down the road!
            ns = value or i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif option == 'offset':
            gen_options[option] = int(value)
        elif option in ('page', 'start', 'until'):
            gen_options[option] = value
        # deprecated or unknown options
        elif option == 'step':
            issue_deprecation_warning('The usage of "{0}"'.format(arg),
                                      2, ArgumentDeprecationWarning)
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    gen_options['namespaces'] = namespaces

    if len(source) > 1:
        problem = 'You can only use one of {0} options.'.format(
            ' or '.join(source))
        pywikibot.bot.suggest_help(additional_text=problem,
                                   missing_action=not action)
        return
    if not action:
        pywikibot.bot.suggest_help(missing_action=True)
    else:
        pywikibot.Site().login()
        options['generator'] = RedirectGenerator(action, **gen_options)
        bot = RedirectRobot(action, **options)
        bot.run()