Python CategorizedPageGenerator примеры, pagegenerators.CategorizedPageGenerator Python примеры использования

Пример #1

0

Показать файл

Файл: robot-update_network_pages.py Проект: bhyvex/sicekit

def main():
    # The generator gives the pages that should be worked upon.
    gen = None
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False
    wantHelp = False


    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            wantHelp = True

    if not wantHelp:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.

        cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Host')
        hosts_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False)
        hosts_gen = pagegenerators.PreloadingGenerator(hosts_gen)

        cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Network')
        nets_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False)
        nets_gen = pagegenerators.PreloadingGenerator(nets_gen)

        bot = IpNetworkBot(nets_gen, hosts_gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()

Пример #2

0

Показать файл

 def refreshGenerator(self):
     generator = pagegenerators.CategorizedPageGenerator(
         self.csdCat, start=self.savedProgress)
     # wrap another generator around it so that we won't produce orphaned talk pages.
     generator2 = pagegenerators.PageWithTalkPageGenerator(generator)
     self.preloadingGen = pagegenerators.PreloadingGenerator(generator2,
                                                             pageNumber=20)

Пример #3

0

Показать файл

def main(args):
    '''
    Main loop. Get a generator and options. Work on all images in the generator.
    '''
    generator = None
    onlyFilter = False
    onlyUncat = False
    genFactory = pagegenerators.GeneratorFactory()

    global search_wikis
    global hint_wiki

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg == '-onlyfilter':
            onlyFilter = True
        elif arg == '-onlyuncat':
            onlyUncat = True
        elif arg.startswith('-hint:'):
            hint_wiki = arg [len('-hint:'):]
        elif arg.startswith('-onlyhint'):
            search_wikis = arg [len('-onlyhint:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = pagegenerators.CategorizedPageGenerator(
            catlib.Category(site, u'Category:Media needing categories'),
            recurse=True)
    initLists()
    categorizeImages(generator, onlyFilter, onlyUncat)
    pywikibot.output(u'All done')

Пример #4

0

Показать файл

Файл: disambredir.py Проект: vmorrisonwood/pywikia

def main():
    global mysite, linktrail, page
    start = []
    for arg in wikipedia.handleArgs():
        start.append(arg)
    if start:
        start = " ".join(start)
    else:
        start = "!"
    mysite = wikipedia.getSite()
    linktrail = mysite.linktrail()
    try:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)
    except wikipedia.NoPage:
        print "The bot does not know the disambiguation category for your wiki."
        raise
    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            wikipedia.getall(mysite, pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []

Пример #5

0

Показать файл

Файл: WPlist.py Проект: legoktm/legobot-old

def createlist(cat, wpproj, raw=False, cats=True):
    category = catlib.Category(site, cat)
    gen = pagegenerators.CategorizedPageGenerator(category, recurse=True)
    wikitext = ''
    wikitext2 = ''
    wikitext3 = ''

    if not cats:
        for page in gen:
            wikitext = wikitext + '\n*' + str(page)
            link = delink(str(page))
            print link
            wikitext2 = wikitext2 + '\n' + link
        wikitext = unicodify(wikitext)
    if cats:
        subcats = category.subcategories(recurse=True)
        for subcat in subcats:
            newtext = retpages(subcat)
            wikitext3 += newtext
        wikitext3 = unicodify(wikitext3)

    page = wikipedia.Page(site, wpproj + '/Articles')
    if not cats:
        page.put(wikitext, 'Updating watchlist')
    if cats:
        page.put(wikitext3, 'Updating watchlist')
    wikitext2 = '<pre>\n' + wikitext2 + '\n</pre>'
    wikitext2 = unicodify(wikitext2)
    if raw == True:
        page = wikipedia.Page(site, wpproj + '/Articles/raw')
        page.put(wikitext2, 'Updating raw watchlist')

Пример #6

0

Показать файл

Файл: robot-generate_puppet_unmanaged_list.py Проект: bhyvex/sicekit

def main():
    # The generator gives the pages that should be worked upon.
    gen = None
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False
    wantHelp = False

    cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'Instance')
    gen = pagegenerators.CategorizedPageGenerator(cat,
                                                  start=None,
                                                  recurse=False)

    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            print arg, "yielding wanthelp"
            wantHelp = True

    if not wantHelp:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = PuppetUnmanagedListBot(gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()

Пример #7

0

Показать файл

Файл: zzgallery.py Проект: PersianWikipedia/fawikibot

def facatlist(facat):
    wikipedia.config.put_throttle = 0
    wikipedia.put_throttle.setDelay()
    count = 0
    listenpageTitle = []
    PageTitle = facat.replace(u'[[', u'').replace(u']]', u'').strip()
    language = 'fa'
    PageTitles = [PageTitle]
    for PageTitle in PageTitles:
        cat = catlib.Category(wikipedia.getSite(language), PageTitle)
        listacategory = [cat]
        listacategory = categorydown(listacategory)
        for enpageTitle in listacategory:
            enpageTitle = str(enpageTitle).split(u'|')[0].split(
                u']]')[0].replace(u'[[', u'').strip()
            cat = catlib.Category(wikipedia.getSite(language), enpageTitle)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            for pagework in gent:
                count += 1
                try:
                    link = str(pagework).split(u'|')[0].split(
                        u']]')[0].replace(u'[[', u'').strip()
                except:
                    pagework = unicode(str(pagework), 'UTF-8')
                    link = pagework.split(u'|')[0].split(u']]')[0].replace(
                        u'[[', u'').strip()
                wikipedia.output(link)
                fapagetitle = link
                wikipedia.output(u'adding ' + fapagetitle +
                                 u' to fapage lists')
                listenpageTitle.append(fapagetitle)
    if listenpageTitle == []:
        return False
    return listenpageTitle

Пример #8

0

Показать файл

def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site,
                                  "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()

Пример #9

0

Показать файл

Файл: WPFilmtf.py Проект: legoktm/legobot-old

def docat(tag, cat2):
    site = wikipedia.getSite()
    cat = catlib.Category(site, cat2)
    gen = pagegenerators.CategorizedPageGenerator(cat)
    dogen(gen, tag)
    wikipedia.output(u'\nFinished with Category:' + cat2 + '.\n')
    print 'Waiting 10 seconds'
    time.sleep(10)

Пример #10

0

Показать файл

def main():
    gen = None

    cat = catlib.Category(wikipedia.getSite(), 'Category:Debian')
    hosts_gen = pagegenerators.CategorizedPageGenerator(cat,
                                                        start=None,
                                                        recurse=False)
    hosts_gen = pagegenerators.PreloadingGenerator(hosts_gen)

    bot = IpNetworkBot(hosts_gen)
    bot.run()

Пример #11

0

Показать файл

    def generator(self):
        # Choose which generator to use according to options.
 
        pagegen = None
 
        if self.__workonnew:
            if not self.__number:
                self.__number = config.special_page_limit
            pagegen = pagegenerators.NewpagesPageGenerator(number = self.__number)
 
        elif self.__refpagetitle:
            refpage = wikipedia.Page(wikipedia.getSite(), self.__refpagetitle)
            pagegen = pagegenerators.ReferringPageGenerator(refpage)
 
        elif self.__linkpagetitle:
            linkpage = wikipedia.Page(wikipedia.getSite(), self.__linkpagetitle)
            pagegen = pagegenerators.LinkedPageGenerator(linkpage)
 
        elif self.__catname:
            cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % self.__catname)
 
            if self.__start:
                pagegen = pagegenerators.CategorizedPageGenerator(cat, recurse = self.__catrecurse, start = self.__start)
            else:
                pagegen = pagegenerators.CategorizedPageGenerator(cat, recurse = self.__catrecurse)
 
        elif self.__textfile:
            pagegen = pagegenerators.TextfilePageGenerator(self.__textfile)
 
        else:
            if not self.__start:
                self.__start = '!'
            namespace = wikipedia.Page(wikipedia.getSite(), self.__start).namespace()
            start = wikipedia.Page(wikipedia.getSite(), self.__start).titleWithoutNamespace()
 
            pagegen = pagegenerators.AllpagesPageGenerator(start, namespace)
 
        return pagegen

Пример #12

0

Показать файл

def get_stats():
    import catlib, pagegenerators

    msg = pywikibot.translate(pywikibot.getSite(), stat_msg)
    cat = catlib.Category(
        pywikibot.getSite(),
        'Category:%s' % pywikibot.translate(pywikibot.getSite(), reports_cat))
    gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True)
    output = u"""{| {{prettytable|width=|align=|text-align=left}}
! %s
! %s
! %s
! %s
! %s
! %s
|-
""" % (msg[1], msg[2], msg[3], 'Google', 'Yahoo', 'Live Search')
    gnt = 0
    ynt = 0
    mnt = 0
    ent = 0
    sn = 0
    snt = 0
    for page in gen:
        data = page.get()
        gn = stat_sum('google', data)
        yn = stat_sum('yahoo', data)
        mn = stat_sum('(msn|live)', data)
        en = len(re.findall('=== \[\[', data))
        sn = len(data)
        gnt += gn
        ynt += yn
        mnt += mn
        ent += en
        snt += sn
        if en > 0:
            output += u"|%s||%s||%s KB||%s||%s||%s\n|-\n" \
                      % (page.title(asLink=True), en, sn / 1024, gn, yn, mn)
    output += u"""|&nbsp;||||||||
|-
|'''%s'''||%s||%s KB||%s||%s||%s
|-
|colspan="6" align=right style="background-color:#eeeeee;"|<small>''%s: %s''</small>
|}
""" % (msg[4], ent, snt / 1024, gnt, ynt, mnt, msg[5],
       time.strftime("%d " + "%s" %
                     (date.monthName(pywikibot.getSite().language(),
                                     time.localtime()[1])) + " %Y"))
    return output

Пример #13

0

Показать файл

Файл: datebot.py Проект: legoktm/legobot-old

def docat(cat2):
    category = catlib.Category(site, cat2)
    gen = pagegen.CategorizedPageGenerator(category)
    for page in gen:
        if page.namespace() == 0:
            try:
                process_article(page)
            except UnicodeEncodeError:
                log_error(page)
                pass
            checktalk()
        else:
            print 'Skipping %s because it is not in the mainspace' % (
                page.title())
            log_error(page)
    print 'Done with Category:%s' % (cat2)

Пример #14

0

Показать файл

def filterSubCategory(subcat, category):
    """ Filter category from all articles and files in subcat """
    articleGen = pagegenerators.PreloadingGenerator(
        pagegenerators.CategorizedPageGenerator(subcat))

    for article in articleGen:
        pywikibot.output(u'Working on %s' % (article.title(), ))
        articleCategories = article.categories()
        if category in articleCategories:
            articleCategories.remove(category)
            text = article.get()
            newtext = pywikibot.replaceCategoryLinks(text, articleCategories)
            pywikibot.showDiff(text, newtext)
            comment = (
                u'Removing [[%s]]: Is already in the subcategory [[%s]]' %
                (category.title(), subcat.title()))
            article.put(newtext, comment)

Пример #15

0

Показать файл

 def createPageGenerator(self, firstPageTitle):
     if pywikibot.getSite().lang in self.misspellingCategory:
         misspellingCategoryTitle = self.misspellingCategory[pywikibot.getSite().lang]
         misspellingCategory = catlib.Category(pywikibot.getSite(),
                                               misspellingCategoryTitle)
         generator = pagegenerators.CategorizedPageGenerator(
             misspellingCategory, recurse = True, start=firstPageTitle)
     else:
         misspellingTemplateName = 'Template:%s' \
                                   % self.misspellingTemplate[pywikibot.getSite().lang]
         misspellingTemplate = pywikibot.Page(pywikibot.getSite(),
                                              misspellingTemplateName)
         generator = pagegenerators.ReferringPageGenerator(
             misspellingTemplate, onlyTemplateInclusion=True)
         if firstPageTitle:
             pywikibot.output(
                 u'-start parameter unsupported on this wiki because there is no category for misspellings.')
     preloadingGen = pagegenerators.PreloadingGenerator(generator)
     return preloadingGen

Пример #16

0

Показать файл

def main():
    site = wikipedia.Site("pt", "wikipedia")
    cat = catlib.Category(site, u"!Robótica")
    ''' Como explicado anteriormente, temos definido o site e a categoria
		podendo então passar-mos a obter a listagem das páginas,
		onde desta vez usaremos o pagegenerators. Uma opção mais rápida será
		usar igualmente o preloadingGen, bastando para isso
		fazer algo como 
		pages = pagegenerators.PreloadingGenerator(pagegenerators.CategorizedPageGenerator(cat))
		Isto faz com que as páginas sejam carregadas no início, ao contrário
		do script actual, que carrega à medida que forem necessárias. 
	'''
    pages = pagegenerators.CategorizedPageGenerator(cat)
    for page in pages:
        '''Agora que temos a iteração vamos primeiro obter o título
		'''
        print page.title()
        ''' Com o page.namespace() obtemos o namespace da página
			embora no formato canonico, ou seja, número. Para obter
			o nome do namespace, fazemos o site.namespace().
			Para fazer tudo junto, basta substituir as duas linhas por
			namespace = site.namespace(page.namespace())
		'''

        pageNamespaceNumber = page.namespace()
        namespace = site.namespace(pageNamespaceNumber)
        if namespace == u"Ajuda":
            ''' Aqui filtramos as páginas que pertencem ao namespace Ajuda
				e obteremos o nome do namespace, assim como as predefinições
				contidas nas páginas. '''
            print len(page.get())
            print u"namespace: ", site.namespace(page.namespace())
            print u"templates: ", page.templates()
        elif namespace == u"Wikipédia":
            ''' Neste bloco, apenas os artigos do namespace wikipédia são filtrados,
				e obteremos o namespage e o título do artigo, sem namespace ou subpáginas
				(resumidamente, o título do artigo principal)
			'''
            print u"namespace: ", site.namespace(page.namespace())
            print u"Página principal (título sem subpágina): ", page.sectionFreeTitle(
            )
            print u"Página principal sem título nem namespace: ", page.title(
                withNamespace=False)

Пример #17

0

Показать файл

def checkRecentEdits_API(cat, end):
#	cat = catlib.Category(site, "%s:%s" % (site.namespace(14), u'Baden-Württemberg'))
	cat = catlib.Category(site, "%s:%s" % (site.namespace(14), u'Portal:Hund'))

	res = []
	for page in pagegenerators.CategorizedPageGenerator(cat, recurse=True):
		if not page.isTalkPage():
			page = page.toggleTalkPage()
		title  = '?'
		change = '?'
		try:
			title  = page.title()
			change = page.getVersionHistory(revCount=1)[0][1]
		except pywikibot.exceptions.NoPage:
			continue
		except:
			pass
		res.append( (title, change) )

	return res

Пример #18

0

Показать файл

def main():
    site = wikipedia.Site("pt", "wikipedia")
    cat = catlib.Category(site, u"!Robótica")

    pages = pagegenerators.CategorizedPageGenerator(cat)
    for page in pages:

        print page.title()

        pageNamespaceNumber = page.namespace()
        namespace = site.namespace(pageNamespaceNumber)
        if namespace == u"Ajuda":

            print len(page.get())
            print u"namespace: ", site.namespace(page.namespace())
            print u"templates: ", page.templates()

        elif namespace == u"Wikipédia":

            print u"namespace: ", site.namespace(page.namespace())
            print u"Página principal (título sem subpágina): ", page.sectionFreeTitle(
            )
            print u"Página principal sem título nem namespace: ", page.title(
                withNamespace=False)

Пример #19

0

Показать файл

def main():
    pageName = ''
    singlePage = ''
    summary = ''
    always = False
    doSinglePage = False
    doCategory = False
    deleteSubcategories = True
    doRef = False
    doLinks = False
    doImages = False
    undelete = False
    fileName = ''
    gen = None

    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg.startswith('-file'):
            if len(arg) == len('-file'):
                fileName = pywikibot.input(
                    u'Enter name of file to delete pages from:')
            else:
                fileName = arg[len('-file:'):]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = pywikibot.input(u'Enter a reason for the deletion:')
            else:
                summary = arg[len('-summary:'):]
        elif arg.startswith('-cat'):
            doCategory = True
            if len(arg) == len('-cat'):
                pageName = pywikibot.input(
                    u'Enter the category to delete from:')
            else:
                pageName = arg[len('-cat:'):]
        elif arg.startswith('-nosubcats'):
            deleteSubcategories = False
        elif arg.startswith('-links'):
            doLinks = True
            if len(arg) == len('-links'):
                pageName = pywikibot.input(u'Enter the page to delete from:')
            else:
                pageName = arg[len('-links:'):]
        elif arg.startswith('-ref'):
            doRef = True
            if len(arg) == len('-ref'):
                pageName = pywikibot.input(u'Enter the page to delete from:')
            else:
                pageName = arg[len('-ref:'):]
        elif arg.startswith('-page'):
            doSinglePage = True
            if len(arg) == len('-page'):
                pageName = pywikibot.input(u'Enter the page to delete:')
            else:
                pageName = arg[len('-page:'):]
        elif arg.startswith('-images'):
            doImages = True
            if len(arg) == len('-images'):
                pageName = pywikibot.input(
                    u'Enter the page with the images to delete:')
            else:
                pageName = arg[len('-images'):]
        elif arg.startswith('-undelete'):
            undelete = True

    mysite = pywikibot.getSite()
    if doSinglePage:
        if not summary:
            summary = pywikibot.input(u'Enter a reason for the deletion:')
        page = pywikibot.Page(mysite, pageName)
        gen = iter([page])
    elif doCategory:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_category) \
                      % pageName
        ns = mysite.category_namespace()
        categoryPage = catlib.Category(mysite, ns + ':' + pageName)
        gen = pagegenerators.CategorizedPageGenerator(
            categoryPage, recurse=deleteSubcategories)
    elif doLinks:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_links) % pageName
        pywikibot.setAction(summary)
        linksPage = pywikibot.Page(mysite, pageName)
        gen = pagegenerators.LinkedPageGenerator(linksPage)
    elif doRef:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_ref) % pageName
        refPage = pywikibot.Page(mysite, pageName)
        gen = pagegenerators.ReferringPageGenerator(refPage)
    elif fileName:
        if not summary:
            summary = pywikibot.translate(mysite, msg_simple_delete)
        gen = pagegenerators.TextfilePageGenerator(fileName)
    elif doImages:
        if not summary:
            summary = pywikibot.translate(mysite, msg_delete_images)
        gen = pagegenerators.ImagesPageGenerator(
            pywikibot.Page(mysite, pageName))

    if gen:
        pywikibot.setAction(summary)
        # We are just deleting pages, so we have no need of using a preloading page generator
        # to actually get the text of those pages.
        bot = DeletionRobot(gen, summary, always, undelete)
        bot.run()
    else:
        pywikibot.showHelp(u'delete')

Пример #20

0

Показать файл

# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import catlib
import pagegenerators
import sys
import wikipedia

start = sys.argv[1]
lfsite = wikipedia.Site('librefind', 'librefind')
ensite = wikipedia.Site('en', 'wikipedia')

cat = catlib.Category(lfsite, u"Category:All the searches")
gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
pre = pagegenerators.PreloadingGenerator(gen)

for page in pre:
    title = page.title()
    enpage = wikipedia.Page(ensite, title)
    if enpage.exists(
    ) and not enpage.isRedirectPage() and not enpage.isDisambig():
        redirects = enpage.getReferences(redirectsOnly=True)
        for redirect in redirects:
            if redirect.namespace(
            ) != 0:  #skiping redirects from userpages etc
                continue
            rtitle = redirect.title()
            print rtitle, title
            rpage = wikipedia.Page(lfsite, rtitle)

Пример #21

0

Показать файл

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import re

import catlib
import pagegenerators
import wikipedia

sitees = wikipedia.Site('es', 'wikipedia')
sitetg = wikipedia.Site('todogratix', 'todogratix')
#planti = wikipedia.Page(sitees, u'Plantilla:Ficha de país')
#gen = pagegenerators.ReferringPageGenerator(planti, followRedirects=True, onlyTemplateInclusion=True)
category = catlib.Category(sitetg, u'Category:Personas')
gen = pagegenerators.CategorizedPageGenerator(category,
                                              recurse=False,
                                              start='!')
pre = pagegenerators.PreloadingGenerator(gen)

skip = u'Juan Manuel Infante of Castile'
for page in pre:
    if not page.exists() or page.isRedirectPage() or page.namespace() != 0:
        continue
    if skip:
        if page.title() == skip:
            skip = ''
        continue

    #filter
    if re.search(ur"[\(\),\.\:\-]", page.title()) or \
        not re.search(ur"(?im)fechafallecimiento\s*=\s*\d{4}", page.get()):

Пример #22

0

Показать файл

Файл: zzpurging.py Проект: PersianWikipedia/fawikibot

def main():
    wikipedia.config.put_throttle = 0
    wikipedia.put_throttle.setDelay()
    summary_commandline, gen, template = None, None, None
    namespaces, PageTitles, exceptions = [], [], []
    encat, newcatfile = '', ''
    autoText, autoTitle = False, False
    recentcat, newcat = False, False
    genFactory = pagegenerators.GeneratorFactory()
    for arg in wikipedia.handleArgs():
        if arg == '-autotitle':
            autoTitle = True
        elif arg == '-autotext':
            autoText = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    wikipedia.input(u'Which page do you want to chage?'))
            else:
                PageTitles.append(arg[6:])
            break
        elif arg.startswith('-except:'):
            exceptions.append(arg[8:])
        elif arg.startswith('-template:'):
            template = arg[10:]
        elif arg.startswith('-encat:'):
            encat = arg[7:].replace(u'Category:',
                                    u'').replace(u'category:',
                                                 u'').replace(u'رده:', u'')
            break
        elif arg.startswith('-newcatfile:'):
            newcatfile = arg[12:]
            break
        elif arg.startswith('-recentcat'):
            arg = arg.replace(':', '')
            if len(arg) == 10:
                genfa = pagegenerators.RecentchangesPageGenerator()
            else:
                genfa = pagegenerators.RecentchangesPageGenerator(
                    number=int(arg[10:]))
            genfa = pagegenerators.DuplicateFilterPageGenerator(genfa)
            genfa = pagegenerators.NamespaceFilterPageGenerator(genfa, [14])
            preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60)
            recentcat = True
            break
        elif arg.startswith('-newcat'):
            arg = arg.replace(':', '')
            if len(arg) == 7:
                genfa = pagegenerators.NewpagesPageGenerator(
                    100, False, None, 14)
            else:
                genfa = pagegenerators.NewpagesPageGenerator(
                    int(arg[7:]), False, None, 14)
            preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60)
            newcat = True
            break
        elif arg.startswith('-namespace:'):
            namespaces.append(int(arg[11:]))
        elif arg.startswith('-summary:'):
            wikipedia.setAction(arg[9:])
            summary_commandline = True
        else:
            generator = genFactory.handleArg(arg)
            if generator:
                gen = generator

    if PageTitles:
        pages = [
            wikipedia.Page(wikipedia.getSite(), PageTitle)
            for PageTitle in PageTitles
        ]
        gen = iter(pages)
    if recentcat:
        for workpage in preloadingGen:
            workpage = workpage.title()
            cat = catlib.Category(wikipedia.getSite('fa'), workpage)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            run(gent)
        wikipedia.stopme()
        sys.exit()
    if newcat:
        run(preloadingGen)
    if newcatfile:
        text2 = codecs.open(newcatfile, 'r', 'utf8')
        text = text2.read()
        linken = re.findall(ur'\[\[.*?\]\]', text, re.S)
        run(linken)
        wikipedia.stopme()
        sys.exit()
    if not gen:
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
    run(preloadingGen)

Пример #23

0

Показать файл

                    wikipedia.output('* ' + title)
                    return False
    return True

def revid_exist(revid):
    for pageobjs in query_results_revids:
        if 'badrevids' in pageobjs['query']:
            for id in pageobjs['query']['badrevids']:
                if id == int(revid):
                    # print rv
                    wikipedia.output('* ' + revid)
                    return False
    return True

cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % wikipedia.translate(wikipedia.getSite(), reports_cat))
gen = pagegenerators.CategorizedPageGenerator(cat, recurse = True)

for page in gen:
    data = page.get()
    wikipedia.output(page.aslink())
    output = ''

    #
    # Preserve text before of the sections
    #

    m = re.search("(?m)^==\s*[^=]*?\s*==", data)
    if m:
        output = data[:m.end() + 1]
    else:
        m = re.search("(?m)^===\s*[^=]*?", data)

Пример #24

0

Показать файл

Файл: wikiindex-size.py Проект: edgarskos/emijrp

def main():
    s = wikipedia.Site('wikiindex', 'wikiindex')
    cat = catlib.Category(s, 'Category:MediaWiki')

    start = '!'
    if len(sys.argv) == 2:
        start = sys.argv[1]

    gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
    pre = pagegenerators.PreloadingGenerator(gen, pageNumber=50)

    """{{Size
|pages = <!--Necessary. Type the plain number of pages here - no thousands separators.-->
wiki pages, wiki_pages

|statistics URL = <!--Preferred, source of page count (mostly a statistics page). If unknown leave void.-->
wiki statistics URL, wiki_statistics_URL

|wikiFactor = <!--Optional. If unknown leave void. (See Proposal:wikiFactor)-->
|wikiFactor URL = <!--Optional, source of wiki factor. If unknown leave void.-->
}}"""

    size_r = re.compile(ur"""(?im)(?P<all>\{\{\s*Size\s*((\s*\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\})""")

    for page in pre:
        if not page.exists() or page.isRedirectPage():
            continue
        
        wikipedia.output('--> %s <--' % (page.title()))
        wtext = page.get()
        newtext = wtext
        
        m = size_r.finditer(wtext)
        for i in m:
            all = i.group('all') and i.group('all').strip() or ''
            pages = i.group('pages') and i.group('pages').strip() or ''
            pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or ''
            wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or ''
            wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or ''
            
            pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0'
            pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or ''
            wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or ''
            wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or ''
            
            #get new values
            n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value)
            if n:
                raw = ''
                try:
                    url = n[0]
                    if url.endswith(":Statistics"):
                        url += '?action=raw'
                    f = urllib.urlopen(url)
                    raw = unicode(f.read(), 'utf-8')
                    f.close()
                except:
                    break
                o = re.findall(ur"total=\d+;good=(\d+);", raw)
                if o:
                    if o[0] and int(pages_value) != int(o[0]):
                        summary = u"BOT - Updating size: %s -> %s" % (pages_value, o[0])
                        pages_value = o[0]
                    else:
                        break
                else:
                    break
            else:
                break
            #end get
            
            #recalculate wikifactor
            pass #todo, leave AS IS meanwhile
            #end recalculate
            
            """print pages, pages_value
            print pagesurl, pagesurl_value
            print wikifactor, wikifactor_value
            print wikifactorurl, wikifactorurl_value"""
            
            newvalues = u"""{{Size
| %s = %s <!--Necessary. Type the plain number of pages here - no thousands separators.-->
| %s = %s <!--Preferred, source of page count (mostly a statistics page). If unknown leave void.-->
| %s = %s <!--Optional. If unknown leave void. (See Proposal:wikiFactor)-->
| %s = %s <!--Optional, source of wiki factor. If unknown leave void.-->
}}""" % (pages and pages or 'pages', pages_value and pages_value or '', pagesurl and pagesurl or 'statistics URL', pagesurl_value and pagesurl_value or '', wikifactor and wikifactor or 'wikiFactor', wikifactor_value and wikifactor_value or '', wikifactorurl and wikifactorurl or 'wikiFactor URL', wikifactorurl_value and wikifactorurl_value or '')
            newtext = wtext.replace(all, newvalues)
            if wtext != newtext:
                wikipedia.showDiff(wtext, newtext)
                page.put(newtext, summary)
                
            break

Пример #25

0

Показать файл

Файл: category.py Проект: willend/SASfit

class CategoryMoveRobot:
    """Robot to move pages from one category to another."""
    def __init__(self,
                 oldCatTitle,
                 newCatTitle,
                 batchMode=False,
                 editSummary='',
                 inPlace=False,
                 moveCatPage=True,
                 deleteEmptySourceCat=True,
                 titleRegex=None,
                 useSummaryForDeletion=True):
        site = pywikibot.getSite()
        self.editSummary = editSummary
        self.oldCat = catlib.Category(site, oldCatTitle)
        self.newCatTitle = newCatTitle
        self.inPlace = inPlace
        self.moveCatPage = moveCatPage
        self.batchMode = batchMode
        self.deleteEmptySourceCat = deleteEmptySourceCat
        self.titleRegex = titleRegex
        self.useSummaryForDeletion = useSummaryForDeletion

    def run(self):
        site = pywikibot.getSite()
        newCat = catlib.Category(site, self.newCatTitle)
        # set edit summary message
        if not self.editSummary:
            self.editSummary = i18n.twtranslate(site, 'category-changing') \
                               % {'oldcat':self.oldCat.title(),
                                  'newcat':newCat.title()}

        if self.useSummaryForDeletion and self.editSummary:
            reason = self.editSummary
        else:
            reason = i18n.twtranslate(site, deletion_reason_move) \
                     % {'newcat': self.newCatTitle, 'title': self.newCatTitle}

        # Copy the category contents to the new category page
        copied = False
        oldMovedTalk = None
        if self.oldCat.exists() and self.moveCatPage:
            copied = self.oldCat.copyAndKeep(
                self.newCatTitle, pywikibot.translate(site, cfd_templates))
            # Also move the talk page
            if copied:
                oldTalk = self.oldCat.toggleTalkPage()
                if oldTalk.exists():
                    newTalkTitle = newCat.toggleTalkPage().title()
                    try:
                        talkMoved = oldTalk.move(newTalkTitle, reason)
                    except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
                        #in order :
                        #Source talk does not exist, or
                        #Target talk already exists
                        pywikibot.output(e.message)
                    else:
                        if talkMoved:
                            oldMovedTalk = oldTalk

        # Move articles
        gen = pagegenerators.CategorizedPageGenerator(self.oldCat,
                                                      recurse=False)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        for article in preloadingGen:
            if not self.titleRegex or re.search(self.titleRegex,
                                                article.title()):
                catlib.change_category(article,
                                       self.oldCat,
                                       newCat,
                                       comment=self.editSummary,
                                       inPlace=self.inPlace)

        # Move subcategories
        gen = pagegenerators.SubCategoriesPageGenerator(self.oldCat,
                                                        recurse=False)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        for subcategory in preloadingGen:
            if not self.titleRegex or re.search(self.titleRegex,
                                                subcategory.title()):
                catlib.change_category(subcategory,
                                       self.oldCat,
                                       newCat,
                                       comment=self.editSummary,
                                       inPlace=self.inPlace)

        # Delete the old category and its moved talk page
        if copied and self.deleteEmptySourceCat == True:
            if self.oldCat.isEmptyCategory():
                confirm = not self.batchMode
                self.oldCat.delete(reason, confirm, mark=True)
                if oldMovedTalk is not None:
                    oldMovedTalk.delete(reason, confirm, mark=True)
            else:
                pywikibot.output('Couldn\'t delete %s - not empty.' %
                                 self.oldCat.title())

Пример #26

0

Показать файл

Файл: category.py Проект: XXN/pywikibot-compat

class CategoryMoveRobot(object):
    """Bot to move pages from one category to another."""
    def __init__(self,
                 oldCatTitle,
                 newCatTitle,
                 batchMode=False,
                 editSummary='',
                 inPlace=False,
                 moveCatPage=True,
                 deleteEmptySourceCat=True,
                 titleRegex=None,
                 useSummaryForDeletion=True,
                 withHistory=False):
        site = pywikibot.getSite()
        self.editSummary = editSummary
        self.oldCat = catlib.Category(site, oldCatTitle)
        self.newCatTitle = newCatTitle
        self.inPlace = inPlace
        self.moveCatPage = moveCatPage
        self.batchMode = batchMode
        self.deleteEmptySourceCat = deleteEmptySourceCat
        self.titleRegex = titleRegex
        self.useSummaryForDeletion = useSummaryForDeletion
        self.withHistory = withHistory

    def run(self):
        """The main bot function that does all the work."""
        site = pywikibot.getSite()
        newCat = catlib.Category(site, self.newCatTitle)
        # set edit summary message
        if self.useSummaryForDeletion and self.editSummary:
            reason = self.editSummary
        else:
            reason = i18n.twtranslate(site, 'category-was-moved', {
                'newcat': self.newCatTitle,
                'title': self.newCatTitle
            })
        if not self.editSummary:
            self.editSummary = i18n.twtranslate(site, 'category-changing', {
                'oldcat': self.oldCat.title(),
                'newcat': newCat.title()
            })

        # Copy the category contents to the new category page
        copied = False
        oldMovedTalk = None
        if (site.isAllowed('move-categorypages') and self.oldCat.exists()
                and self.moveCatPage):
            self.oldCat.move(newCat.title(),
                             reason=self.editSummary,
                             movetalkpage=True,
                             leaveRedirect=not self.deleteEmptySourceCat)
            copied = True
        elif self.oldCat.exists() and self.moveCatPage:
            copied = self.oldCat.copyAndKeep(
                newCat.title(), pywikibot.translate(site, cfd_templates))
            # Also move the talk page
            if copied:
                oldTalk = self.oldCat.toggleTalkPage()
                if oldTalk.exists():
                    newTalkTitle = newCat.toggleTalkPage().title()
                    try:
                        talkMoved = oldTalk.move(newTalkTitle, reason)
                    except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
                        #in order :
                        #Source talk does not exist, or
                        #Target talk already exists
                        pywikibot.output(e.message)
                    else:
                        if talkMoved:
                            oldMovedTalk = oldTalk

                if self.withHistory:
                    # Whether or not there was an old talk page, we write
                    # the page history to the new talk page
                    history = self.oldCat.getVersionHistoryTable()
                    # Set the section title for the old cat's history on the new
                    # cat's talk page.
                    sectionTitle = i18n.twtranslate(
                        site, 'category-section-title',
                        {'oldcat': self.oldCat.title()})
                    #Should be OK, we are within if self.oldCat.exists()
                    historySection = u'\n== %s ==\n%s' % (sectionTitle,
                                                          history)
                    try:
                        text = newCat.toggleTalkPage().get() + historySection
                    except pywikibot.NoPage:
                        text = historySection
                    try:
                        newCat.toggleTalkPage().put(
                            text,
                            i18n.twtranslate(site, 'category-version-history')
                            % {'oldcat': self.oldCat.title()})
                    except:
                        pywikibot.output(
                            'History of the category has not been saved to new '
                            'talk page')
                        #TODO: some nicer exception handling (not too important)
                        #      first move the page, than tagg the vh

        # Move articles
        gen = pagegenerators.CategorizedPageGenerator(self.oldCat,
                                                      recurse=False)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        for article in preloadingGen:
            if not self.titleRegex or re.search(self.titleRegex,
                                                article.title()):
                catlib.change_category(article,
                                       self.oldCat,
                                       newCat,
                                       comment=self.editSummary,
                                       inPlace=self.inPlace)

        # Move subcategories
        gen = pagegenerators.SubCategoriesPageGenerator(self.oldCat,
                                                        recurse=False)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        for subcategory in preloadingGen:
            if not self.titleRegex or re.search(self.titleRegex,
                                                subcategory.title()):
                catlib.change_category(subcategory,
                                       self.oldCat,
                                       newCat,
                                       comment=self.editSummary,
                                       inPlace=self.inPlace)

        # Delete the old category and its moved talk page
        if copied and self.deleteEmptySourceCat:
            if self.oldCat.isEmptyCategory():
                confirm = not self.batchMode
                self.oldCat.delete(reason, confirm, mark=True)
                if oldMovedTalk is not None:
                    oldMovedTalk.delete(reason, confirm, mark=True)
            else:
                pywikibot.output('Couldn\'t delete %s - not empty.' %
                                 self.oldCat.title())

Пример #27

0

Показать файл

def main(*args):
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    # if the -file argument is used, page titles are dumped in this array.
    # otherwise it will only contain one page.
    generator = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    primary = False
    main_only = False
    #Shall we use only the first link from each asterisked line?
    first_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    for arg in pywikibot.handleArgs(*args):
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                generator = pagegenerators.TextfilePageGenerator(filename=None)
            else:
                generator = pagegenerators.TextfilePageGenerator(
                    filename=arg[6:])
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.getSite()
                page = pywikibot.Page(mysite, arg[5:])
                if page.exists():
                    alternatives.append(page.title())
                else:
                    answer = pywikibot.inputChoice(
                        u'Possibility %s does not actually exist. Use it anyway?'
                        % page.title(), ['yes', 'no'], ['y', 'N'], 'N')
                    if answer == 'y':
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg == '-first':
            first_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                if len(arg) <= len('-start:'):
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.getSite().disambcategory())
                else:
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.getSite().disambcategory(), start=arg[7:])
                generator = pagegenerators.NamespaceFilterPageGenerator(
                    generator, [0])
            except pywikibot.NoPage:
                pywikibot.output(
                    "Disambiguation category for your wiki is not known.")
                raise
        elif arg.startswith("-"):
            pywikibot.output("Unrecognized command line argument: %s" % arg)
            # show help text and exit
            pywikibot.showHelp()
        else:
            pageTitle.append(arg)

    # if the disambiguation page is given as a command line argument,
    # connect the title's parts with spaces
    if pageTitle != []:
        pageTitle = ' '.join(pageTitle)
        page = pywikibot.Page(pywikibot.getSite(), pageTitle)
        generator = iter([page])

    # if no disambiguation page was given as an argument, and none was
    # read from a file, query the user
    if not generator:
        pageTitle = pywikibot.input(
            u'On which disambiguation page do you want to work?')
        page = pywikibot.Page(pywikibot.getSite(), pageTitle)
        generator = iter([page])

    bot = DisambiguationRobot(always,
                              alternatives,
                              getAlternatives,
                              dnSkip,
                              generator,
                              primary,
                              main_only,
                              first_only,
                              minimum=minimum)
    bot.run()

Пример #28

0

Показать файл

def main():
    global protectionLevels
    protectionLevels = ['sysop', 'autoconfirmed', 'none']

    pageName = ''
    summary = ''
    always = False
    doSinglePage = False
    doCategory = False
    protectSubcategories = True
    doRef = False
    doLinks = False
    doImages = False
    fileName = ''
    gen = None
    edit = ''
    move = ''
    defaultProtection = 'sysop'

    # read command line parameters
    for arg in wikipedia.handleArgs():
        if arg == '-always':
            always = True
        elif arg.startswith('-file'):
            if len(arg) == len('-file'):
                fileName = wikipedia.input(
                    u'Enter name of file to protect pages from:')
            else:
                fileName = arg[len('-file:'):]
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = wikipedia.input(
                    u'Enter a reason for the protection:')
            else:
                summary = arg[len('-summary:'):]
        elif arg.startswith('-cat'):
            doCategory = True
            if len(arg) == len('-cat'):
                pageName = wikipedia.input(
                    u'Enter the category to protect from:')
            else:
                pageName = arg[len('-cat:'):]
        elif arg.startswith('-nosubcats'):
            protectSubcategories = False
        elif arg.startswith('-links'):
            doLinks = True
            if len(arg) == len('-links'):
                pageName = wikipedia.input(u'Enter the page to protect from:')
            else:
                pageName = arg[len('-links:'):]
        elif arg.startswith('-ref'):
            doRef = True
            if len(arg) == len('-ref'):
                pageName = wikipedia.input(u'Enter the page to protect from:')
            else:
                pageName = arg[len('-ref:'):]
        elif arg.startswith('-page'):
            doSinglePage = True
            if len(arg) == len('-page'):
                pageName = wikipedia.input(u'Enter the page to protect:')
            else:
                pageName = arg[len('-page:'):]
        elif arg.startswith('-images'):
            doImages = True
            if len(arg) == len('-images'):
                pageName = wikipedia.input(
                    u'Enter the page with the images to protect:')
            else:
                pageName = arg[len('-images:'):]
        elif arg.startswith('-unprotect'):
            defaultProtection = 'none'
        elif arg.startswith('-edit'):
            edit = arg[len('-edit:'):]
            if edit not in protectionLevels:
                edit = choiceProtectionLevel('edit', defaultProtection)
        elif arg.startswith('-move'):
            move = arg[len('-move:'):]
            if move not in protectionLevels:
                move = choiceProtectionLevel('move', defaultProtection)
        elif arg.startswith('-create'):
            create = arg[len('-create:'):]
            if create not in protectionLevels:
                create = choiceProtectionLevel('create', defaultProtection)

    mysite = wikipedia.getSite()

    if doSinglePage:
        if not summary:
            summary = wikipedia.input(u'Enter a reason for the protection:')
        page = wikipedia.Page(mysite, pageName)
        gen = iter([page])
    elif doCategory:
        if not summary:
            summary = wikipedia.translate(mysite,
                                          msg_protect_category) % pageName
        ns = mysite.category_namespace()
        categoryPage = catlib.Category(mysite, ns + ':' + pageName)
        gen = pagegenerators.CategorizedPageGenerator(
            categoryPage, recurse=protectSubcategories)
    elif doLinks:
        if not summary:
            summary = wikipedia.translate(mysite, msg_protect_links) % pageName
        linksPage = wikipedia.Page(mysite, pageName)
        gen = pagegenerators.LinkedPageGenerator(linksPage)
    elif doRef:
        if not summary:
            summary = wikipedia.translate(mysite, msg_protect_ref) % pageName
        refPage = wikipedia.Page(mysite, pageName)
        gen = pagegenerators.ReferringPageGenerator(refPage)
    elif fileName:
        if not summary:
            summary = wikipedia.translate(mysite, msg_simple_protect)
        gen = pagegenerators.TextfilePageGenerator(fileName)
    elif doImages:
        if not summary:
            summary = wikipedia.translate(mysite,
                                          msg_protect_images) % pageName
        gen = pagegenerators.ImagesPageGenerator(
            wikipedia.Page(mysite, pageName))

    if gen:
        wikipedia.setAction(summary)
        # We are just protecting pages, so we have no need of using a preloading page generator
        # to actually get the text of those pages.
        if not edit: edit = defaultProtection
        if not move: move = defaultProtection
        bot = ProtectionRobot(gen, summary, always, edit=edit, move=move)
        bot.run()
    else:
        wikipedia.showHelp(u'protect')

Пример #29

0

Показать файл

Файл: commons_link.py Проект: vmorrisonwood/pywikia

        action = None
        for arg in wikipedia.handleArgs():
            if arg == ('pages'):
                action = 'pages'
            elif arg == ('categories'):
                action = 'categories'
            elif arg.startswith('-start:'):
                start = wikipedia.Page(wikipedia.getSite(), arg[7:])
                gen = pagegenerators.AllpagesPageGenerator(
                    start.titleWithoutNamespace(),
                    namespace=start.namespace(),
                    includeredirects=False)
            elif arg.startswith('-cat:'):
                cat = catlib.Category(wikipedia.getSite(),
                                      'Category:%s' % arg[5:])
                gen = pagegenerators.CategorizedPageGenerator(cat)
            elif arg.startswith('-ref:'):
                ref = wikipedia.Page(wikipedia.getSite(), arg[5:])
                gen = pagegenerators.ReferringPageGenerator(ref)
            elif arg.startswith('-link:'):
                link = wikipedia.Page(wikipedia.getSite(), arg[6:])
                gen = pagegenerators.LinkedPageGenerator(link)
            elif arg.startswith('-page:'):
                singlepage = wikipedia.Page(wikipedia.getSite(), arg[6:])
                gen = iter([singlepage])
            #else:
            #bug

        if action == 'pages':
            preloadingGen = pagegenerators.PreloadingGenerator(gen)
            bot = CommonsLinkBot(preloadingGen, acceptall=False)

Пример #30

0

Показать файл

Файл: blockpageschecker.py Проект: swertschak/wikijournals-api

def main():
    """ Main Function """
    # Loading the comments
    global categoryToCheck, comment, project_inserted
    # always, define a generator to understand if the user sets one,
    # defining what's genFactory
    always = False
    generator = False
    show = False
    moveBlockCheck = False
    protectedpages = False
    protectType = 'edit'
    namespace = 0
    genFactory = pagegenerators.GeneratorFactory()
    # To prevent Infinite loops
    errorCount = 0
    # Loading the default options.
    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg == '-move':
            moveBlockCheck = True
        elif arg == '-show':
            show = True
        elif arg.startswith('-protectedpages'):
            protectedpages = True
            if len(arg) > 15:
                namespace = int(arg[16:])
        elif arg.startswith('-moveprotected'):
            protectedpages = True
            protectType = 'move'
            if len(arg) > 14:
                namespace = int(arg[15:])
        else:
            genFactory.handleArg(arg)

    if config.mylang not in project_inserted:
        pywikibot.output(
            u"Your project is not supported by this script.\nYou have to edit the script and add it!"
        )
        return
    site = pywikibot.getSite()
    if protectedpages:
        generator = site.protectedpages(namespace=namespace, type=protectType)
    # Take the right templates to use, the category and the comment
    TSP = pywikibot.translate(site, templateSemiProtection)
    TTP = pywikibot.translate(site, templateTotalProtection)
    TSMP = pywikibot.translate(site, templateSemiMoveProtection)
    TTMP = pywikibot.translate(site, templateTotalMoveProtection)
    TNR = pywikibot.translate(site, templateNoRegex)
    TU = pywikibot.translate(site, templateUnique)

    category = pywikibot.translate(site, categoryToCheck)
    commentUsed = pywikibot.translate(site, comment)
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = list()
        pywikibot.output(u'Loading categories...')
        # Define the category if no other generator has been setted
        for CAT in category:
            cat = catlib.Category(site, CAT)
            # Define the generator
            gen = pagegenerators.CategorizedPageGenerator(cat)
            for pageCat in gen:
                generator.append(pageCat)
        pywikibot.output(u'Categories loaded, start!')
    # Main Loop
    preloadingGen = pagegenerators.PreloadingGenerator(generator,
                                                       pageNumber=60)
    for page in preloadingGen:
        pagename = page.title(asLink=True)
        pywikibot.output('Loading %s...' % pagename)
        try:
            text = page.get()
            restrictions = page.getRestrictions()
        except pywikibot.NoPage:
            pywikibot.output("%s doesn't exist! Skipping..." % pagename)
            continue
        except pywikibot.IsRedirectPage:
            pywikibot.output("%s is a redirect! Skipping..." % pagename)
            if show:
                showQuest(site, page)
            continue
        """
        # This check does not work :
        # PreloadingGenerator cannot set correctly page.editRestriction
        # (see bug #1949476 )
        if not page.canBeEdited():
            pywikibot.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename)
            continue
        """
        if 'edit' in restrictions.keys():
            editRestr = restrictions['edit']
        else:
            editRestr = None
        if editRestr and editRestr[0] == 'sysop':
            try:
                config.sysopnames[site.family.name][site.lang]
            except:
                pywikibot.output(
                    "%s is sysop-protected : this account can't edit it! Skipping..."
                    % pagename)
                continue

        # Understand, according to the template in the page, what should be the protection
        # and compare it with what there really is.
        TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU)
        # Only to see if the text is the same or not...
        oldtext = text
        # keep track of the changes for each step (edit then move)
        changes = -1

        if not editRestr:
            # page is not edit-protected
            # Deleting the template because the page doesn't need it.
            if TU != None:
                replaceToPerform = u'|'.join(TTP + TSP + TU)
            else:
                replaceToPerform = u'|'.join(TTP + TSP)
            text, changes = re.subn(
                '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
            if changes == 0:
                text, changes = re.subn('(%s)' % replaceToPerform, '', text)
            msg = u'The page is editable for all'
            if not moveBlockCheck:
                msg += u', deleting the template..'
            pywikibot.output(u'%s.' % msg)

        elif editRestr[0] == 'sysop':
            # total edit protection
            if (TemplateInThePage[0] == 'sysop-total'
                    and TTP != None) or (TemplateInThePage[0] == 'unique'
                                         and TU != None):
                msg = 'The page is protected to the sysop'
                if not moveBlockCheck:
                    msg += ', skipping...'
                pywikibot.output(msg)
            else:
                pywikibot.output(
                    u'The page is protected to the sysop, but the template seems not correct. Fixing...'
                )
                if TU != None:
                    text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
                else:
                    text, changes = re.subn(TemplateInThePage[1], TNR[1], text)

        elif TSP != None or TU != None:
            # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection
            if TemplateInThePage[
                    0] == 'autoconfirmed-total' or TemplateInThePage[
                        0] == 'unique':
                msg = 'The page is editable only for the autoconfirmed users'
                if not moveBlockCheck:
                    msg += ', skipping...'
                pywikibot.output(msg)
            else:
                pywikibot.output(
                    u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...'
                )
                if TU != None:
                    text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
                else:
                    text, changes = re.subn(TemplateInThePage[1], TNR[0], text)

        if changes == 0:
            # We tried to fix edit-protection templates, but it did not work.
            pywikibot.output(
                'Warning : No edit-protection template could be found')

        if moveBlockCheck and changes > -1:
            # checking move protection now
            try:
                moveRestr = restrictions['move']
            except KeyError:
                moveRestr = False
            changes = -1

            if not moveRestr:
                pywikibot.output(
                    u'The page is movable for all, deleting the template...')
                # Deleting the template because the page doesn't need it.
                if TU != None:
                    replaceToPerform = u'|'.join(TSMP + TTMP + TU)
                else:
                    replaceToPerform = u'|'.join(TSMP + TTMP)
                text, changes = re.subn(
                    '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
                if changes == 0:
                    text, changes = re.subn('(%s)' % replaceToPerform, '',
                                            text)
            elif moveRestr[0] == 'sysop':
                # move-total-protection
                if (TemplateInThePage[0] == 'sysop-move'
                        and TTMP != None) or (TemplateInThePage[0] == 'unique'
                                              and TU != None):
                    pywikibot.output(
                        u'The page is protected from moving to the sysop, skipping...'
                    )
                    if TU != None:
                        text = oldtext  # no changes needed, better to revert the old text.
                else:
                    pywikibot.output(
                        u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...'
                    )
                    if TU != None:
                        text, changes = re.subn(TemplateInThePage[1], TNR[4],
                                                text)
                    else:
                        text, changes = re.subn(TemplateInThePage[1], TNR[3],
                                                text)

            elif TSMP != None or TU != None:
                # implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection
                if TemplateInThePage[
                        0] == 'autoconfirmed-move' or TemplateInThePage[
                            0] == 'unique':
                    pywikibot.output(
                        u'The page is movable only for the autoconfirmed users, skipping...'
                    )
                    if TU != None:
                        text = oldtext  # no changes needed, better to revert the old text.
                else:
                    pywikibot.output(
                        u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...'
                    )
                    if TU != None:
                        text, changes = re.subn(TemplateInThePage[1], TNR[4],
                                                text)
                    else:
                        text, changes = re.subn(TemplateInThePage[1], TNR[2],
                                                text)

            if changes == 0:
                # We tried to fix move-protection templates, but it did not work.
                pywikibot.output(
                    'Warning : No move-protection template could be found')

        if oldtext != text:
            # Ok, asking if the change has to be performed and do it if yes.
            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                             page.title())
            pywikibot.showDiff(oldtext, text)
            if not always:
                choice = pywikibot.inputChoice(
                    u'Do you want to accept these changes?',
                    ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                if choice == 'a':
                    always = True
            if always or choice == 'y':
                while 1:
                    try:
                        page.put(text, commentUsed, force=True)
                    except pywikibot.EditConflict:
                        pywikibot.output(u'Edit conflict! skip!')
                        break
                    except pywikibot.ServerError:
                        # Sometimes there is this error that's quite annoying because
                        # can block the whole process for nothing.
                        errorCount += 1
                        if errorCount < 5:
                            pywikibot.output(u'Server Error! Wait..')
                            time.sleep(3)
                            continue
                        else:
                            # Prevent Infinite Loops
                            raise pywikibot.ServerError(u'Fifth Server Error!')
                    except pywikibot.SpamfilterError, e:
                        pywikibot.output(
                            u'Cannot change %s because of blacklist entry %s' %
                            (page.title(), e.url))
                        break
                    except pywikibot.PageNotSaved, error:
                        pywikibot.output(u'Error putting page: %s' %
                                         (error.args, ))
                        break
                    except pywikibot.LockedPage:
                        pywikibot.output(
                            u'The page is still protected. Skipping...')
                        break
                    else:
                        # Break only if the errors are one after the other
                        errorCount = 0
                        break

Python CategorizedPageGenerator примеры использования