Пример #1
0
def main():
    # The generator gives the pages that should be worked upon.
    gen = None
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False
    wantHelp = False


    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            wantHelp = True

    if not wantHelp:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.

        cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Host')
        hosts_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False)
        hosts_gen = pagegenerators.PreloadingGenerator(hosts_gen)

        cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'IP-Network')
        nets_gen = pagegenerators.CategorizedPageGenerator(cat, start = None, recurse = False)
        nets_gen = pagegenerators.PreloadingGenerator(nets_gen)

        bot = IpNetworkBot(nets_gen, hosts_gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()
Пример #2
0
def facatlist(facat):
    wikipedia.config.put_throttle = 0
    wikipedia.put_throttle.setDelay()
    count = 0
    listenpageTitle = []
    PageTitle = facat.replace(u'[[', u'').replace(u']]', u'').strip()
    language = 'fa'
    PageTitles = [PageTitle]
    for PageTitle in PageTitles:
        cat = catlib.Category(wikipedia.getSite(language), PageTitle)
        listacategory = [cat]
        listacategory = categorydown(listacategory)
        for enpageTitle in listacategory:
            enpageTitle = str(enpageTitle).split(u'|')[0].split(
                u']]')[0].replace(u'[[', u'').strip()
            cat = catlib.Category(wikipedia.getSite(language), enpageTitle)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            for pagework in gent:
                count += 1
                try:
                    link = str(pagework).split(u'|')[0].split(
                        u']]')[0].replace(u'[[', u'').strip()
                except:
                    pagework = unicode(str(pagework), 'UTF-8')
                    link = pagework.split(u'|')[0].split(u']]')[0].replace(
                        u'[[', u'').strip()
                wikipedia.output(link)
                fapagetitle = link
                wikipedia.output(u'adding ' + fapagetitle +
                                 u' to fapage lists')
                listenpageTitle.append(fapagetitle)
    if listenpageTitle == []:
        return False
    return listenpageTitle
Пример #3
0
    def setUp(self):
        self.site = pywikibot.getSite('en', 'wikipedia')
        self.data = [catlib.Category(self.site, 'Category:Cat1'),
                     catlib.Category(self.site, 'Category:Cat2')]

        self.site_de = pywikibot.getSite('de', 'wikipedia')
        self.site_fr = pywikibot.getSite('fr', 'wikipedia')
Пример #4
0
    def move_contents(self, oldCatTitle, newCatTitle, editSummary):
        """The worker function that moves pages out of oldCat into newCat"""
        while True:
            try:
                oldCat = catlib.Category(self.site,
                                         self.catprefix + oldCatTitle)
                newCat = catlib.Category(self.site,
                                         self.catprefix + newCatTitle)

                oldCatLink = oldCat.title()
                newCatLink = newCat.title()
                comment = editSummary % locals()
                # Move articles
                found, moved = 0, 0
                for result in self.query_results(list="categorymembers",
                                                 cmtitle=oldCat.title(),
                                                 cmprop="title|sortkey",
                                                 cmlimit="max"):
                    found += len(result['categorymembers'])
                    for item in result['categorymembers']:
                        article = pywikibot.Page(self.site, item['title'])
                        changed = self.change_category(article,
                                                       oldCat,
                                                       newCat,
                                                       comment=comment)
                        if changed:
                            moved += 1

                # pass 2: look for template doc pages
                for result in self.query_results(list="categorymembers",
                                                 cmtitle=oldCat.title(),
                                                 cmprop="title|sortkey",
                                                 cmnamespace="10",
                                                 cmlimit="max"):
                    for item in result['categorymembers']:
                        doc = pywikibot.Page(self.site, item['title'] + "/doc")
                        try:
                            old_text = doc.get()
                        except pywikibot.Error:
                            continue
                        changed = self.change_category(doc,
                                                       oldCat,
                                                       newCat,
                                                       comment=comment)
                        if changed:
                            moved += 1

                if found:
                    pywikibot.output(u"%s: %s found, %s moved" %
                                     (oldCat.title(), found, moved))
                return (found, moved)
            except pywikibot.ServerError:
                pywikibot.output(u"Server error: retrying in 5 seconds...")
                time.sleep(5)
                continue
            except KeyboardInterrupt:
                raise
            except:
                return (None, None)
Пример #5
0
 def run(self):
 	self.count = { "target" : [], "done" : [] }
     pywikibot.setAction( self.summary )
     musecat = catlib.Category( pywikibot.getSite(), u'Category:博物館' )
     ignore_list = [
         catlib.Category( pywikibot.getSite(), u'Category:登録博物館' ),
         catlib.Category( pywikibot.getSite(), u'Category:博物館相当施設' ),
         catlib.Category( pywikibot.getSite(),  u'Category:全国博物館園職員録' ),
         catlib.Category( pywikibot.getSite(), u'Category:全国博物館総覧' ),
     ]
     catlist = musecat.subcategoriesList()
     catlist = set( catlist ) - set( ignore_list )
     for page in self.generator:
         self.treat(page, catlist)
Пример #6
0
def Main():
    site = pywikibot.getSite()
    d = datetime.today()
    datestring = d.isoformat()
    zipfilename = "archive%s.zip" % datestring
    z = zipfile.ZipFile(zipfilename, "w")
    for x in (
            'Candidates_for_speedy_deletion_as_hoaxes',
            'Candidates_for_speedy_deletion_as_importance_or_significance_not_asserted',
            'Candidates_for_speedy_deletion_for_unspecified_reason'):
        cat = catlib.Category(site, x)
        pages = cat.articlesList(False)
        gen = pagegenerators.PreloadingGenerator(pages, 100)
        for Page in gen:
            outfile = "PAGES/%s.txt" % Page.urlname()
            text = Page.get()
            sutf8 = text.encode('UTF-8')
            print outfile
            z.writestr(outfile, sutf8)

        count = 0

        for strings in gen.data:
            for string in strings:
                for string2 in string:
                    count = count + 1
                    #                    sutf8 = string2.encode('UTF-8')
                    z.writestr("RawFiles/%s%d.xml" % (x, count), string2)

    z.close()
    push_zip(zipfilename)
Пример #7
0
def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.getSite()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            import catlib
            if not namespaces:
                namespaces = [0]
            cat = catlib.Category(site,
                                  "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always)
        bot.run()
Пример #8
0
def getCategoryLinks(text, site=None):
    import catlib
    """Return a list of category links found in text.

    List contains Category objects.
    Do not call this routine directly, use Page.categories() instead.

    """
    result = []
    if site is None:
        site = pywikibot.getSite()
    # Ignore category links within nowiki tags, pre tags, includeonly tags,
    # and HTML comments
    text = removeDisabledParts(text)
    catNamespace = '|'.join(site.category_namespaces())
    R = re.compile(
        r'\[\[\s*(?P<namespace>%s)\s*:\s*(?P<catName>.+?)'
        r'(?:\|(?P<sortKey>.+?))?\s*\]\]' % catNamespace, re.I)
    for match in R.finditer(text):
        cat = catlib.Category(
            site,
            '%s:%s' % (match.group('namespace'), match.group('catName')),
            sortKey=match.group('sortKey'))
        result.append(cat)
    return result
Пример #9
0
def main(args):
    '''
    Main loop. Get a generator and options. Work on all images in the generator.
    '''
    generator = None
    onlyFilter = False
    onlyUncat = False
    genFactory = pagegenerators.GeneratorFactory()

    global search_wikis
    global hint_wiki

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg == '-onlyfilter':
            onlyFilter = True
        elif arg == '-onlyuncat':
            onlyUncat = True
        elif arg.startswith('-hint:'):
            hint_wiki = arg [len('-hint:'):]
        elif arg.startswith('-onlyhint'):
            search_wikis = arg [len('-onlyhint:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = pagegenerators.CategorizedPageGenerator(
            catlib.Category(site, u'Category:Media needing categories'),
            recurse=True)
    initLists()
    categorizeImages(generator, onlyFilter, onlyUncat)
    pywikibot.output(u'All done')
Пример #10
0
def main():
    ''' Esta é a def onde o nosso script vai estar '''

    site = wikipedia.Site("pt", "wikipedia")  # definimos que o site é a pt.wp
    '''De seguida, definimos a categoria Ambiente
	 e obtemos a listagem dos títulos dos artigos.
	 Na demonstração o código está por extenso para
	 mais fácil percepção, na prática, bastaria
	 pages = catlib.Category(site, u"Ambiente").articles()
	 para se obter a listagem	  
	'''
    cat = catlib.Category(site,
                          u"Ambiente")  # Aqui definimos a categoria Ambiente.
    catList = cat.articlesList()
    '''Agora que temos uma listagem,
	 e antes de contar os elementos,
	 vamos ver os títulos que constam na catList.
	 
	 Esta abordagem serve bem para ilustrar este exemplo,
	 caso fosse para interagir directamente com os artigos,
	 como veremos noutro post, há abordagens mais eficientes.
 
	 O primeiro print, ou seja, no caso o objecto page,
	 é um objecto python, enquanto que o segundo print,
	 o do page.title(), já tem o formato de unicode.
	'''

    for page in catList:
        print u"página (objecto):", page
        print u"Título da página: ", page.title()  # mostra o título do artigo
    ''' Por fim, fazemos a contagem dos artigos	'''

    print u"\n Nº de artigos na categoria: ", len(catList)
Пример #11
0
def createlist(cat, wpproj, raw=False, cats=True):
    category = catlib.Category(site, cat)
    gen = pagegenerators.CategorizedPageGenerator(category, recurse=True)
    wikitext = ''
    wikitext2 = ''
    wikitext3 = ''

    if not cats:
        for page in gen:
            wikitext = wikitext + '\n*' + str(page)
            link = delink(str(page))
            print link
            wikitext2 = wikitext2 + '\n' + link
        wikitext = unicodify(wikitext)
    if cats:
        subcats = category.subcategories(recurse=True)
        for subcat in subcats:
            newtext = retpages(subcat)
            wikitext3 += newtext
        wikitext3 = unicodify(wikitext3)

    page = wikipedia.Page(site, wpproj + '/Articles')
    if not cats:
        page.put(wikitext, 'Updating watchlist')
    if cats:
        page.put(wikitext3, 'Updating watchlist')
    wikitext2 = '<pre>\n' + wikitext2 + '\n</pre>'
    wikitext2 = unicodify(wikitext2)
    if raw == True:
        page = wikipedia.Page(site, wpproj + '/Articles/raw')
        page.put(wikitext2, 'Updating raw watchlist')
def main():
    # The generator gives the pages that should be worked upon.
    gen = None
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False
    wantHelp = False

    cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % 'Instance')
    gen = pagegenerators.CategorizedPageGenerator(cat,
                                                  start=None,
                                                  recurse=False)

    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        else:
            print arg, "yielding wanthelp"
            wantHelp = True

    if not wantHelp:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = PuppetUnmanagedListBot(gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()
Пример #13
0
def filterCountries(categories):
    '''
    Try to filter out ...by country categories.
    First make a list of any ...by country categories and try to find some countries.
    If a by country category has a subcategoy containing one of the countries found, add it.
    The ...by country categories remain in the set and should be filtered out by filterParents.
    '''
    result = categories
    listByCountry = []
    listCountries = []
    for cat in categories:
        if (cat.endswith(u'by country')):
            listByCountry.append(cat)

        #If cat contains 'by country' add it to the list
        #If cat contains the name of a country add it to the list
        else:
            for country in countries:
                if country in cat:
                    listCountries.append(country)

    if (len(listByCountry) > 0):
        for bc in listByCountry:
            category = catlib.Category(wikipedia.getSite(), u'Category:' + bc)
            for subcategory in category.subcategories():
                for country in listCountries:
                    if (subcategory.titleWithoutNamespace().endswith(country)):
                        result.append(subcategory.titleWithoutNamespace())

    return list(set(result))
Пример #14
0
def UpdateRepoCats(*args):
    # Get List of all articles in Category:All add-ons
    site = pywikibot.getSite()

    # Download all repos as soup element
    soups = importAllAddonXML()

    # Get all pages in Category All add-ons
    cat = catlib.Category(site, u'Category:All add-ons')
    pages = cat.articlesList(False)
    allRepoCats = repoCatList(site)

    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        # Get addon_id via regexp
        addon_id = None
        addon_id = re.search("\|ID=([a-zA-Z0-9_\.\-]+)", Page.get())
        if not addon_id:
            pywikibot.output("Can't find addon_id for %s, skipping it..." %
                             Page.title())
            continue
        else:
            addon_id = addon_id.group(1)
            pywikibot.output("Identifying Repos for %s." % addon_id)
        # See if addon_id can be found in repos
        repos = checkInRepo(addon_id, soups)
        addRemoveRepoCats(Page, repos, allRepoCats)
Пример #15
0
def getSDTitles(site):
    titles = []
    for x in (
            'Declined_AfC_submissions', "Proposed_deletion",
            "Expired_proposed_deletions",
            'Candidates_for_speedy_deletion_as_hoaxes',
            'Candidates_for_speedy_deletion_as_importance_or_significance_not_asserted',
            'Candidates_for_speedy_deletion_as_lacking_context',
            'Candidates for speedy deletion as pages previously deleted via deletion discussion',
            'Contested candidates for speedy deletion',
            'Speedy_deletion_candidates_with_talk_pages',
            'Candidates_for_speedy_deletion_as_duplicating_an_existing_topic',
            'Candidates_for_speedy_deletion_for_unspecified_reason'):
        cat = catlib.Category(site, x)
        #        pages = cat.articlesList(False)
        pages = cat.articlesList(recurse=True)
        #        print pages
        for x in pages:
            #            print x.urlname()
            n = x.urlname()
            an = n.encode("ascii", "ignore")
            if (isNewTitle(an)):
                titles += [n]

    return titles
Пример #16
0
 def test_titles_Category(self):
     params = {
         'action': 'query',
         'prop': 'revisions',
         'rvprop': ['ids', 'timestamp', 'user'],
         'rvdir': 'newer',
         'rvlimit': 1,
         'titles': [catlib.Category(self.site, u'Category:Categories')],
     }
     expectedresult = {
         u'pages': {
             u'794823': {
                 u'ns':
                 14,
                 u'pageid':
                 794823,
                 u'revisions': [{
                     u'revid': 4494485,
                     u'user': u'SEWilco',
                     u'timestamp': u'2004-07-07T18:45:50Z',
                 }],
                 u'title':
                 u'Category:Categories',
             },
         }
     }
     self.assertEqualQueryResult(params, expectedresult)
Пример #17
0
    def run(self):
        site = pywikibot.getSite()
        newCat = catlib.Category(site, self.newCatTitle)
        # set edit summary message
        if not self.editSummary:
            try:
                self.editSummary = pywikibot.translate(
                    site, msg_change) % (self.oldCat.title(), newCat.title())
            except TypeError:
                self.editSummary = pywikibot.translate(
                    site, msg_change) % self.oldCat.title()

        # Copy the category contents to the new category page
        copied = False
        oldMovedTalk = None
        if self.oldCat.exists() and self.moveCatPage:
            copied = self.oldCat.copyAndKeep(
                self.newCatTitle, pywikibot.translate(site, cfd_templates))
            # Also move the talk page
            if copied:
                reason = pywikibot.translate(site, deletion_reason_move) \
                         % (self.newCatTitle, self.newCatTitle)
                oldTalk = self.oldCat.toggleTalkPage()
                if oldTalk.exists():
                    newTalkTitle = newCat.toggleTalkPage().title()
                    try:
                        talkMoved = oldTalk.move(newTalkTitle, reason)
                    except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
                        #in order :
                        #Source talk does not exist, or
                        #Target talk already exists
                        pywikibot.output(e.message)
                    else:
                        if talkMoved:
                            oldMovedTalk = oldTalk
Пример #18
0
def main():
    wikipedia.output(u'Testing 1 2 3')
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
                generator = [
                    wikipedia.Page(
                        site,
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                generator = [wikipedia.Page(site, arg[6:])]
        else:
            generator = genFactory.handleArg(arg)
    if generator:
        for page in generator:
            if (page.namespace() == 14):
                sort_TOL_Category(catlib.Category(site, page.title()))
    else:
        wikipedia.output(u'No categories to work on!')
Пример #19
0
def CategoryGenerator(generator):
    """
    Wraps around another generator. Yields the same pages, but as Category
    objects instead of Page objects. Makes sense only if it is ascertained
    that only categories are being retrieved.
    """
    for page in generator:
        yield catlib.Category(page.site(), page.title())
Пример #20
0
 def __init__(self):
     self.site = pywikibot.getSite()
     self.cat = catlib.Category(
         self.site,
         'Category:Wikipedia usernames with possible policy issues')
     self.recurse = False
     self.run_time = datetime.datetime.now() + datetime.timedelta(days=-7)
     self.sentinel_text = '[[Category:Wikipedia usernames with possible policy issues|{{PAGENAME}}]]'
Пример #21
0
def docat(tag, cat2):
    site = wikipedia.getSite()
    cat = catlib.Category(site, cat2)
    gen = pagegenerators.CategorizedPageGenerator(cat)
    dogen(gen, tag)
    wikipedia.output(u'\nFinished with Category:' + cat2 + '.\n')
    print 'Waiting 10 seconds'
    time.sleep(10)
Пример #22
0
def CAT(site, name, hide):
    name = site.namespace(14) + ':' + name
    cat = catlib.Category(site, name)
    for article in cat.articles(endsort=hide):
        yield article
    if hide:
        for article in cat.articles(startFrom=unichr(ord(hide) + 1)):
            yield article
Пример #23
0
def addRemoveRepoCats(article, repos, allRepoCats, comment=None):
    # Create list of repos to be removed
    notRepos = []

    if not article.canBeEdited():
        pywikibot.output("Can't edit %s, skipping it..." % article.aslink())
        return False

    cats = article.categories(get_redirect=True)
    site = article.site()
    changesMade = False
    newCatList = []
    newCatSet = set()

    repoCatList = []

    #remove all repos
    for i in range(len(cats)):
        cat = cats[i]
        if cat in allRepoCats:
            changesMade = True
            continue
        if cat.title() not in newCatSet:
            newCatSet.add(cat.title())
            newCatList.append(cat)

    #add relevant repos
    for i in range(len(repos)):
        repo = repos[i]
        newCatList.append(catlib.Category(site, 'Category:' + repoCats[repo]))
        changesMade = True

    if not changesMade:
        pywikibot.output(u'No changes necessary to %s!' % article.title())
    else:
        text = article.get(get_redirect=True)
        try:
            text = pywikibot.replaceCategoryLinks(text, newCatList)
        except ValueError:
            # Make sure that the only way replaceCategoryLinks() can return
            # a ValueError is in the case of interwiki links to self.
            pywikibot.output(u'Skipping %s because of interwiki link to self' %
                             article)
        try:
            article.put(text,
                        comment='Addon-Bot repo category update',
                        watchArticle=None,
                        minorEdit=True)
        except pywikibot.EditConflict:
            pywikibot.output(u'Skipping %s because of edit conflict' %
                             article.title())
        except pywikibot.SpamfilterError, e:
            pywikibot.output(u'Skipping %s because of blacklist entry %s' %
                             (article.title(), e.url))
        except pywikibot.LockedPage:
            pywikibot.output(u'Skipping %s because page is locked' %
                             article.title())
Пример #24
0
def category2testset(site, categoryName="Automated tests"):
    """Scan a category on the given pywikipedia site for valid 
	extension tests, and generate a testset (a list of Test objects)"""
    category = catlib.Category(site, categoryName)
    testset = []
    for article in category.articles():
        testset += article2testset(article)

    return testset
Пример #25
0
    def categories(self):
        for page in self.generator:
            try:
                wikipedia.output(u'\n>>>> %s <<<<' % page.title())
                getCommons = wikipedia.getSite('commons', 'commons')
                commonsCategory = catlib.Category(getCommons,
                                                  'Category:%s' % page.title())
                try:
                    getcommonscat = commonsCategory.get(get_redirect=True)
                    commonsCategoryTitle = commonsCategory.title()
                    categoryname = commonsCategoryTitle.split('Category:',
                                                              1)[1]
                    if page.title() == categoryname:
                        oldText = page.get()
                        text = oldText

                        # for commonscat template
                        findTemplate = re.compile(ur'\{\{[Cc]ommons')
                        s = findTemplate.search(text)
                        findTemplate2 = re.compile(ur'\{\{[Ss]isterlinks')
                        s2 = findTemplate2.search(text)
                        if s or s2:
                            wikipedia.output(u'** Already done.')
                        else:
                            text = wikipedia.replaceCategoryLinks(
                                text + u'{{commonscat|%s}}' % categoryname,
                                page.categories())
                            if oldText != text:
                                wikipedia.showDiff(oldText, text)
                                if not self.acceptall:
                                    choice = wikipedia.inputChoice(
                                        u'Do you want to accept these changes?',
                                        ['Yes', 'No', 'All'], ['y', 'N', 'a'],
                                        'N')
                                    if choice == 'a':
                                        self.acceptall = True
                                if self.acceptall or choice == 'y':
                                    try:
                                        msg = wikipedia.translate(
                                            wikipedia.getSite(), comment2)
                                        page.put(text, msg)
                                    except wikipedia.EditConflict:
                                        wikipedia.output(
                                            u'Skipping %s because of edit conflict'
                                            % (page.title()))

                except wikipedia.NoPage:
                    wikipedia.output(u'Category does not exist in Commons!')

            except wikipedia.NoPage:
                wikipedia.output(u'Page %s does not exist?!' % page.title())
            except wikipedia.IsRedirectPage:
                wikipedia.output(u'Page %s is a redirect; skipping.' %
                                 page.title())
            except wikipedia.LockedPage:
                wikipedia.output(u'Page %s is locked?!' % page.title())
Пример #26
0
def pagesInCat(site, categoryname,
               recurse):  # Return pages in this category as a list, or die.
    try:
        category = catlib.Category(site, categoryname)
        pages = category.articlesList(recurse=recurse)
    except Exception:
        traceback.print_exc()
        print "Error, stopping."
        exit()
    return pages
Пример #27
0
def filterCategory(page):
    """ Loop over all subcategories of page and filter them """

    # FIXME: category = catlib.Category(page) doesn't work
    site = page.site()
    title = page.title()
    category = catlib.Category(site, title)

    for subcat in category.subcategories():
        filterSubCategory(subcat, category)
Пример #28
0
def crawlerCat(category):
    cat = catlib.Category(site, category)
    pages = cat.articlesList(False)
    for Page in pagegenerators.PreloadingGenerator(pages, 100):
        modification(Page.title())
    subcat = cat.subcategories(recurse=True)
    for subcategory in subcat:
        pages = subcategory.articlesList(False)
        for Page in pagegenerators.PreloadingGenerator(pages, 100):
            modification(Page.title())
Пример #29
0
 def __init__(self, catTitle, listTitle, editSummary, overwrite = False, showImages = False, subCats = False, talkPages = False, recurse = False):
     self.editSummary = editSummary
     self.overwrite = overwrite
     self.showImages = showImages
     self.site = pywikibot.getSite()
     self.cat = catlib.Category(self.site, 'Category:' + catTitle)
     self.list = pywikibot.Page(self.site, listTitle)
     self.subCats = subCats
     self.talkPages = talkPages
     self.recurse = recurse
Пример #30
0
 def __init__(self):
     """
     Arguments:
         none yet
     """
     self.mySite = wikipedia.getSite()
     self.csdCat = catlib.Category(
         self.mySite, wikipedia.translate(self.mySite, self.csd_cat))
     self.savedProgress = None
     self.preloadingGen = None