예제 #1
0
def processFile(row, imageDir):
    metadata = getMetadata(row)
    title = getTitle(metadata)
    description = getDescription(metadata)

    # Check of the title already exists
    site = wikipedia.getSite('commons', 'commons')
    page = wikipedia.ImagePage(site, title)

    if page.exists():
        wikipedia.output(u'The file %s already exists. Probably already uploaded by me. Skipping' % title)
        return False

    wikipedia.output(u'Preparing upload for %s.' % title)    
    wikipedia.output(description)    
                        
    # Download and dezoomify the image
    tempfile = imageDir + metadata.get('id') + u'.jpg'
    try:
        dezoomify.Dezoomify(url=metadata.get('link'), debug=True, out=tempfile)
    except IOError as e:
        #wikipedia.output(e)
        wikipedia.output(u'Dezoomify failed')
        return False
        

    # Check for dupe. This probably doesn't work, but it doesn't hurt either.
    duplicates = findDuplicateImages(tempfile)
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        return False
    
    bot = upload.UploadRobot(url=tempfile, description=description, useFilename=title, keepFilename=True, verifyDescription=False)
    bot.run()
예제 #2
0
파일: image.py 프로젝트: moleculea/ess
def main():
    oldImage = None
    newImage = None
    summary = ''
    always = False
    loose = False
    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg == '-loose':
            loose = True
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = pywikibot.input(u'Choose an edit summary: ')
            else:
                summary = arg[len('-summary:'):]
        else:
            if oldImage:
                newImage = arg
            else:
                oldImage = arg
    if not oldImage:
        pywikibot.showHelp('image')
    else:
        mysite = pywikibot.getSite()
        ns = mysite.image_namespace()
        oldImagePage = pywikibot.ImagePage(mysite, ns + ':' + oldImage)
        gen = pagegenerators.FileLinksGenerator(oldImagePage)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = ImageRobot(preloadingGen, oldImage, newImage, summary, always,
                         loose)
        bot.run()
예제 #3
0
    def run(self):
        for page in self.generator:
            if self.interwiki:
                imagelist = []
                for linkedPage in page.interwiki():
                    imagelist += linkedPage.imagelinks(followRedirects = True)
            elif page.isImage():
                imagePage = pywikibot.ImagePage(page.site(), page.title())
                imagelist = [imagePage]
            else:
                imagelist = page.imagelinks(followRedirects = True)

            while len(imagelist)>0:
                self.showImageList(imagelist)
                if len(imagelist) == 1:
                    # no need to query the user, only one possibility
                    todo = 0
                else:
                    pywikibot.output(u"Give the number of the image to transfer.")
                    todo = pywikibot.input(u"To end uploading, press enter:")
                    if not todo:
                        break
                    todo = int(todo)
                if todo in range(len(imagelist)):
                    if imagelist[todo].fileIsOnCommons():
                        pywikibot.output(u'The image is already on Wikimedia Commons.')
                    else:
                        self.transferImage(imagelist[todo], debug = False)
                    # remove the selected image from the list
                    imagelist = imagelist[:todo] + imagelist[todo + 1:]
                else:
                    pywikibot.output(u'No such image number.')
예제 #4
0
def UnusedFilesGenerator(number=100, repeat=False, site=None, extension=None):
    if site is None:
        site = wikipedia.getSite()
    for page in site.unusedfiles(number=number,
                                 repeat=repeat,
                                 extension=extension):
        yield wikipedia.ImagePage(page.site(), page.title())
예제 #5
0
파일: userlib.py 프로젝트: hasteur/UAABOT
    def _uploadedImagesOld(self, number=10):
        """Yield ImagePages from Special:Log&type=upload"""

        regexp = re.compile(
            '<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a> '
            '.* uploaded "<a href=".*?"(?P<new> class="new")? '
            'title="(Image|File):(?P<image>.+?)"\s*>'
            '(?:.*?<span class="comment">(?P<comment>.*?)</span>)?',
            re.UNICODE)
        path = self.site().log_address(number, mode='upload', user=self.name())
        html = self.site().getUrl(path)
        redlink_key = self.site().mediawiki_message('red-link-title')
        redlink_tail_len = None
        if redlink_key.startswith('$1 '):
            redlink_tail_len = len(redlink_key[3:])
        for m in regexp.finditer(html):
            image = m.group('image')
            deleted = False
            if m.group('new'):
                deleted = True
                if redlink_tail_len:
                    image = image[0:0 - redlink_tail_len]

            date = m.group('date')
            comment = m.group('comment') or ''
            yield pywikibot.ImagePage(self.site(),
                                      image), date, comment, deleted
예제 #6
0
def tagNowCommons(wImage, cImage, timestamp):
    site = wikipedia.getSite()
    language = site.language()
    family = site.family.name

    imagepage = wikipedia.ImagePage(wikipedia.getSite(), wImage)
    if not imagepage.exists() or imagepage.isRedirectPage():
        return

    if skips.get(family) and skips.get(family).get(language):
        localskips = skips.get(family).get(language)
    else:
        localskips = skips.get('_default')

    for template in imagepage.templates():
        title = template.replace(u'_', u' ').strip()
        if title in localskips:
            return
    text = imagepage.get()
    oldtext = text

    text = u'{{NowCommons|File:%s|date=%s|bot=~~~}}\n' % (cImage.replace(
        u'_', u' '), timestamp) + text
    comment = u'File is available on Wikimedia Commons.'
    wikipedia.showDiff(oldtext, text)
    try:
        imagepage.put(text, comment)
        #print u'put'
    except wikipedia.LockedPage:
        return
예제 #7
0
    def processImage(self, page):
        """
        Work on a single image
        """
        if page.exists() and (page.namespace() == 6) and \
           (not page.isRedirectPage()):
            imagepage = pywikibot.ImagePage(page.site(), page.title())

            #First do autoskip.
            if self.doiskip(imagepage):
                pywikibot.output(
                    u'Skipping %s : Got a template on the skip list.'
                    % page.title())
                return False

            text = imagepage.get()
            foundMatch = False
            for (regex, replacement) in licenseTemplates[page.site().language()]:
                match = re.search(regex, text, flags=re.IGNORECASE)
                if match:
                    foundMatch = True
            if not foundMatch:
                pywikibot.output(
                    u'Skipping %s : No suitable license template was found.'
                    % page.title())
                return False
            self.prefetchQueue.put(self.getNewFields(imagepage))
예제 #8
0
def categorizeImages(generator, onlyFilter, onlyUncat):
    '''
    Loop over all images in generator and try to categorize them. Get category suggestions from CommonSense.
    '''
    for page in generator:
        if page.exists() and (page.namespace()
                              == 6) and (not page.isRedirectPage()):
            imagepage = wikipedia.ImagePage(page.site(), page.title())
            wikipedia.output(u'Working on ' + imagepage.title())

            if (onlyUncat and not (u'Uncategorized' in imagepage.templates())):
                wikipedia.output(u'No Uncategorized template found')
            else:
                currentCats = getCurrentCats(imagepage)
                if (onlyFilter):
                    commonshelperCats = []
                    usage = []
                    galleries = []
                else:
                    (commonshelperCats, usage,
                     galleries) = getCommonshelperCats(imagepage)
                newcats = applyAllFilters(commonshelperCats + currentCats)

                if (len(newcats) > 0
                        and not (set(currentCats) == set(newcats))):
                    for cat in newcats:
                        wikipedia.output(u' Found new cat: ' + cat)
                    saveImagePage(imagepage, newcats, usage, galleries,
                                  onlyFilter)
예제 #9
0
def ImageGenerator(generator):
    """
    Wraps around another generator. Yields the same pages, but as Image
    objects instead of Page objects. Makes sense only if it is ascertained
    that only categories are being retrieved.
    """
    for page in generator:
        yield pywikibot.ImagePage(page.site(), page.title())
예제 #10
0
def listfiles_generator():
    site = wikipedia.getSite()
    path = filelist_address(site.family, site.lang)
    ns = site.image_namespace()
    html = site.getUrl(path)
    entryR = re.compile('<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
    for m in entryR.finditer(html):
        title = m.group('title')
        image = wikipedia.ImagePage(site, title)
        yield image
예제 #11
0
파일: mtc.py 프로젝트: legoktm/legobot-old
def moveimage(name):
    #HACK
    name = str(name)
    name = name.title()
    name = wikipedia.ImagePage(wikien, name)
    if wikipedia.Page(commons, name.title()).exists():
        print '%s is already on the commons.' % (name.title())
        ncd(name)
        return
    uploadres = upload(name)
    if uploadres == False:
        return False
    ncd(name)
예제 #12
0
    def uploadedImages(self, number=10):
        if not self.site().has_api() or self.site().versionnumber() < 11:
            for c in self._uploadedImagesOld(number):
                yield c
            return

        for s in self.site().logpages(number,
                                      mode='upload',
                                      user=self.name(),
                                      dump=True):
            yield wikipedia.ImagePage(
                self.site(),
                s['title']), s['timestamp'], s['comment'], s['pageid'] > 0
        return
예제 #13
0
def main(args):
    generator = None
    always = False

    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData(
            'You have to specify the generator you want to use for the script!'
        )

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        if page.exists() and (page.namespace() == 6) and \
            (not page.isRedirectPage()):
            imagepage = pywikibot.ImagePage(page.site(), page.title())
            foundNowCommons = False
            for template in imagepage.templates():
                #FIXME: Move the templates list to a lib.
                if template in pywikibot.translate(imagepage.site(),
                                                   nowCommons):
                    foundNowCommons = True
            if foundNowCommons:
                pywikibot.output(
                    u'The file %s is already tagged with NowCommons' %
                    imagepage.title())
            else:
                imagehash = imagepage.getHash()
                commons = pywikibot.getSite(u'commons', u'commons')
                duplicates = commons.getFilesFromAnHash(imagehash)
                if duplicates:
                    duplicate = duplicates.pop()
                    pywikibot.output(u'Found duplicate image at %s' %
                                     duplicate)
                    comment = i18n.twtranslate(
                        imagepage.site(), 'commons-file-now-available', {
                            'localfile': imagepage.titleWithoutNamespace(),
                            'commonsfile': duplicate
                        })
                    template = pywikibot.translate(imagepage.site(),
                                                   nowCommonsTemplate)
                    newtext = imagepage.get() + template % (duplicate, )
                    pywikibot.showDiff(imagepage.get(), newtext)
                    imagepage.put(newtext, comment)
예제 #14
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if generator:
	for page in generator:
	    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
		wikipedia.output(page.title())
		id = getGeographId(page)
		if id:
		    geograph_lib.categorizeImage(page, id, cursor, cursor2)
    else:
	topics = getTopics(cursor)
	for (topic,) in topics:
	    images = getImagesWithTopic(cursor3, topic)
	    for (imageName, id) in images:
		try:
		    page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName)
		    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
			wikipedia.output(page.title())
			geograph_lib.categorizeImage(page, id, cursor, cursor2)
		except UnicodeDecodeError:
		    print "UnicodeDecodeError, can't find the source. yah! :-("
		    pass
예제 #15
0
 def getImagelinks(self, page, min=0, step=50, sort=""):
     q = """ SELECT il_to
             FROM %s.imagelinks
             WHERE il_from=(
                 SELECT page_id
                 FROM %s.page
                 WHERE page_title=%%s AND page_namespace=%%s) """ % (
         (page.site().dbName(), ) * 2)
     q += sort
     for row in self._generate(
             q, min,
             step, (page.titleWithoutNamespace(True).encode('utf-8'),
                    page.namespace())):
         yield wikipedia.ImagePage(
             page.site(),
             page.site().image_namespace() + ":" +
             row['il_to'].decode('utf-8'), page.site())
예제 #16
0
def tagNowCommons(page):

    imagepage = pywikibot.ImagePage(page.site(), page.title())
    site = page.site()
    language = site.language()
    family = site.family.name

    if not imagepage.fileIsOnCommons():

        if family in skips and language in skips[family]:
            localskips = skips[family][language]
        else:
            localskips = skips['_default']

        for template in imagepage.templates():
            #FIXME: Move the templates list to a lib.
            if template in localskips:
                pywikibot.output(
                    u'The file %s is already tagged with NowCommons' %
                    imagepage.title())
                return

        imagehash = imagepage.getHash()
        commons = pywikibot.getSite(u'commons', u'commons')
        duplicates = commons.getFilesFromAnHash(imagehash)
        if duplicates:
            duplicate = duplicates.pop()
            pywikibot.output(u'Found duplicate image at %s' % duplicate)
            comment = i18n.twtranslate(
                imagepage.site(), 'commons-file-now-available', {
                    'localfile': imagepage.title(withNamespace=False),
                    'commonsfile': duplicate
                })
            template = pywikibot.translate(imagepage.site(),
                                           nowCommonsTemplate)
            newtext = imagepage.get() + template % (duplicate, )
            pywikibot.showDiff(imagepage.get(), newtext)
            try:
                imagepage.put(newtext, comment)
            except pywikibot.LockedPage:
                return
예제 #17
0
    def uploadedImages(self, number=10):
        """ Yield tuples describing files uploaded by this user.
        Each tuple is composed of a pywikibot.Page, the timestamp
        comment (unicode) and a bool (always False...).
        Pages returned are not guaranteed to be unique.

        @param total: limit result to this number of pages
        @type total: int
        """
        if self.isAnonymous():
            raise StopIteration
        if not self.site().has_api() or self.site().versionnumber() < 11:
            for c in self._uploadedImagesOld(number):
                yield c
            return

        for item in self.site().logpages(number, mode='upload',
                                         user=self.username, dump=True):
            yield pywikibot.ImagePage(self.site(), item['title']), \
                  item['timestamp'], item['comment'], item['pageid'] > 0
        return
예제 #18
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    generator = None
    genFactory = pagegenerators.GeneratorFactory()
    target = u'/Users/hay/tmp/wlm/'

    for arg in wikipedia.handleArgs():
        if arg.startswith('-target:'):
            target = arg[len('-target:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
        # Get a preloading generator with only images
        pgenerator = pagegenerators.PreloadingGenerator(
            pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
        for page in pgenerator:
            imagepage = wikipedia.ImagePage(page.site(), page.title())
            downloadFile(imagepage, target)
예제 #19
0
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = wikipedia.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = wikipedia.input(
                    u'Links to which image page should be processed?')
            if fileLinksPageTitle.startswith(site.namespace(6) + ":"):
                fileLinksPage = wikipedia.ImagePage(site, fileLinksPageTitle)
            else:
                fileLinksPage = wikipedia.ImagePage(
                    site, 'Image:' + fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number=int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number=int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            gen = UserContributionsGenerator(arg[14:])
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number=int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = wikipedia.input(u'Which page should be processed?')
            page = wikipedia.Page(site, title)
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-randomredirect'):
            if len(arg) == 15:
                gen = RandomRedirectPageGenerator()
            else:
                gen = RandomRedirectPageGenerator(number=int(arg[16:]))
        elif arg.startswith('-random'):
            if len(arg) == 7:
                gen = RandomPageGenerator()
            else:
                gen = RandomPageGenerator(number=int(arg[8:]))
        elif arg.startswith('-recentchanges'):
            if len(arg) == 14:
                gen = RecentchangesPageGenerator()
            else:
                gen = RecentchangesPageGenerator(number=int(arg[15:]))
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = wikipedia.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    wikipedia.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse=True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse=True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        # This parameter is deprecated, catr should be used instead.
        elif arg.startswith('-subcat'):
            gen = self.getCategoryGen(arg, 7, recurse=True)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [
                    wikipedia.Page(
                        site,
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                gen = [wikipedia.Page(site, arg[len('-page:'):])]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = wikipedia.input(
                    u'Links to which page should be processed?')
            referredPage = wikipedia.Page(site, referredPageTitle)
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = wikipedia.input(
                    u'Links from which page should be processed?')
            linkingPage = wikipedia.Page(site, linkingPageTitle)
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = wikipedia.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = wikipedia.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = wikipedia.Page(
                site, "%s:%s" % (site.namespace(10), transclusionPageTitle))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-gorandom'):
            for firstPage in RandomPageGenerator(number=1):
                firstPageTitle = firstPage.title()
            namespace = wikipedia.Page(site, firstPageTitle).namespace()
            firstPageTitle = wikipedia.Page(
                site, firstPageTitle).titleWithoutNamespace()
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects=False)
        elif arg.startswith('-start'):
            if arg.startswith('-startxml'):
                wikipedia.output(u'-startxml : wrong parameter')
                sys.exit()
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = wikipedia.input(
                    u'At which page do you want to start?')
            namespace = wikipedia.Page(site, firstPageTitle).namespace()
            firstPageTitle = wikipedia.Page(
                site, firstPageTitle).titleWithoutNamespace()
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects=False)
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = wikipedia.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or wikipedia.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(number=int(limit))
        elif arg.startswith('-new'):
            if len(arg) >= 5:
                gen = NewpagesPageGenerator(number=int(arg[5:]))
            else:
                gen = NewpagesPageGenerator(number=60)
        elif arg.startswith('-imagelinks'):
            imagelinkstitle = arg[len('-imagelinks:'):]
            if not imagelinkstitle:
                imagelinkstitle = wikipedia.input(
                    u'Images on which page should be processed?')
            imagelinksPage = wikipedia.Page(site, imagelinkstitle)
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = wikipedia.input(
                    u'What do you want to search for?')
            # In order to be useful, all namespaces are required
            gen = SearchPageGenerator(mediawikiQuery, namespaces=[])
        elif arg.startswith('-google'):
            gen = GoogleSearchPageGenerator(arg[8:])
        elif arg.startswith('-titleregex'):
            if len(arg) == 6:
                regex = wikipedia.input(
                    u'What page names are you looking for?')
            else:
                regex = arg[7:]
            gen = RegexFilterPageGenerator(site.allpages(), regex)
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        else:
            pass
        if gen:
            self.gens.append(gen)
            return self.getCombinedGenerator()
        else:
            return False
예제 #20
0
    def _parseCategory(self, purge=False, startFrom=None):
        """
        Yields all articles and subcategories that are in this category by API.

        Set startFrom to a string which is the title of the page to start from.

        Yielded results are tuples in the form (tag, page) where tag is one
        of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category
        object.

        Note that results of this method need not be unique.

        This should not be used outside of this module.
        """
        if not self.site().has_api() or self.site().versionnumber() < 11:
            for tag, page in self._oldParseCategory(purge, startFrom):
                yield tag, page
            return

        currentPageOffset = None
        params = {
            'action': 'query',
            'list': 'categorymembers',
            'cmtitle': self.title(),
            'cmprop': ['title', 'ids', 'sortkey', 'timestamp'],
            #'': '',
        }
        while True:
            if wikipedia.config.special_page_limit > 500:
                params['cmlimit'] = 500
            else:
                params['cmlimit'] = wikipedia.config.special_page_limit

            if currentPageOffset:
                params['cmcontinue'] = currentPageOffset
                wikipedia.output(
                    'Getting [[%s]] list from %s...' %
                    (self.title(),
                     currentPageOffset[:-1]))  # cmcontinue last key is '|'
            elif startFrom:
                params['cmstartsortkey'] = startFrom
                wikipedia.output('Getting [[%s]] list starting at %s...' %
                                 (self.title(), startFrom))
            else:
                wikipedia.output('Getting [[%s]]...' % self.title())

            wikipedia.get_throttle()
            data = query.GetData(params, self.site())
            if 'error' in data:
                raise RuntimeError("%s" % data['error'])
            count = 0

            for memb in data['query']['categorymembers']:
                count += 1
                # For MediaWiki versions where subcats look like articles
                if memb['ns'] == 14:
                    yield SUBCATEGORY, Category(self.site(),
                                                memb['title'],
                                                sortKey=memb['sortkey'])
                elif memb['ns'] == 6:
                    yield ARTICLE, wikipedia.ImagePage(self.site(),
                                                       memb['title'])
                else:
                    yield ARTICLE, wikipedia.Page(self.site(),
                                                  memb['title'],
                                                  defaultNamespace=memb['ns'])
                if count >= params['cmlimit']:
                    break
            # try to find a link to the next list page
            if 'query-continue' in data and count < params['cmlimit']:
                currentPageOffset = data['query-continue']['categorymembers'][
                    'cmcontinue']
            else:
                break
예제 #21
0
파일: imagecopy.py 프로젝트: moleculea/ess
def main(args):
    generator = None
    #newname = "";
    imagepage = None
    always = False
    category = u''
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg.startswith('-cc:'):
            category = arg[len('-cc:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData(
            'You have to specify the generator you want to use for the script!'
        )

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        skip = False
        if page.exists() and (page.namespace()
                              == 6) and (not page.isRedirectPage()):
            imagepage = pywikibot.ImagePage(page.site(), page.title())

            #First do autoskip.
            if doiskip(imagepage.get()):
                pywikibot.output("Skipping " + page.title())
                skip = True
            else:
                # The first upload is last in the list.
                try:
                    username = imagepage.getLatestUploader()[0]
                except NotImplementedError:
                    #No API, using the page file instead
                    (datetime, username, resolution, size,
                     comment) = imagepage.getFileVersionHistory().pop()
                if always:
                    newname = imagepage.titleWithoutNamespace()
                    CommonsPage = pywikibot.Page(
                        pywikibot.getSite('commons', 'commons'),
                        u'File:' + newname)
                    if CommonsPage.exists():
                        skip = True
                else:
                    while True:

                        # Do the Tkdialog to accept/reject and change te name
                        (newname,
                         skip) = Tkdialog(imagepage.titleWithoutNamespace(),
                                          imagepage.get(), username,
                                          imagepage.permalink(),
                                          imagepage.templates()).getnewname()

                        if skip:
                            pywikibot.output('Skipping this image')
                            break

                        # Did we enter a new name?
                        if len(newname) == 0:
                            #Take the old name
                            newname = imagepage.titleWithoutNamespace()
                        else:
                            newname = newname.decode('utf-8')

                        # Check if the image already exists
                        CommonsPage = pywikibot.Page(
                            pywikibot.getSite('commons', 'commons'),
                            u'File:' + newname)
                        if not CommonsPage.exists():
                            break
                        else:
                            pywikibot.output(
                                'Image already exists, pick another name or skip this image'
                            )
                        # We dont overwrite images, pick another name, go to the start of the loop

            if not skip:
                imageTransfer(imagepage, newname, category).start()

    pywikibot.output(u'Still ' + str(threading.activeCount()) +
                     u' active threads, lets wait')
    for openthread in threading.enumerate():
        if openthread != threading.currentThread():
            openthread.join()
    pywikibot.output(u'All threads are done')
예제 #22
0
파일: catlib.py 프로젝트: hasteur/UAABOT
    def _parseCategory(self,
                       purge=False,
                       startFrom=None,
                       sortby=None,
                       sortdir=None,
                       endsort=None):
        """
        Yields all articles and subcategories that are in this category by API.

        Set startFrom to a string which is the title of the page to start from.

        Yielded results are tuples in the form (tag, page) where tag is one
        of the constants ARTICLE and SUBCATEGORY, and title is the Page or
        Category object.

        Note that results of this method need not be unique.

        This should not be used outside of this module.

        """
        if not self.site().has_api() or self.site().versionnumber() < 11:
            for tag, page in self._oldParseCategory(purge, startFrom):
                yield tag, page
            return

        currentPageOffset = None
        params = {
            'action': 'query',
            'list': 'categorymembers',
            'cmtitle': self.title(),
            'cmprop': ['title', 'ids', 'sortkey', 'timestamp'],
            #'': '',
        }
        if self.site().versionnumber() > 16:
            params['cmprop'].append('sortkeyprefix')
        if sortby:
            params['cmsort'] = sortby
        if sortdir:
            params['cmdir'] = sortdir
        while True:
            if pywikibot.config.special_page_limit > 500:
                params['cmlimit'] = 500
            else:
                params['cmlimit'] = pywikibot.config.special_page_limit

            if currentPageOffset:
                params.update(currentPageOffset)
                pywikibot.output(
                    'Getting [[%s]] list from %s...' %
                    (self.title(), "%s=%s" % currentPageOffset.popitem()))
            else:
                msg = 'Getting [[%s]] list' % self.title()
                # category sort keys are uppercase
                if startFrom:
                    startFrom = startFrom.upper()
                    params['cmstartsortkey'] = startFrom
                    msg += ' starting at %s' % startFrom
                if endsort:
                    endsort = endsort.upper()
                    params['cmendsortkey'] = endsort
                    msg += ' ending at %s' % endsort
                pywikibot.output(msg + u'...')

            pywikibot.get_throttle()
            data = query.GetData(params, self.site())
            if 'error' in data:
                raise RuntimeError("%s" % data['error'])
            count = 0

            for memb in data['query']['categorymembers']:
                count += 1
                # For MediaWiki versions where subcats look like articles
                if memb['ns'] == 14:
                    if 'sortkeyprefix' in memb:
                        sortKeyPrefix = memb['sortkeyprefix']
                    else:
                        sortKeyPrefix = None
                    yield SUBCATEGORY, Category(self.site(),
                                                memb['title'],
                                                sortKey=memb['sortkey'],
                                                sortKeyPrefix=sortKeyPrefix)
                elif memb['ns'] == 6:
                    yield ARTICLE, pywikibot.ImagePage(self.site(),
                                                       memb['title'])
                else:
                    page = pywikibot.Page(self.site(),
                                          memb['title'],
                                          defaultNamespace=memb['ns'])
                    if 'sortkeyprefix' in memb:
                        page.sortkeyprefix = memb['sortkeyprefix']
                    else:
                        page.sortkeyprefix = None
                    yield ARTICLE, page
                if count >= params['cmlimit']:
                    break
            # try to find a link to the next list page
            if 'query-continue' in data and count < params['cmlimit']:
                currentPageOffset = data['query-continue']['categorymembers']
            else:
                break
예제 #23
0
파일: catlib.py 프로젝트: hasteur/UAABOT
    def _oldParseCategory(self, purge=False, startFrom=None):
        """Yields all articles and subcategories that are in this category.

        Set purge to True to instruct MediaWiki not to serve a cached version.

        Set startFrom to a string which is the title of the page to start from.

        Yielded results are tuples in the form (tag, page) where tag is one
        of the constants ARTICLE and SUBCATEGORY, and title is the Page or
        Category object.

        Note that results of this method need not be unique.

        This should not be used outside of this module.

        """
        if self.site().versionnumber() < 4:
            Rtitle = re.compile('title\s?=\s?\"([^\"]*)\"')
        elif self.site().versionnumber() < 8:
            # FIXME seems to parse all links
            Rtitle = re.compile('/\S*(?: title\s?=\s?)?\"([^\"]*)\"')
        else:
            Rtitle = re.compile(
                '<li>(?:<span.*?>)?<a href=\".*?\"\s?title\s?=\s?\"'
                '([^\"]*)\"\>\+?[^\<\+]')
        if self.site().versionnumber() < 8:
            Rsubcat = None
            Rimage = None
        else:
            Rsubcat = re.compile(
                'CategoryTreeLabelCategory\"\s?href=\".+?\">(.+?)</a>')
            Rimage = re.compile(
                '<div class\s?=\s?\"thumb\"\sstyle=\"[^\"]*\">'
                '(?:<div style=\"[^\"]*\">)?<a href=\".*?\"'
                '(?:\sclass="image")?\stitle\s?=\s?\"([^\"]*)\"')
        # regular expression matching the "(next 200)" link
        RLinkToNextPage = re.compile('&amp;from=(.*?)" title="')

        if startFrom:
            currentPageOffset = urllib.quote(
                startFrom.encode(self.site().encoding()))
        else:
            currentPageOffset = None
        while True:
            path = self.site().get_address(self.urlname())
            if purge:
                path += '&action=purge'
            if currentPageOffset:
                path += '&from=' + currentPageOffset
                pywikibot.output(
                    'Getting [[%s]] starting at %s...' %
                    (self.title(),
                     pywikibot.url2link(currentPageOffset, self.site(),
                                        self.site())))
            else:
                pywikibot.output('Getting [[%s]]...' % self.title())
            pywikibot.get_throttle()
            txt = self.site().getUrl(path)
            # index where subcategory listing begins
            if self.site().versionnumber() >= 9:
                # These IDs were introduced in 1.9
                if '<div id="mw-subcategories">' in txt:
                    ibegin = txt.index('<div id="mw-subcategories">')
                elif '<div id="mw-pages">' in txt:
                    ibegin = txt.index('<div id="mw-pages">')
                elif '<div id="mw-category-media">' in txt:
                    ibegin = txt.index('<div id="mw-category-media">')
                else:
                    # No pages
                    return
            else:
                # does not work for cats without text
                ibegin = txt.index('<!-- start content -->')
                # TODO: This parses category text and may think they are
                # pages in category! Check for versions before 1.9

            # index where article listing ends
            if '<div class="printfooter">' in txt:
                iend = txt.index('<div class="printfooter">')
            elif '<div class="catlinks">' in txt:
                iend = txt.index('<div class="catlinks">')
            else:
                iend = txt.index('<!-- end content -->')
            txt = txt[ibegin:iend]
            for title in Rtitle.findall(txt):
                if title == self.title():
                    # This is only a link to "previous 200" or "next 200".
                    # Ignore it.
                    pass
                # For MediaWiki versions where subcats look like articles
                elif isCatTitle(title, self.site()):
                    yield SUBCATEGORY, Category(self.site(), title)
                else:
                    yield ARTICLE, pywikibot.Page(self.site(), title)
            if Rsubcat:
                # For MediaWiki versions where subcats look differently
                for titleWithoutNamespace in Rsubcat.findall(txt):
                    title = 'Category:%s' % titleWithoutNamespace
                    yield SUBCATEGORY, Category(self.site(), title)
            if Rimage:
                # For MediaWiki versions where images work through galleries
                for title in Rimage.findall(txt):
                    # In some MediaWiki versions, the titles contain the
                    # namespace, but they don't in other (newer) versions. Use
                    # the ImagePage's defaultNamespace feature to get everything
                    # correctly.
                    yield ARTICLE, pywikibot.ImagePage(self.site(), title)
            # try to find a link to the next list page
            matchObj = RLinkToNextPage.search(txt)
            if matchObj:
                currentPageOffset = matchObj.group(1)
            else:
                break
예제 #24
0
lcontent = pywikibot.translate(site, content)
category = pywikibot.translate(site, cat)
putmsg = pywikibot.translate(site, msg)

#from non-free copyright tag category get all EDPtemplate
templatecat = catlib.Category(site, category)
templatelist = templatecat.articlesList()

#from References of EDP template get all non-free images
for tempalte in templatelist:
    images = [page for page in tempalte.getReferences() if page.isImage()]

    for image in images:
        imagetitle = image.title()
        imagepage = pywikibot.ImagePage(site, imagetitle)

        #from imagepage get all usingPages of non-articles
        pimages = [
            puseimage for puseimage in imagepage.usingPages()
            if puseimage.namespace() <> 0
        ]
        for pimage in pimages:
            ns = pimage.namespace()
            pimagetitle = pimage.title()
            c = u'\nfond an used the image [[%s]] in [[%s]]: ' \
                % (imagetitle, pimagetitle)
            text = pimage.get()
            try:
                re.search('<!--(.*?)' + imagetitle + '(.*?)-->', text,
                          re.I).group(0)
예제 #25
0
def main():
    site = wikipedia.getSite(u'commons', u'commons')

    #Array of images to work on
    images = []
    imageTitleA = u''
    imageTitleB = u''
    familyA = u''
    familyB = u''
    langA = u''
    langB = u''
    imagePageA = None
    imagePageB = None

    for arg in wikipedia.handleArgs():
        if arg.startswith('-familyA:'):
            if len(arg) == len('-familyA:'):
                familyA = wikipedia.input(u'What family do you want to use?')
            else:
                familyA = arg[len('-familyA:'):]
        elif arg.startswith('-familyB:'):
            if len(arg) == len('-familyB:'):
                familyB = wikipedia.input(u'What family do you want to use?')
            else:
                familyB = arg[len('-familyB:'):]
        elif arg.startswith('-langA:'):
            if len(arg) == len('-langA:'):
                langA = wikipedia.input(u'What language do you want to use?')
            else:
                langA = arg[len('-langA:'):]
        elif arg.startswith('-langB:'):
            if len(arg) == len('-langB:'):
                langB = wikipedia.input(u'What language do you want to use?')
            else:
                langB = arg[len('langB:'):]
        else:
            images.append(arg)

    if not (len(images) == 2):
        raise wikipedia.Error, 'require two images to work on.'
    else:
        imageTitleA = images[0]
        imageTitleB = images[1]

    if not (imageTitleA == u''):
        if not (langA == u''):
            if not (familyA == u''):
                imagePageA = wikipedia.ImagePage(
                    wikipedia.getSite(langA, familyA), imageTitleA)
            else:
                imagePageA = wikipedia.ImagePage(
                    wikipedia.getSite(langA, u'wikipedia'), imageTitleA)
        else:
            imagePageA = wikipedia.ImagePage(
                wikipedia.getSite(u'commons', u'commons'), imageTitleA)

    if not (imageTitleB == u''):
        if not (langB == u''):
            if not (familyB == u''):
                imagePageB = wikipedia.ImagePage(
                    wikipedia.getSite(langB, familyB), imageTitleB)
            else:
                imagePageB = wikipedia.ImagePage(
                    wikipedia.getSite(langB, u'wikipedia'), imageTitleB)
        else:
            imagePageB = wikipedia.ImagePage(
                wikipedia.getSite(u'commons', u'commons'), imageTitleB)

    if imagePageA and imagePageB:
        matchImagePages(imagePageA, imagePageB)
예제 #26
0
    def run(self):
        commons = pywikibot.getSite('commons', 'commons')
        comment = pywikibot.translate(self.site, nowCommonsMessage)

        for page in self.getPageGenerator():
            if use_hash:
                # Page -> Has the namespace | commons image -> Not
                images_list = page # 0 -> local image, 1 -> commons image
                page = pywikibot.Page(self.site, images_list[0])
            else:
                # If use_hash is true, we have already print this before, no need
                # Show the title of the page we're working on.
                # Highlight the title in purple.
                pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                                 % page.title())
            try:
                localImagePage = pywikibot.ImagePage(self.site, page.title())
                if localImagePage.fileIsOnCommons():
                    pywikibot.output(u'File is already on Commons.')
                    continue
                md5 = localImagePage.getFileMd5Sum()
                if use_hash:
                    filenameOnCommons = images_list[1]
                else:
                    filenameOnCommons = self.findFilenameOnCommons(
                        localImagePage)
                if not filenameOnCommons and not use_hash:
                    pywikibot.output(u'NowCommons template not found.')
                    continue
                commonsImagePage = pywikibot.ImagePage(commons, 'Image:%s'
                                                       % filenameOnCommons)
                if localImagePage.title(withNamespace=False) == \
                   commonsImagePage.title(withNamespace=False) and use_hash:
                    pywikibot.output(
                        u'The local and the commons images have the same name')
                if localImagePage.title(withNamespace=False) != \
                   commonsImagePage.title(withNamespace=False):
                    usingPages = list(localImagePage.usingPages())
                    if usingPages and usingPages != [localImagePage]:
                        pywikibot.output(
                            u'\"\03{lightred}%s\03{default}\" is still used in %i pages.'
                            % (localImagePage.title(withNamespace=False),
                               len(usingPages)))
                        if replace == True:
                                pywikibot.output(
                                    u'Replacing \"\03{lightred}%s\03{default}\" by \"\03{lightgreen}%s\03{default}\".'
                                    % (localImagePage.title(withNamespace=False),
                                       commonsImagePage.title(withNamespace=False)))
                                oImageRobot = image.ImageRobot(
                                    pg.FileLinksGenerator(localImagePage),
                                    localImagePage.title(withNamespace=False),
                                    commonsImagePage.title(withNamespace=False),
                                    '', replacealways, replaceloose)
                                oImageRobot.run()
                                # If the image is used with the urlname the
                                # previous function won't work
                                if len(list(pywikibot.ImagePage(self.site,
                                                                page.title()).usingPages())) > 0 and \
                                                                replaceloose:
                                    oImageRobot = image.ImageRobot(
                                        pg.FileLinksGenerator(
                                            localImagePage),
                                        self.urlname(
                                            localImagePage.title(
                                                withNamespace=False)),
                                        commonsImagePage.title(
                                            withNamespace=False),
                                        '', replacealways, replaceloose)
                                    oImageRobot.run()
                                # refresh because we want the updated list
                                usingPages = len(list(pywikibot.ImagePage(
                                    self.site, page.title()).usingPages()))
                                if usingPages > 0 and use_hash:
                                    # just an enter
                                    pywikibot.input(
                                        u'There are still %s pages with this image, confirm the manual removal from them please.'
                                        % usingPages)

                        else:
                            pywikibot.output(u'Please change them manually.')
                        continue
                    else:
                        pywikibot.output(
                            u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.'
                            % localImagePage.title(withNamespace=False))
                commonsText = commonsImagePage.get()
                if replaceonly == False:
                    if md5 == commonsImagePage.getFileMd5Sum():
                        pywikibot.output(
                            u'The image is identical to the one on Commons.')
                        if len(localImagePage.getFileVersionHistory()) > 1 and not use_hash:
                            pywikibot.output(
                                u"This image has a version history. Please delete it manually after making sure that the old versions are not worth keeping.""")
                            continue
                        if autonomous == False:
                            pywikibot.output(
                                u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n'
                                % page.title())
                            pywikibot.output(localImagePage.get())
                            pywikibot.output(
                                u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n'
                                % commonsImagePage.title())
                            pywikibot.output(commonsText)
                            choice = pywikibot.inputChoice(
u'Does the description on Commons contain all required source and license\n'
                                u'information?',
                                ['yes', 'no'], ['y', 'N'], 'N')
                            if choice.lower() in ['y', 'yes']:
                                localImagePage.delete(
                                    comment + ' [[:commons:Image:%s]]'
                                    % filenameOnCommons, prompt = False)
                        else:
                            localImagePage.delete(
                                comment + ' [[:commons:Image:%s]]'
                                % filenameOnCommons, prompt = False)
                    else:
                        pywikibot.output(
                            u'The image is not identical to the one on Commons.')
            except (pywikibot.NoPage, pywikibot.IsRedirectPage), e:
                pywikibot.output(u'%s' % e[0])
                continue
예제 #27
0
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = pywikibot.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = i18n.input(
                    'pywikibot-enter-file-links-processing')
            if fileLinksPageTitle.startswith(site.namespace(6) + ":"):
                fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle)
            else:
                fileLinksPage = pywikibot.ImagePage(
                    site, 'Image:' + fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number=int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number=int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            args = arg[14:].split(';')
            number = None
            try:
                number = int(args[1])
            except:
                number = 250
            gen = UserContributionsGenerator(args[0],
                                             number,
                                             namespaces=self.getNamespaces)
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number=int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = i18n.input('pywikibot-enter-page-processing')
            page = pywikibot.Page(site, title)
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-randomredirect'):
            if len(arg) == 15:
                gen = RandomRedirectPageGenerator()
            else:
                gen = RandomRedirectPageGenerator(number=int(arg[16:]))
        elif arg.startswith('-random'):
            if len(arg) == 7:
                gen = RandomPageGenerator()
            else:
                gen = RandomPageGenerator(number=int(arg[8:]))
        elif arg.startswith('-recentchanges'):
            if len(arg) >= 15:
                gen = RecentchangesPageGenerator(number=int(arg[15:]),
                                                 nobots=False)
            else:
                gen = RecentchangesPageGenerator(nobots=False)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-rc-nobots'):
            if len(arg) >= 11:
                gen = RecentchangesPageGenerator(number=int(arg[11:]),
                                                 nobots=True)
            else:
                gen = RecentchangesPageGenerator(nobots=True)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = pywikibot.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-ns'):
            if len(arg) == len('-ns'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-ns:'):].split(","))
            return True
        elif arg.startswith('-limit'):
            if len(arg) == len('-limit'):
                self.limit = int(pywikibot.input("What is the limit value?"))
            else:
                self.limit = int(arg[len('-limit:'):])
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse=True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse=True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [
                    pywikibot.Page(
                        site,
                        pywikibot.input(u'What page do you want to use?'))
                ]
            else:
                gen = [pywikibot.Page(site, arg[len('-page:'):])]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncattemplates'):
            gen = UnCategorizedTemplatesGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = pywikibot.input(
                    u'Links to which page should be processed?')
            referredPage = pywikibot.Page(site, referredPageTitle)
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = pywikibot.input(
                    u'Links from which page should be processed?')
            linkingPage = pywikibot.Page(site, linkingPageTitle)
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = pywikibot.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = pywikibot.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = pywikibot.Page(
                site, "%s:%s" % (site.namespace(10), transclusionPageTitle))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-gorandom'):
            for firstPage in RandomPageGenerator(number=1):
                firstPageTitle = firstPage.title()
            namespace = pywikibot.Page(site, firstPageTitle).namespace()
            firstPageTitle = pywikibot.Page(
                site, firstPageTitle).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects=False)
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            if self.namespaces != []:
                namespace = self.namespaces[0]
            else:
                namespace = pywikibot.Page(site, firstPageTitle).namespace()

            firstPageTitle = pywikibot.Page(
                site, firstPageTitle).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects=False)
        elif arg.startswith('-redirectonly'):
            firstPageTitle = arg[14:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            namespace = pywikibot.Page(site, firstPageTitle).namespace()
            firstPageTitle = pywikibot.Page(
                site, firstPageTitle).title(withNamespace=False)
            gen = AllpagesPageGenerator(firstPageTitle,
                                        namespace,
                                        includeredirects='only')
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = pywikibot.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or pywikibot.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(number=int(limit))
        elif arg == ('-new') or arg.startswith('-new:'):
            if len(arg) >= 5:
                gen = NewpagesPageGenerator(number=int(arg[5:]))
            else:
                gen = NewpagesPageGenerator(number=60)
        elif arg.startswith('-imagelinks'):
            imagelinkstitle = arg[len('-imagelinks:'):]
            if not imagelinkstitle:
                imagelinkstitle = pywikibot.input(
                    u'Images on which page should be processed?')
            imagelinksPage = pywikibot.Page(site, imagelinkstitle)
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = pywikibot.input(
                    u'What do you want to search for?')
            gen = SearchPageGenerator(mediawikiQuery,
                                      number=None,
                                      namespaces=self.getNamespaces)
        elif arg.startswith('-titleregex'):
            if len(arg) == 11:
                regex = pywikibot.input(
                    u'What page names are you looking for?')
            else:
                regex = arg[12:]
            gen = RegexFilterPageGenerator(site.allpages(), [regex])
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        elif arg.startswith('-'):
            mode, log, user = arg.partition('log')
            # exclude -log, -nolog
            if log == 'log' and mode not in ['-', '-no']:
                number = 500
                if not user:
                    user = None
                else:
                    try:
                        number = int(user[1:])
                        user = None
                    except ValueError:
                        user = user[1:]
                if user:
                    result = user.split(';')
                    user = result[0]
                    try:
                        number = int(result[1])
                    except:
                        pass
                gen = LogpagesPageGenerator(number, mode[1:], user)
        if gen:
            self.gens.append(gen)
            return self.getCombinedGenerator()
        else:
            return False
예제 #28
0
    def doImage(self, image):
        r = re.compile(u'\|', re.UNICODE | re.DOTALL)
        data = re.split(r, image)
        imageName = data[0]
        newImageName = data[0]
        r = re.compile(u'^\s*$', re.UNICODE | re.DOTALL)
        if len(data) >= 2 and not re.match(r, data[1]):
            newImageName = data[1]
        sourceWiki = u'anime'
        if len(data) >= 3:
            sourceWiki = data[2]
        exclusionMode = u'normal'
        if len(data) >= 4:
            exclusionMode = data[3]
        exclusionInfo = u''
        if len(data) >= 5:
            exclusionInfo = data[4]
        sourceSite = None
        outputSites = []
        sourceImage = None
        sourcePage = None

        wikipedia.output(u'Doing Image %s' % imageName)
        for site in self.siteList:
            if site.family.name == sourceWiki:
                sourceSite = site
            if exclusionMode == u'normal':
                outputSites.append(site)
            elif exclusionMode == u'include':
                r = re.compile(u',', re.UNICODE | re.DOTALL)
                includes = re.split(r, exclusionInfo)
                if site.family.name in includes:
                    outputSites.append(site)
            elif exclusionMode == u'exclude':
                r = re.compile(u',', re.UNICODE | re.DOTALL)
                excludes = re.split(r, exclusionInfo)
                if site.family.name not in includes:
                    outputSites.append(site)
            else:
                wikipedia.output(u'Unknown exclusion mode. Skiping %s.' %
                                 imageName)
                return False
        if sourceSite == None:
            wikipedia.output(u'No source site found. Skiping %s.' % imageName)
            return False

        try:
            sourceDescriptionPage = wikipedia.Page(sourceSite, imageName, None,
                                                   6)  #6=Image Namespace
            sourceImagePage = wikipedia.ImagePage(
                sourceSite, sourceDescriptionPage.title())
        except wikipedia.NoPage:
            wikipedia.output(u'No source page found. Skiping %s.' % imageName)
            return False

        sourceURL = sourceImagePage.fileUrl()
        if '://' not in sourceURL:
            sourceURL = u'http://%s%s' % (sourceSite.hostname(), sourceURL)

        # Get file contents
        uo = wikipedia.MyURLopener()
        sourceFile = uo.open(sourceURL, "rb")
        wikipedia.output(u'Reading file %s' % sourceURL)
        sourceContents = sourceFile.read()
        if sourceContents.find(
                "The requested URL was not found on this server.") != -1:
            wikipedia.output("Couldn't download the image. Skiping.")
            return False
        sourceFile.close()

        #Setup Description Page
        pageDescription = sourceDescriptionPage.get()
        r = re.compile(u'== Summary ==\n?')
        if re.search(r, pageDescription):
            pageDescription = re.sub(r, u'', pageDescription)

        mirrorText = u'{{networkMirror|%s|%s}}' % (imageName,
                                                   sourceSite.family.name)
        comm = re.compile(u'({{commons(\|[^{}]*)?}})', re.IGNORECASE)
        if re.search(comm, pageDescription):
            pageDescription = re.sub(comm, u'\\1\n%s' % mirrorText,
                                     pageDescription)
        else:
            pageDescription = u'%s%s' % (mirrorText, pageDescription)
        pageDescription = u'== Summary ==\n%s' % pageDescription

        for site in outputSites:
            if sourceSite.family.name != site.family.name or imageName != newImageName:
                doUpload = False
                doDescription = False

                try:
                    siteDescriptionPage = wikipedia.Page(
                        site, newImageName, None, 6)  #6=Image Namespace
                    siteImagePage = wikipedia.ImagePage(
                        site, siteDescriptionPage.title())

                    siteURL = siteImagePage.fileUrl()
                    if '://' not in siteURL:
                        siteURL = u'http://%s%s' % (site.hostname(), siteURL)

                    uo2 = wikipedia.MyURLopener()
                    siteFile = uo2.open(siteURL, "rb")
                    wikipedia.output(u'Reading file %s' % siteURL)
                    siteContents = siteFile.read()
                    if sourceContents.find(
                            "The requested URL was not found on this server."
                    ) != -1:
                        wikipedia.output(
                            "Couldn't download the image at new location.")
                        doUpload = True
                        break
                    siteFile.close()

                    if siteContents != sourceContents:
                        doUpload = True

                    if siteDescriptionPage.get() != pageDescription:
                        doDescription = True

                except wikipedia.NoPage:
                    doUpload = True
                    doDescription = True

                if doUpload:
                    bot = upload.UploadRobot(url=sourceURL,
                                             useFilename=newImageName,
                                             keepFilename=True,
                                             verifyDescription=False,
                                             description=msg['en'],
                                             targetSite=site,
                                             urlEncoding=sourceSite.encoding())
                    bot.run()
                if doDescription:
                    siteDescriptionPage.put(pageDescription)