Exemplo n.º 1
0
def processFile(row, imageDir):
    metadata = getMetadata(row)
    title = getTitle(metadata)
    description = getDescription(metadata)

    # Check of the title already exists
    site = wikipedia.getSite('commons', 'commons')
    page = wikipedia.ImagePage(site, title)

    if page.exists():
        wikipedia.output(u'The file %s already exists. Probably already uploaded by me. Skipping' % title)
        return False

    wikipedia.output(u'Preparing upload for %s.' % title)    
    wikipedia.output(description)    
                        
    # Download and dezoomify the image
    tempfile = imageDir + metadata.get('id') + u'.jpg'
    try:
        dezoomify.Dezoomify(url=metadata.get('link'), debug=True, out=tempfile)
    except IOError as e:
        #wikipedia.output(e)
        wikipedia.output(u'Dezoomify failed')
        return False
        

    # Check for dupe. This probably doesn't work, but it doesn't hurt either.
    duplicates = findDuplicateImages(tempfile)
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        return False
    
    bot = upload.UploadRobot(url=tempfile, description=description, useFilename=title, keepFilename=True, verifyDescription=False)
    bot.run()
Exemplo n.º 2
0
 def run(self):
     for page in self.generator:
         try:
             # get the page, and save it using the unmodified text.
             # whether or not getting a redirect throws an exception
             # depends on the variable self.touch_redirects.
             text = page.get()
             originalText = text
             for url in weblinkchecker.weblinksIn(text,
                                                  withoutBracketed=True):
                 filename = url.split('/')[-1]
                 description = pywikibot.translate(pywikibot.getSite(),
                                                   msg) % url
                 bot = upload.UploadRobot(url, description=description)
                 # TODO: check duplicates
                 #filename = bot.uploadImage()
                 #if filename:
                 #    text = text.replace(url, u'[[Image:%s]]' % filename) #
             # only save if there were changes
             #if text != originalText:
             #    page.put(text)
         except pywikibot.NoPage:
             print "Page %s does not exist?!" % page.title(asLink=True)
         except pywikibot.IsRedirectPage:
             print "Page %s is a redirect; skipping." \
                   % page.title(asLink=True)
         except pywikibot.LockedPage:
             print "Page %s is locked?!" % page.title(asLink=True)
Exemplo n.º 3
0
def processPhoto(photo_id):

    # Get all the metadata
    metadata = getMetadata(photo_id)
    if not metadata:
        #Incorrect photo_id
        return

    photoUrl = u'http://www.fema.gov/photodata/original/' + str(
        photo_id) + '.jpg'
    photo = downloadPhoto(photoUrl)

    duplicates = findDuplicateImages(photo)
    # We don't want to upload tupes
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        return

    title = buildTitle(photo_id, metadata)
    description = buildDescription(photo_id, metadata)

    bot = upload.UploadRobot(photoUrl,
                             description=description,
                             useFilename=title,
                             keepFilename=True,
                             verifyDescription=False,
                             targetSite=wikipedia.getSite(
                                 'commons', 'commons'))
    bot.upload_image(debug=False)
Exemplo n.º 4
0
def up(filename, pagetitle, desc):
    url = filename
    keepFilename=True        #set to True to skip double-checking/editing destination filename
    verifyDescription=False    #set to False to skip double-checking/editing description => change to bot-mode
    targetSite = wikipedia.getSite('commons', 'commons')
    bot = upload.UploadRobot(url, description=desc, useFilename=pagetitle, keepFilename=keepFilename, verifyDescription=verifyDescription, targetSite = targetSite)
    bot.upload_image(debug=True)
Exemplo n.º 5
0
def main(args):
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    if (args[0]):
        subject = args[0]
        if os.path.isdir(subject):
            for filename in glob.glob(subject + "/*.jpg"):
                duplicates = findDuplicateImages(filename)
                if duplicates:
                    wikipedia.output(u'Found duplicate image at %s' %
                                     duplicates.pop())
                else:
                    #print f
                    description = generateDescriptionFromFile(filename, cursor)
                    #wikipedia.output(description)
                    #wikipedia.output(description)
                    #wikipedia.output(u'Reading file %s' % filename.decode(sys.getfilesystemencoding()))
                    bot = upload.UploadRobot(url=filename.decode(
                        sys.getfilesystemencoding()),
                                             description=description,
                                             keepFilename=True,
                                             verifyDescription=False)
                    bot.run()
        #else:
        #    generateDescriptionFromFile(f, cursor)
    else:
        print u'Use kit_description_generator.py <folder> '
Exemplo n.º 6
0
    def procesFile(self, metadata):
        # FIXME: Do some metadata enrichment
        #metadata = getEuropeanaMetadata(metadata)

        fileLocation = metadata.get(self.configuration.get('sourceFileField'))

        photo = self.downloadPhoto(fileLocation)
        duplicates = self.findDuplicateImages(photo)

        # We don't want to upload dupes
        if duplicates:
            pywikibot.output(u'Found duplicate image at %s' % duplicates.pop())
            # The file is at Commons so return True
            return True

        # FIXME: Do some checking to see if the title already exists

        title = self.getTitle(metadata)
        description = self.getDescription(metadata)

        pywikibot.output(u'Preparing upload for %s.' % title)
        pywikibot.output(description)

        bot = upload.UploadRobot(url=fileLocation,
                                 description=description,
                                 useFilename=title,
                                 keepFilename=True,
                                 verifyDescription=False,
                                 targetSite=self.site)
        bot.run()
Exemplo n.º 7
0
def main(args):


    directory = u'D:/Wikipedia/nationaal archief/WeTransfer-VjTrJQOD/'
    csvFile = u'D:/Wikipedia/nationaal archief/WeTransfer-VjTrJQOD/Complete lijst Spaarnestad.csv'

    database = {}

    reader = csv.reader(open(csvFile, "rb"))
    for row in reader:
        database[row[0]] = row
        #print row
        #        wikipedia.output(row)

    if os.path.isdir(directory):
        for filename in glob.glob(directory + "/*.jpg"):
            #print filename
            duplicates = findDuplicateImages(filename)
            if duplicates:
                wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
            else:
                print 'bla'
                dirname = os.path.dirname(filename)
                basename = os.path.basename(filename)
                baseFilename, extension = os.path.splitext(basename)

                description = generateDescription(baseFilename, database)
                title = getTitle(baseFilename, database)

                wikipedia.output(description)
                wikipedia.output(title)
                bot = upload.UploadRobot(url=filename.decode(sys.getfilesystemencoding()), description=description, useFilename=title, keepFilename=True, verifyDescription=False)
                bot.run()
Exemplo n.º 8
0
def up(filename, pagetitle, desc, comment):
	if filename[:4] == 'http':
		source_url=filename; source_filename=None
	else:
		source_url=None; source_filename=filename
		site.upload(pywikibot.ImagePage(site, 'File:' + pagetitle),
		source_filename=source_filename, 
		source_url=source_url,
		comment=comment, 
		text=desc, 
		watch=False, 
		ignore_warnings=True, # True if ignoring duplicates
		chunk_size=1048576)
		return
	url = source_url
	keepFilename=True        #set to True to skip double-checking/editing destination filename
	verifyDescription=False    #set to False to skip double-checking/editing description => change to bot-mode
	targetSite = pywikibot.getSite('commons', 'commons')
	bot = upload.UploadRobot(
		[url], # string gives depreciation msg
		description=desc, # only one description if multiple images
		useFilename=pagetitle,
		keepFilename=keepFilename, 
		verifyDescription=verifyDescription, 
		targetSite = targetSite,
		ignoreWarning = True,
		chunk_size=2000000 # 2MB
		)
	bot.upload_file(file_url=url, debug=True)
Exemplo n.º 9
0
def main(give_url, image_url, desc):
    url = give_url

    if url == '':
        if image_url:
            url = pywikibot.input(
                u"What URL range should I check (use $ for the part that is changeable)")
        else:
            url = pywikibot.input(
                u"From what URL should I get the images?")

    if image_url:
        minimum=1
        maximum=99
        answer= pywikibot.input(
            u"What is the first number to check (default: 1)")
        if answer:
            minimum=int(answer)
        answer= pywikibot.input(
            u"What is the last number to check (default: 99)")
        if answer:
            maximum=int(answer)

    if not desc:
        basicdesc = pywikibot.input(
            u"What text should be added at the end of the description of each image from this url?")
    else:
        basicdesc = desc

    if image_url:
        ilinks = []
        i = minimum
        while i <= maximum:
            ilinks += [url.replace("$",str(i))]
            i += 1
    else:
        ilinks = get_imagelinks(url)

    for image in ilinks:
        answer = pywikibot.inputChoice(u'Include image %s?'
                                       % image, ['yes', 'no', 'stop'],
                                       ['y', 'N', 's'], 'N')
        if answer == 'y':
            desc = pywikibot.input(u"Give the description of this image:")
            categories = []
            while True:
                cat = pywikibot.input(
                    u"Specify a category (or press enter to end adding categories)")
                if not cat.strip(): break
                if ":" in cat:
                    categories.append("[["+cat+"]]")
                else:
                    categories.append("[["+mysite.namespace(14)+":"+cat+"]]")
            desc = desc + "\r\n\r\n" + basicdesc + "\r\n\r\n" + \
                   "\r\n".join(categories)
            uploadBot = upload.UploadRobot(image, description = desc)
            uploadBot.run()
        elif answer == 's':
            break
Exemplo n.º 10
0
def main(args):

    directory = u'D:/Wikipedia/nationaal archief/WeTransfer-M18YYg8e/Nationaal Archief fotoselectie WM batch 20100906/Nationaal Archief fotoselectie WM batch 20100906/'
    batchCsvFile = u'D:/Wikipedia/nationaal archief/WeTransfer-M18YYg8e/Nationaal Archief_fotoselectie WM_batch20100906.csv'
    maincCsvFile = u'D:/Wikipedia/nationaal archief/Nationaal archief_memorixexp_metadata tbv Wikimediaselectie.csv'

    batchinfo = {}

    reader1 = csv.reader(open(batchCsvFile, "rb"))
    for row in reader1:
        batchinfo[row[0]] = row
        #print row
        #        wikipedia.output(row)

    maininfo = {}

    reader2 = csv.reader(open(maincCsvFile, "rb"))
    for row in reader2:
        maininfo[row[4]] = row
        #print row
        #        wikipedia.output(row)

    if os.path.isdir(directory):
        for filename in glob.glob(directory + "/*.jpg"):
            #print filename
            #duplicates = findDuplicateImages(filename)
            duplicates = False
            if duplicates:
                wikipedia.output(u'Found duplicate image at %s' %
                                 duplicates.pop())
            else:
                dirname = os.path.dirname(filename)
                basename = os.path.basename(filename)

                description = generateDescription(basename, batchinfo,
                                                  maininfo)

                batchrecord = batchinfo.get(basename)
                bestanddeelnummer_negatief = unicode(batchrecord[1], 'utf-8')
                mainrecord = maininfo.get(bestanddeelnummer_negatief)
                beschrijving = unicode(mainrecord[7], 'utf-8')

                baseFilename, extension = os.path.splitext(basename)

                title = getTitle(baseFilename, beschrijving)

                wikipedia.output(title)
                wikipedia.output(description)

                bot = upload.UploadRobot(url=filename.decode(
                    sys.getfilesystemencoding()),
                                         description=description,
                                         useFilename=title,
                                         keepFilename=True,
                                         verifyDescription=False)
                bot.run()
Exemplo n.º 11
0
    def run(self):
        for page in self.generator:
            try:
                # get the page, and save it using the unmodified text.
                # whether or not getting a redirect throws an exception
                # depends on the variable self.touch_redirects.
                text = page.get()
                originalText = text
                key = u" FIGURE DELETED"
                dstIdx = text.find(key)
                blkStart = 0
                while dstIdx >= 0:
                    blkStart = text.find(u"begin{figure}", blkStart)
                    blkEnd = text.find(u"end{figure}", blkStart + 1)
                    filename = getImgName(text, blkStart, blkEnd)
                    caption = getImgCaption(text, blkStart, blkEnd)
                    caption = convertLatex(caption)
                    if len(filename) > 0:
                        filename = os.path.abspath(
                            os.path.join(self.basepath, filename))
                        if not os.path.exists(filename):
                            filename = filename[:-3] + filename[-3:].upper()

    #                    print(page.title(), dstIdx, filename, caption)
                        print(caption, page.title())
                        if not os.path.exists(filename):
                            raise FileNotFound(filename)

                        bot = upload.UploadRobot(filename,
                                                 description=self.descrGeneric,
                                                 keepFilename=True,
                                                 verifyDescription=False,
                                                 ignoreWarning=True)
                        filename = bot.upload_image()
                        if filename is not None and len(filename) > 0:
                            filename = u'[[File:%(a)s|400px|thumb|center|%(b)s]]' % {
                                "a": filename,
                                "b": caption
                            }

                        text = text[:dstIdx] + filename + text[dstIdx +
                                                               len(key):]

                    # done in this round, prepare the next
                    dstIdx = text.find(key, dstIdx + 1)
                    blkStart = blkEnd + 1


#                print(text)
                if text != originalText:
                    page.put(text)

            except FileNotFound, e:
                print "File not found: '%s'" % str(e)
            except pywikibot.NoPage:
                print "Page %s does not exist?!" % page.aslink()
Exemplo n.º 12
0
def processPhoto(flickr=None,
                 photo_id=u'',
                 flickrreview=False,
                 reviewer=u'',
                 override=u'',
                 addCategory=u'',
                 removeCategories=False,
                 autonomous=False):
    """ Process a single Flickr photo """
    if photo_id:
        pywikibot.output(str(photo_id))
        (photoInfo, photoSizes) = getPhoto(flickr, photo_id)
    if isAllowedLicense(photoInfo) or override:
        #Get the url of the largest photo
        photoUrl = getPhotoUrl(photoSizes)
        #Should download the photo only once
        photo = downloadPhoto(photoUrl)

        #Don't upload duplicate images, should add override option
        duplicates = findDuplicateImages(photo)
        if duplicates:
            pywikibot.output(u'Found duplicate image at %s' % duplicates.pop())
        else:
            filename = getFilename(photoInfo)
            flinfoDescription = getFlinfoDescription(photo_id)
            photoDescription = buildDescription(flinfoDescription,
                                                flickrreview, reviewer,
                                                override, addCategory,
                                                removeCategories)
            #pywikibot.output(photoDescription)
            if not autonomous:
                (newPhotoDescription, newFilename,
                 skip) = Tkdialog(photoDescription, photo, filename).run()
            else:
                newPhotoDescription = photoDescription
                newFilename = filename
                skip = False
        #pywikibot.output(newPhotoDescription)
        #if (pywikibot.Page(title=u'File:'+ filename, site=pywikibot.Site()).exists()):
        # I should probably check if the hash is the same and if not upload it under a different name
        #pywikibot.output(u'File:' + filename + u' already exists!')
        #else:
        #Do the actual upload
        #Would be nice to check before I upload if the file is already at Commons
        #Not that important for this program, but maybe for derived programs
            if not skip:
                bot = upload.UploadRobot(photoUrl,
                                         description=newPhotoDescription,
                                         useFilename=newFilename,
                                         keepFilename=True,
                                         verifyDescription=False)
                bot.upload_image(debug=False)
                return 1
    else:
        pywikibot.output(u'Invalid license')
    return 0
Exemplo n.º 13
0
def processPhoto(photoInfo=None,
                 panoramioreview=False,
                 reviewer=u'',
                 override=u'',
                 addCategory=u'',
                 autonomous=False):
    ''' Process a single Panoramio photo '''

    if isAllowedLicense(photoInfo) or override:
        #Should download the photo only once
        photo = downloadPhoto(photoInfo.get(u'photo_file_url'))

        #Don't upload duplicate images, should add override option
        duplicates = findDuplicateImages(photo)
        if duplicates:
            pywikibot.output(u'Found duplicate image at %s' % duplicates.pop())
        else:
            filename = getFilename(photoInfo)
            pywikibot.output(filename)
            description = getDescription(photoInfo, panoramioreview, reviewer,
                                         override, addCategory)

            pywikibot.output(description)
            if not autonomous:
                (newDescription, newFilename,
                 skip) = Tkdialog(description, photo, filename).run()
            else:
                newDescription = description
                newFilename = filename
                skip = False


##        pywikibot.output(newPhotoDescription)
##        if (pywikibot.Page(title=u'File:'+ filename,
##                           site=pywikibot.getSite()).exists()):
##            # I should probably check if the hash is the same and if not upload
##            # it under a different name
##            pywikibot.output(u'File:' + filename + u' already exists!')
##        else:
# Do the actual upload
# Would be nice to check before I upload if the file is already at
# Commons
# Not that important for this program, but maybe for derived
# programs
            if not skip:
                bot = upload.UploadRobot(photoInfo.get(u'photo_file_url'),
                                         description=newDescription,
                                         useFilename=newFilename,
                                         keepFilename=True,
                                         verifyDescription=False)
                bot.upload_image(debug=False)
                return 1
    return 0
Exemplo n.º 14
0
def processFile(row):
    metadata = getMetadata(row)

    if not metadata['FORM'] == u'painting':
        wikipedia.output(u'Not a painting, skipping')
        return False

    photo = downloadPhoto(metadata['IMAGEURL'])
    duplicates = findDuplicateImages(photo)

    # We don't want to upload dupes
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        # The file is at Commons so return True
        return True

    title = getTitle(metadata)
    description = getDescription(metadata)

    # Check of the title already exists
    #site = wikipedia.getSite('commons', 'commons')
    #page = wikipedia.ImagePage(site, title)

    #if page.exists():
    #    wikipedia.output(u'The file %s already exists. Probably already uploaded by me. Skipping' % title)
    #    return False

    wikipedia.output(u'Preparing upload for %s.' % title)
    wikipedia.output(description)

    # Download and dezoomify the image
    #tempfile = imageDir + metadata.get('id') + u'.jpg'
    #try:
    #    dezoomify.Dezoomify(url=metadata.get('link'), debug=True, out=tempfile)
    #except IOError as e:
    #    #wikipedia.output(e)
    #    wikipedia.output(u'Dezoomify failed')
    #    return False

    # Check for dupe. This probably doesn't work, but it doesn't hurt either.
    #duplicates = findDuplicateImages(tempfile)
    #if duplicates:
    #    wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
    #    return False
    bot = upload.UploadRobot(metadata['IMAGEURL'],
                             description=description,
                             useFilename=title,
                             keepFilename=True,
                             verifyDescription=False,
                             targetSite=wikipedia.getSite(
                                 'commons', 'commons'))
    #bot = upload.UploadRobot(url=tempfile, description=description, useFilename=title, keepFilename=True, verifyDescription=False)
    bot.run()
Exemplo n.º 15
0
def processPhoto(flickr=None,
                 photo_id=u'',
                 flickrreview=False,
                 reviewer=u'',
                 addCategory=u'',
                 removeCategories=False,
                 autonomous=False):
    '''
    Process a single Flickr photo
    '''
    if photo_id:
        print photo_id
        (photoInfo, photoSizes) = flickrripper.getPhoto(flickr, photo_id)
    if flickrripper.isAllowedLicense(photoInfo):
        #Get the url of the largest photo
        photoUrl = flickrripper.getPhotoUrl(photoSizes)
        #Should download the photo only once
        photo = flickrripper.downloadPhoto(photoUrl)

        #Don't upload duplicate images, should add override option
        duplicates = flickrripper.findDuplicateImages(photo)
        if duplicates:
            wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        else:
            filename = flickrripper.getFilename(photoInfo, project=u'WLM_2011')
            flinfoDescription = flickrripper.getFlinfoDescription(photo_id)

            rijksmonumentid = getRijksmonumentid(photoInfo)

            photoDescription = buildDescription(flinfoDescription,
                                                flickrreview, reviewer,
                                                addCategory, removeCategories,
                                                rijksmonumentid)
            #wikipedia.output(photoDescription)
            if not autonomous:
                (newPhotoDescription, newFilename,
                 skip) = Tkdialog(photoDescription, photo, filename).run()
            else:
                newPhotoDescription = photoDescription
                newFilename = filename
                skip = False

            wikipedia.output(newPhotoDescription)
            if not skip:
                bot = upload.UploadRobot(photoUrl,
                                         description=newPhotoDescription,
                                         useFilename=newFilename,
                                         keepFilename=True,
                                         verifyDescription=False)
                bot.upload_image(debug=False)
                return 1
    return 0
Exemplo n.º 16
0
def main(args):
    '''
    Main loop.
    '''
    workdir = u''
    textfile = u''
    records = {}
    
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    if not (len(args)==2):
        wikipedia.output(u'Too few arguments. Usage: NARA_uploader.py <directory> <textfile>')
        sys.exit()
    
    if os.path.isdir(args[0]):
        workdir = args[0]
    else:
        wikipedia.output(u'%s doesn\'t appear to be a directory. Exiting' % (args[0],))
        sys.exit()
        
    textfile = args[1]
    records = getRecords(textfile)
    #print records

    sourcefilenames = glob.glob(workdir + u"/*.TIF")

    for sourcefilename in sourcefilenames:
        filename = os.path.basename(sourcefilename)
        # This will give an ugly error if the id is unknown
        if not records.get(filename):
             wikipedia.output(u'Can\'t find %s in %s. Skipping this file.' % (filename, textfile))

        else:
            fileId = records.get(filename)
        
            duplicates = findDuplicateImages(sourcefilename)
            if duplicates:
                wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
            else:
                # No metadata handling. We use a webtool
                description = getDescription(fileId)
                categories = u'{{Uncategorized-NARA|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n'
                description = description + categories

                title = getTitle(fileId, description)
                
                wikipedia.output(title)
                wikipedia.output(description)
                    
                bot = upload.UploadRobot(url=sourcefilename.decode(sys.getfilesystemencoding()), description=description, useFilename=title, keepFilename=True, verifyDescription=False)
                bot.run()
Exemplo n.º 17
0
    def transferImage(self, sourceImagePage, debug=False):
        """Gets a wikilink to an image, downloads it and its description,
           and uploads it to another wikipedia.
           Returns the filename which was used to upload the image
           This function is used by imagetransfer.py and by copy_table.py
        """
        sourceSite = sourceImagePage.site()
        if debug: print "-" * 50
        if debug: print "Found image: %s"% imageTitle
        url = sourceImagePage.fileUrl().encode('utf-8')
        pywikibot.output(u"URL should be: %s" % url)
        # localize the text that should be printed on the image description page
        try:
            description = sourceImagePage.get()
            # try to translate license templates
            if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates:
                for old, new in licenseTemplates[(sourceSite.sitename(), self.targetSite.sitename())].iteritems():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = pywikibot.replaceExcept(description, old, new,
                                                          ['comment', 'math',
                                                           'nowiki', 'pre'])

            description = pywikibot.translate(self.targetSite, copy_message) \
                          % (sourceSite, description)
            description += '\n\n' + sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += "\r\n\r\n" + sourceImagePage.aslink(forceInterwiki = True)
        except pywikibot.NoPage:
            description=''
            print "Image does not exist or description page is empty."
        except pywikibot.IsRedirectPage:
            description=''
            print "Image description page is redirect."
        else:
            bot = upload.UploadRobot(url = url, description = description, targetSite = self.targetSite, urlEncoding = sourceSite.encoding())
            # try to upload
            targetFilename = bot.run()
            if targetFilename and self.targetSite.family.name == 'commons' and self.targetSite.lang == 'commons':
                # upload to Commons was successful
                reason = pywikibot.translate(sourceSite, nowCommonsMessage)
                # try to delete the original image if we have a sysop account
                if sourceSite.family.name in config.sysopnames and sourceSite.lang in config.sysopnames[sourceSite.family.name]:
                    if sourceImagePage.delete(reason):
                        return
                if sourceSite.lang in nowCommonsTemplate and sourceSite.family.name in config.usernames and sourceSite.lang in config.usernames[sourceSite.family.name]:
                    # add the nowCommons template.
                    pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title())
                    sourceImagePage.put(sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, comment = nowCommonsMessage[sourceSite.lang])
Exemplo n.º 18
0
def processPhoto(photo_id, category=u''):
    '''
    Work on a single photo at 
    http://www.af.mil/photos/media_view.asp?id=<photo_id>    
    get the metadata, check for dupes, build description, upload the image
    '''

    # Get all the metadata
    metadata = getMetadata(photo_id)

    if not metadata:
        #Incorrect photo_id
        return False

    if metadata.get('author') and re.search(u'Courtesy',
                                            metadata.get('author'), re.I):
        #Courtesy photos are probably copyvios
        return False

    photo = downloadPhoto(metadata['url'])

    duplicates = findDuplicateImages(photo)
    #duplicates = False
    # We don't want to upload dupes
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        # The file is at Commons so return True
        return True

    title = buildTitle(photo_id, metadata)

    description = buildDescription(photo_id, metadata, category)

    #wikipedia.output(title)
    #wikipedia.output(description)

    try:
        bot = upload.UploadRobot(metadata['url'],
                                 description=description,
                                 useFilename=title,
                                 keepFilename=True,
                                 verifyDescription=False,
                                 targetSite=wikipedia.getSite(
                                     'commons', 'commons'))
        bot.upload_image(debug=False)
        return True
    except wikipedia.PageNotFound:
        #High res is missing, just skip it
        pass
    return False
Exemplo n.º 19
0
def update():

    targetFilename = 'Wzrost_Wikislownika.svg'
    fname = 'Wzrost_Wikislownika.svg'
    desc = 'update (2015/08)'

    bot = upload.UploadRobot([targetFilename],
                             description=desc,
                             keepFilename=True,
                             verifyDescription=False,
                             ignoreWarning=True,
                             targetSite=pywikibot.getSite(
                                 'commons', 'commons'))
    bot.run()
Exemplo n.º 20
0
def main():
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    imagerecat.initLists()

    flickr = flickrapi.FlickrAPI(api_key)
    groupId = '1044478@N20'
    #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20
    for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId):
        (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId)
        if isAllowedLicense(photoInfo=photoInfo):
            tags = getTags(photoInfo=photoInfo)
            if photoCanUpload(tags=tags):
                # Get the url of the largest photo
                photoUrl = getPhotoUrl(photoSizes=photoSizes)
                # Download this photo
                photo = downloadPhoto(photoUrl=photoUrl)
                # Check if it exists at Commons
                duplicates = findDuplicateImages(photo=photo)
                if duplicates:
                    wikipedia.output(u'Found duplicate image at %s' %
                                     duplicates.pop())
                else:
                    flinfoDescription = getFlinfoDescription(photoId=photoId)
                    tagDescription = getTagDescription(tags=tags)
                    tagCategories = getTagCategories(tags)
                    filename = getFilename(photoInfo=photoInfo)
                    #print filename
                    photoDescription = buildDescription(
                        flinfoDescription, tagDescription, tagCategories)
                    if (wikipedia.Page(title=u'File:' + filename,
                                       site=wikipedia.getSite()).exists()):
                        # I should probably check if the hash is the same and if not upload it under a different name
                        wikipedia.output(u'File:' + filename +
                                         u' already exists!')
                    else:
                        #Do the actual upload
                        #Would be nice to check before I upload if the file is already at Commons
                        #Not that important for this program, but maybe for derived programs
                        bot = upload.UploadRobot(url=photoUrl,
                                                 description=photoDescription,
                                                 useFilename=filename,
                                                 keepFilename=True,
                                                 verifyDescription=False)
                        bot.run()

    wikipedia.output('All done')
Exemplo n.º 21
0
def processItem(record):
    (header, metadata, about) = record

    identifier = header.identifier().replace(u'oai:openimages.eu:', u'')

    if not getLicenseTemplate(metadata):
        wikipedia.output(u'File doesn\'t contain a valid license')
        return False

    movieurl = getRightMovie(metadata)

    if not movieurl:
        wikipedia.output(u'No .ogv file found')
        return False

    photo = downloadPhoto(movieurl)

    duplicates = findDuplicateImages(photo)
    # Clean the buffer
    photo.close()

    # We don't want to upload dupes
    if duplicates:
        wikipedia.output(u'Found duplicate file at %s' % duplicates.pop())
        # The file is at Commons so return True
        return True

    title = getTitle(metadata, identifier)
    description = getDescription(metadata, identifier)

    wikipedia.output(title)
    #wikipedia.output(description)

    bot = upload.UploadRobot(movieurl,
                             description=description,
                             useFilename=title,
                             keepFilename=True,
                             verifyDescription=False,
                             ignoreWarning=True,
                             targetSite=wikipedia.getSite(
                                 'commons', 'commons'))
    bot.upload_image(debug=False)

    return True
Exemplo n.º 22
0
    def _doUpload(self, photo):
        duplicates = photo.findDuplicateImages(self.site)
        if duplicates:
            return duplicates[0]

        title = photo.getTitle(self.titlefmt)
        description = photo.getDescription(self.pagefmt)

        bot = upload.UploadRobot(url=photo.URL,
                                 description=description,
                                 useFilename=title,
                                 keepFilename=True,
                                 verifyDescription=False,
                                 ignoreWarning=True,
                                 targetSite=self.site)
        bot._contents = photo.downloadPhoto().getvalue()
        bot._retrieved = True
        bot.run()
        return title
Exemplo n.º 23
0
def update():

    oldMainURL = 'https://pl.wiktionary.org/w/index.php?title=Wikis%C5%82ownik:Strona_g%C5%82%C3%B3wna&oldid=2639658'

    toParse = urllib.request.urlopen(oldMainURL)
    doc = etree.parse(toParse)
    toParse.close()

    entryNum = int(
        doc.xpath('//td[@class="sg_ramka"]/div/div[1]/p/a/font/b/text()')
        [0].replace('\xa0', ''))
    pageNum = int(
        doc.xpath('//td[@class="sg_ramka"]/div/div[1]/p/b/text()')[0].replace(
            '\xa0', ''))

    entry = '%.1f' % (entryNum / 1000.0)
    page = '%.1f' % (pageNum / 1000.0)

    dateToday = date.today()
    dateEarlier = dateToday - timedelta(days=5)

    with open("stat-data.csv", "a+") as myfile:
        myfile.seek(0)
        last_line = myfile.readlines()[-1].split(',')
        if last_line[0] != dateEarlier.strftime('%m-%Y'):
            myfile.write("%s,%s,%s\n" %
                         (dateEarlier.strftime("%m-%Y"), page, entry))

    monthly_stat_plot()

    targetFilename = 'Wzrost_Wikislownika.svg'
    fname = 'Wzrost_Wikislownika.svg'
    desc = 'update (%s)' % dateEarlier.strftime("%Y/%m")

    bot = upload.UploadRobot([targetFilename],
                             description=desc,
                             keepFilename=True,
                             verifyDescription=False,
                             ignoreWarning=True,
                             targetSite=pywikibot.getSite(
                                 'commons', 'commons'))
    bot.run()
Exemplo n.º 24
0
    def _doUpload(self, photo):
        duplicates = photo.findDuplicateImages(self.site)
        if duplicates:
            pywikibot.output(u"Skipping duplicate of %r" % (duplicates, ))
            return duplicates[0]

        title = make_title(photo.metadata, self.front_titlefmt,
                           self.rear_titlefmt, self.variable_titlefmt)

        description = textlib.glue_template_and_params((self.pagefmt,
                                                        photo.metadata))
        print title

        bot = upload.UploadRobot(url = photo.URL,
                                 description = description,
                                 useFilename = title,
                                 keepFilename = True,
                                 verifyDescription = False,
                                 targetSite = self.site)
        bot._contents = photo.downloadPhoto().getvalue()
        bot._retrieved = True
        bot.run()
        return title
Exemplo n.º 25
0
def processPhoto(photo):
    '''
    Work on a single photo at 
    http://www.army.mil/-images/<year>/<month>/<day>/<id>/
    get the metadata, check for dupes, build description, upload the image
    '''
    #print photo.get('url')

    # Get all the metadata
    metadata = getMetadata(photo)

    if not metadata:
        #Incorrect photo_id
        return

    photo = downloadPhoto(metadata['orgimage'])

    duplicates = findDuplicateImages(photo)
    #duplicates = False
    # We don't want to upload tupes
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        return
    
    title = buildTitle(metadata)
    description = buildDescription(metadata)

    #wikipedia.output(title)
    #wikipedia.output(description)

    try:
	bot = upload.UploadRobot(metadata['orgimage'], description=description, useFilename=title, keepFilename=True, verifyDescription=False, targetSite = wikipedia.getSite('commons', 'commons'))
	bot.upload_image(debug=False)
    except wikipedia.PageNotFound:
	#Image missing? Just skip it
	pass
Exemplo n.º 26
0
def processPhoto(photo_id):
    '''
    Work on a single photo at 
    http://www.photolibrary.fema.gov/photolibrary/photo_details.do?id=<photo_id>    
    get the metadata, check for dupes, build description, upload the image
    '''
    print "Working on: " + str(photo_id)
    # Get all the metadata
    metadata = getMetadata(photo_id)
    if not metadata:
        print "Didn't find metadata at http://www.photolibrary.fema.gov/photolibrary/photo_details.do?id=" + str(
            photo_id)
        #Incorrect photo_id
        return

    photoUrl = u'http://www.fema.gov/photodata/original/' + str(
        photo_id) + '.jpg'
    photo = downloadPhoto(photoUrl)

    duplicates = findDuplicateImages(photo)
    # We don't want to upload tupes
    if duplicates:
        wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
        return

    title = buildTitle(photo_id, metadata)
    description = buildDescription(photo_id, metadata)

    bot = upload.UploadRobot(photoUrl,
                             description=description,
                             useFilename=title,
                             keepFilename=True,
                             verifyDescription=False,
                             targetSite=wikipedia.getSite(
                                 'commons', 'commons'))
    bot.upload_image(debug=False)
Exemplo n.º 27
0
    def run(self):
        """
        Starts the robot.
        """
        for painting in self.generator:
            # Buh, for this one I know for sure it's in there

            #print painting[u'id']
            print painting[u'url']

            paintingItem = None
            newclaims = []
            if painting[u'id'] in self.paintingIds:
                paintingItemTitle = u'Q%s' % (self.paintingIds.get(
                    painting[u'id']), )
                print paintingItemTitle
                paintingItem = pywikibot.ItemPage(self.repo,
                                                  title=paintingItemTitle)

            else:
                #Break for now
                print u'Let us create stuff'
                #continue
                #print u'WTFTFTFTFT???'

                #print 'bla'

                data = {
                    'labels': {},
                    'descriptions': {},
                }

                data['labels']['en'] = {
                    'language': 'en',
                    'value': painting[u'title']
                }

                data['descriptions']['en'] = {
                    'language': u'en',
                    'value': u'painting by %s' % (painting[u'creator'], )
                }
                data['descriptions']['nl'] = {
                    'language': u'nl',
                    'value': u'schilderij van %s' % (painting[u'creator'], )
                }

                print data

                identification = {}
                summary = u'Creating new item with data from %s ' % (
                    painting[u'url'], )
                pywikibot.output(summary)
                #monumentItem.editEntity(data, summary=summary)
                try:
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                except pywikibot.exceptions.APIError:
                    # We got ourselves a duplicate label and description, let's correct that
                    pywikibot.output(
                        u'Oops, already had that one. Trying again')
                    data['descriptions']['en'] = {
                        'language':
                        u'en',
                        'value':
                        u'painting by %s (%s, %s)' %
                        (painting[u'creator'], painting[u'collectionshort'],
                         painting[u'id'])
                    }
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                    pass

                #print result
                paintingItemTitle = result.get(u'entity').get('id')
                paintingItem = pywikibot.ItemPage(self.repo,
                                                  title=paintingItemTitle)

                # Add to self.paintingIds so that we don't create dupes
                self.paintingIds[painting[u'id']] = paintingItemTitle.replace(
                    u'Q', u'')

                newclaim = pywikibot.Claim(
                    self.repo, u'P%s' % (self.paintingIdProperty, ))
                newclaim.setTarget(painting[u'id'])
                pywikibot.output('Adding new id claim to %s' % paintingItem)
                paintingItem.addClaim(newclaim)

                self.addReference(paintingItem, newclaim, painting[u'url'])

                newqualifier = pywikibot.Claim(
                    self.repo, u'P195')  #Add collection, isQualifier=True
                newqualifier.setTarget(self.collectionitem)
                pywikibot.output('Adding new qualifier claim to %s' %
                                 paintingItem)
                newclaim.addQualifier(newqualifier)

                collectionclaim = pywikibot.Claim(self.repo, u'P195')
                collectionclaim.setTarget(self.collectionitem)
                pywikibot.output('Adding collection claim to %s' %
                                 paintingItem)
                paintingItem.addClaim(collectionclaim)

                # Add the date they got it as a qualifier to the collection
                if painting.get(u'acquisitiondate'):
                    colqualifier = pywikibot.Claim(self.repo, u'P580')
                    acdate = None
                    if len(painting[u'acquisitiondate']) == 4 and painting[
                            u'acquisitiondate'].isnumeric():  # It's a year
                        acdate = pywikibot.WbTime(
                            year=painting[u'acquisitiondate'])
                    elif len(painting[u'acquisitiondate'].split(u'-', 2)) == 3:
                        (acday, acmonth,
                         acyear) = painting[u'acquisitiondate'].split(u'-', 2)
                        acdate = pywikibot.WbTime(year=int(acyear),
                                                  month=int(acmonth),
                                                  day=int(acday))
                    if acdate:
                        colqualifier.setTarget(acdate)
                        pywikibot.output(
                            'Adding new acquisition date qualifier claim to collection on %s'
                            % paintingItem)
                        collectionclaim.addQualifier(colqualifier)

                self.addReference(paintingItem, collectionclaim,
                                  painting[u'url'])

            if paintingItem and paintingItem.exists():
                painting['wikidata'] = paintingItem.title()

                data = paintingItem.get()
                claims = data.get('claims')
                #print claims

                if painting.get(u'creator'):
                    self.fixDescription(paintingItem, painting.get(u'creator'))

                # located in
                if u'P276' not in claims and painting.get(u'location'):
                    newclaim = pywikibot.Claim(self.repo, u'P276')
                    location = pywikibot.ItemPage(self.repo,
                                                  painting.get(u'location'))
                    newclaim.setTarget(location)
                    pywikibot.output('Adding located in claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)

                    self.addReference(paintingItem, newclaim, painting['url'])

                # instance of always painting while working on the painting collection
                if u'P31' not in claims:

                    dcformatItem = pywikibot.ItemPage(self.repo,
                                                      title='Q3305213')

                    newclaim = pywikibot.Claim(self.repo, u'P31')
                    newclaim.setTarget(dcformatItem)
                    pywikibot.output('Adding instance claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)

                    self.addReference(paintingItem, newclaim, painting['url'])

                # creator
                if u'P170' not in claims and painting.get(u'creator'):
                    #print painting[u'creator']
                    creategen = pagegenerators.PreloadingEntityGenerator(
                        pagegenerators.WikidataItemGenerator(
                            pagegenerators.SearchPageGenerator(
                                painting[u'creator'],
                                step=None,
                                total=10,
                                namespaces=[0],
                                site=self.repo)))

                    newcreator = None

                    try:
                        for creatoritem in creategen:
                            print creatoritem.title()
                            if creatoritem.get().get('labels').get(
                                    'en'
                            ) == painting[u'creator'] or creatoritem.get(
                            ).get('labels').get('nl') == painting[u'creator']:
                                #print creatoritem.get().get('labels').get('en')
                                #print creatoritem.get().get('labels').get('nl')
                                # Check occupation and country of citizinship
                                if u'P106' in creatoritem.get().get('claims'):
                                    existing_claims = creatoritem.get().get(
                                        'claims').get('P106')
                                    for existing_claim in existing_claims:
                                        if existing_claim.target_equals(
                                                u'Q1028181'):
                                            newcreator = creatoritem
                                    continue
                            elif (
                                    creatoritem.get().get('aliases').get('en')
                                    and painting[u'creator'] in
                                    creatoritem.get().get('aliases').get('en')
                            ) or (creatoritem.get().get('aliases').get('nl')
                                  and painting[u'creator'] in
                                  creatoritem.get().get('aliases').get('nl')):
                                if u'P106' in creatoritem.get().get('claims'):
                                    existing_claims = creatoritem.get().get(
                                        'claims').get('P106')
                                    for existing_claim in existing_claims:
                                        if existing_claim.target_equals(
                                                u'Q1028181'):
                                            newcreator = creatoritem
                                    continue
                    except pywikibot.exceptions.APIError:
                        print u'Search API is acting up, just let it be'
                        pass

                    if newcreator:
                        pywikibot.output(newcreator.title())

                        newclaim = pywikibot.Claim(self.repo, u'P170')
                        newclaim.setTarget(newcreator)
                        pywikibot.output('Adding creator claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                        #print creatoritem.title()
                        #print creatoritem.get()

                    else:
                        pywikibot.output('No item found for %s' %
                                         (painting[u'creator'], ))

                else:
                    print u'Already has a creator'

                # date of creation
                if u'P571' not in claims and painting.get(u'date'):
                    if len(
                            painting[u'date']
                    ) == 4 and painting[u'date'].isnumeric():  # It's a year
                        newdate = pywikibot.WbTime(year=painting[u'date'])
                        newclaim = pywikibot.Claim(self.repo, u'P571')
                        newclaim.setTarget(newdate)
                        pywikibot.output(
                            'Adding date of creation claim to %s' %
                            paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                # material used
                if u'P186' not in claims and painting.get(u'medium'):
                    if painting.get(u'medium') == u'Oil on canvas':
                        olieverf = pywikibot.ItemPage(self.repo, u'Q296955')
                        doek = pywikibot.ItemPage(self.repo, u'Q4259259')
                        oppervlak = pywikibot.ItemPage(self.repo, u'Q861259')

                        newclaim = pywikibot.Claim(self.repo, u'P186')
                        newclaim.setTarget(olieverf)
                        pywikibot.output('Adding new oil paint claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                        newclaim = pywikibot.Claim(self.repo, u'P186')
                        newclaim.setTarget(doek)
                        pywikibot.output('Adding new canvas claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                        newqualifier = pywikibot.Claim(
                            self.repo, u'P518')  #Applies to part
                        newqualifier.setTarget(oppervlak)
                        pywikibot.output('Adding new qualifier claim to %s' %
                                         paintingItem)
                        newclaim.addQualifier(newqualifier)

                # Described at url
                if u'P973' not in claims:
                    newclaim = pywikibot.Claim(self.repo, u'P973')
                    newclaim.setTarget(painting[u'url'])
                    pywikibot.output('Adding described at claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)
                #    self.addReference(paintingItem, newclaim, uri)

                # Upload an image baby! BUT NOT NOW

                imagetitle = u''
                if painting.get(u'imageurl') and u'P18' not in claims:
                    commonssite = pywikibot.Site("commons", "commons")
                    photo = Photo(painting[u'imageurl'], painting)
                    titlefmt = u'%(creator)s - %(title)s - %(id)s - Minneapolis Institute of Arts.%(_ext)s'
                    pagefmt = u'User:Multichill/Minneapolis Institute of Arts'

                    duplicates = photo.findDuplicateImages()
                    if duplicates:
                        pywikibot.output(u"Skipping duplicate of %r" %
                                         duplicates)
                        imagetitle = duplicates[0]
                        #return duplicates[0]
                    else:

                        imagetitle = self.cleanUpTitle(
                            photo.getTitle(titlefmt))
                        pywikibot.output(imagetitle)
                        description = photo.getDescription(pagefmt)
                        pywikibot.output(description)

                        handle, tempname = tempfile.mkstemp()
                        with os.fdopen(handle, "wb") as t:
                            t.write(photo.downloadPhoto().getvalue())
                            t.close()
                        #tempname

                        bot = upload.UploadRobot(url=tempname,
                                                 description=description,
                                                 useFilename=imagetitle,
                                                 keepFilename=True,
                                                 verifyDescription=False,
                                                 uploadByUrl=False,
                                                 targetSite=commonssite)
                        #bot._contents = photo.downloadPhoto().getvalue()

                        #bot._retrieved = True
                        bot.run()

                if u'P18' not in claims and imagetitle:
                    newclaim = pywikibot.Claim(self.repo, u'P18')
                    imagelink = pywikibot.Link(imagetitle,
                                               source=commonssite,
                                               defaultNamespace=6)
                    image = pywikibot.ImagePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.ImagePage(image.getRedirectTarget())
                    newclaim.setTarget(image)
                    pywikibot.output('Adding %s --> %s' %
                                     (newclaim.getID(), newclaim.getTarget()))
                    paintingItem.addClaim(newclaim)
Exemplo n.º 28
0
    def doImage(self, image):
        r = re.compile(u'\|', re.UNICODE | re.DOTALL)
        data = re.split(r, image)
        imageName = data[0]
        newImageName = data[0]
        r = re.compile(u'^\s*$', re.UNICODE | re.DOTALL)
        if len(data) >= 2 and not re.match(r, data[1]):
            newImageName = data[1]
        sourceWiki = u'anime'
        if len(data) >= 3:
            sourceWiki = data[2]
        exclusionMode = u'normal'
        if len(data) >= 4:
            exclusionMode = data[3]
        exclusionInfo = u''
        if len(data) >= 5:
            exclusionInfo = data[4]
        sourceSite = None
        outputSites = []
        sourceImage = None
        sourcePage = None

        wikipedia.output(u'Doing Image %s' % imageName)
        for site in self.siteList:
            if site.family.name == sourceWiki:
                sourceSite = site
            if exclusionMode == u'normal':
                outputSites.append(site)
            elif exclusionMode == u'include':
                r = re.compile(u',', re.UNICODE | re.DOTALL)
                includes = re.split(r, exclusionInfo)
                if site.family.name in includes:
                    outputSites.append(site)
            elif exclusionMode == u'exclude':
                r = re.compile(u',', re.UNICODE | re.DOTALL)
                excludes = re.split(r, exclusionInfo)
                if site.family.name not in includes:
                    outputSites.append(site)
            else:
                wikipedia.output(u'Unknown exclusion mode. Skiping %s.' %
                                 imageName)
                return False
        if sourceSite == None:
            wikipedia.output(u'No source site found. Skiping %s.' % imageName)
            return False

        try:
            sourceDescriptionPage = wikipedia.Page(sourceSite, imageName, None,
                                                   6)  #6=Image Namespace
            sourceImagePage = wikipedia.ImagePage(
                sourceSite, sourceDescriptionPage.title())
        except wikipedia.NoPage:
            wikipedia.output(u'No source page found. Skiping %s.' % imageName)
            return False

        sourceURL = sourceImagePage.fileUrl()
        if '://' not in sourceURL:
            sourceURL = u'http://%s%s' % (sourceSite.hostname(), sourceURL)

        # Get file contents
        uo = wikipedia.MyURLopener()
        sourceFile = uo.open(sourceURL, "rb")
        wikipedia.output(u'Reading file %s' % sourceURL)
        sourceContents = sourceFile.read()
        if sourceContents.find(
                "The requested URL was not found on this server.") != -1:
            wikipedia.output("Couldn't download the image. Skiping.")
            return False
        sourceFile.close()

        #Setup Description Page
        pageDescription = sourceDescriptionPage.get()
        r = re.compile(u'== Summary ==\n?')
        if re.search(r, pageDescription):
            pageDescription = re.sub(r, u'', pageDescription)

        mirrorText = u'{{networkMirror|%s|%s}}' % (imageName,
                                                   sourceSite.family.name)
        comm = re.compile(u'({{commons(\|[^{}]*)?}})', re.IGNORECASE)
        if re.search(comm, pageDescription):
            pageDescription = re.sub(comm, u'\\1\n%s' % mirrorText,
                                     pageDescription)
        else:
            pageDescription = u'%s%s' % (mirrorText, pageDescription)
        pageDescription = u'== Summary ==\n%s' % pageDescription

        for site in outputSites:
            if sourceSite.family.name != site.family.name or imageName != newImageName:
                doUpload = False
                doDescription = False

                try:
                    siteDescriptionPage = wikipedia.Page(
                        site, newImageName, None, 6)  #6=Image Namespace
                    siteImagePage = wikipedia.ImagePage(
                        site, siteDescriptionPage.title())

                    siteURL = siteImagePage.fileUrl()
                    if '://' not in siteURL:
                        siteURL = u'http://%s%s' % (site.hostname(), siteURL)

                    uo2 = wikipedia.MyURLopener()
                    siteFile = uo2.open(siteURL, "rb")
                    wikipedia.output(u'Reading file %s' % siteURL)
                    siteContents = siteFile.read()
                    if sourceContents.find(
                            "The requested URL was not found on this server."
                    ) != -1:
                        wikipedia.output(
                            "Couldn't download the image at new location.")
                        doUpload = True
                        break
                    siteFile.close()

                    if siteContents != sourceContents:
                        doUpload = True

                    if siteDescriptionPage.get() != pageDescription:
                        doDescription = True

                except wikipedia.NoPage:
                    doUpload = True
                    doDescription = True

                if doUpload:
                    bot = upload.UploadRobot(url=sourceURL,
                                             useFilename=newImageName,
                                             keepFilename=True,
                                             verifyDescription=False,
                                             description=msg['en'],
                                             targetSite=site,
                                             urlEncoding=sourceSite.encoding())
                    bot.run()
                if doDescription:
                    siteDescriptionPage.put(pageDescription)
Exemplo n.º 29
0
def main(args):
    '''
    Main loop.
    '''
    workdir = u''
    textfile = u''
    records = {}

    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    if (len(args) < 3):
        wikipedia.output(
            u'Too few arguments. Usage: NARA_uploader.py <original dir> <textfile> <derivative dir> [start filename]'
        )
        sys.exit()

    if os.path.isdir(args[0]):
        workdir = args[0]
    else:
        wikipedia.output(u'%s doesn\'t appear to be a directory. Exiting.' %
                         (args[0], ))
        sys.exit()

    derivativeDirectory = args[2]
    if os.path.exists(
            derivativeDirectory) and not os.path.isdir(derivativeDirectory):
        wikipedia.output(u"%s exists, but isn't a directory. Exiting." %
                         derivativeDirectory)
        sys.exit()
    elif not os.path.exists(derivativeDirectory):
        wikipedia.output(u'%s doesn\'t appear to exist. Creating.' %
                         derivativeDirectory)
        os.mkdir(derivativeDirectory)

    try:
        startFile = args[3]
        startFileFound = False

        startPath = os.path.join(workdir, startFile)

        if not os.path.exists(startPath) or os.path.isdir(startPath):
            wikipedia.output(
                u"%s doesn't exist, or it is directory. Exiting." % startPath)
            sys.exit()

    except IndexError:
        startFile = None

    textfile = args[1]
    records = getRecords(textfile)
    #print records

    sourcefilenames = glob.glob(workdir + u"/*.TIF")
    sourcefilenames.sort()

    for sourcefilename in sourcefilenames:

        wikipedia.output(u'\nProcessing %s' % sourcefilename)

        if startFile:  #if we want to skip to a file
            fileHead, fileTail = os.path.split(sourcefilename)

            if not startFileFound:
                if fileTail != startFile:
                    wikipedia.output('Skipping %s' % sourcefilename)
                    continue
                else:  #we have fond the start point
                    startFileFound = True

        filename = os.path.basename(sourcefilename)
        # This will give an ugly error if the id is unknown
        if not records.get(filename):
            wikipedia.output(u'Can\'t find %s in %s. Skipping this file.' %
                             (filename, textfile))
        elif os.path.getsize(sourcefilename) >= 1024 * 1024 * 100:
            wikipedia.output(u'%s too big. Skipping this file.' %
                             (sourcefilename, ))
        else:
            fileId = records.get(filename)

            wikipedia.output(u'Found file ID: %d' % fileId)

            #generate all the files we might need to upload
            filesToUpload = createDerivatives(sourcefilename,
                                              derivativeDirectory)

            duplicateFiletypes = {}
            #check for duplicates of the original on wiki
            for fileInfo in filesToUpload:

                if fileInfo['ext'] != '.tif':
                    continue

                foundDuplicates = findDuplicateImagesByHash(fileInfo['name'])

                duplicateFiletypes = addDuplicatesToList(
                    fileInfo, foundDuplicates, duplicateFiletypes)

            # follow the naming + description from the tif if it exists, or make it up from the description
            if '.tif' in duplicateFiletypes:
                title = duplicateFiletypes['.tif']

                wikipedia.output(
                    u'Fetching description from TIF file page: %s' % title)
                description = wikipedia.Page(site, 'File:' + title).get()

            else:
                description = fetchDescriptionFromWebtool(fileId)

                if not description:
                    wikipedia.output(u'No description! Skipping this file.')
                    continue
                else:
                    categories = u'{{Uncategorized-NARA|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n'
                    description = description + categories

                    title = getTitle(fileId, description)

                    if not title:
                        continue

            #check for duplicates of the derivatives (using the filename we just made)
            for fileInfo in filesToUpload:

                if fileInfo['ext'] == '.tif':
                    continue

                titleRoot, ext = os.path.splitext(title)
                fileTitle = titleRoot + fileInfo['ext']

                foundDuplicates = findDuplicateImagesByName(fileTitle)

                duplicateFiletypes = addDuplicatesToList(
                    fileInfo, foundDuplicates, duplicateFiletypes)

            #construct the gallery
            filesToUpload = setDestinations(filesToUpload, title)
            gallery = createDerivativeGallery(filesToUpload, title)

            #for every file, including original and derivatives
            for fileInfo in filesToUpload:

                titleRoot, ext = os.path.splitext(title)
                fileTitle = titleRoot + fileInfo['ext']

                if fileInfo[
                        'ext'] in duplicateFiletypes:  #we have a duplicate: add derivs if needed

                    currentFilename = duplicateFiletypes[fileInfo['ext']]

                    currentFilePage = wikipedia.Page(site,
                                                     'File:' + currentFilename)

                    currentDescription = currentFilePage.get()

                    currentDescription = addDerivativesToDescription(
                        currentDescription, gallery, title)

                    if currentDescription:
                        wikipedia.output(
                            'Updating the description for %s:\n\n%s' %
                            (currentFilename, currentDescription))
                        currentFilePage.put(
                            currentDescription,
                            comment="Adding other versions to the description."
                        )
                    else:
                        wikipedia.output('Gallery exists on page %s' %
                                         currentFilename)

                else:  #upload the file with generated info

                    wikipedia.output(fileInfo['name'] + ' --> ' +
                                     fileInfo['dest'])

                    newDescription = addDerivativesToDescription(
                        description, gallery, title)

                    if newDescription:  #if the gallery add failed due to existing gallery, just carry on with the original
                        description = newDescription

                    fileDescription = removeTIFFParameter(
                        description, fileInfo['ext'])
                    wikipedia.output(fileDescription)
                    bot = upload.UploadRobot(url=fileInfo['name'].decode(
                        sys.getfilesystemencoding()),
                                             description=fileDescription,
                                             useFilename=fileInfo['dest'],
                                             keepFilename=True,
                                             verifyDescription=False)
                    bot.run()
Exemplo n.º 30
0
    def transferImage(self, sourceImagePage):
        """
        Download image and its description, and upload it to another site.

        @return: the filename which was used to upload the image
        """
        sourceSite = sourceImagePage.site
        url = sourceImagePage.fileUrl().encode('utf-8')
        pywikibot.output(u"URL should be: %s" % url)
        # localize the text that should be printed on the image description page
        try:
            description = sourceImagePage.get()
            # try to translate license templates
            if (sourceSite.sitename,
                    self.targetSite.sitename) in licenseTemplates:
                for old, new in licenseTemplates[(
                        sourceSite.sitename,
                        self.targetSite.sitename)].items():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = textlib.replaceExcept(
                        description, old, new,
                        ['comment', 'math', 'nowiki', 'pre'])

            description = i18n.translate(
                self.targetSite, copy_message,
                fallback=True) % (sourceSite, description)
            description += '\n\n'
            description += sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += u'\r\n\r\n{0}'.format(sourceImagePage)
        except pywikibot.NoPage:
            description = ''
            print("Image does not exist or description page is empty.")
        except pywikibot.IsRedirectPage:
            description = ''
            print("Image description page is redirect.")
        else:
            bot = upload.UploadRobot(url=url,
                                     description=description,
                                     targetSite=self.targetSite,
                                     urlEncoding=sourceSite.encoding(),
                                     keepFilename=self.keep_name,
                                     verifyDescription=not self.keep_name,
                                     ignoreWarning=self.ignore_warning)
            # try to upload
            targetFilename = bot.run()
            if targetFilename and self.targetSite.family.name == 'commons' and \
               self.targetSite.code == 'commons':
                # upload to Commons was successful
                reason = i18n.translate(sourceSite,
                                        nowCommonsMessage,
                                        fallback=True)
                # try to delete the original image if we have a sysop account
                if sourceSite.family.name in config.sysopnames and \
                   sourceSite.lang in config.sysopnames[sourceSite.family.name]:
                    if sourceImagePage.delete(reason):
                        return
                if sourceSite.lang in nowCommonsTemplate and \
                   sourceSite.family.name in config.usernames and \
                   sourceSite.lang in config.usernames[sourceSite.family.name]:
                    # add the nowCommons template.
                    pywikibot.output(u'Adding nowCommons template to %s' %
                                     sourceImagePage.title())
                    sourceImagePage.put(
                        sourceImagePage.get() + '\n\n' +
                        nowCommonsTemplate[sourceSite.lang] % targetFilename,
                        summary=nowCommonsMessage[sourceSite.lang])