def addWikidataSuggestions(self): """ Not used at the moment Some suggestions are too good to add by hand. Have the bot add them :return: """ # Make a list of creator/institution/inventory number (ascession number) matchesKeys = set(self.commonsWithoutCIA.keys()) & set( self.wikidataWithoutCIA.keys()) for key in matchesKeys: (creator, institution, inv) = key # Just get one image and one item image = self.commonsWithoutCIA.get(key)[0] paintingdict = self.wikidataWithoutCIA.get(key)[0] itemTitle = paintingdict.get('item') item = pywikibot.ItemPage(self.repo, title=itemTitle) data = item.get() claims = data.get('claims') if u'P18' not in claims: #url = paintingdict.get('url') summary = u'based on [[%s]] / [[%s]] / %s match with Commons' % ( creator, institution, inv) newclaim = pywikibot.Claim(self.repo, u'P18') imagelink = pywikibot.Link(image, source=self.commons, defaultNamespace=6) imagePage = pywikibot.ImagePage(imagelink) if imagePage.isRedirectPage(): imagePage = pywikibot.ImagePage( imagePage.getRedirectTarget()) newclaim.setTarget(imagePage) item.addClaim(newclaim, summary=summary)
def testNonFileLocal(self): """Test file page, without local file, existing on the local wiki.""" title = 'File:Sepp Maier 1.JPG' commons = self.get_site('commons') testwp = self.get_site('testwiki') testwp_file = pywikibot.ImagePage(testwp, title) self.assertTrue(testwp_file.fileUrl()) self.assertTrue(testwp_file.exists()) self.assertTrue(testwp_file.fileIsShared()) commons_file = pywikibot.ImagePage(commons, title) self.assertEqual(testwp_file.fileUrl(), commons_file.fileUrl())
def parsePhoto(self, photoID): pageURL = self.baseURL + photoID pageText = "" for line in urllib2.urlopen(pageURL): pageText += line fileData = json.loads( re.findall("Y\.photo\.init\((.*?)\);", pageText, re.S)[0] ) fileTitle = fileData["title"] fileDescription = fileData["description"] fileDate = fileData["date_taken"] fileOwner = fileData["ownername"] fileURL = fileData["sizes"]["o"]["url"] fileExtension = fileURL.split(".")[-1] name = "%s (%s).%s" % (fileTitle, photoID, fileExtension) imagePage = pywikibot.ImagePage(site, name) imagePage.text = self.imageDescription % { "date": fileDate, "description": fileDescription, "owner": fileOwner, "source": pageURL } if self.uploadByURL: site.upload(imagePage, source_url=fileURL, comment=self.summary) else: path = os.path.abspath( os.path.join( self.subDirName, name ) ) urllib.urlretrieve(fileURL, path) site.upload(imagePage, source_filename=path, comment=self.summary) os.remove(path)
def main(): oldImage = None newImage = None summary = '' always = False loose = False # read command line parameters for arg in pywikibot.handleArgs(): if arg == '-always': always = True elif arg == '-loose': loose = True elif arg.startswith('-summary'): if len(arg) == len('-summary'): summary = pywikibot.input(u'Choose an edit summary: ') else: summary = arg[len('-summary:'):] else: if oldImage: newImage = arg else: oldImage = arg if not oldImage: pywikibot.showHelp('image') else: mysite = pywikibot.Site() ns = mysite.image_namespace() oldImagePage = pywikibot.ImagePage(mysite, ns + ':' + oldImage) gen = pagegenerators.FileLinksGenerator(oldImagePage) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ImageRobot(preloadingGen, oldImage, newImage, summary, always, loose) bot.run()
def upload_image(self, html, data, imgfile): site = self.targetSite # Construct the name commons_filename = "AMH-%s-%s_%s.jpg" % ( data["amh_id"], data["institution_shortcode"].upper(), data["title_en"][:150]) if self.page_exists(commons_filename): pywikibot.output("%s already exists, skipping" % commons_filename) return imagepage = pywikibot.ImagePage( site, commons_filename) # normalizes filename imagepage.text = html pywikibot.output(u'Uploading file %s to %s via API....' % (commons_filename, site)) try: site.upload(imagepage, source_filename=imgfile) except pywikibot.UploadWarning as warn: pywikibot.output(u"We got a warning message: ", newline=False) pywikibot.output(str(warn)) except Exception as e: pywikibot.error("Upload error: ", exc_info=True) else: # No warning, upload complete. pywikibot.output(u"Upload successful.")
def categorizeImages(generator, onlyFilter, onlyUncat): """ Loop over all images in generator and try to categorize them. Get category suggestions from CommonSense. """ for page in generator: if page.exists() and (page.namespace() == 6) and \ (not page.isRedirectPage()): imagepage = pywikibot.ImagePage(page.site(), page.title()) pywikibot.output(u'Working on ' + imagepage.title()) if onlyUncat and not (u'Uncategorized' in imagepage.templates()): pywikibot.output(u'No Uncategorized template found') else: currentCats = getCurrentCats(imagepage) if onlyFilter: commonshelperCats = [] usage = [] galleries = [] else: (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage) newcats = applyAllFilters(commonshelperCats + currentCats) if len(newcats) > 0 and not (set(currentCats) == set(newcats)): for cat in newcats: pywikibot.output(u' Found new cat: ' + cat) saveImagePage(imagepage, newcats, usage, galleries, onlyFilter)
def run(self): for page in self.generator: if self.interwiki: imagelist = [] for linkedPage in page.interwiki(): imagelist += linkedPage.imagelinks(followRedirects=True) elif page.isImage(): imagePage = pywikibot.ImagePage(page.site(), page.title()) imagelist = [imagePage] else: imagelist = page.imagelinks(followRedirects=True) while imagelist: self.showImageList(imagelist) if len(imagelist) == 1: # no need to query the user, only one possibility todo = 0 else: pywikibot.output( u"Give the number of the image to transfer.") todo = pywikibot.input(u"To end uploading, press enter:") if not todo: break todo = int(todo) if todo in range(len(imagelist)): if imagelist[todo].fileIsOnCommons(): pywikibot.output( u'The image is already on Wikimedia Commons.') else: self.transferImage(imagelist[todo]) # remove the selected image from the list imagelist = imagelist[:todo] + imagelist[todo + 1:] else: pywikibot.output(u'No such image number.')
def up(filename, pagetitle, desc, comment): if filename[:4] == 'http': source_url=filename; source_filename=None else: source_url=None; source_filename=filename site.upload(pywikibot.ImagePage(site, 'File:' + pagetitle), source_filename=source_filename, source_url=source_url, comment=comment, text=desc, watch=False, ignore_warnings=True, # True if ignoring duplicates chunk_size=1048576) return url = source_url keepFilename=True #set to True to skip double-checking/editing destination filename verifyDescription=False #set to False to skip double-checking/editing description => change to bot-mode targetSite = pywikibot.getSite('commons', 'commons') bot = upload.UploadRobot( [url], # string gives depreciation msg description=desc, # only one description if multiple images useFilename=pagetitle, keepFilename=keepFilename, verifyDescription=verifyDescription, targetSite = targetSite, ignoreWarning = True, chunk_size=2000000 # 2MB ) bot.upload_file(file_url=url, debug=True)
def testOnBoth(self): title = 'File:Pulsante spam.png' commons = self.get_site('commons') itwp = self.get_site('itwiki') itwp_file = pywikibot.ImagePage(itwp, title) for using in itwp_file.usingPages(): self.assertIsInstance(using, pywikibot.Page) commons_file = pywikibot.ImagePage(commons, title) self.assertTrue(itwp_file.fileUrl()) self.assertTrue(itwp_file.exists()) self.assertTrue(commons_file.exists()) self.assertFalse(itwp_file.fileIsShared()) self.assertTrue(commons_file.fileIsShared())
def run(self): """ Starts the bot. """ for page in self.generator: pywikibot.output(u'Working on %s' % page.title()) item = pywikibot.ItemPage.fromPage(page) if item.exists(): pywikibot.output(u'Found %s' % item.title()) imagename = page.properties().get('page_image') if imagename: claims = item.get().get('claims') if self.wdproperty in claims: pywikibot.output( u'Item %s already contains image (%s)' % (item.title(), self.wdproperty)) else: newclaim = pywikibot.Claim(self.repo, self.wdproperty) commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(imagename, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) continue newclaim.setTarget(image) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) item.addClaim(newclaim) # A generator might yield pages from multiple sites source = self.getSource(page.site) if source: newclaim.addSource(source, bot=True)
def main(local, total): en = pywikibot.Site('en', local.family.name) if not local.logged_in(): local.login() commons = pywikibot.Site('commons', 'commons') for image in pywikibot.Category( commons, 'Featured pictures on Wikimedia Commons').articles(namespaces=6, total=total): image_en = pywikibot.ImagePage(en, image.title()) using_en = usingPages(image_en) if len(list(using_en)) == 0: pywikibot.output(u'no pages using "{image}" on {site}'.format( image=image.title(), site=en)) continue image_local = pywikibot.ImagePage(local, image.title()) using_local = usingPages(image_local) for enpage in using_en: try: langlinks = [ linked for linked in enpage.langlinks() if linked.site == local ] if len(langlinks) == 1: localpage = pywikibot.Page(langlinks[0]) kwargs = dict(enpage=enpage, localpage=localpage, image=image) if langlink in list(using_local): pywikibot.output( u'\03{{lightgreen}}{enpage} and {localpage} have {image}' .format(**kwargs)) else: pywikibot.output( u'\03{{lightred}}{enpage} has {image} but {localpage} has not' .format(**kwargs)) log(**kwargs) elif len(langlink) == 0: pywikibot.output( u'\03{{lightyellow}}{enpage} has {image} but no langlinks to {local} have been found' .format(en=page, image=image, local=local)) except: pass
def testSharedOnly(self): title = 'File:Sepp Maier 1.JPG' commons = self.get_site('commons') itwp = self.get_site('itwiki') itwp_file = pywikibot.ImagePage(itwp, title) for using in itwp_file.usingPages(): self.assertIsInstance(using, pywikibot.Page) commons_file = pywikibot.ImagePage(commons, title) self.assertFalse(itwp_file.exists()) self.assertTrue(commons_file.exists()) self.assertTrue(itwp_file.fileIsShared()) self.assertTrue(commons_file.fileIsShared()) self.assertTrue(commons_file.fileUrl()) self.assertIn('/wikipedia/commons/', itwp_file.fileUrl()) self.assertRaises(pywikibot.NoPage, itwp_file.get)
def testLocalOnly(self): title = 'File:April Fools Day Adminship discussion (2005).png' commons = self.get_site('commons') enwp = self.get_site('enwiki') enwp_file = pywikibot.ImagePage(enwp, title) for using in enwp_file.usingPages(): self.assertIsInstance(using, pywikibot.Page) commons_file = pywikibot.ImagePage(commons, title) self.assertTrue(enwp_file.fileUrl()) self.assertTrue(enwp_file.exists()) self.assertFalse(commons_file.exists()) self.assertFalse(enwp_file.fileIsShared()) self.assertRaises(pywikibot.NoPage, commons_file.fileIsShared) self.assertRaises(pywikibot.NoPage, commons_file.fileUrl) self.assertRaises(pywikibot.NoPage, commons_file.get)
def upload_image(self, debug=False): """Upload the image at self.url to the target wiki. Return the filename that was used to upload the image. If the upload fails, ask the user whether to try again or not. If the user chooses not to retry, return null. """ filename = self.process_filename() site = self.targetSite imagepage = pywikibot.ImagePage(site, filename) # normalizes filename imagepage.text = self.description pywikibot.output(u'Uploading file to %s via API....' % site) try: if self.uploadByUrl: site.upload(imagepage, source_url=self.url, ignore_warnings=self.ignoreWarning) else: if "://" in self.url: temp = self.read_file_content() else: temp = self.url site.upload(imagepage, source_filename=temp, ignore_warnings=self.ignoreWarning) except pywikibot.UploadWarning as warn: pywikibot.output(u"We got a warning message: ", newline=False) pywikibot.output(str(warn)) answer = pywikibot.inputChoice(u"Do you want to ignore?", ['Yes', 'No'], ['y', 'N'], 'N') if answer == "y": self.ignoreWarning = 1 self.keepFilename = True return self.upload_image(debug) else: pywikibot.output(u"Upload aborted.") return except Exception: pywikibot.error("Upload error: ", exc_info=True) else: #No warning, upload complete. pywikibot.output(u"Upload successful.") return filename # data['filename']
def run(self): """ Starts the robot. """ for item in self.generator: data = item.get() claims = data.get('claims') if self.wdproperty in claims: heritageid = claims.get(self.wdproperty)[0].getTarget() # Hardcoded to Rijksmonument. Could do something with SPARQL query if u'P18' not in claims and heritageid in self.monumentImages: imagename = self.monumentImages.get(heritageid).get('image') sourceurl = self.monumentImages.get(heritageid).get('source') print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P18') commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(imagename, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) else: newclaim.setTarget(image) pywikibot.output( 'Adding %s --> %s based on %s' % (newclaim.getID(), newclaim.getTarget(), sourceurl)) summary = 'based on usage in list https%s' % (sourceurl, ) item.addClaim(newclaim, summary=summary)
def upload(site, metadata, image_dict): for image in metadata[image_dict]: image_file, qualified_image_location = helpers.find_right_extension( image, self.qualified_article_dir) logging.info(image_file) if image_file: #we found a valid image file harmonized_name = helpers.harmonizing_name( image_file, metadata['article-title']) #print harmonized_name image_page = pywikibot.ImagePage(site, harmonized_name) page_text = commons_template.page( metadata, metadata[image_dict][image]['caption']) image_page._text = page_text try: site.upload( imagepage=image_page, source_filename=qualified_image_location, comment='Automatic upload of media from: [[doi:' + self.doi + ']]', ignore_warnings=False) # "ignore_warnings" means "overwrite" if True logging.info('Uploaded image %s' % image_file) metadata[image_dict][image][ 'uploaded_name'] = harmonized_name except pywikibot.exceptions.UploadWarning as warning: warning_string = unicode(warning) if warning_string.startswith( 'Uploaded file is a duplicate of '): liststring = warning_string.split( 'Uploaded file is a duplicate of ')[1][:-1] duplicate_list = ast.literal_eval(liststring) duplicate_name = duplicate_list[0] print 'duplicate found: ', duplicate_name logging.info('Duplicate image %s' % image_file) metadata[image_dict][image][ 'uploaded_name'] = duplicate_name elif warning_string.endswith('already exists.'): logging.info('Already exists image %s' % image_file) metadata[image_dict][image][ 'uploaded_name'] = harmonized_name else: raise
def up(source, pagetitle, desc, comment): if source[:4] == 'http': source_url = source source_filename = None else: source_url = None source_filename = source site.upload(pywikibot.ImagePage(site, 'File:' + pagetitle), source_filename=source_filename, source_url=source_url, comment=comment, text=desc, watch=False, ignore_warnings=False, chunk_size=1048576, _file_key=None, _offset=0, _verify_stash=None, report_success=None)
def upload_image(self, img): site = self.targetSite filename = self.PATH + "/stedenboek/" + img["filename"] imagepage = pywikibot.ImagePage(site, img["filename"]) # normalizes filename imagepage.text = self.format(img) pywikibot.output(u'Uploading file %s to %s via API....' % (img["filename"], site)) try: site.upload(imagepage, source_filename=filename) except pywikibot.UploadWarning as warn: pywikibot.output(u"We got a warning message: ", newline=False) pywikibot.output(str(warn)) except Exception as e: pywikibot.error("Upload error: ", exc_info=True) else: # No warning, upload complete. pywikibot.output(u"Upload successful.")
def run(self): """ Starts the robot. """ for painting in self.generator: # Buh, for this one I know for sure it's in there #print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get( painting[u'id']), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } data['labels']['en'] = { 'language': 'en', 'value': painting[u'title'] } data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (painting[u'creator'], ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (painting[u'creator'], ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( painting[u'url'], ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output( u'Oops, already had that one. Trying again') data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s (%s, %s)' % (painting[u'creator'], painting[u'collectionshort'], painting[u'id']) } result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) # Add to self.paintingIds so that we don't create dupes self.paintingIds[painting[u'id']] = paintingItemTitle.replace( u'Q', u'') newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(self.collectionitem) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(self.collectionitem) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) # Add the date they got it as a qualifier to the collection if painting.get(u'acquisitiondate'): colqualifier = pywikibot.Claim(self.repo, u'P580') acdate = None if len(painting[u'acquisitiondate']) == 4 and painting[ u'acquisitiondate'].isnumeric(): # It's a year acdate = pywikibot.WbTime( year=painting[u'acquisitiondate']) elif len(painting[u'acquisitiondate'].split(u'-', 2)) == 3: (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2) acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday)) if acdate: colqualifier.setTarget(acdate) pywikibot.output( 'Adding new acquisition date qualifier claim to collection on %s' % paintingItem) collectionclaim.addQualifier(colqualifier) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims if painting.get(u'creator'): self.fixDescription(paintingItem, painting.get(u'creator')) # located in if u'P276' not in claims and painting.get(u'location'): newclaim = pywikibot.Claim(self.repo, u'P276') location = pywikibot.ItemPage(self.repo, painting.get(u'location')) newclaim.setTarget(location) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None try: for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en' ) == painting[u'creator'] or creatoritem.get( ).get('labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims'): existing_claims = creatoritem.get().get( 'claims').get('P106') for existing_claim in existing_claims: if existing_claim.target_equals( u'Q1028181'): newcreator = creatoritem continue except pywikibot.exceptions.APIError: print u'Search API is acting up, just let it be' pass if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'date'): if len( painting[u'date'] ) == 4 and painting[u'date'].isnumeric(): # It's a year newdate = pywikibot.WbTime(year=painting[u'date']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) # material used if u'P186' not in claims and painting.get(u'medium'): if painting.get(u'medium') == u'Oil on canvas': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim) # self.addReference(paintingItem, newclaim, uri) # Upload an image baby! BUT NOT NOW imagetitle = u'' if painting.get(u'imageurl') and u'P18' not in claims: commonssite = pywikibot.Site("commons", "commons") photo = Photo(painting[u'imageurl'], painting) titlefmt = u'%(creator)s - %(title)s - %(id)s - Minneapolis Institute of Arts.%(_ext)s' pagefmt = u'User:Multichill/Minneapolis Institute of Arts' duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output(u"Skipping duplicate of %r" % duplicates) imagetitle = duplicates[0] #return duplicates[0] else: imagetitle = self.cleanUpTitle( photo.getTitle(titlefmt)) pywikibot.output(imagetitle) description = photo.getDescription(pagefmt) pywikibot.output(description) handle, tempname = tempfile.mkstemp() with os.fdopen(handle, "wb") as t: t.write(photo.downloadPhoto().getvalue()) t.close() #tempname bot = upload.UploadRobot(url=tempname, description=description, useFilename=imagetitle, keepFilename=True, verifyDescription=False, uploadByUrl=False, targetSite=commonssite) #bot._contents = photo.downloadPhoto().getvalue() #bot._retrieved = True bot.run() if u'P18' not in claims and imagetitle: newclaim = pywikibot.Claim(self.repo, u'P18') imagelink = pywikibot.Link(imagetitle, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) newclaim.setTarget(image) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) paintingItem.addClaim(newclaim)
def main(): test_mode = 0 site = pywikibot.getSite() site_en = pywikibot.getSite('en', 'wiktionary') site_com = pywikibot.getSite('commons', 'commons') cat = Category(site, 'Kategoria:chiński standardowy (indeks)') cat_com = Category(site, 'Chinese kanji stroke order') lista_stron = pagegenerators.CategorizedPageGenerator(cat) lista_com = pagegenerators.CategorizedPageGenerator(cat_com) log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log') lista = [] istnieje = [] han_char = re.compile('{{Han(_| )char\|(.*?)}') han_ref = re.compile('{{Han(_| )ref\|(.*})') zh_f = re.compile('{{zh-forms\|(.*)}') jap_f = re.compile('{{ja-forms\|(.*)}') kx = re.compile('kx=(.*?)(\||})') dkj = re.compile('\|dkj=(.*?)(\||})') dj = re.compile('\|dj=(.*?)(\||})') hdz = re.compile('\|hdz=(.*?)(\||})') rn = re.compile('rn=([0-9]*?)\|') rad = re.compile('rad=(.)') han_as = re.compile('as=([0-9]*?)\|') sn = re.compile('sn=([0-9]*?)\|') canj = re.compile('canj=([^\|]*)') cr = re.compile('four=(.*?)\|') alt = re.compile('alt=(.*?)\|') asj = re.compile('asj=(.*?)\|') tekst_przed = re.compile('(.*?)=', re.DOTALL) tekst_po = re.compile('.*?(=.*)', re.DOTALL) grafika = re.compile( '(\-bw\.|\-red\.|\-order\.|{{zch\-cienie}}|{{zch\-animacja}}|{{zch\-komiks}})' ) for page in lista_stron: if len(page.title()) == 1: lista.append(page) for a in lista: tekst = '' rn_abort = 0 rad_abort = 0 han_as_abort = 0 sn_abort = 0 canj_abort = 0 cr_abort = 0 try: strona = a.get() except pywikibot.IsRedirectPage: print('[[%s]] - przekierowanie' % a.title()) log = log + '\n*[[%s]] - przekierowanie' % a.title() except pywikibot.Error: print('[[%s]] - błąd' % a.title()) log = log + '\n*[[%s]] - błąd' % a.title() else: tekst_przed_s = re.search(tekst_przed, a.get()) tekst_po_s = re.search(tekst_po, a.get()) log = '' if test_mode == 1: sekcja_znak = 'fdssagrefadf' else: sekcja_znak = '{{znak chiński}}' if sekcja_znak in a.get(): print('[[%s]] - istnieje już sekcja {{znak chiński}}' % a.title()) log = log + '\n*[[%s]] - istnieje już sekcja {{s|znak chiński}}' % a.title( ) istnieje.append(a) else: ang = pywikibot.Page(site_en, a.title()) han_char_s = re.search(han_char, ang.get()) grafika_s = re.search(grafika, a.get()) if grafika_s != None: print('[[%s]] - znaleziono grafikę z CJK stroke order' % a.title()) log = log + '\n*[[%s]] - znaleziono grafikę z CJK stroke order' % a.title( ) if han_char_s != None: szablon_han = han_char_s.group(2) rn_s = re.search(rn, szablon_han) rad_s = re.search(rad, szablon_han) han_as_s = re.search(han_as, szablon_han) sn_s = re.search(sn, szablon_han) canj_s = re.search(canj, szablon_han) cr_s = re.search(cr, szablon_han) alt_s = re.search(alt, szablon_han) asj_s = re.search(asj, szablon_han) if alt_s == None: alter = 0 else: if alt_s.group(1) == '': alter = 0 else: alter = 1 if asj_s == None: alter1 = 0 else: if asj_s.group(1) == '': alter1 = 0 else: alter1 = 1 if alter == 0 and alter1 == 0: #print a.title() if rn_s == None: print('[[%s]] - Nie istnieje argument \'rn\'' % a.title()) log = log + '\n*[[%s]] - Nie istnieje argument \'rn\'' % a.title( ) rn_abort = 1 if rad_s == None: print('[[%s]] - Nie istnieje argument \'rad\'' % a.title()) log = log + '\n*[[%s]] - Nie istnieje argument \'rad\'' % a.title( ) rad_abort = 1 if han_as_s != None: #print han_as_s.group(1) if han_as_s.group(1) == '0' or han_as_s.group( 1) == '00': as_output = '+ 0' else: if han_as_s.group(1)[0] == '0': as_output = '+ %s' % han_as_s.group(1)[1] else: as_output = han_as_s.group(1)[1] #print as_output else: han_as_abort = 1 if sn_s == None: sn_abort = 1 if canj_s == None: canj_abort = 1 if cr_s != None: if cr_s.group(1).isspace() or cr_s.group(1) == '': print( '[[%s]] - argument \'four\' na en.wikt jest pusty - dodać ręcznie' % a.title()) log = log + '\n*[[%s]] - argument \'four\' na en.wikt jest pusty - dodać ręcznie' % a.title( ) else: cr_abort = 1 kolejnosc_koncowa_c = '' if pywikibot.ImagePage(site_en, '%s-bw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-komiks}}' else: if pywikibot.ImagePage(site_en, '%s-red.png' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-cienie}}' else: if pywikibot.ImagePage( site_en, '%s-order.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-animacja}}' kolejnosc_koncowa_j = '' if pywikibot.ImagePage(site_en, '%s-jbw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-komiks|j}}' else: if pywikibot.ImagePage(site_en, '%s-jred.png' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-cienie|j}}' else: if pywikibot.ImagePage( site_en, '%s-jorder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-animacja|j}}' kolejnosc_koncowa_t = '' if pywikibot.ImagePage(site_en, '%s-tbw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-komiks|t}}' else: if pywikibot.ImagePage(site_en, '%s-tred.png' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-cienie|t}}' else: if pywikibot.ImagePage( site_en, '%s-torder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-animacja|t}}' kolejnosc_koncowa_a = '' if pywikibot.ImagePage(site_en, '%s-abw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-komiks|a}}' else: if pywikibot.ImagePage(site_en, '%s-ared.png' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-cienie|a}}' else: if pywikibot.ImagePage( site_en, '%s-aorder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-animacja|a}}' tekst = '== {{zh|%s}} ({{znak chiński}}) ==\n{{klucz}}' % a.title( ) if rn_abort or rad_abort or han_as_abort: print( '[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{klucz}} - dodać ręcznie' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{s|klucz}} - dodać ręcznie' % a.title( ) else: tekst = tekst + ' %s %s %s' % ( rn_s.group(1), rad_s.group(1), as_output) tekst = tekst + '\n{{kreski}}' if sn_abort: print( '[[%s]] - w en.wikt nie istnieje argument do {{kreski}} - dodać ręcznie' ) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument do {{s|kreski}} - dodać ręcznie' else: tekst = tekst + ' %s\n' % sn_s.group(1) zh_f_s = re.search(zh_f, ang.get()) ja_f_s = re.search(jap_f, ang.get()) warianty = '{{warianty' warianty_obr = '{{warianty-obrazek' ku = '' xu = '' sou = '' sot = '' ming = '' upr = '' trad = '' shin = '' if zh_f_s != None: zh_f_str = zh_f_s.group(1).replace( "[", "").replace("]", "").replace("{{zh-lookup|", "").replace("}", "") zh_osobno = zh_f_str.split('|') warianty = warianty + ' | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % ( zh_osobno[1], zh_osobno[0]) ''' if pywikibot.ImagePage(site_en, u'%s-kaishu.svg' % zh_osobno[0]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.png' % zh_osobno[0]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|p}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.gif' % zh_osobno[0]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|g}}' % zh_osobno[0] if pywikibot.ImagePage(site_en, u'%s-xinshu.svg' % zh_osobno[0]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.png' % zh_osobno[0]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|p}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.gif' % zh_osobno[0]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|g}}' % zh_osobno[0] if pywikibot.ImagePage(site_en, u'%s-songti.svg' % zh_osobno[0]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-songti.png' % zh_osobno[0]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|p}}' % zh_osobno[0] else: if pywikibot.ImagePage(site_en, u'%s-songti.gif' % zh_osobno[0]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|g}}' % zh_osobno[0] if ku != u'' or xu !=u'' or sou !=u'': warianty = warianty + u'{{warianty-obrazek' if ku != u'': warianty = warianty + ku if xu !=u'': warianty = warianty + xu if sou !=u'': warianty = warianty + sou warianty = warianty + u'}}' ''' if ja_f_s != None: ja_f_str = ja_f_s.group(1).replace( "[", "").replace("]", "").replace("{{zh-lookup|", "").replace("}", "") ja_osobno = ja_f_str.split('|') warianty = warianty + ' | {{zch-w|js|%s}} | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % ( ja_osobno[0], ja_osobno[2], ja_osobno[1]) trad = ja_osobno[2] upr = ja_osobno[1] shin = ja_osobno[0] '''if pywikibot.ImagePage(site_en, u'%s-kaishu.svg' % ja_osobno[1]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.png' % ja_osobno[1]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|p}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-kaishu.gif' % ja_osobno[1]).fileIsShared(): ku = u' | {{zch-obrazek|ku|%s|g}}' % ja_osobno[1] if pywikibot.ImagePage(site_en, u'%s-xinshu.svg' % ja_osobno[1]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.png' % ja_osobno[1]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|p}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-xinshu.gif' % ja_osobno[1]).fileIsShared(): xu = u' | {{zch-obrazek|xu|%s|g}}' % ja_osobno[1] if pywikibot.ImagePage(site_en, u'%s-songti.svg' % ja_osobno[1]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-songti.png' % ja_osobno[1]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|p}}' % ja_osobno[1] else: if pywikibot.ImagePage(site_en, u'%s-songti.gif' % ja_osobno[1]).fileIsShared(): sou = u' | {{zch-obrazek|sou|%s|g}}' % ja_osobno[1] if ku != u'' or xu !=u'' or sou !=u'': warianty = warianty + u'{{warianty-obrazek' if ku != u'': warianty = warianty + ku if xu !=u'': warianty = warianty + xu if sou !=u'': warianty = warianty + sou warianty = warianty + u'}}''' if pywikibot.ImagePage(site_en, '%s-clerical.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-clerical.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-clerical.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-xinshu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-xinshu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-xinshu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-still.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-caoshu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-still.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-caoshu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-still.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|g}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-caoshu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-kaishu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-kaishu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-kaishu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-songti.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-songti.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|p}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-songti.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|g}}' % a.title( ) '''if sot != u'': ming = ming + sot else: if zh_f_s != None: ming = ming + u' | {{zch-w|ct|%s}}' % zh_osobno[1] if ja_f_s != None: ming = ming + u' | {{zch-w|ct|%s}}' % ja_osobno[2] if sou != u'': ming = ming + sou else: if zh_f_s != None: ming = ming + u' | {{zch-w|cu|%s}}' % zh_osobno[0] if ja_f_s != None: ming = ming + u' | {{zch-w|cu|%s}}' % ja_osobno[1]''' if warianty == '{{warianty': tekst = tekst + '{{warianty|{{zch-w}}' else: tekst = tekst + warianty tekst = tekst + '}}' if warianty_obr != '{{warianty-obrazek': tekst = tekst + ' ' + warianty_obr + '}}' tekst = tekst + '\n{{kolejność}}' if kolejnosc_koncowa_c == '' and kolejnosc_koncowa_j == '' and kolejnosc_koncowa_t == '' and kolejnosc_koncowa_a == '': print( '[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title()) log = log + '\n*[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title( ) else: tekst = tekst + '\n' if kolejnosc_koncowa_c != '': tekst = tekst + '%s ' % kolejnosc_koncowa_c if kolejnosc_koncowa_j != '': tekst = tekst + '%s ' % kolejnosc_koncowa_j if kolejnosc_koncowa_t != '': tekst = tekst + '%s ' % kolejnosc_koncowa_t if kolejnosc_koncowa_a != '': tekst = tekst + '%s ' % kolejnosc_koncowa_a tekst = tekst + '\n{{znaczenia}}\n{{etymologia}}' etym = ' {{warianty-obrazek' if pywikibot.ImagePage(site_en, '%s-oracle.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|o|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, '%s-oracle.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|o|%s|p}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-bronze.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|br|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-bronze.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|br|%s|p}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-bigseal.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|bs|%s}}' % a.title( ) else: if pywikibot.ImagePage( site_en, '%s-bigseal.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|bs|%s|p}}' % a.title( ) if pywikibot.ImagePage(site_en, '%s-seal.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|ss|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, '%s-seal.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|ss|%s|p}}' % a.title( ) etym = etym + '}}' if etym != ' {{warianty-obrazek}}': tekst = tekst + etym tekst = tekst + '\n{{kody|cjz=' if canj_abort: print( '[[%s]] - w en.wikt nie istnieje argument cjz - dodać ręcznie' ) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument cjz - dodać ręcznie' else: tekst = tekst + '%s' % canj_s.group(1) tekst = tekst + '|cr=' if cr_abort == 1: print( '[[%s]] - w en.wikt nie istnieje argument \'\'four\'\' - dodać ręcznie' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument \'\'four\'\' - dodać ręcznie' % a.title( ) else: tekst = tekst + '%s' % cr_s.group(1) tekst = tekst + '|u=%x}}' % ord(a.title()) han_ref_s = re.search(han_ref, ang.get()) if han_ref_s != None: tekst = tekst + '\n{{słowniki' kx_s = re.search(kx, han_ref_s.group(2)) if kx_s != None: tekst = tekst + '|kx=%s' % kx_s.group(1) dkj_s = re.search(dkj, han_ref_s.group(2)) if dkj_s != None: tekst = tekst + '|dkj=%s' % dkj_s.group(1) dj_s = re.search(dj, han_ref_s.group(2)) if dj_s != None: tekst = tekst + '|dj=%s' % dj_s.group(1) hdz_s = re.search(hdz, han_ref_s.group(2)) if hdz_s != None: tekst = tekst + '|hdz=%s' % hdz_s.group(1) tekst = tekst + '}}' tekst = tekst + '\n{{uwagi}}\n{{źródła}}\n\n' else: print( '[[%s]] - znaleziono alternatywne zapisy, pomijam' % a.title()) log = log + '\n*[[%s]] - znaleziono alternatywne zapisy, pomijam' % a.title( ) final = tekst_przed_s.group(1) + tekst + tekst_po_s.group( 1) if test_mode == 1: print(final + '\n\n') else: a.put(final, comment='bot dodaje sekcję {{znak chiński}}') else: print('[[%s]] - Nie znaleziono szablonu {{Han char}}' % a.title()) log = log + '\n*[[%s]] - Nie znaleziono szablonu {{s|Han char}}, pomijam' % a.title( ) log_site = pywikibot.Page(site, 'Wikipedysta:AlkamidBot/zch/log') log_stary = log_site.get() if test_mode == 1: print(log) else: log = log_stary + log log_site.put(log, comment='%s' % a.title())
def run(self): """ Starts the robot. """ for metadata in self.generator: # Buh, for this one I know for sure it's in there #print metadata[u'id'] #print metadata[u'url'] # Do some url magic so that all url fields are always filled if not metadata.get('refurl'): metadata['refurl']=metadata['url'] if not metadata.get('idrefurl'): metadata['idrefurl']=metadata['refurl'] if not metadata.get('describedbyurl'): metadata['describedbyurl']=metadata['url'] artworkItem = None newclaims = [] if metadata[u'id'] in self.artworkIds: artworkItemTitle = self.artworkIds.get(metadata[u'id']) print (artworkItemTitle) artworkItem = pywikibot.ItemPage(self.repo, title=artworkItemTitle) elif self.create: #Break for now #print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = {'labels': {}, 'descriptions': {}, } # loop over stuff if metadata.get('title'): for lang, label in metadata['title'].items(): data['labels'][lang] = {'language': lang, 'value': label} if metadata.get('description'): for lang, description in metadata['description'].items(): data['descriptions'][lang] = {'language': lang, 'value': description} identification = {} summary = u'Creating new item with data from %s ' % (metadata[u'url'],) pywikibot.output(summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: ## TODO: Check if this is pywikibot.OtherPageSaveError too ## We got ourselves a duplicate label and description, let's correct that by adding collection and the id pywikibot.output(u'Oops, already had that one. Trying again') for lang, description in metadata['description'].items(): data['descriptions'][lang] = {'language': lang, 'value': u'%s (%s %s)' % (description, metadata['inception'], metadata['id'],) } result = self.repo.editEntity(identification, data, summary=summary) pass # Crash here artworkItemTitle = result.get(u'entity').get('id') # Wikidata is sometimes lagging. Wait for 10 seconds before trying to actually use the item time.sleep(10) artworkItem = pywikibot.ItemPage(self.repo, title=artworkItemTitle) # Add to self.artworkIds so that we don't create dupes self.artworkIds[metadata[u'id']]=artworkItemTitle # Add the id to the item so we can get back to it later newclaim = pywikibot.Claim(self.repo, self.idProperty) newclaim.setTarget(metadata[u'id']) pywikibot.output('Adding new id claim to %s' % artworkItem) artworkItem.addClaim(newclaim) #self.addReference(artworkItem, newclaim, metadata[u'idrefurl']) #newqualifier = pywikibot.Claim(self.repo, u'P195') #Add collection, isQualifier=True #newqualifier.setTarget(self.collectionitem) #pywikibot.output('Adding new qualifier claim to %s' % artworkItem) #newclaim.addQualifier(newqualifier) #collectionclaim = pywikibot.Claim(self.repo, u'P195') #collectionclaim.setTarget(self.collectionitem) #pywikibot.output('Adding collection claim to %s' % artworkItem) #artworkItem.addClaim(collectionclaim) ## Add the date they got it as a qualifier to the collection #if metadata.get(u'acquisitiondate'): # if type(metadata[u'acquisitiondate']) is int or (len(metadata[u'acquisitiondate'])==4 and \ # metadata[u'acquisitiondate'].isnumeric()): # It's a year # acdate = pywikibot.WbTime(year=metadata[u'acquisitiondate']) # colqualifier = pywikibot.Claim(self.repo, u'P580') # colqualifier.setTarget(acdate) # pywikibot.output('Adding new acquisition date qualifier claim to collection on %s' % artworkItem) # collectionclaim.addQualifier(colqualifier) ## FIXME: Still have to rewrite this part ''' if metadata.get(u'acquisitiondate'): colqualifier = pywikibot.Claim(self.repo, u'P580') acdate = None if len(painting[u'acquisitiondate'])==4 and painting[u'acquisitiondate'].isnumeric(): # It's a year acdate = pywikibot.WbTime(year=painting[u'acquisitiondate']) elif len(painting[u'acquisitiondate'].split(u'-', 2))==3: (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2) acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday)) if acdate: colqualifier.setTarget(acdate) ''' #self.addReference(artworkItem, collectionclaim, metadata[u'refurl']) if artworkItem and artworkItem.exists(): metadata['wikidata'] = artworkItem.title() data = artworkItem.get() claims = data.get('claims') # Add missing labels # FIXME: Move to a function # FIXME Do something with aliases too labels = data.get('labels') if metadata.get('title'): labelschanged = False for lang, label in metadata['title'].items(): if lang not in labels: labels[lang] = label labelschanged = True if labelschanged: summary = u'Adding missing label(s) from %s' % (metadata.get(u'refurl'),) try: artworkItem.editLabels(labels, summary=summary) except pywikibot.OtherPageSaveError: # Just skip it for no pywikibot.output(u'Oops, already had that label/description combination. Skipping') pass """ # Add missing descriptions # FIXME Move to a function descriptions = copy.deepcopy(data.get('descriptions')) if metadata.get('description'): descriptionschanged = False for lang, description in metadata['description'].items(): if lang not in descriptions: descriptions[lang] = description descriptionschanged = True if descriptionschanged: summary = u'Adding missing description(s) from %s' % (metadata.get(u'refurl'),) try: artworkItem.editDescriptions(descriptions, summary=summary) except pywikibot.exceptions.OtherPageSaveError: # pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that by adding collection and the id descriptions = copy.deepcopy(data.get('descriptions')) pywikibot.output(u'Oops, already had that label/description combination. Trying again') for lang, description in metadata['description'].items(): if lang not in descriptions: descriptions[lang] = u'%s (%s %s)' % (description, metadata['collectionshort'], metadata['id'],) artworkItem.editDescriptions(descriptions, summary=summary) pass #print claims """ # instance of self.addItemStatement(artworkItem, u'P31', metadata.get(u'instanceofqid'), metadata.get(u'refurl')) # country self.addItemStatement(artworkItem, u'P17', metadata.get(u'countryqid'), metadata.get(u'refurl')) # adminlocation self.addItemStatement(artworkItem, u'P131', metadata.get(u'adminlocationqid'), metadata.get(u'refurl')) # location self.addItemStatement(artworkItem, u'P276', metadata.get(u'locationqid'), metadata.get(u'refurl')) # creator self.addItemStatement(artworkItem, u'P170', metadata.get(u'creatorqid'), metadata.get(u'refurl')) # genre self.addItemStatement(artworkItem, u'P136', metadata.get(u'genreqid'), metadata.get(u'refurl')) # Inception if u'P571' not in claims and metadata.get(u'inception'): if type(metadata[u'inception']) is int or (len(metadata[u'inception'])==4 and \ metadata[u'inception'].isnumeric()): # It's a year newdate = pywikibot.WbTime(year=metadata[u'inception']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output('Adding date of creation claim to %s' % artworkItem) artworkItem.addClaim(newclaim) self.addReference(artworkItem, newclaim, metadata[u'refurl']) # TODO: Implement circa if metadata.get('image') and u'P18' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P18') commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(metadata.get('image'), source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output('[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(),)) else: newclaim.setTarget(image) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) artworkItem.addClaim(newclaim) if metadata.get('commonscat') and u'P373' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P373') commonssite = pywikibot.Site("commons", "commons") commonslink = pywikibot.Link(metadata.get('commonscat'), source=commonssite, defaultNamespace=14) commonscat = pywikibot.Page(commonslink) if commonscat.isRedirectPage(): commonscat = pywikibot.Page(commonscat.getRedirectTarget()) if not commonscat.exists(): pywikibot.output('[[%s]] doesn\'t exist so I can\'t link to it' % (commonscat.title(),)) else: newclaim.setTarget(commonscat.title(withNamespace=False)) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) artworkItem.addClaim(newclaim) if metadata.get('lat') and metadata.get('lon') and u'P625' not in claims: print u'no coordinates found' # Build coordinates and add them coordinate = pywikibot.Coordinate(metadata.get('lat'), metadata.get('lon'), dim=100) newclaim = pywikibot.Claim(self.repo, u'P625') newclaim.setTarget(coordinate) pywikibot.output(u'Adding %s, %s to %s' % (coordinate.lat, coordinate.lon, artworkItem.title())) artworkItem.addClaim(newclaim)
def main(): always = dryrun = startsWith = False edhRegex = re.compile('\s*(HD\d+)[\s.]*') # Handles command-line arguments for pywikibot. for arg in pywikibot.handleArgs(): if arg == '-dry': # Performs a dry run (does not edit site) dryrun = True if arg == '-always': # Does not ask for confirmation always = True if arg.startswith('-start:'): # Label, for example: -start:UBBVarga000001 startsWith = arg.replace('-start:', '') if not dryrun: # pywikibot/families/eagle_family.py site = pywikibot.Site('en', 'eagle').data_repository() commons = pywikibot.Site('commons', 'commons') soup = BeautifulSoup(open(FILE_PATH)) for xml_item in soup.ubb.find_all('tei'): data = {} # Resets element info # Label and description url = xml_item.teiheader.filedesc.publicationstmt.find('idno', {'type': 'URI'}).get_text() match = re.search('/([^/]*?)/([^/]*?)/(\d+)\.xml$', url) data['label'] = match.group(1) + match.group(2) + match.group(3) if startsWith: if data['label'] != startsWith: continue # Skips files until start elif data['label'] == startsWith: startsWith = False # Resets pywikibot.output("\n>>>>> " + data['label'] + " <<<<<\n") # UBB Identifier data['ubb'] = url pywikibot.output("UBB identifier: " + data['ubb']) # Description data['description'] = elementText(xml_item.teiheader.profiledesc.textclass.keywords.term) pywikibot.output("Description: " + data['description']) # Translation try: data['translation'] = elementText(xml_item.find('text').body.find('div', {'type': 'translation'}).p) except AttributeError: pywikibot.output('ERROR: translation not found!') continue # Author data['author'] = elementText(xml_item.teiheader.revisiondesc.change) pywikibot.output("Author: " + data['author']) if data['author'] == 'Rada Varga': data['trans_lang'] = 'en' elif data['author'] == 'Ioan Piso': data['trans_lang'] = 'fr' else: pywikibot.output('ERROR! Author not recognized!') exit() pywikibot.output("Translation (" + data['trans_lang'].upper() + "): " + data['translation']) # IPR data['ipr'] = xml_item.find('text').body.find('div', {'type': 'translation'}).desc.ref['target'] pywikibot.output("IPR: " + data['ipr']) # Publisher data['publisher'] = AUTHORITY # data['publisher'] = elementText(xml_item.teiheader.filedesc.publicationstmt.authority).title() pywikibot.output("Publisher: " + data['publisher']) # Process bibliography data['bibliography'] = [] bibList = xml_item.find('text').body.find('div', {'type': 'bibliography'}).find_all('bibl') for b in bibList: bibText = elementText(b) if not bibText: continue match = edhRegex.match(bibText) if match: data['edh'] = match.group(1) else: data['bibliography'].append(bibText) pywikibot.output('Bib note #' + str(len(data['bibliography'])) + ': ' + bibText) if 'edh' in data: pywikibot.output("EDH: " + data['edh']) # Images data['images'] = [] try: imgs = xml_item.facsimile.find_all('graphic') for i in imgs: # Wikidata only wants the image title img_title = i['url'].replace('https://commons.wikimedia.org/wiki/File:', '') data['images'].append(img_title) pywikibot.output('Image #' + str(len(data['images'])) + ': ' + i['url']) except AttributeError: # No <facsimile> pass pywikibot.output('') # newline choice = None while choice is None: if not always: choice = pywikibot.inputChoice(u"Proceed?", ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') else: choice = 'y' if choice in ['A', 'a']: always = True choice = 'y' if not dryrun and choice in ['Y', 'y']: page = pywikibot.ItemPage(site) page.editEntity({'labels': {'en': data['label']}, 'descriptions': {data['trans_lang']: data['description']}}) page.get() addClaimToItem(site, page, 'P59', data['ubb']) addClaimToItem(site, page, 'P25', data['ipr']) if 'edh' in data: addClaimToItem(site, page, 'P24', data['edh']) for img_title in data['images']: img_page = pywikibot.ImagePage(commons, img_title) addClaimToItem(site, page, 'P10', img_page) if data['trans_lang'] == 'en': trans_property = 'P11' elif data['trans_lang'] == 'fr': trans_property = 'P15' transClaim = pywikibot.Claim(site, trans_property) transClaim.setTarget(data['translation']) page.addClaim(transClaim) sources = [] authorClaim = pywikibot.Claim(site, 'P21') authorClaim.setTarget(data['author']) sources.append(authorClaim) publisherClaim = pywikibot.Claim(site, 'P41') publisherClaim.setTarget(data['publisher']) sources.append(publisherClaim) for b in data['bibliography']: bibClaim = pywikibot.Claim(site, 'P54') bibClaim.setTarget(b) sources.append(bibClaim) transClaim.addSources(sources)
def run(self): """ Starts the robot. """ for monument in self.generator: try: monumentItem = None newclaims = [] if monument.get('id') in self.monumentIds: monumentItemTitle = u'Q%s' % (self.monumentIds.get( monument.get('id')), ) print monument print monumentItemTitle monumentItem = pywikibot.ItemPage(self.repo, title=monumentItemTitle) else: print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') # Fix wikitext and more shit monumentName = monument.get('name') #monumentName = re.sub('^\[\[([^\|]+)\|([^\]]+)\]\](.+)$', u'\\2\\3', monumentName) monumentName = re.sub('\[\[([^\|]+)\|([^\]]+)\]\]', u'\\2', monumentName) #monumentName = re.sub('^\[\[([^\]]+)\]\](.+)$', u'\\1\\2', monumentName) monumentName = re.sub('\[\[([^\]]+)\]\]', u'\\1', monumentName) if len(monumentName) > 200: monumentName = re.sub('^(.{20,200})\.(.+)$', u'\\1.', monumentName) if len(monumentName) > 200: monumentName = re.sub('^(.{20,200}),(.+)$', u'\\1.', monumentName) # Still have to do more shit data = { 'labels': { monument.get('lang'): { 'language': monument.get('lang'), 'value': monumentName } } } identification = {} summary = u'Creating new item with data from %s' % ( monument.get('source'), ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result monumentItemTitle = result.get(u'entity').get('id') monumentItem = pywikibot.ItemPage(self.repo, title=monumentItemTitle) ''' {u'success': 1, u'entity': {u'lastrevid': 134951692, u'labels': {u'nl': {u'value ': u'[[Elswout]]: hoofdgebouw', u'language': u'nl'}}, u'descriptions': [], u'cla ims': [], u'type': u'item', u'id': u'Q17000292', u'aliases': []}} {u'success': 1, u'entity': {u'lastrevid': 134951703, u'labels': {u'nl': {u'value ': u'Elswout: landgoed', u'language': u'nl'}}, u'descriptions': [], u'claims': [ ], u'type': u'item', u'id': u'Q17000293', u'aliases': []}} {u'success': 1, u'entity': {u'lastrevid': 134951710, u'labels': {u'nl': {u'value ': u'Elswout: keermuren van het voorplein', u'language': u'nl'}}, u'descriptions ': [], u'claims': [], u'type': u'item', u'id': u'Q17000294', u'aliases': []}} ''' #print monumentItem.title() newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.monumentIdProperty, )) newclaim.setTarget(monument.get('id')) pywikibot.output('Adding new id claim to %s' % monumentItem) monumentItem.addClaim(newclaim) if monumentItem and monumentItem.exists(): data = monumentItem.get() descriptions = data.get('descriptions') claims = data.get('claims') print claims if monument.get('address') and not descriptions.get( monument.get('lang')): #FIXME: If it contains links like '[[]]' it will break if not u'(' in monument.get('address'): monumentDescription = u'Rijksmonument op %s' % ( monument.get('address'), ) summary = u'Setting %s description to "%s"' % ( monument.get('lang'), monumentDescription, ) try: monumentItem.editDescriptions( { monument.get('lang'): monumentDescription }, summary=summary) except pywikibot.data.api.APIError: pywikibot.output( u'Ooops, that didn\'t work. Another item already has the same description' ) if u'P31' not in claims: newclaim = pywikibot.Claim(self.repo, u'P31') monumentTypeItem = pywikibot.ItemPage( self.repo, title=self.monumentType) newclaim.setTarget(monumentTypeItem) pywikibot.output('Adding instance claim to %s' % monumentItem) monumentItem.addClaim(newclaim) if monument.get('adm0') and u'P17' not in claims: print u'no country found' if monument.get('adm0').upper() in self.iso3166_1Codes: #print u'Found an item for the ISO code' adm0ItemTitle = u'Q%s' % (self.iso3166_1Codes.get( monument.get('adm0').upper()), ) adm0Item = pywikibot.ItemPage(self.repo, title=adm0ItemTitle) newclaim = pywikibot.Claim(self.repo, u'P17') newclaim.setTarget(adm0Item) pywikibot.output('Adding country claim to %s' % monumentItem) monumentItem.addClaim(newclaim) else: print u'country found' foundProv = False if u'P131' in claims and len(claims.get('P131')) == 1: if monument.get('adm1').upper() in self.iso3166_2Codes: if claims.get('P131')[0].getTarget().title( ) == u'Q%s' % (self.iso3166_2Codes.get( monument.get('adm1').upper()), ): print u'This item only contains a province claim' foundProv = True if u'P131' not in claims or foundProv: print u'no administrative thingie found' for adm in [ monument.get('adm1'), monument.get('adm2'), monument.get('adm3'), monument.get('adm4') ]: if adm: if adm.upper() in self.iso3166_2Codes: if not foundProv: print u'Found an item for the ISO code' admItemTitle = u'Q%s' % ( self.iso3166_2Codes.get( adm.upper()), ) admItem = pywikibot.ItemPage( self.repo, title=admItemTitle) newclaim = pywikibot.Claim( self.repo, u'P131') newclaim.setTarget(admItem) pywikibot.output( u'Adding %s to %s' % (admItem.title(), monumentItem.title())) monumentItem.addClaim(newclaim) #print adm1Item.get() else: adm = adm.replace(u'[', u'').replace(u']', u'') site = pywikibot.Site( monument.get('lang'), u'wikipedia') admLink = pywikibot.Link( adm, source=site, defaultNamespace=0) admPage = pywikibot.Page(admLink) if admPage.isRedirectPage(): admPage = pywikibot.Page( admPage.getRedirectTarget()) if not admPage.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (admPage.title(), )) elif admPage.isDisambig(): pywikibot.output( '[[%s]] is a disambiguation page so I can\'t link to it' % (admPage.title(), )) else: admItem = pywikibot.ItemPage.fromPage( admPage) if admItem.exists(): munFound = False if 'P31' in admItem.claims: for instClaim in admItem.claims.get( 'P31'): if instClaim.getTarget( ).title() == 'Q2039348': munFound = True if not munFound: # It's not an administrative division, but it might be in one if 'P131' in admItem.claims: for possAdmClaim in admItem.claims.get( 'P131'): possAdmItem = possAdmClaim.getTarget( ) possAdmItem.get() if 'P31' in possAdmItem.claims: for instClaim in possAdmItem.claims.get( 'P31'): if instClaim.getTarget( ).title( ) == 'Q2039348': admItem = possAdmItem munFound = True continue if munFound: newclaim = pywikibot.Claim( self.repo, u'P131') newclaim.setTarget(admItem) pywikibot.output( u'Adding %s to %s' % (admItem.title(), monumentItem.title())) monumentItem.addClaim( newclaim) else: print u'administrative thingie found' if monument.get('address') and u'P969' not in claims: if u'[' not in monument.get( 'address') and u']' not in monument.get( 'address') and u'|' not in monument.get( 'address'): newclaim = pywikibot.Claim(self.repo, u'P969') newclaim.setTarget(monument.get('address')) pywikibot.output(u'Adding %s to %s' % (monument.get('address'), monumentItem.title())) monumentItem.addClaim(newclaim) else: print u'Contains funky chars, skipping' print u'no address found' # Clean up the address and add it else: print u'address found' if monument.get('lat') and monument.get( 'lon') and u'P625' not in claims: print u'no coordinates found' # Build coordinates and add them coordinate = pywikibot.Coordinate(monument.get('lat'), monument.get('lon'), dim=100) newclaim = pywikibot.Claim(self.repo, u'P625') newclaim.setTarget(coordinate) pywikibot.output(u'Adding %s, %s to %s' % (coordinate.lat, coordinate.lon, monumentItem.title())) monumentItem.addClaim(newclaim) else: print u'coordinates found' if monument.get('image') and u'P18' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P18') commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(monument.get('image'), source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage( image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) else: newclaim.setTarget(image) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) monumentItem.addClaim(newclaim) else: print u'image found' # Europeana ID if u'P727' not in claims: europeanaID = u'2020718/DR_%s' % (monument.get('id'), ) newclaim = pywikibot.Claim(self.repo, u'P727') newclaim.setTarget(europeanaID) pywikibot.output('Adding Europeana ID claim to %s' % monumentItem) monumentItem.addClaim(newclaim) if monument.get('commonscat') and u'P373' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P373') commonssite = pywikibot.Site("commons", "commons") commonslink = pywikibot.Link( monument.get('commonscat'), source=commonssite, defaultNamespace=14) commonscat = pywikibot.Page(commonslink) if commonscat.isRedirectPage(): commonscat = pywikibot.Page( commonscat.getRedirectTarget()) if not commonscat.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (commonscat.title(), )) else: newclaim.setTarget( commonscat.title(withNamespace=False)) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) monumentItem.addClaim(newclaim) except: print u'F**k this shit, I am just going to contiue anyway' pass
def result(self, pagedata): p = PageGenerator.result(self, pagedata) image = pywikibot.ImagePage(p) if 'imageinfo' in pagedata: image._imageinfo = pagedata['imageinfo'][0] return image
def createClaimImage(s, l): try: dataS = dataLoad(s) dicoS = dataS.get() try: retour = propExists(property, dicoS['claims']) if retour == True: retour = "La propriété P" + str(property).encode( 'utf-8') + " existe pour " + s.encode( 'utf-8') + " = " + idClean(dataS).encode( 'utf-8') + " avec pour valeur " + commonsId( l).encode('utf-8') return retour else: try: image = re.match( u'http:\/\/upload\.wikimedia\.org\/wikipedia\/\commons\/[0-9a-z]{1}\/[0-9a-z]{2}\/(.*\.[a-zA-Z]{0,4})', l) try: pathImage = image.group(1) except Exception as er: pywikibot.output(u'Exception inconnue : %s' % str(er)) commons = pywikibot.getSite(u'commons', u'commons') dataMedia = pywikibot.ImagePage(commons, pathImage) mediadicoS = dataMedia.get() try: if pathImage and "Defaut.svg" not in pathImage: try: edit = dataS.editclaim("p" + str(property), pathImage, refs={("p143", "Q8447") }) return edit except pywikibot.EditConflict: pywikibot.output( u'Skipping because of edit conflict') except pywikibot.Error: pywikibot.output( u'Exception inconnue : on passe au suivant' ) else: retour = "Le chemin vers l'image n'est pas valide" return retour except pywikibot.NoPage: pywikibot.output(u'Page does not exist?!') except pywikibot.IsRedirectPage: pywikibot.output(u'%s est une page de redirection' % pagename) except pywikibot.Error: pywikibot.output( u'Exception inconnue : on passe au suivant') except Exception as er: pywikibot.output(u'Exception inconnue : %s' % str(er)) except pywikibot.EditConflict: pywikibot.output(u'Skipping because of edit conflict') except pywikibot.Error: pywikibot.output( u'Exception inconnue : on passe au suivant') except Exception as er: pywikibot.output(u'Exception inconnue : %s' % str(er)) except pywikibot.NoPage: pywikibot.output(u'Page does not exist?!') except pywikibot.Error: pywikibot.output(u'Exception inconnue : on passe au suivant') except Exception as er: pywikibot.output(u'Exception inconnue : %s' % str(er)) except pywikibot.NoPage: pywikibot.output(u'Page does not exist?!') except pywikibot.Error: pywikibot.output(u'Exception inconnue : on passe au suivant') except Exception as er: pywikibot.output(u'Exception inconnue : %s' % str(er))
def run(self): """ Starts the robot. """ getty = pywikibot.ItemPage(self.repo, u'Q731126') for painting in self.generator: # Buh, for this one I know for sure it's in there #print painting[u'id'] print painting[u'url'] paintingItem = None newclaims = [] if painting[u'id'] in self.paintingIds: paintingItemTitle = u'Q%s' % (self.paintingIds.get( painting[u'id']), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #Break for now print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } data['labels']['en'] = { 'language': 'en', 'value': painting[u'title'] } data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (painting[u'creator'], ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (painting[u'creator'], ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( painting[u'url'], ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.data.api.APIError: # We got ourselves a duplicate label and description, let's correct that pywikibot.output( u'Oops, already had that one. Trying again') data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s (Getty, %s)' % (painting[u'creator'], painting[u'id']) } result = self.repo.editEntity(identification, data, summary=summary) pass #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) # Add to self.paintingIds so that we don't create dupes self.paintingIds[painting[u'id']] = paintingItemTitle.replace( u'Q', u'') newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(painting[u'id']) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(getty) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(getty) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) self.addReference(paintingItem, collectionclaim, painting[u'url']) if paintingItem and paintingItem.exists(): painting['wikidata'] = paintingItem.title() data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims and painting.get(u'location'): newclaim = pywikibot.Claim(self.repo, u'P276') location = pywikibot.ItemPage(self.repo, painting.get(u'location')) newclaim.setTarget(location) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting['url']) # creator if u'P170' not in claims and painting.get(u'creator'): #print painting[u'creator'] creategen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( painting[u'creator'], step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en' ) == painting[u'creator'] or creatoritem.get().get( 'labels').get('nl') == painting[u'creator']: #print creatoritem.get().get('labels').get('en') #print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif ( creatoritem.get().get('aliases').get('en') and painting[u'creator'] in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get('nl') and painting[u'creator'] in creatoritem.get().get('aliases').get('nl')): if u'P106' in creatoritem.get().get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) #print creatoritem.title() #print creatoritem.get() else: pywikibot.output('No item found for %s' % (painting[u'creator'], )) else: print u'Already has a creator' # date of creation if u'P571' not in claims and painting.get(u'date'): if len( painting[u'date'] ) == 4 and painting[u'date'].isnumeric(): # It's a year newdate = pywikibot.WbTime(year=painting[u'date']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) # material used if u'P186' not in claims and painting.get(u'medium'): if painting.get(u'medium') == u'Oil on canvas': olieverf = pywikibot.ItemPage(self.repo, u'Q296955') doek = pywikibot.ItemPage(self.repo, u'Q4259259') oppervlak = pywikibot.ItemPage(self.repo, u'Q861259') newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(olieverf) pywikibot.output('Adding new oil paint claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(doek) pywikibot.output('Adding new canvas claim to %s' % paintingItem) paintingItem.addClaim(newclaim) self.addReference(paintingItem, newclaim, painting[u'url']) newqualifier = pywikibot.Claim( self.repo, u'P518') #Applies to part newqualifier.setTarget(oppervlak) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) # Described at url if u'P973' not in claims: newclaim = pywikibot.Claim(self.repo, u'P973') newclaim.setTarget(painting[u'url']) pywikibot.output('Adding described at claim to %s' % paintingItem) paintingItem.addClaim(newclaim) # self.addReference(paintingItem, newclaim, uri) # Upload an image baby! BUT NOT NOW imagetitle = u'' if painting.get(u'imageurl'): #A free file is available, let's see how big the current file is if u'P18' in claims: imagefile = claims.get('P18')[0].getTarget() size = imagefile.latest_file_info.size if u'P18' not in claims or size < 1000000: commonssite = pywikibot.Site("commons", "commons") photo = Photo(painting[u'imageurl'], painting) titlefmt = u'%(creator)s - %(title)s - %(id)s - J. Paul Getty Museum.%(_ext)s' pagefmt = u'User:Multichill/J. Paul Getty Museum' duplicates = photo.findDuplicateImages() if duplicates: pywikibot.output(u"Skipping duplicate of %r" % duplicates) imagetitle = duplicates[0] #return duplicates[0] else: imagetitle = self.cleanUpTitle( photo.getTitle(titlefmt)) imagefile = pywikibot.FilePage(commonssite, title=imagetitle) imagetitle = imagefile.title() pywikibot.output(imagetitle) description = photo.getDescription(pagefmt) pywikibot.output(description) imagefile.text = description handle, tempname = tempfile.mkstemp() with os.fdopen(handle, "wb") as t: t.write(photo.downloadPhoto().getvalue()) t.close() #tempname commonssite.upload(imagefile, source_filename=tempname, ignore_warnings=True, chunk_size=1000000) #bot = upload.UploadRobot(url=tempname, # description=description, # useFilename=imagetitle, # keepFilename=True, # verifyDescription=False, # uploadByUrl=False, # targetSite=commonssite) #bot._contents = photo.downloadPhoto().getvalue() pywikibot.output( 'Uploaded a file, sleeping a bit so I don\it run into lagging databases' ) time.sleep(15) #bot._retrieved = True #bot.run() if u'P18' not in claims and imagetitle: newclaim = pywikibot.Claim(self.repo, u'P18') imagelink = pywikibot.Link(imagetitle, source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) newclaim.setTarget(image) pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) paintingItem.addClaim(newclaim)
def zch(a): han_char = re.compile('{{Han(_| )char\|(.*?)}', re.DOTALL) han_ref = re.compile('{{Han(_| )ref\|(.*})') zh_f = re.compile('{{zh-forms\|(.*)}') jap_f = re.compile('{{ja-forms\|(.*)}') kx = re.compile('kx=(.*?)(\||})') dkj = re.compile('\|dkj=(.*?)(\||})') dj = re.compile('\|dj=(.*?)(\||})') hdz = re.compile('\|hdz=(.*?)(\||})') rn = re.compile('rn=([0-9]*?)\|') rad = re.compile('rad=(.)') han_as = re.compile('as=([0-9]*?)\|') sn = re.compile('sn=([0-9]*?)\|') canj = re.compile('canj=([^\|]*)') cr = re.compile('four=(.*?)\|') alt = re.compile('alt=(.*?)\|') asj = re.compile('asj=(.*?)\|') tekst = '' rn_abort = 0 rad_abort = 0 han_as_abort = 0 sn_abort = 0 canj_abort = 0 cr_abort = 0 ang = pywikibot.Page(site_en, a.title()) han_char_s = re.search(han_char, ang.get()) log = '' if han_char_s != None: szablon_han = han_char_s.group(2) rn_s = re.search(rn, szablon_han) rad_s = re.search(rad, szablon_han) han_as_s = re.search(han_as, szablon_han) sn_s = re.search(sn, szablon_han) canj_s = re.search(canj, szablon_han) cr_s = re.search(cr, szablon_han) alt_s = re.search(alt, szablon_han) asj_s = re.search(asj, szablon_han) if alt_s == None: alter = 0 else: if alt_s.group(1) == '': alter = 0 else: alter = 1 if asj_s == None: alter1 = 0 else: if asj_s.group(1) == '': alter1 = 0 else: alter1 = 1 #print a.title() if rn_s == None: print('[[%s]] - Nie istnieje argument \'rn\'' % a.title()) log = log + '\n*[[%s]] - Nie istnieje argument \'rn\'' % a.title() rn_abort = 1 if rad_s == None: print('[[%s]] - Nie istnieje argument \'rad\'' % a.title()) log = log + '\n*[[%s]] - Nie istnieje argument \'rad\'' % a.title() rad_abort = 1 if han_as_s != None: #print han_as_s.group(1) if han_as_s.group(1) == '0' or han_as_s.group(1) == '00': as_output = '+ 0' else: if han_as_s.group(1)[0] == '0': as_output = '+ %s' % han_as_s.group(1)[1] else: as_output = '+ %s' % han_as_s.group(1) #print as_output else: han_as_abort = 1 if sn_s == None: sn_abort = 1 if canj_s == None: canj_abort = 1 if cr_s != None: if cr_s.group(1).isspace() or cr_s.group(1) == '': print('[[%s]] - argument \'four\' na en.wikt jest pusty' % a.title()) log = log + '\n*[[%s]] - argument \'four\' na en.wikt jest pusty' % a.title( ) else: cr_abort = 1 kolejnosc_koncowa_c = '' if pywikibot.ImagePage(site_en, 'File:%s-bw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-komiks}}' else: if pywikibot.ImagePage(site_en, 'File:%s-red.png' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-cienie}}' else: if pywikibot.ImagePage(site_en, 'File:%s-order.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_c = '{{zch-animacja}}' kolejnosc_koncowa_j = '' if pywikibot.ImagePage(site_en, 'File:%s-jbw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-komiks|j}}' else: if pywikibot.ImagePage(site_en, 'File:%s-jred.png' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-cienie|j}}' else: if pywikibot.ImagePage(site_en, 'File:%s-jorder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_j = '{{zch-animacja|j}}' kolejnosc_koncowa_t = '' if pywikibot.ImagePage(site_en, 'File:%s-tbw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-komiks|t}}' else: if pywikibot.ImagePage(site_en, 'File:%s-tred.png' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-cienie|t}}' else: if pywikibot.ImagePage(site_en, 'File:%s-torder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_t = '{{zch-animacja|t}}' kolejnosc_koncowa_a = '' if pywikibot.ImagePage(site_en, 'File:%s-abw.png' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-komiks|a}}' else: if pywikibot.ImagePage(site_en, 'File:%s-ared.png' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-cienie|a}}' else: if pywikibot.ImagePage(site_en, 'File:%s-aorder.gif' % a.title()).fileIsShared(): kolejnosc_koncowa_a = '{{zch-animacja|a}}' tekst = '== {{zh|%s}} ({{znak chiński}}) ==\n{{klucz}}' % a.title() if rn_abort or rad_abort or han_as_abort: print( '[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{klucz}}' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje któryś z argumentów do {{s|klucz}}' % a.title( ) else: tekst = tekst + ' %s %s %s' % (rn_s.group(1), rad_s.group(1), as_output) tekst = tekst + '\n{{kreski}}' if sn_abort: print('[[%s]] - w en.wikt nie istnieje argument do {{kreski}}' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument do {{s|kreski}}' % a.title( ) else: tekst = tekst + ' %s\n' % sn_s.group(1) zh_f_s = re.search(zh_f, ang.get()) ja_f_s = re.search(jap_f, ang.get()) warianty = '{{warianty' warianty_obr = '{{warianty-obrazek' ku = '' xu = '' sou = '' sot = '' ming = '' upr = '' trad = '' shin = '' if zh_f_s != None: zh_f_str = zh_f_s.group(1).replace("[", "").replace( "]", "").replace("{{zh-lookup|", "").replace("}", "") zh_osobno = zh_f_str.split('|') warianty = warianty + ' | {{zch-w|ct|%s}} | {{zch-w|cu|%s}}' % ( zh_osobno[1], zh_osobno[0]) #TODO: obsługa PREFIXINDEX, tak jak w 弦 if ja_f_s != None: ja_f_str = ja_f_s.group(1).replace("[", "").replace( "]", "").replace("{{zh-lookup|", "").replace("}", "") ja_osobno = ja_f_str.split('|') warianty = warianty + ' | {{zch-w|ct|%s}} | {{zch-w|cu|%s}} | {{zch-w|js|%s}}' % ( ja_osobno[2], ja_osobno[1], ja_osobno[0]) trad = ja_osobno[2] upr = ja_osobno[1] shin = ja_osobno[0] if pywikibot.ImagePage(site_en, 'File:%s-clerical.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, 'File:%s-clerical.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|p}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-clerical.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|c|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, 'File:%s-xinshu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-xinshu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|p}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-xinshu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|xt|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, 'File:%s-still.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-caoshu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-still.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|p}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-caoshu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|p}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-still.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|st|%s|g}}' % a.title( ) else: if pywikibot.ImagePage( site_en, 'File:%s-caoshu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|ca|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, 'File:%s-kaishu.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-kaishu.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|p}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-kaishu.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|kt|%s|g}}' % a.title( ) if pywikibot.ImagePage(site_en, 'File:%s-songti.svg' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-songti.png' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|p}}' % a.title( ) else: if pywikibot.ImagePage(site_en, 'File:%s-songti.gif' % a.title()).fileIsShared(): warianty_obr = warianty_obr + ' | {{zch-obrazek|sot|%s|g}}' % a.title( ) if warianty == '{{warianty': tekst = tekst + '{{warianty|{{zch-w}}' else: tekst = tekst + warianty tekst = tekst + '}}' if warianty_obr != '{{warianty-obrazek': tekst = tekst + ' ' + warianty_obr + '}}' tekst = tekst + '\n{{kolejność}}' if kolejnosc_koncowa_c == '' and kolejnosc_koncowa_j == '' and kolejnosc_koncowa_t == '' and kolejnosc_koncowa_a == '': print( '[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title()) log = log + '\n*[[%s]] - na commons nie znaleziono żadnej kolejności pisania' % a.title( ) else: tekst = tekst + '\n' if kolejnosc_koncowa_c != '': tekst = tekst + '%s ' % kolejnosc_koncowa_c if kolejnosc_koncowa_j != '': tekst = tekst + '%s ' % kolejnosc_koncowa_j if kolejnosc_koncowa_t != '': tekst = tekst + '%s ' % kolejnosc_koncowa_t if kolejnosc_koncowa_a != '': tekst = tekst + '%s ' % kolejnosc_koncowa_a tekst = tekst + '\n{{znaczenia}}\n{{etymologia}}' etym = ' {{warianty-obrazek' if pywikibot.ImagePage(site_en, 'File:%s-oracle.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|o|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, 'File:%s-oracle.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|o|%s|p}}' % a.title() if pywikibot.ImagePage(site_en, 'File:%s-bronze.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|br|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, 'File:%s-bronze.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|br|%s|p}}' % a.title() if pywikibot.ImagePage(site_en, 'File:%s-bigseal.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|bs|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, 'File:%s-bigseal.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|bs|%s|p}}' % a.title() if pywikibot.ImagePage(site_en, 'File:%s-seal.svg' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|ss|%s}}' % a.title() else: if pywikibot.ImagePage(site_en, 'File:%s-seal.png' % a.title()).fileIsShared(): etym = etym + ' | {{zch-obrazek|ss|%s|p}}' % a.title() etym = etym + '}}' if etym != ' {{warianty-obrazek}}': tekst = tekst + etym tekst = tekst + '\n{{kody|cjz=' if canj_abort: print('[[%s]] - w en.wikt nie istnieje argument cjz' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument cjz' % a.title( ) else: tekst = tekst + '%s' % canj_s.group(1) tekst = tekst + '|cr=' if cr_abort == 1: print('[[%s]] - w en.wikt nie istnieje argument \'\'four\'\'' % a.title()) log = log + '\n*[[%s]] - w en.wikt nie istnieje argument \'\'four\'\'' % a.title( ) else: tekst = tekst + '%s' % cr_s.group(1) tekst = tekst + '|u=%x}}' % ord(a.title()) han_ref_s = re.search(han_ref, ang.get()) if han_ref_s != None: tekst = tekst + '\n{{słowniki' kx_s = re.search(kx, han_ref_s.group(2)) if kx_s != None: tekst = tekst + '|kx=%s' % kx_s.group(1) dkj_s = re.search(dkj, han_ref_s.group(2)) if dkj_s != None: tekst = tekst + '|dkj=%s' % dkj_s.group(1) dj_s = re.search(dj, han_ref_s.group(2)) if dj_s != None: tekst = tekst + '|dj=%s' % dj_s.group(1) hdz_s = re.search(hdz, han_ref_s.group(2)) if hdz_s != None: tekst = tekst + '|hdz=%s' % hdz_s.group(1) tekst = tekst + '}}' tekst = tekst + '\n{{uwagi}}\n{{źródła}}' if alter != 0 or alter1 != 0: print('[[%s]] - do sprawdzenia, znaleziono alternatywne zapisy' % a.title()) log = log + '\n*[[%s]] - do sprawdzenia, znaleziono alternatywne zapisy' % a.title( ) log_write(log_site, log, a.title()) return tekst else: print('[[%s]] - Nie znaleziono szablonu {{Han char}}' % a.title()) log = log + '\n*[[%s]] - Nie znaleziono szablonu {{s|Han char}}, pomijam' % a.title( ) log_write(log_site, log, a.title()) return 0
# -*- coding: utf-8 -*- import re import urllib2 import pywikibot pywikibot.config.put_throttle = 12 site = pywikibot.Site('commons', 'commons') site.login() # __ __ _ _____ _____ # \ \ / / | | |_ _| | __ \ # \ \ /\ / ___ _ __| | __ | | _ __ | |__) _ __ ___ __ _ _ __ ___ ___ ___ # \ \/ \/ / _ \| '__| |/ / | | | '_ \ | ___| '__/ _ \ / _` | '__/ _ / __/ __| # \ /\ | (_) | | | < _| |_| | | | | | | | | (_) | (_| | | | __\__ \__ \ # \/ \/ \___/|_| |_|\_\ |_____|_| |_| |_| |_| \___/ \__, |_| \___|___|___/ # __/ | # |___/ def fix_image(svg): print ''.join(list(urllib2.urlopen(svg.fileUrl()))) fix_image(pywikibot.ImagePage(site, '2NOGCMOS.svg'))
def retrieveEnPlusCommons(a): new = None han_char = re.compile('{{Han(_| )char\|(.*?)}', re.DOTALL) han_ref = re.compile('{{Han(_| )ref\|(.*})') zh_f = re.compile('{{zh-forms\|(.*?)}') hani_f = re.compile('{{Hani-forms\|(.*?)}') jap_f = re.compile('{{ja-forms\|(.*?)}') zh_hanzi_r = re.compile(r'{{zh-hanzi\|(.*?)}') kx = re.compile('kx=(.*?)(\||})') dkj = re.compile('\|dkj=(.*?)(\||})') dj = re.compile('\|dj=(.*?)(\||})') hdz = re.compile('\|hdz=(.*?)(\||})') rn = re.compile('rn=([0-9]*?)\|') rad = re.compile('rad=(.)') han_as = re.compile('as=([0-9]*?)\|') sn = re.compile('sn=([0-9]*?)\|') canj = re.compile('canj=([^\|]*)') canjPars1 = re.compile(r'(.*?)\([A-Z]*?\)') canjPars2 = re.compile(r'.*?(\([A-Z]*?\))') cr = re.compile('four=(.*?)\|') alt = re.compile('alt=(.*?)\|') asj = re.compile('asj=(.*?)\|') rn_abort = 0 rad_abort = 0 han_as_abort = 0 sn_abort = 0 canj_abort = 0 cr_abort = 0 ang = pywikibot.Page(site_en, a) ang_text = ang.get() han_char_s = re.search(han_char, ang_text) new = NewChar(a) if han_char_s: szablon_han = han_char_s.group(2) rn_s = re.search(rn, szablon_han) rad_s = re.search(rad, szablon_han) han_as_s = re.search(han_as, szablon_han) sn_s = re.search(sn, szablon_han) canj_s = re.search(canj, szablon_han) cr_s = re.search(cr, szablon_han) if rn_s == None or not rn_s.group(1).strip(): #log = log + u'\n*[[%s]] - Nie istnieje argument \'rn\'' % a rn_abort = 1 if rad_s == None or not rad_s.group(1).strip(): #log = log + u'\n*[[%s]] - Nie istnieje argument \'rad\'' % a rad_abort = 1 if han_as_s != None or not han_as_s.group(1).strip(): if han_as_s.group(1) == '0' or han_as_s.group(1) == '00': as_output = '+ 0' else: if han_as_s.group(1)[0] == '0': as_output = '+ %s' % han_as_s.group(1)[1] else: as_output = '+ %s' % han_as_s.group(1) else: han_as_abort = 1 if not (rn_abort or rad_abort or han_as_abort): temp = ' %s %s %s' % (rn_s.group(1), rad_s.group(1), as_output) new.addKlucz(temp) if sn_s and sn_s.group(1).strip(): new.addKreski(' %s' % sn_s.group(1)) if canj_s and canj_s.group(1).strip(): separate1 = re.findall(canjPars1, canj_s.group(1)) separate2 = re.findall(canjPars2, canj_s.group(1)) cjzText = '' if len(separate1) == len(separate2): for c, d in zip(separate1, separate2): cjzText = cjzText + c.replace('X', '難') + d new.addCJZ(cjzText) if cr_s and cr_s.group(1).strip(): new.addCR(cr_s.group(1)) han_ref_s = re.search(han_ref, ang_text) if han_ref_s: kx_s = re.search(kx, han_ref_s.group(2)) if kx_s: new.addSlownik('kx', kx_s.group(1)) dkj_s = re.search(dkj, han_ref_s.group(2)) if dkj_s: new.addSlownik('dkj', dkj_s.group(1)) dj_s = re.search(dj, han_ref_s.group(2)) if dj_s: new.addSlownik('dj', dj_s.group(1)) hdz_s = re.search(hdz, han_ref_s.group(2)) if hdz_s: new.addSlownik('hdz', hdz_s.group(1)) alt_s = re.search(alt, szablon_han) asj_s = re.search(asj, szablon_han) if alt_s and alt_s.group(1).strip(): log('*[[%s]] - na en.wikt istnieje argument alt' % ang.title()) if asj_s and asj_s.group(1).strip(): log('*[[%s]] - na en.wikt istnieje argument asj' % ang.title()) ku = '' xu = '' sou = '' sot = '' ming = '' upr = '' trad = '' shin = '' try: pywikibot.ImagePage(site_en, 'File:%s-clerical.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-clerical.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-clerical.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addWariant('c', '|g') else: new.addWariant('c', '|p') else: new.addWariant('c', '') try: pywikibot.ImagePage(site_en, 'File:%s-xinshu.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-xinshu.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-xinshu.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addWariant('xt', '|g') else: new.addWariant('xt', '|p') else: new.addWariant('xt', '') try: pywikibot.ImagePage(site_en, 'File:%s-still.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-caoshu.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-still.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-caoshu.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-still.gif' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-caoshu.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addWariant('ca', '|g') else: new.addWariant('st', '|g') else: new.addWariant('ca', '|p') else: new.addWariant('st', '|p') else: new.addWariant('ca', '') else: new.addWariant('st', '') try: pywikibot.ImagePage(site_en, 'File:%s-kaishu.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-kaishu.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-kaishu.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addWariant('kt', '|g') else: new.addWariant('kt', '|p') else: new.addWariant('kt', '') try: pywikibot.ImagePage(site_en, 'File:%s-songti.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-songti.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-songti.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addWariant('sot', '|g') else: new.addWariant('sot', '|p') else: new.addWariant('sot', '') try: pywikibot.ImagePage(site_en, 'File:%s-bw.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-red.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-order.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: try: tmpget = pywikibot.ImagePage(commons, 'File:%s-order.gif' % a).get() except pywikibot.NoPage or pywikibot.IsRedirectPage: pass except pywikibot.IsRedirectPage: pass else: if not '{{ARlicense' in tmpget: new.addKolejnosc('', '{{zch-animacja') else: new.addKolejnosc('', '{{zch-cienie') else: new.addKolejnosc('', '{{zch-komiks') try: pywikibot.ImagePage(site_en, 'File:%s-jbw.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-jred.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-jorder.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: try: tmpget = pywikibot.ImagePage( commons, 'File:%s-jorder.gif' % a).get() except pywikibot.NoPage or pywikibot.IsRedirectPage: pass except pywikibot.IsRedirectPage: pass else: if not '{{ARlicense' in tmpget: new.addKolejnosc('j', '{{zch-animacja') else: new.addKolejnosc('j', '{{zch-cienie') else: new.addKolejnosc('j', '{{zch-komiks') try: pywikibot.ImagePage(site_en, 'File:%s-tbw.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-tred.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-torder.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: try: tmpget = pywikibot.ImagePage( commons, 'File:%s-torder.gif' % a).get() except pywikibot.NoPage or pywikibot.IsRedirectPage: pass except pywikibot.IsRedirectPage: pass else: if not '{{ARlicense' in tmpget: new.addKolejnosc('t', '{{zch-animacja') else: new.addKolejnosc('t', '{{zch-cienie') else: new.addKolejnosc('t', '{{zch-komiks') try: pywikibot.ImagePage(site_en, 'File:%s-abw.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-ared.png' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-aorder.gif' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: try: tmpget = pywikibot.ImagePage( commons, 'File:%s-aorder.gif' % a).get() except pywikibot.NoPage or pywikibot.IsRedirectPage: pass except pywikibot.IsRedirectPage: pass else: if not '{{ARlicense' in tmpget: new.addKolejnosc('a', '{{zch-animacja') else: new.addKolejnosc('a', '{{zch-cienie') else: new.addKolejnosc('a', '{{zch-komiks') try: pywikibot.ImagePage(site_en, 'File:%s-oracle.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-oracle.png' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addEtym('o', '%s|p' % a) else: new.addEtym('o', '%s' % a) try: pywikibot.ImagePage(site_en, 'File:%s-bronze.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-bronze.png' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addEtym('br', '%s|p' % a) else: new.addEtym('br', '%s' % a) try: pywikibot.ImagePage(site_en, 'File:%s-bigseal.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-bigseal.png' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addEtym('bs', '%s|p' % a) else: new.addEtym('bs', '%s' % a) try: pywikibot.ImagePage(site_en, 'File:%s-seal.svg' % a).fileIsShared() except pywikibot.NoPage or pywikibot.IsRedirectPage: try: pywikibot.ImagePage(site_en, 'File:%s-seal.png' % a).fileIsShared() except pywikibot.NoPage: pass except pywikibot.IsRedirectPage: pass else: new.addEtym('ss', '%s|p' % a) else: new.addEtym('ss', '%s' % a) zh_f_s = re.search(zh_f, ang_text) ja_f_s = re.search(jap_f, ang_text) hani_f_s = re.search(hani_f, ang_text) zh_hanzi_s = re.search(zh_hanzi_r, ang_text) if ja_f_s: ja_f_str = ja_f_s.group(1).replace("[", "").replace("]", "").replace( "{{zh-lookup|", "").replace("}", "") ja_osobno = ja_f_str.split('|') try: new.addWariant('ct', ja_osobno[2]) except IndexError: log('*[[%s]] - nietypowy zapis wariantów tradycyjnych/uproszczonych na en.wikt' % a) try: new.addWariant('cu', ja_osobno[1]) except IndexError: log('*[[%s]] - nietypowy zapis wariantów tradycyjnych/uproszczonych na en.wikt' % a) try: new.addWariant('js', ja_osobno[0]) except IndexError: log('*[[%s]] - nietypowy zapis wariantów tradycyjnych/uproszczonych na en.wikt' % a) elif zh_f_s: zh_f_str = zh_f_s.group(1).replace("[", "").replace("]", "").replace( "{{zh-lookup|", "").replace("}", "") zh_osobno = zh_f_str.split('|') new.addWariant('ct', zh_osobno[1]) new.addWariant('cu', zh_osobno[0]) elif hani_f_s: hani_f_str = hani_f_s.group(1).replace("[", "").replace( "]", "").replace("{{zh-lookup|", "").replace("}", "") hani_osobno = hani_f_str.split('|') try: new.addWariant('ct', hani_osobno[1]) except IndexError: log('*[[%s]] - nietypowy zapis wariantów tradycyjnych/uproszczonych na en.wikt' % a) try: new.addWariant('cu', hani_osobno[0]) except IndexError: log('*[[%s]] - nietypowy zapis wariantów tradycyjnych/uproszczonych na en.wikt' % a) elif zh_hanzi_s: zh_hanzi_str = zh_hanzi_s.group(1).replace("[", "").replace( "]", "").replace("{{zh-lookup|", "").replace("}", "") new.addWariant('ct', zh_hanzi_str) new.addWariant('cu', zh_hanzi_str) return new
def run(self): commons = pywikibot.Site('commons', 'commons') comment = i18n.translate(self.site, nowCommonsMessage, fallback=True) for page in self.getPageGenerator(): if use_hash: # Page -> Has the namespace | commons image -> Not images_list = page # 0 -> local image, 1 -> commons image page = pywikibot.Page(self.site, images_list[0]) else: # If use_hash is true, we have already print this before, no need # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) try: localImagePage = pywikibot.ImagePage(self.site, page.title()) if localImagePage.fileIsShared(): pywikibot.output(u'File is already on Commons.') continue md5 = localImagePage.getFileMd5Sum() if use_hash: filenameOnCommons = images_list[1] else: filenameOnCommons = self.findFilenameOnCommons( localImagePage) if not filenameOnCommons and not use_hash: pywikibot.output(u'NowCommons template not found.') continue commonsImagePage = pywikibot.ImagePage( commons, 'Image:%s' % filenameOnCommons) if localImagePage.title(withNamespace=False) == \ commonsImagePage.title(withNamespace=False) and use_hash: pywikibot.output( u'The local and the commons images have the same name') if localImagePage.title(withNamespace=False) != \ commonsImagePage.title(withNamespace=False): usingPages = list(localImagePage.usingPages()) if usingPages and usingPages != [localImagePage]: pywikibot.output( u'\"\03{lightred}%s\03{default}\" is still used in %i pages.' % (localImagePage.title(withNamespace=False), len(usingPages))) if self.getOption('replace') is True: pywikibot.output( u'Replacing \"\03{lightred}%s\03{default}\" by \ \"\03{lightgreen}%s\03{default}\".' % (localImagePage.title(withNamespace=False), commonsImagePage.title(withNamespace=False))) oImageRobot = image.ImageRobot( pg.FileLinksGenerator(localImagePage), localImagePage.title(withNamespace=False), commonsImagePage.title(withNamespace=False), '', self.getOption('replacealways'), self.getOption('replaceloose')) oImageRobot.run() # If the image is used with the urlname the # previous function won't work if len(list(pywikibot.ImagePage(self.site, page.title()).usingPages())) > 0 and \ self.getOption('replaceloose'): oImageRobot = image.ImageRobot( pg.FileLinksGenerator(localImagePage), localImagePage.title(withNamespace=False, asUrl=True), commonsImagePage.title( withNamespace=False), '', self.getOption('replacealways'), self.getOption('replaceloose')) oImageRobot.run() # refresh because we want the updated list usingPages = len( list( pywikibot.ImagePage( self.site, page.title()).usingPages())) if usingPages > 0 and use_hash: # just an enter pywikibot.input( u'There are still %s pages with this \ image, confirm the manual removal from them please.' % usingPages) else: pywikibot.output(u'Please change them manually.') continue else: pywikibot.output( u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.' % localImagePage.title(withNamespace=False)) commonsText = commonsImagePage.get() if self.getOption('replaceonly') is False: if md5 == commonsImagePage.getFileMd5Sum(): pywikibot.output( u'The image is identical to the one on Commons.') if len(localImagePage.getFileVersionHistory() ) > 1 and not use_hash: pywikibot.output( u"This image has a version history. Please \ delete it manually after making sure that the \ old versions are not worth keeping." "") continue if self.getOption('always') is False: pywikibot.output( u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % page.title()) pywikibot.output(localImagePage.get()) pywikibot.output( u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % commonsImagePage.title()) pywikibot.output(commonsText) choice = pywikibot.inputChoice( u'Does the description \ on Commons contain all required source and license\n' u'information?', ['yes', 'no'], ['y', 'N'], 'N') if choice == 'y': localImagePage.delete( '%s [[:commons:Image:%s]]' % (comment, filenameOnCommons), prompt=False) else: localImagePage.delete( comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt=False) else: pywikibot.output( u'The image is not identical to the one on Commons.' ) except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e: pywikibot.output(u'%s' % e[0]) continue