def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = geograph_lib.connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: for page in generator: if page.exists() and page.namespace()==6 and not page.isRedirectPage(): wikipedia.output(page.title()) id = getGeographId(page) if id: geograph_lib.categorizeImage(page, id, cursor, cursor2) else: topics = getTopics(cursor) for (topic,) in topics: images = getImagesWithTopic(cursor3, topic) for (imageName, id) in images: try: page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName) if page.exists() and page.namespace()==6 and not page.isRedirectPage(): wikipedia.output(page.title()) geograph_lib.categorizeImage(page, id, cursor, cursor2) except UnicodeDecodeError: print "UnicodeDecodeError, can't find the source. yah! :-(" pass
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) start_id=0 conn = None cursor = None (conn, cursor) = geograph_lib.connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') if(len(args) >1): if len(args) > 2: start_id=int(args[2]) sourcedir = args[0] destinationdir = args[1] if os.path.isdir(sourcedir) and os.path.isdir(destinationdir): #print sourcedir for subdir in os.listdir(sourcedir): #print subdir if os.path.isdir(sourcedir + subdir): #print subdir sourcefilenames = glob.glob(sourcedir + subdir + u"/*.jpg") sourcefilenames = filterSourceFilenames(sourcefilenames) for sourcefilename in sourcefilenames: # First get the file id fileId = getFileId(sourcefilename) if fileId>=start_id: wikipedia.output(str(fileId)) duplicates = findDuplicateImages(sourcefilename) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: #Get metadata metadata = geograph_lib.getMetadata(fileId, cursor) #Check if we got metadata if metadata: #Get description description = geograph_lib.getDescription(metadata) # The hard part, find suitable categories # categories = geograph_lib.getCategories(metadata, cursor, cursor2) categories = '{{Uncategorized-Geograph|gridref=%s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % (metadata.get('grid_reference'),) #print categories description = description + categories wikipedia.output(description) #Get destinationfilename destinationFilename = geograph_lib.getTitle(metadata) #Copy file to destination dir shutil.copy(unicode(sourcefilename), unicode(destinationdir + destinationFilename + u'.jpg')) #And save the description as well outputDescriptionFile(destinationdir + destinationFilename + u'.txt', description)
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) start_id = 0 conn = None cursor = None (conn, cursor) = geograph_lib.connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = geograph_lib.connectDatabase2( 'sql-s2.toolserver.org', u'u_multichill_commons_categories_p') if (len(args) > 1): if len(args) > 2: start_id = int(args[2]) sourcedir = args[0] destinationdir = args[1] if os.path.isdir(sourcedir) and os.path.isdir(destinationdir): #print sourcedir for subdir in os.listdir(sourcedir): #print subdir if os.path.isdir(sourcedir + subdir): #print subdir sourcefilenames = glob.glob(sourcedir + subdir + u"/*.jpg") sourcefilenames = filterSourceFilenames(sourcefilenames) for sourcefilename in sourcefilenames: # First get the file id fileId = getFileId(sourcefilename) if fileId >= start_id: wikipedia.output(str(fileId)) duplicates = findDuplicateImages(sourcefilename) if duplicates: wikipedia.output( u'Found duplicate image at %s' % duplicates.pop()) else: #Get metadata metadata = geograph_lib.getMetadata( fileId, cursor) #Check if we got metadata if metadata: #Get description description = geograph_lib.getDescription( metadata) # The hard part, find suitable categories # categories = geograph_lib.getCategories(metadata, cursor, cursor2) categories = '{{Uncategorized-Geograph|gridref=%s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % ( metadata.get('grid_reference'), ) #print categories description = description + categories wikipedia.output(description) #Get destinationfilename destinationFilename = geograph_lib.getTitle( metadata) #Copy file to destination dir shutil.copy( unicode(sourcefilename), unicode(destinationdir + destinationFilename + u'.jpg')) #And save the description as well outputDescriptionFile( destinationdir + destinationFilename + u'.txt', description)