def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if generator:
	for page in generator:
	    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
		wikipedia.output(page.title())
		id = getGeographId(page)
		if id:
		    geograph_lib.categorizeImage(page, id, cursor, cursor2)
    else:
	topics = getTopics(cursor)
	for (topic,) in topics:
	    images = getImagesWithTopic(cursor3, topic)
	    for (imageName, id) in images:
		try:
		    page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName)
		    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
			wikipedia.output(page.title())
			geograph_lib.categorizeImage(page, id, cursor, cursor2)
		except UnicodeDecodeError:
		    print "UnicodeDecodeError, can't find the source. yah! :-("
		    pass
Пример #2
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if generator:
	for page in generator:
	    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
		wikipedia.output(page.title())
		id = getGeographId(page)
		if id:
		    geograph_lib.categorizeImage(page, id, cursor, cursor2)
    else:
	topics = getTopics(cursor)
	for (topic,) in topics:
	    images = getImagesWithTopic(cursor3, topic)
	    for (imageName, id) in images:
		try:
		    page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName)
		    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
			wikipedia.output(page.title())
			geograph_lib.categorizeImage(page, id, cursor, cursor2)
		except UnicodeDecodeError:
		    print "UnicodeDecodeError, can't find the source. yah! :-("
		    pass
Пример #3
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    start_id=0    

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    if(len(args) >1):
	if len(args) > 2:
	    start_id=int(args[2])
	sourcedir = args[0]
	destinationdir = args[1]
	if os.path.isdir(sourcedir) and os.path.isdir(destinationdir):
	    #print sourcedir
	    for subdir in os.listdir(sourcedir):
		#print subdir
		if os.path.isdir(sourcedir + subdir):
		    #print subdir
		    sourcefilenames = glob.glob(sourcedir + subdir + u"/*.jpg")
		    sourcefilenames = filterSourceFilenames(sourcefilenames)
		    for sourcefilename in sourcefilenames:
			# First get the file id
			fileId = getFileId(sourcefilename)
			if fileId>=start_id:
			    wikipedia.output(str(fileId))

			    duplicates = findDuplicateImages(sourcefilename)
			    if duplicates:
				wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
			    else:
				#Get metadata
				metadata = geograph_lib.getMetadata(fileId, cursor)

				#Check if we got metadata
				if metadata:

				    #Get description
				    description = geograph_lib.getDescription(metadata)

				    # The hard part, find suitable categories
				    # categories =  geograph_lib.getCategories(metadata, cursor, cursor2)
				    categories = '{{Uncategorized-Geograph|gridref=%s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % (metadata.get('grid_reference'),)
				    #print categories
				    description = description + categories

				    wikipedia.output(description)

				    #Get destinationfilename
				    destinationFilename = geograph_lib.getTitle(metadata)
				
				    #Copy file to destination dir
				    shutil.copy(unicode(sourcefilename), unicode(destinationdir + destinationFilename + u'.jpg'))
				    #And save the description as well
				    outputDescriptionFile(destinationdir + destinationFilename + u'.txt', description)
Пример #4
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    start_id = 0

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2(
        'sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    if (len(args) > 1):
        if len(args) > 2:
            start_id = int(args[2])
        sourcedir = args[0]
        destinationdir = args[1]
        if os.path.isdir(sourcedir) and os.path.isdir(destinationdir):
            #print sourcedir
            for subdir in os.listdir(sourcedir):
                #print subdir
                if os.path.isdir(sourcedir + subdir):
                    #print subdir
                    sourcefilenames = glob.glob(sourcedir + subdir + u"/*.jpg")
                    sourcefilenames = filterSourceFilenames(sourcefilenames)
                    for sourcefilename in sourcefilenames:
                        # First get the file id
                        fileId = getFileId(sourcefilename)
                        if fileId >= start_id:
                            wikipedia.output(str(fileId))

                            duplicates = findDuplicateImages(sourcefilename)
                            if duplicates:
                                wikipedia.output(
                                    u'Found duplicate image at %s' %
                                    duplicates.pop())
                            else:
                                #Get metadata
                                metadata = geograph_lib.getMetadata(
                                    fileId, cursor)

                                #Check if we got metadata
                                if metadata:

                                    #Get description
                                    description = geograph_lib.getDescription(
                                        metadata)

                                    # The hard part, find suitable categories
                                    # categories =  geograph_lib.getCategories(metadata, cursor, cursor2)
                                    categories = '{{Uncategorized-Geograph|gridref=%s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % (
                                        metadata.get('grid_reference'), )
                                    #print categories
                                    description = description + categories

                                    wikipedia.output(description)

                                    #Get destinationfilename
                                    destinationFilename = geograph_lib.getTitle(
                                        metadata)

                                    #Copy file to destination dir
                                    shutil.copy(
                                        unicode(sourcefilename),
                                        unicode(destinationdir +
                                                destinationFilename + u'.jpg'))
                                    #And save the description as well
                                    outputDescriptionFile(
                                        destinationdir + destinationFilename +
                                        u'.txt', description)