예제 #1
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org',
                                        u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org',
                                        u'commonswiki_p')

    topics = getTopics(cursor)
    images = {}
    for (topic, ) in topics:
        images[topic] = getImagesWithTopicCount(cursor3, topic)
        print images[topic]

    outputStats(topics, images)
    '''
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'nl', u'wikipedia'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    items = getNumberOfItems(cursor)
    images = getNumberOfImages(cursor)
    addresses = {}
    names = {} 
    pages = list(set(items.keys() + images.keys()))
    pages.sort()

    for key in pages:
	print key
	page = wikipedia.Page(wikipedia.getSite(), key)
	text = page.get()
	addresses[key] = getNumberOfAddresses(text)
	names[key] = getNumberOfNames(text)
	#print key + u' - ' + str(addresses[key]) + u' - ' + str(names[key]) 
	
    updateStats(pages, items, addresses, names, images)
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if not generator:
        generator = getRijksmonumentWithoutLocation()

    # Get a preloading generator with only images
    pgenerator = pagegenerators.PreloadingGenerator(
        pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
    for page in pgenerator:
        locationTemplate = locateImage(page, conn, cursor)
        if locationTemplate:
            addLocation(page, locationTemplate)
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    imagerecat.initLists()
    generator = None;
    genFactory = pagegenerators.GeneratorFactory()

    mark = True

    for arg in wikipedia.handleArgs():
	if arg.startswith('-dontmark'):
	    mark = False
        elif arg.startswith('-page'):
            if len(arg) == 5:
                generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
	elif arg.startswith('-yesterday'):
	    generator = [wikipedia.Page(wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday())]
        else:
            generator = genFactory.handleArg(arg)
    if generator:
        for page in generator:
	    if((page.namespace() == 14) and (page.title().startswith(u'Category:Media needing categories as of'))):
		wikipedia.output(u'Working on ' + page.title())
		for (image, gals, cats) in getImagesToCategorize(cursor, page.titleWithoutNamespace()):
		    categorizeImage(image, gals, imagerecat.applyAllFilters(cats))
		if (mark):
		    categoriesChecked(page.title())
예제 #5
0
def main(args):
    '''
    Main loop. Get a generator and options. Work on all images in the generator.
    '''
    generator = None
    onlyFilter = False
    onlyUncat = False
    genFactory = pagegenerators.GeneratorFactory()

    global search_wikis
    global hint_wiki

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg == '-onlyfilter':
            onlyFilter = True
        elif arg == '-onlyuncat':
            onlyUncat = True
        elif arg.startswith('-hint:'):
            hint_wiki = arg [len('-hint:'):]
        elif arg.startswith('-onlyhint'):
            search_wikis = arg [len('-onlyhint:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = pagegenerators.CategorizedPageGenerator(
            catlib.Category(site, u'Category:Media needing categories'),
            recurse=True)
    initLists()
    categorizeImages(generator, onlyFilter, onlyUncat)
    pywikibot.output(u'All done')
예제 #6
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    sourcedir = u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/data/'
    destinationdirjpg = u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputjpg/'
    destinationdirtif = u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputtif/'
    basefilename = u'Ordnance_Survey_1-250000_-_'
    sourcename = u'1:250 000 Scale Colour Raster'
    scale = u'250.000'
    squares = []

    for sourcefilename in glob.glob(sourcedir + u"*.tif"):
        square = sourcefilename.replace(sourcedir, u'').replace(u'.tif', u'')
        squares.append(square)

    for square in squares:
        print square
        OSlib.processSquare(square, squares, scale, sourcedir, sourcename,
                            basefilename, u'jpg', destinationdirjpg)
        OSlib.processSquare(square, squares, scale, sourcedir, sourcename,
                            basefilename, u'tif', destinationdirtif)
    '''
예제 #7
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'nl', u'wikipedia'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    items = getNumberOfItems(cursor)
    images = getNumberOfImages(cursor)
    addresses = {}
    names = {}
    pages = list(set(items.keys() + images.keys()))
    pages.sort()

    for key in pages:
        print key
        page = wikipedia.Page(wikipedia.getSite(), key)
        text = page.get()
        addresses[key] = getNumberOfAddresses(text)
        names[key] = getNumberOfNames(text)
        #print key + u' - ' + str(addresses[key]) + u' - ' + str(names[key])

    updateStats(pages, items, addresses, names, images)
예제 #8
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    uncat = u''
    (conn, cursor) = connectDatabase()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-date'):
            if len(arg) == 5:
                uncat = u'Media_needing_categories_as_of_' + wikipedia.input(
                    u'What page do you want to use?')
            else:
                uncat = u'Media_needing_categories_as_of_' + arg[6:]
        elif arg.startswith('-yesterday'):
            uncat = u'Media_needing_categories_as_of_' + getYesterday()
    if uncat:
        uncat = uncat.replace(' ', '_')
        for (user, images) in getUsersToNotify(cursor, uncat):
            notifyUser(user, images, uncat)
    else:
        wikipedia.output(u'Please specify date to work with "-date:' +
                         getYesterday() + u'" or "-yesterday"')
예제 #9
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()
    
    # Get datetime
    date = datetime.utcnow().strftime('%Y%m%d%H%M')

    # Get number of uncategorized files
    uncatQuery=u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_categories\_as\_of\_%'"
    uncatCount = getCount(cursor, uncatQuery)

    # Get number of files to be checked
    checkQuery=u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_category\_review\_as\_of\_%'"
    checkCount = getCount(cursor, checkQuery)

    # Get total
    totalCount = int(uncatCount) + int(checkCount)

    # Update the stats page with this number
    updateStats(date, uncatCount, checkCount, totalCount)
예제 #10
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    #conn2 = None
    #cursor2 = None
    #(conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    topics = getTopics(cursor)
    images = {}
    cats = []
    for (topic,) in topics:
	cats.extend(getCategoriesToSplit(cursor3, topic))

    outputCategoriesToSplit(cats)
    '''
예제 #11
0
def main(args):
    '''
    Grab a bunch of images and tag them if they are not categorized.
    '''
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg.startswith('-yesterday'):
            generator = uploadedYesterday(site)
        elif arg.startswith('-recentchanges'):
            generator = recentChanges(site=site, delay=120)
        else:
            genFactory.handleArg(arg)
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        pywikibot.output(
          u'You have to specify the generator you want to use for the program!')
    else:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        for page in pregenerator:
            if page.exists() and (page.namespace() == 6) \
                   and (not page.isRedirectPage()) :
                if isUncat(page):
                    addUncat(page)
예제 #12
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    topics = getTopics(cursor)
    images = {}
    for (topic,) in topics:
    	images[topic] = getImagesWithTopicCount(cursor3, topic)
    	print images[topic]

    outputStats(topics, images)
    '''
예제 #13
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    sourcedir=u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/data/'
    destinationdirjpg=u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputjpg/'
    destinationdirtif=u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputtif/'
    basefilename=u'Ordnance_Survey_1-250000_-_'
    sourcename=u'1:250 000 Scale Colour Raster'
    scale=u'250.000'
    squares = []

    for sourcefilename in glob.glob(sourcedir + u"*.tif"):
	square = sourcefilename.replace(sourcedir, u'').replace(u'.tif', u'')
	squares.append(square)

    for square in squares:
	print square
	OSlib.processSquare(square, squares, scale, sourcedir, sourcename, basefilename, u'jpg', destinationdirjpg)
	OSlib.processSquare(square, squares, scale, sourcedir, sourcename, basefilename, u'tif', destinationdirtif)
	

    '''
예제 #14
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    bigcategory = u''
    target = u''

    generator = None
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
	        generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
	    else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
	elif arg.startswith('-bigcat'):
	    if len(arg) == 7:
		bigcategory = wikipedia.input(u'What category do you want to split out?')
	    else:
    		bigcategory = arg[8:]
	elif arg.startswith('-target'):
	    if len(arg) == 7:
		target = wikipedia.input(u'What category is the target category?')
	    else:
		target = arg[8:]

    if not bigcategory==u'':
	splitOutCategory(bigcategory, target)
    else:
	if not generator:
	    generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14])
	for cat in generator:
	    intersectCategories(cat)
예제 #15
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    #conn2 = None
    #cursor2 = None
    #(conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org',
                                        u'commonswiki_p')

    topics = getTopics(cursor)
    images = {}
    cats = []
    for (topic, ) in topics:
        cats.extend(getCategoriesToSplit(cursor3, topic))

    outputCategoriesToSplit(cats)
    '''
def main():
    countrycode = u''

    # Connect database, we need that
    (conn, cursor) = connectDatabase()
    (conn2, cursor2) = connectDatabase2()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-countrycode:'):
            countrycode = arg [len('-countrycode:'):]

    lang = wikipedia.getSite().language()
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    
    if countrycode:
	if not mconfig.countries.get((countrycode, lang)):
	    wikipedia.output(u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang))
	    return False
	wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
	locateCountry(countrycode, lang, mconfig.countries.get((countrycode, lang)), conn, cursor, conn2, cursor2)
    else:
	for (countrycode, lang), countryconfig in mconfig.countries.iteritems():
            if not countryconfig.get('autoGeocode'):
                wikipedia.output(u'"%s" in language "%s" is not supported in auto geocode mode (yet).' % (countrycode, lang))
            else:
                wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang))
                locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
예제 #17
0
def main(args):
    '''
    Grab a bunch of images and tag them if they are not categorized.
    '''
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg.startswith('-yesterday'):
            generator = uploadedYesterday(site)
        elif arg.startswith('-recentchanges'):
            generator = recentChanges(site=site, delay=120)
        else:
            genFactory.handleArg(arg)
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        pywikibot.output(
          u'You have to specify the generator you want to use for the program!')
    else:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        for page in pregenerator:
            if page.exists() and (page.namespace() == 6) \
                   and (not page.isRedirectPage()) :
                if isUncat(page):
                    addUncat(page)
예제 #18
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    # Get datetime
    date = datetime.utcnow().strftime('%Y%m%d%H%M')

    # Get number of uncategorized files
    uncatQuery = u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_categories\_as\_of\_%'"
    uncatCount = getCount(cursor, uncatQuery)

    # Get number of files to be checked
    checkQuery = u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_category\_review\_as\_of\_%'"
    checkCount = getCount(cursor, checkQuery)

    # Get total
    totalCount = int(uncatCount) + int(checkCount)

    # Update the stats page with this number
    updateStats(date, uncatCount, checkCount, totalCount)
예제 #19
0
def main(args):
    """
    Main loop.
    """
    workdir = u""
    textfile = u""
    records = {}

    site = wikipedia.getSite(u"commons", u"commons")
    wikipedia.setSite(site)

    if not (len(args) == 2):
        wikipedia.output(u"Too few arguments. Usage: NARA_uploader.py <directory> <textfile>")
        sys.exit()

    if os.path.isdir(args[0]):
        workdir = args[0]
    else:
        wikipedia.output(u"%s doesn't appear to be a directory. Exiting" % (args[0],))
        sys.exit()

    textfile = args[1]
    records = getRecords(textfile)
    # print records

    sourcefilenames = glob.glob(workdir + u"/*.TIF")

    for sourcefilename in sourcefilenames:
        filename = os.path.basename(sourcefilename)
        # This will give an ugly error if the id is unknown
        if not records.get(filename):
            wikipedia.output(u"Can't find %s in %s. Skipping this file." % (filename, textfile))
        elif os.path.getsize(sourcefilename) >= 1024 * 1024 * 100:
            wikipedia.output(u"%s too big. Skipping this file." % (sourcefilename,))
        else:
            fileId = records.get(filename)

            duplicates = findDuplicateImages(sourcefilename)
            if duplicates:
                wikipedia.output(u"Found duplicate image at %s" % duplicates.pop())
            else:
                # No metadata handling. We use a webtool
                description = getDescription(fileId)
                categories = u"{{Uncategorized-NARA|year=2011|month=September|day=21}}\n"
                description = description + categories

                print fileId
                title = getTitle(fileId, description)

                wikipedia.output(title)
                wikipedia.output(description)

                bot = upload.UploadRobot(
                    url=sourcefilename.decode(sys.getfilesystemencoding()),
                    description=description,
                    useFilename=title,
                    keepFilename=True,
                    verifyDescription=False,
                )
                bot.run()
예제 #20
0
def main():
    wikipedia.output(u'Testing 1 2 3')
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
                generator = [
                    wikipedia.Page(
                        site,
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                generator = [wikipedia.Page(site, arg[6:])]
        else:
            generator = genFactory.handleArg(arg)
    if generator:
        for page in generator:
            if (page.namespace() == 14):
                sort_TOL_Category(catlib.Category(site, page.title()))
    else:
        wikipedia.output(u'No categories to work on!')
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if not generator:
	generator = getRijksmonumentWithoutLocation()
    
    # Get a preloading generator with only images
    pgenerator = pagegenerators.PreloadingGenerator(pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
    for page in pgenerator:
	locationTemplate = locateImage(page, conn, cursor)
	if locationTemplate:
	    addLocation(page, locationTemplate)
예제 #22
0
def main(args):
    '''
    Main loop.
    '''

    genFactory = pagegenerators.GeneratorFactory()

    start_id = 0
    end_id = 0
    updaterun = False
    site = wikipedia.getSite('commons', 'commons')
    wikipedia.setSite(site)
    updatePage = wikipedia.Page(site, u'User:BotMultichillT/Air_Force_latest')
    interval = 100

    for arg in wikipedia.handleArgs():
        if arg.startswith('-start_id'):
            if len(arg) == 9:
                start_id = wikipedia.input(
                    u'What is the id of the photo you want to start at?')
            else:
                start_id = arg[10:]
        elif arg.startswith('-end_id'):
            if len(arg) == 7:
                end_id = wikipedia.input(
                    u'What is the id of the photo you want to end at?')
            else:
                end_id = arg[8:]
        elif arg == u'-updaterun':
            updaterun = True
        elif arg.startswith('-interval'):
            if len(arg) == 9:
                interval = wikipedia.input(
                    u'What interval do you want to use?')
            else:
                interval = arg[10:]
        else:
            genFactory.handleArg(arg)
    generator = genFactory.getCombinedGenerator()
    # Do we have a pagenerator?
    if generator:
        for page in generator:
            if page.namespace() == 14:
                processCategory(page)

    # Is updaterun set?
    elif updaterun:
        start_id = int(updatePage.get())
        end_id = start_id + int(interval)
        last_id = processPhotos(int(start_id), int(end_id))
        comment = u'Worked from ' + str(start_id) + u' to ' + str(last_id)
        updatePage.put(str(last_id), comment)

    # Do we have a start_id and a end_id
    elif int(start_id) > 0 and int(end_id) > 0:
        last_id = processPhotos(int(start_id), int(end_id))
    # Use the default generator
    else:
        print "Screw this, will implement later"
예제 #23
0
def main(args):
    '''
    Main loop.
    '''

    genFactory = pagegenerators.GeneratorFactory()    

    start_id = 0
    end_id   = 0
    updaterun = False
    site = wikipedia.getSite('commons', 'commons')
    wikipedia.setSite(site)
    updatePage = wikipedia.Page(site, u'User:BotMultichillT/Air_Force_latest') 
    interval=100

    for arg in wikipedia.handleArgs():
        if arg.startswith('-start_id'):
            if len(arg) == 9:
                start_id = wikipedia.input(u'What is the id of the photo you want to start at?')
            else:
                start_id = arg[10:]
        elif arg.startswith('-end_id'):
            if len(arg) == 7:
                end_id = wikipedia.input(u'What is the id of the photo you want to end at?')
            else:
                end_id = arg[8:]
	elif arg==u'-updaterun':
	    updaterun = True
	elif arg.startswith('-interval'):
	    if len(arg) == 9:
		interval = wikipedia.input(u'What interval do you want to use?')
	    else:
		interval = arg[10:]
	else:
	    genFactory.handleArg(arg)
    generator = genFactory.getCombinedGenerator()
    # Do we have a pagenerator?
    if generator:
	for page in generator:
	    if page.namespace()==14:
		processCategory(page)

    # Is updaterun set?
    elif updaterun:
	start_id = int(updatePage.get())
	end_id = start_id + int(interval)
	last_id = processPhotos(int(start_id), int(end_id))
	comment = u'Worked from ' + str(start_id) + u' to ' + str(last_id)
	updatePage.put(str(last_id), comment)
	
    # Do we have a start_id and a end_id
    elif int(start_id) > 0 and int(end_id) > 0:
	last_id = processPhotos(int(start_id), int(end_id))
    # Use the default generator
    else:
	print "Screw this, will implement later"
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()
    for templateTitle in getUncategorizedTemplates(cursor):
        tagUncategorized(templateTitle)
예제 #25
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()
    for templateTitle in getUncategorizedTemplates(cursor):
        tagUncategorized(templateTitle)
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()
    #images = getImagesToCategorize(cursor)

    for (image, category) in getImagesToCategorize(cursor):
	categorizeImage(image, category)
예제 #27
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()
    #images = getImagesToCategorize(cursor)

    for (image, category) in getImagesToCategorize(cursor):
        categorizeImage(image, category)
예제 #28
0
def main(args):
    '''
    Main loop.
    '''
    workdir = u''
    textfile = u''
    records = {}
    
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    if not (len(args)==2):
        wikipedia.output(u'Too few arguments. Usage: NARA_uploader.py <directory> <textfile>')
        sys.exit()
    
    if os.path.isdir(args[0]):
        workdir = args[0]
    else:
        wikipedia.output(u'%s doesn\'t appear to be a directory. Exiting' % (args[0],))
        sys.exit()
        
    textfile = args[1]
    records = getRecords(textfile)
    #print records

    sourcefilenames = glob.glob(workdir + u"/*.TIF")

    for sourcefilename in sourcefilenames:
        filename = os.path.basename(sourcefilename)
        # This will give an ugly error if the id is unknown
        if not records.get(filename):
             wikipedia.output(u'Can\'t find %s in %s. Skipping this file.' % (filename, textfile))

        else:
            fileId = records.get(filename)
        
            duplicates = findDuplicateImages(sourcefilename)
            if duplicates:
                wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
            else:
                # No metadata handling. We use a webtool
                description = getDescription(fileId)
                categories = u'{{Uncategorized-NARA|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n'
                description = description + categories

                title = getTitle(fileId, description)
                
                wikipedia.output(title)
                wikipedia.output(description)
                    
                bot = upload.UploadRobot(url=sourcefilename.decode(sys.getfilesystemencoding()), description=description, useFilename=title, keepFilename=True, verifyDescription=False)
                bot.run()
def main():
    site = wikipedia.getSite(u"commons", u"commons")
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    subjects = getSubjects(cursor)
    # subjects = [u'Engineers']

    for subject in subjects:
        sort_by_country_category(cursor, subject)
예제 #30
0
def main():
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    subjects = getSubjects(cursor)
    #subjects = [u'Engineers']

    for subject in subjects:
        sort_by_country_category(cursor, subject)
예제 #31
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    generator = None
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
	        generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
	    else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
    if not generator:
        generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Populate category'), onlyTemplateInclusion=True), [14])
    for cat in generator:
        populateCategory(cat)
예제 #32
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    langs = getLangs(cursor)

    for baseTemplate in getBaseTemplates(cursor):
        #print baseTemplate
        checkTemplate(cursor, baseTemplate, langs)
예제 #33
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    langs = getLangs(cursor)

    for baseTemplate in getBaseTemplates(cursor):
	#print baseTemplate
	checkTemplate(cursor, baseTemplate, langs)
예제 #34
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    imagerecat.initLists()
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    mark = True

    for arg in wikipedia.handleArgs():
        if arg.startswith('-dontmark'):
            mark = False
        elif arg.startswith('-page'):
            if len(arg) == 5:
                generator = [
                    wikipedia.Page(
                        wikipedia.getSite(),
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
        elif arg.startswith('-yesterday'):
            generator = [
                wikipedia.Page(
                    wikipedia.getSite(),
                    u'Category:Media_needing_categories_as_of_' +
                    getYesterday())
            ]
        else:
            generator = genFactory.handleArg(arg)
    if generator:
        for page in generator:
            if ((page.namespace() == 14) and (page.title().startswith(
                    u'Category:Media needing categories as of'))):
                wikipedia.output(u'Working on ' + page.title())
                for (image, gals, cats) in getImagesToCategorize(
                        cursor, page.titleWithoutNamespace()):
                    categorizeImage(image, gals,
                                    imagerecat.applyAllFilters(cats))
                if (mark):
                    categoriesChecked(page.title())
예제 #35
0
def main():
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    imagerecat.initLists()

    flickr = flickrapi.FlickrAPI(api_key)
    groupId = '1044478@N20'
    #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20
    for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId):
        (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId)
        if isAllowedLicense(photoInfo=photoInfo):
            tags = getTags(photoInfo=photoInfo)
            if photoCanUpload(tags=tags):
                # Get the url of the largest photo
                photoUrl = getPhotoUrl(photoSizes=photoSizes)
                # Download this photo
                photo = downloadPhoto(photoUrl=photoUrl)
                # Check if it exists at Commons
                duplicates = findDuplicateImages(photo=photo)
                if duplicates:
                    wikipedia.output(u'Found duplicate image at %s' %
                                     duplicates.pop())
                else:
                    flinfoDescription = getFlinfoDescription(photoId=photoId)
                    tagDescription = getTagDescription(tags=tags)
                    tagCategories = getTagCategories(tags)
                    filename = getFilename(photoInfo=photoInfo)
                    #print filename
                    photoDescription = buildDescription(
                        flinfoDescription, tagDescription, tagCategories)
                    if (wikipedia.Page(title=u'File:' + filename,
                                       site=wikipedia.getSite()).exists()):
                        # I should probably check if the hash is the same and if not upload it under a different name
                        wikipedia.output(u'File:' + filename +
                                         u' already exists!')
                    else:
                        #Do the actual upload
                        #Would be nice to check before I upload if the file is already at Commons
                        #Not that important for this program, but maybe for derived programs
                        bot = upload.UploadRobot(url=photoUrl,
                                                 description=photoDescription,
                                                 useFilename=filename,
                                                 keepFilename=True,
                                                 verifyDescription=False)
                        bot.run()

    wikipedia.output('All done')
예제 #36
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    #Get the api key
    if config.flickr['api_key']:
        flickr = flickrapi.FlickrAPI(config.flickr['api_key'])
    else:
        wikipedia.output('Flickr api key not found! Get yourself an api key')
        wikipedia.output(
            'Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/'
        )
        return

    group_id = u'1710854@N24'
    addCategory = u'Rijksmonumenten'
    removeCategories = True
    autonomous = True
    totalPhotos = 0
    uploadedPhotos = 0

    # Do we mark the images as reviewed right away?
    if config.flickr['review']:
        flickrreview = config.flickr['review']
    else:
        flickrreview = False

    # Set the Flickr reviewer
    if config.flickr['reviewer']:
        reviewer = config.flickr['reviewer']
    elif 'commons' in config.sysopnames['commons']:
        print config.sysopnames['commons']
        reviewer = config.sysopnames['commons']['commons']
    elif 'commons' in config.usernames['commons']:
        reviewer = config.usernames['commons']['commons']
    else:
        reviewer = u''

    for photo_id in flickrripper.getPhotos(flickr=flickr, group_id=group_id):
        uploadedPhotos += processPhoto(flickr, photo_id, flickrreview,
                                       reviewer, addCategory, removeCategories,
                                       autonomous)
        totalPhotos += 1

    wikipedia.output(u'Finished running')
    wikipedia.output(u'Total photos: ' + str(totalPhotos))
    wikipedia.output(u'Uploaded photos: ' + str(uploadedPhotos))
예제 #37
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if generator:
	for page in generator:
	    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
		wikipedia.output(page.title())
		id = getGeographId(page)
		if id:
		    geograph_lib.categorizeImage(page, id, cursor, cursor2)
    else:
	topics = getTopics(cursor)
	for (topic,) in topics:
	    images = getImagesWithTopic(cursor3, topic)
	    for (imageName, id) in images:
		try:
		    page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName)
		    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
			wikipedia.output(page.title())
			geograph_lib.categorizeImage(page, id, cursor, cursor2)
		except UnicodeDecodeError:
		    print "UnicodeDecodeError, can't find the source. yah! :-("
		    pass
예제 #38
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    conn3 = None
    cursor3 = None
    (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')
    
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if generator:
	for page in generator:
	    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
		wikipedia.output(page.title())
		id = getGeographId(page)
		if id:
		    geograph_lib.categorizeImage(page, id, cursor, cursor2)
    else:
	topics = getTopics(cursor)
	for (topic,) in topics:
	    images = getImagesWithTopic(cursor3, topic)
	    for (imageName, id) in images:
		try:
		    page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName)
		    if page.exists() and page.namespace()==6 and not page.isRedirectPage():
			wikipedia.output(page.title())
			geograph_lib.categorizeImage(page, id, cursor, cursor2)
		except UnicodeDecodeError:
		    print "UnicodeDecodeError, can't find the source. yah! :-("
		    pass
예제 #39
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()
    
    # Get datetime to start with
    date = datetime(2008, 12, 01)

    # Print the header
    print 'Date, deleted, ok, uncategorized, to be checked'
    while(date < datetime.utcnow()):
	(deletedImages, okImages, uncategorizedImages, toBeCheckedImages) = getDayStats(cursor, date.strftime('%Y%m%d'))
	print date.strftime('%Y%m%d') + u', ' + str(deletedImages) + u', ' + str(okImages) + u', ' + str(uncategorizedImages) + u', ' + str(toBeCheckedImages)
	date = date + timedelta(days=1)
예제 #40
0
파일: aaa.py 프로젝트: filbertkm/toolserver
	def upload(self, logfilename, donedir):
		print self.template
#		p = wikipedia.Page(wikipedia.getSite(u'commons', u'commons'), 'User:Aude/sandbox2')
#		p.put(self.template, 'AAA-image template, tests')

		do_upload = wikipedia.input( u'\nContinue uploading %s ?' % (self.uploadname) )
		if (do_upload == 'y'):
			print 'Uploading %s' % (self.uploadname)
			site = wikipedia.getSite(u'commons', u'commons')
			wikipedia.setSite(site)
			bot = upload.UploadRobot(url=self.filelocation, description=self.template, useFilename = self.uploadname, keepFilename = True, verifyDescription=False)
			bot.run()
			logfile = open(logfilename, 'a')
			logfile.write('%s,uploaded,%s\n' % (self.filename, datetime.today()))
			logfile.close()
                        dst = '%s%s' % (donedir, self.filename)
			shutil.move(self.filelocation, dst)
		else:   		
			print u'\nUpload of image %s cancelled.\n\n' % (self.filename)
예제 #41
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    #Get the api key
    if config.flickr['api_key']:
        flickr = flickrapi.FlickrAPI(config.flickr['api_key'])
    else:
        wikipedia.output('Flickr api key not found! Get yourself an api key')
        wikipedia.output('Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/')
        return

    group_id = u'1516413@N22'
    addCategory = u'Rijksmonumenten'
    removeCategories = True
    autonomous = True
    totalPhotos = 0
    uploadedPhotos = 0

    # Do we mark the images as reviewed right away?
    if config.flickr['review']:
        flickrreview = config.flickr['review']
    else:    
        flickrreview = False 

    # Set the Flickr reviewer
    if config.flickr['reviewer']:
        reviewer = config.flickr['reviewer']
    elif 'commons' in config.sysopnames['commons']:
        print config.sysopnames['commons']
        reviewer = config.sysopnames['commons']['commons']
    elif 'commons' in config.usernames['commons']:
        reviewer = config.usernames['commons']['commons']
    else:
        reviewer = u''

    for photo_id in flickrripper.getPhotos(flickr=flickr, group_id=group_id):
	uploadedPhotos += processPhoto(flickr, photo_id, flickrreview, reviewer, addCategory, removeCategories, autonomous)
	totalPhotos += 1

    wikipedia.output(u'Finished running')
    wikipedia.output(u'Total photos: ' + str(totalPhotos))
    wikipedia.output(u'Uploaded photos: ' + str(uploadedPhotos))
예제 #42
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    bigcategory = u''
    target = u''

    generator = None
    for arg in wikipedia.handleArgs():
        if arg.startswith('-page'):
            if len(arg) == 5:
                generator = [
                    wikipedia.Page(
                        wikipedia.getSite(),
                        wikipedia.input(u'What page do you want to use?'))
                ]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
        elif arg.startswith('-bigcat'):
            if len(arg) == 7:
                bigcategory = wikipedia.input(
                    u'What category do you want to split out?')
            else:
                bigcategory = arg[8:]
        elif arg.startswith('-target'):
            if len(arg) == 7:
                target = wikipedia.input(
                    u'What category is the target category?')
            else:
                target = arg[8:]

    if not bigcategory == u'':
        splitOutCategory(bigcategory, target)
    else:
        if not generator:
            generator = pagegenerators.NamespaceFilterPageGenerator(
                pagegenerators.ReferringPageGenerator(
                    wikipedia.Page(wikipedia.getSite(),
                                   u'Template:Intersect categories'),
                    onlyTemplateInclusion=True), [14])
        for cat in generator:
            intersectCategories(cat)
예제 #43
0
파일: wlmdownload.py 프로젝트: hay/hay
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    generator = None
    genFactory = pagegenerators.GeneratorFactory()
    target = u'/Users/hay/tmp/wlm/'

    for arg in wikipedia.handleArgs():
        if arg.startswith('-target:'):
            target = arg [len('-target:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
	# Get a preloading generator with only images
	pgenerator = pagegenerators.PreloadingGenerator(pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
	for page in pgenerator:
	    imagepage = wikipedia.ImagePage(page.site(), page.title())
	    downloadFile(imagepage, target)
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
	genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
	# Get a preloading generator with only images
	pgenerator = pagegenerators.PreloadingGenerator(pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
	for page in pgenerator:
	    addTopicCategory(page, conn, cursor)
예제 #45
0
def main():
    wikipedia.output(u'Testing 1 2 3')
    generator = None;
    genFactory = pagegenerators.GeneratorFactory()

    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    for arg in wikipedia.handleArgs():
	if arg.startswith('-page'):
	    if len(arg) == 5:
		generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))]
	    else:
		generator = [wikipedia.Page(site, arg[6:])]
	else:
	    generator = genFactory.handleArg(arg)
    if generator:
	for page in generator:
	    if(page.namespace() == 14):
		sort_TOL_Category(catlib.Category(site, page.title()))
    else:
	wikipedia.output(u'No categories to work on!')
예제 #46
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    # Connect database, we need that
    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
        # Get a preloading generator with only images
        pgenerator = pagegenerators.PreloadingGenerator(
            pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
        for page in pgenerator:
            categorizeImage(page, conn, cursor)
예제 #47
0
def main():
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)
    imagerecat.initLists()

    flickr = flickrapi.FlickrAPI(api_key)
    groupId = '1044478@N20'
    #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20
    for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId):
        (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId)
	if isAllowedLicense(photoInfo=photoInfo):
	    tags=getTags(photoInfo=photoInfo)
	    if photoCanUpload(tags=tags):
		# Get the url of the largest photo
		photoUrl = getPhotoUrl(photoSizes=photoSizes)
		# Download this photo
		photo = downloadPhoto(photoUrl=photoUrl)
		# Check if it exists at Commons
		duplicates = findDuplicateImages(photo=photo)
		if duplicates:
		    wikipedia.output(u'Found duplicate image at %s' % duplicates.pop())
		else:
		    flinfoDescription = getFlinfoDescription(photoId=photoId)
		    tagDescription = getTagDescription(tags=tags)
		    tagCategories = getTagCategories(tags)
		    filename = getFilename(photoInfo=photoInfo)
		    #print filename
		    photoDescription = buildDescription(flinfoDescription, tagDescription, tagCategories)
		    if (wikipedia.Page(title=u'File:'+ filename, site=wikipedia.getSite()).exists()):
			# I should probably check if the hash is the same and if not upload it under a different name
			wikipedia.output(u'File:' + filename + u' already exists!')
		    else:
			#Do the actual upload
			#Would be nice to check before I upload if the file is already at Commons
			#Not that important for this program, but maybe for derived programs
			bot = upload.UploadRobot(url=photoUrl, description=photoDescription, useFilename=filename, keepFilename=True, verifyDescription=False)
			bot.run()

    wikipedia.output('All done')
예제 #48
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = connectDatabase2('commonswiki-p.db.toolserver.org',
                                        u'commonswiki_p')

    imageSet = getImagesToCorrect(cursor2)
    #print imageSet
    for (pageName, fileId) in imageSet:
        wikipedia.output(pageName)
        if not pageName == u'' and not fileId == u'':
            #Get page contents
            page = wikipedia.Page(site, pageName)
            if page.exists():
                categories = page.categories()

                #Get metadata
                metadata = getMetadata(fileId, cursor)

                #Check if we got metadata
                if metadata:
                    #Get description
                    description = getDescription(metadata)

                    description = wikipedia.replaceCategoryLinks(
                        description, categories, site)
                    comment = u'Fixing description of Geograph image with broken template'
                    wikipedia.output(description)
                    page.put(description, comment)
예제 #49
0
def main():
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))

    generator = None
    genFactory = pagegenerators.GeneratorFactory()
    target = u'/Users/hay/tmp/wlm/'

    for arg in wikipedia.handleArgs():
        if arg.startswith('-target:'):
            target = arg[len('-target:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()

    if generator:
        # Get a preloading generator with only images
        pgenerator = pagegenerators.PreloadingGenerator(
            pagegenerators.NamespaceFilterPageGenerator(generator, [6]))
        for page in pgenerator:
            imagepage = wikipedia.ImagePage(page.site(), page.title())
            downloadFile(imagepage, target)
예제 #50
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    conn = None
    cursor = None
    (conn, cursor) = connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p')

    imageSet = getImagesToCorrect(cursor2)
    #print imageSet
    for (pageName, fileId) in imageSet:
	wikipedia.output(pageName)
	if not pageName==u'' and not fileId==u'':
	    #Get page contents
	    page = wikipedia.Page(site, pageName)
	    if page.exists():
		categories = page.categories()

		#Get metadata
		metadata = getMetadata(fileId, cursor)

		#Check if we got metadata
		if metadata:
		    #Get description
		    description = getDescription(metadata)

		    description = wikipedia.replaceCategoryLinks(description, categories, site)
		    comment= u'Fixing description of Geograph image with broken template'
		    wikipedia.output(description)
		    page.put(description, comment)
예제 #51
0
def main():
    '''
    The main loop
    '''
    wikipedia.setSite(wikipedia.getSite(u'commons', u'commons'))
    conn = None
    cursor = None
    uncat = u''
    (conn, cursor) = connectDatabase()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-date'):
            if len(arg) == 5:
                uncat = u'Media_needing_categories_as_of_' + wikipedia.input(u'What page do you want to use?')
            else:
                uncat = u'Media_needing_categories_as_of_' + arg[6:]
	elif arg.startswith('-yesterday'):
	    uncat = u'Media_needing_categories_as_of_' + getYesterday()
    if uncat:
	uncat = uncat.replace(' ', '_')
	for (user, images) in getUsersToNotify(cursor, uncat):
	    notifyUser(user, images, uncat)
    else:
	wikipedia.output(u'Please specify date to work with "-date:' + getYesterday() + u'" or "-yesterday"')
예제 #52
0
def main(args):
    '''
    Main loop.
    '''
    workdir = u''
    textfile = u''
    records = {}

    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    if (len(args) < 3):
        wikipedia.output(
            u'Too few arguments. Usage: NARA_uploader.py <original dir> <textfile> <derivative dir> [start filename]'
        )
        sys.exit()

    if os.path.isdir(args[0]):
        workdir = args[0]
    else:
        wikipedia.output(u'%s doesn\'t appear to be a directory. Exiting.' %
                         (args[0], ))
        sys.exit()

    derivativeDirectory = args[2]
    if os.path.exists(
            derivativeDirectory) and not os.path.isdir(derivativeDirectory):
        wikipedia.output(u"%s exists, but isn't a directory. Exiting." %
                         derivativeDirectory)
        sys.exit()
    elif not os.path.exists(derivativeDirectory):
        wikipedia.output(u'%s doesn\'t appear to exist. Creating.' %
                         derivativeDirectory)
        os.mkdir(derivativeDirectory)

    try:
        startFile = args[3]
        startFileFound = False

        startPath = os.path.join(workdir, startFile)

        if not os.path.exists(startPath) or os.path.isdir(startPath):
            wikipedia.output(
                u"%s doesn't exist, or it is directory. Exiting." % startPath)
            sys.exit()

    except IndexError:
        startFile = None

    textfile = args[1]
    records = getRecords(textfile)
    #print records

    sourcefilenames = glob.glob(workdir + u"/*.TIF")
    sourcefilenames.sort()

    for sourcefilename in sourcefilenames:

        wikipedia.output(u'\nProcessing %s' % sourcefilename)

        if startFile:  #if we want to skip to a file
            fileHead, fileTail = os.path.split(sourcefilename)

            if not startFileFound:
                if fileTail != startFile:
                    wikipedia.output('Skipping %s' % sourcefilename)
                    continue
                else:  #we have fond the start point
                    startFileFound = True

        filename = os.path.basename(sourcefilename)
        # This will give an ugly error if the id is unknown
        if not records.get(filename):
            wikipedia.output(u'Can\'t find %s in %s. Skipping this file.' %
                             (filename, textfile))
        elif os.path.getsize(sourcefilename) >= 1024 * 1024 * 100:
            wikipedia.output(u'%s too big. Skipping this file.' %
                             (sourcefilename, ))
        else:
            fileId = records.get(filename)

            wikipedia.output(u'Found file ID: %d' % fileId)

            #generate all the files we might need to upload
            filesToUpload = createDerivatives(sourcefilename,
                                              derivativeDirectory)

            duplicateFiletypes = {}
            #check for duplicates of the original on wiki
            for fileInfo in filesToUpload:

                if fileInfo['ext'] != '.tif':
                    continue

                foundDuplicates = findDuplicateImagesByHash(fileInfo['name'])

                duplicateFiletypes = addDuplicatesToList(
                    fileInfo, foundDuplicates, duplicateFiletypes)

            # follow the naming + description from the tif if it exists, or make it up from the description
            if '.tif' in duplicateFiletypes:
                title = duplicateFiletypes['.tif']

                wikipedia.output(
                    u'Fetching description from TIF file page: %s' % title)
                description = wikipedia.Page(site, 'File:' + title).get()

            else:
                description = fetchDescriptionFromWebtool(fileId)

                if not description:
                    wikipedia.output(u'No description! Skipping this file.')
                    continue
                else:
                    categories = u'{{Uncategorized-NARA|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n'
                    description = description + categories

                    title = getTitle(fileId, description)

                    if not title:
                        continue

            #check for duplicates of the derivatives (using the filename we just made)
            for fileInfo in filesToUpload:

                if fileInfo['ext'] == '.tif':
                    continue

                titleRoot, ext = os.path.splitext(title)
                fileTitle = titleRoot + fileInfo['ext']

                foundDuplicates = findDuplicateImagesByName(fileTitle)

                duplicateFiletypes = addDuplicatesToList(
                    fileInfo, foundDuplicates, duplicateFiletypes)

            #construct the gallery
            filesToUpload = setDestinations(filesToUpload, title)
            gallery = createDerivativeGallery(filesToUpload, title)

            #for every file, including original and derivatives
            for fileInfo in filesToUpload:

                titleRoot, ext = os.path.splitext(title)
                fileTitle = titleRoot + fileInfo['ext']

                if fileInfo[
                        'ext'] in duplicateFiletypes:  #we have a duplicate: add derivs if needed

                    currentFilename = duplicateFiletypes[fileInfo['ext']]

                    currentFilePage = wikipedia.Page(site,
                                                     'File:' + currentFilename)

                    currentDescription = currentFilePage.get()

                    currentDescription = addDerivativesToDescription(
                        currentDescription, gallery, title)

                    if currentDescription:
                        wikipedia.output(
                            'Updating the description for %s:\n\n%s' %
                            (currentFilename, currentDescription))
                        currentFilePage.put(
                            currentDescription,
                            comment="Adding other versions to the description."
                        )
                    else:
                        wikipedia.output('Gallery exists on page %s' %
                                         currentFilename)

                else:  #upload the file with generated info

                    wikipedia.output(fileInfo['name'] + ' --> ' +
                                     fileInfo['dest'])

                    newDescription = addDerivativesToDescription(
                        description, gallery, title)

                    if newDescription:  #if the gallery add failed due to existing gallery, just carry on with the original
                        description = newDescription

                    fileDescription = removeTIFFParameter(
                        description, fileInfo['ext'])
                    wikipedia.output(fileDescription)
                    bot = upload.UploadRobot(url=fileInfo['name'].decode(
                        sys.getfilesystemencoding()),
                                             description=fileDescription,
                                             useFilename=fileInfo['dest'],
                                             keepFilename=True,
                                             verifyDescription=False)
                    bot.run()
예제 #53
0
def main():
    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    #imagerecat.initLists()

    photoset = u''  #public (popular photos), full (all photos), user ID number
    size = u'original'
    minx = u''
    miny = u''
    maxx = u''
    maxy = u''
    start_id = u''
    end_id = u''
    addCategory = u''
    autonomous = False
    totalPhotos = 0
    uploadedPhotos = 0

    # Do we mark the images as reviewed right away?
    if config.panoramio['review']:
        panoramioreview = config.panoramio['review']
    else:
        panoramioreview = False

    # Set the Panoramio reviewer
    if config.panoramio['reviewer']:
        reviewer = config.panoramio['reviewer']
    elif 'commons' in config.sysopnames['commons']:
        print config.sysopnames['commons']
        reviewer = config.sysopnames['commons']['commons']
    elif 'commons' in config.usernames['commons']:
        reviewer = config.usernames['commons']['commons']
    else:
        reviewer = u''

    # Should be renamed to overrideLicense or something like that
    override = u''
    for arg in pywikibot.handleArgs():
        if arg.startswith('-set'):
            if len(arg) == 4:
                photoset = pywikibot.input(u'What is the set?')
            else:
                photoset = arg[5:]
        elif arg.startswith('-start_id'):
            if len(arg) == 9:
                start_id = pywikibot.input(
                    u'What is the id of the photo you want to start at?')
            else:
                start_id = arg[10:]
        elif arg.startswith('-end_id'):
            if len(arg) == 7:
                end_id = pywikibot.input(
                    u'What is the id of the photo you want to end at?')
            else:
                end_id = arg[8:]
        elif arg.startswith('-tags'):
            if len(arg) == 5:
                tags = pywikibot.input(
                    u'What is the tag you want to filter out (currently only one supported)?'
                )
            else:
                tags = arg[6:]
        elif arg == '-panoramioreview':
            panoramioreview = True
        elif arg.startswith('-reviewer'):
            if len(arg) == 9:
                reviewer = pywikibot.input(u'Who is the reviewer?')
            else:
                reviewer = arg[10:]
        elif arg.startswith('-override'):
            if len(arg) == 9:
                override = pywikibot.input(u'What is the override text?')
            else:
                override = arg[10:]
        elif arg.startswith('-addcategory'):
            if len(arg) == 12:
                addCategory = pywikibot.input(
                    u'What category do you want to add?')
            else:
                addCategory = arg[13:]
        elif arg == '-autonomous':
            autonomous = True

    if photoset:
        for photoInfo in getPhotos(photoset, start_id, end_id):
            photoInfo = getLicense(photoInfo)
            #time.sleep(10)
            uploadedPhotos += processPhoto(photoInfo, panoramioreview,
                                           reviewer, override, addCategory,
                                           autonomous)
            totalPhotos += 1
    else:
        usage()
    pywikibot.output(u'Finished running')
    pywikibot.output(u'Total photos: ' + str(totalPhotos))
    pywikibot.output(u'Uploaded photos: ' + str(uploadedPhotos))
예제 #54
0
def main():
    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
##    imagerecat.initLists()

    photoset = u''  # public (popular photos), full (all photos), user ID number
    size = u'original'
    minx = u''
    miny = u''
    maxx = u''
    maxy = u''
    start_id = u''
    end_id = u''
    addCategory = u''
    autonomous = False
    totalPhotos = 0
    uploadedPhotos = 0

    # Do we mark the images as reviewed right away?
    if config.panoramio['review']:
        panoramioreview = config.panoramio['review']
    else:
        panoramioreview = False

    # Set the Panoramio reviewer
    if config.panoramio['reviewer']:
        reviewer = config.panoramio['reviewer']
    elif 'commons' in config.sysopnames['commons']:
        print config.sysopnames['commons']
        reviewer = config.sysopnames['commons']['commons']
    elif 'commons' in config.usernames['commons']:
        reviewer = config.usernames['commons']['commons']
    else:
        reviewer = u''

    # Should be renamed to overrideLicense or something like that
    override = u''
    for arg in pywikibot.handleArgs():
        if arg.startswith('-set'):
            if len(arg) == 4:
                photoset = pywikibot.input(u'What is the set?')
            else:
                photoset = arg[5:]
        elif arg.startswith('-start_id'):
            if len(arg) == 9:
                start_id = pywikibot.input(
                    u'What is the id of the photo you want to start at?')
            else:
                start_id = arg[10:]
        elif arg.startswith('-end_id'):
            if len(arg) == 7:
                end_id = pywikibot.input(
                    u'What is the id of the photo you want to end at?')
            else:
                end_id = arg[8:]
        elif arg.startswith('-tags'):
            if len(arg) == 5:
                tags = pywikibot.input(
                    u'What is the tag you want to filter out (currently only '
                    u'one supported)?')
            else:
                tags = arg[6:]
        elif arg == '-panoramioreview':
            panoramioreview = True
        elif arg.startswith('-reviewer'):
            if len(arg) == 9:
                reviewer = pywikibot.input(u'Who is the reviewer?')
            else:
                reviewer = arg[10:]
        elif arg.startswith('-override'):
            if len(arg) == 9:
                override = pywikibot.input(u'What is the override text?')
            else:
                override = arg[10:]
        elif arg.startswith('-addcategory'):
            if len(arg) == 12:
                addCategory = pywikibot.input(
                    u'What category do you want to add?')
            else:
                addCategory = arg[13:]
        elif arg == '-autonomous':
            autonomous = True

    if photoset:
        for photoInfo in getPhotos(photoset, start_id, end_id):
            photoInfo = getLicense(photoInfo)
            #time.sleep(10)
            uploadedPhotos += processPhoto(photoInfo, panoramioreview,
                                           reviewer, override, addCategory,
                                           autonomous)
            totalPhotos += 1
    else:
        usage()
    pywikibot.output(u'Finished running')
    pywikibot.output(u'Total photos: ' + str(totalPhotos))
    pywikibot.output(u'Uploaded photos: ' + str(uploadedPhotos))
예제 #55
0
def main():
    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    #imagerecat.initLists()

    #Get the api key
    if config.flickr['api_key']:
        flickr = flickrapi.FlickrAPI(config.flickr['api_key'])
    else:
        pywikibot.output('Flickr api key not found! Get yourself an api key')
        pywikibot.output(
            'Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/'
        )
        return

    group_id = u''
    photoset_id = u''
    user_id = u''
    start_id = u''
    end_id = u''
    tags = u''
    addCategory = u''
    removeCategories = False
    autonomous = False
    totalPhotos = 0
    uploadedPhotos = 0

    # Do we mark the images as reviewed right away?
    if config.flickr['review']:
        flickrreview = config.flickr['review']
    else:
        flickrreview = False

    # Set the Flickr reviewer
    if config.flickr['reviewer']:
        reviewer = config.flickr['reviewer']
    elif 'commons' in config.sysopnames['commons']:
        print config.sysopnames['commons']
        reviewer = config.sysopnames['commons']['commons']
    elif 'commons' in config.usernames['commons']:
        reviewer = config.usernames['commons']['commons']
    else:
        reviewer = u''

    # Should be renamed to overrideLicense or something like that
    override = u''
    for arg in pywikibot.handleArgs():
        if arg.startswith('-group_id'):
            if len(arg) == 9:
                group_id = pywikibot.input(
                    u'What is the group_id of the pool?')
            else:
                group_id = arg[10:]
        elif arg.startswith('-photoset_id'):
            if len(arg) == 12:
                photoset_id = pywikibot.input(u'What is the photoset_id?')
            else:
                photoset_id = arg[13:]
        elif arg.startswith('-user_id'):
            if len(arg) == 8:
                user_id = pywikibot.input(
                    u'What is the user_id of the flickr user?')
            else:
                user_id = arg[9:]
        elif arg.startswith('-start_id'):
            if len(arg) == 9:
                start_id = pywikibot.input(
                    u'What is the id of the photo you want to start at?')
            else:
                start_id = arg[10:]
        elif arg.startswith('-end_id'):
            if len(arg) == 7:
                end_id = pywikibot.input(
                    u'What is the id of the photo you want to end at?')
            else:
                end_id = arg[8:]
        elif arg.startswith('-tags'):
            if len(arg) == 5:
                tags = pywikibot.input(
                    u'What is the tag you want to filter out (currently only one supported)?'
                )
            else:
                tags = arg[6:]
        elif arg == '-flickrreview':
            flickrreview = True
        elif arg.startswith('-reviewer'):
            if len(arg) == 9:
                reviewer = pywikibot.input(u'Who is the reviewer?')
            else:
                reviewer = arg[10:]
        elif arg.startswith('-override'):
            if len(arg) == 9:
                override = pywikibot.input(u'What is the override text?')
            else:
                override = arg[10:]
        elif arg.startswith('-addcategory'):
            if len(arg) == 12:
                addCategory = pywikibot.input(
                    u'What category do you want to add?')
            else:
                addCategory = arg[13:]
        elif arg == '-removecategories':
            removeCategories = True
        elif arg == '-autonomous':
            autonomous = True

    if user_id or group_id or photoset_id:
        for photo_id in getPhotos(flickr, user_id, group_id, photoset_id,
                                  start_id, end_id, tags):
            uploadedPhotos += processPhoto(flickr, photo_id, flickrreview,
                                           reviewer, override, addCategory,
                                           removeCategories, autonomous)
            totalPhotos += 1
    else:
        usage()
    pywikibot.output(u'Finished running')
    pywikibot.output(u'Total photos: ' + str(totalPhotos))
    pywikibot.output(u'Uploaded photos: ' + str(uploadedPhotos))
예제 #56
0
def main(args):
    '''
    Main loop.
    '''
    site = wikipedia.getSite(u'commons', u'commons')
    wikipedia.setSite(site)

    start_id = 0

    conn = None
    cursor = None
    (conn, cursor) = geograph_lib.connectDatabase()

    conn2 = None
    cursor2 = None
    (conn2, cursor2) = geograph_lib.connectDatabase2(
        'sql-s2.toolserver.org', u'u_multichill_commons_categories_p')

    if (len(args) > 1):
        if len(args) > 2:
            start_id = int(args[2])
        sourcedir = args[0]
        destinationdir = args[1]
        if os.path.isdir(sourcedir) and os.path.isdir(destinationdir):
            #print sourcedir
            for subdir in os.listdir(sourcedir):
                #print subdir
                if os.path.isdir(sourcedir + subdir):
                    #print subdir
                    sourcefilenames = glob.glob(sourcedir + subdir + u"/*.jpg")
                    sourcefilenames = filterSourceFilenames(sourcefilenames)
                    for sourcefilename in sourcefilenames:
                        # First get the file id
                        fileId = getFileId(sourcefilename)
                        if fileId >= start_id:
                            wikipedia.output(str(fileId))

                            duplicates = findDuplicateImages(sourcefilename)
                            if duplicates:
                                wikipedia.output(
                                    u'Found duplicate image at %s' %
                                    duplicates.pop())
                            else:
                                #Get metadata
                                metadata = geograph_lib.getMetadata(
                                    fileId, cursor)

                                #Check if we got metadata
                                if metadata:

                                    #Get description
                                    description = geograph_lib.getDescription(
                                        metadata)

                                    # The hard part, find suitable categories
                                    # categories =  geograph_lib.getCategories(metadata, cursor, cursor2)
                                    categories = '{{Uncategorized-Geograph|gridref=%s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % (
                                        metadata.get('grid_reference'), )
                                    #print categories
                                    description = description + categories

                                    wikipedia.output(description)

                                    #Get destinationfilename
                                    destinationFilename = geograph_lib.getTitle(
                                        metadata)

                                    #Copy file to destination dir
                                    shutil.copy(
                                        unicode(sourcefilename),
                                        unicode(destinationdir +
                                                destinationFilename + u'.jpg'))
                                    #And save the description as well
                                    outputDescriptionFile(
                                        destinationdir + destinationFilename +
                                        u'.txt', description)