def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() always = False for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlFilename = arg[5:] gen = XmlDumpSelflinkPageGenerator(xmlFilename) elif arg == '-sql': # NOT WORKING YET query = """ SELECT page_namespace, page_title FROM page JOIN pagelinks JOIN text ON (page_id = pl_from AND page_id = old_id) WHERE pl_title = page_title AND pl_namespace = page_namespace AND page_namespace = 0 AND (old_text LIKE concat('%[[', page_title, ']]%') OR old_text LIKE concat('%[[', page_title, '|%')) LIMIT 100""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp('selflink') else: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = SelflinkBot(preloadingGen, always) bot.run()
def getRijksmonumentWithoutLocation(): query = """SELECT page_namespace, page_title FROM page JOIN templatelinks ON page_id=tl_from WHERE page_namespace=6 AND page_is_redirect=0 AND tl_namespace=10 AND tl_title='Rijksmonument' AND NOT EXISTS( SELECT * FROM categorylinks WHERE page_id=cl_from AND cl_to='Media_with_locations')""" result = pagegenerators.MySQLPageGenerator(query) return result
def main(): summary = None generator = None always = False i = 0 amount = 50 query = "SELECT page_namespace, page_title FROM page JOIN templatelinks ON page_id=tl_from WHERE page_namespace=0 AND page_is_redirect=0 AND tl_title LIKE 'Co%rdinaten'" while True: thisquery = query + " LIMIT " + str(i) + " , " + str(amount) generator = pagegenerators.MySQLPageGenerator(thisquery) i = i + amount for page in generator: workOnPage(page)
def getImages(categoryA, categoryB): ''' Get images both in categoryA and in categoryB ''' result = None query = u"""SELECT DISTINCT file.page_namespace, file.page_title FROM page AS file JOIN categorylinks AS cat0A ON file.page_id=cat0A.cl_from JOIN categorylinks AS cat0B ON file.page_id=cat0B.cl_from JOIN page AS pcat0B ON cat0B.cl_to=pcat0B.page_title JOIN categorylinks AS cat1B ON pcat0B.page_id=cat1B.cl_from JOIN page AS pcat1B ON cat1B.cl_to=pcat1B.page_title JOIN categorylinks AS cat2B ON pcat1B.page_id=cat2B.cl_from JOIN page AS pcat2B ON cat2B.cl_to=pcat2B.page_title JOIN categorylinks AS cat3B ON pcat2B.page_id=cat3B.cl_from JOIN page AS pcat3B ON cat3B.cl_to=pcat3B.page_title JOIN categorylinks AS cat4B ON pcat3B.page_id=cat4B.cl_from WHERE file.page_namespace=6 AND file.page_is_redirect=0 AND pcat0B.page_namespace=14 AND pcat0B.page_is_redirect=0 AND pcat1B.page_namespace=14 AND pcat1B.page_is_redirect=0 AND pcat2B.page_namespace=14 AND pcat2B.page_is_redirect=0 AND pcat3B.page_namespace=14 AND pcat3B.page_is_redirect=0 AND cat0A.cl_to='%s' AND ( cat0B.cl_to='%s' OR cat1B.cl_to='%s' OR cat2B.cl_to='%s' OR cat3B.cl_to='%s' OR cat4B.cl_to='%s')""" catA = categoryA.replace(u' ', u'_').replace(u'\'', u'\\\'') catB = categoryB.replace(u' ', u'_').replace(u'\'', u'\\\'') print query % (catA, catB, catB, catB, catB, catB) result = pagegenerators.MySQLPageGenerator( query % (catA, catB, catB, catB, catB, catB)) return result
def main(): quietMode = False # use -quiet to get less output # if the -file argument is used, page titles are stored in this array. # otherwise it will only contain one page. articles = [] # if -file is not used, this temporary array is used to read the page title. page_title = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] xmlfilename = None gen = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlfilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlfilename = arg[5:] gen = TableXmlDumpPageGenerator(xmlfilename) elif arg == '-sql': query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) WHERE old_text LIKE '%<table%' LIMIT 200""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-skip:'): articles = articles[articles.index(arg[6:]):] elif arg.startswith('-auto'): config.table2wikiAskOnlyWarnings = True config.table2wikiSkipWarnings = True print "Automatic mode!\n" elif arg.startswith('-quiet'): quietMode = True else: if not genFactory.handleArg(arg): page_title.append(arg) # if the page is given as a command line argument, # connect the title's parts with spaces if page_title != []: page_title = ' '.join(page_title) page = pywikibot.Page(pywikibot.getSite(), page_title) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = Table2WikiRobot(preloadingGen, quietMode) bot.run() else: pywikibot.showHelp('table2wiki')
def main(*args): add_cat = None gen = None # summary message summary_commandline = False # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # This is the maximum number of pages to load per query maxquerysize = 60 # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. editSummary = pywikibot.translate(pywikibot.getSite(), msg) # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Do not save the page titles, rather work on wiki titlefile = None filename = None # If we save, primary behaviour is append rather then new file append = True # Read commandline parameters. for arg in pywikibot.handleArgs(*args): if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = pywikibot.input( u'Please enter the XML dump\'s filename:') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append( pywikibot.input(u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-savenew'): append = False if len(arg) == 8: filename = pywikibot.input( u'Please enter the filename to save the titles \n(will be deleted if exists):' ) else: filename = arg[9:] elif arg.startswith('-save'): if len(arg) == 5: filename = pywikibot.input( u'Please enter the filename to save the titles:') else: filename = arg[6:] elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): editSummary = arg[9:] summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-query:'): maxquerysize = int(arg[7:]) else: if not genFactory.handleArg(arg): commandline_replacements.append(arg) if (len(commandline_replacements) % 2): raise pywikibot.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix is None): replacements.append( (commandline_replacements[0], commandline_replacements[1])) if not summary_commandline: editSummary = pywikibot.translate(pywikibot.getSite(), msg) % ( ' (-%s +%s)' % (commandline_replacements[0], commandline_replacements[1])) elif (len(commandline_replacements) > 1): if (fix is None): for i in xrange(0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if not summary_commandline: pairs = [(commandline_replacements[i], commandline_replacements[i + 1]) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) editSummary = pywikibot.translate(pywikibot.getSite(), msg ) \ % replacementsDescription else: raise pywikibot.Error( 'Specifying -fix with replacements is undefined') elif fix is None: old = pywikibot.input( u'Please enter the text that should be replaced:') new = pywikibot.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = pywikibot.input( u'Please enter another text that should be replaced, or press Enter to start:' ) if old == '': change = change + ')' break new = pywikibot.input(u'Please enter the new text:') change = change + ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline: default_summary_message = pywikibot.translate( pywikibot.getSite(), msg) % change pywikibot.output(u'The summary message will default to: %s' % default_summary_message) summary_message = pywikibot.input( u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:' ) if summary_message == '': summary_message = default_summary_message editSummary = summary_message else: # Perform one of the predefined actions. try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) return if "regex" in fix: regex = fix['regex'] if "msg" in fix: editSummary = pywikibot.translate(pywikibot.getSite(), fix['msg']) if "exceptions" in fix: exceptions = fix['exceptions'] if "nocase" in fix: caseInsensitive = fix['nocase'] replacements = fix['replacements'] #Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in [ 'title', 'require-title', 'text-contains', 'inside' ]: if exceptionCategory in exceptions: patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [ pywikibot.Page(pywikibot.getSite(), PageTitle) for PageTitle in PageTitles ] gen = iter(pages) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return if xmlFilename: # XML parsing can be quite slow, so use smaller batches and # longer lookahead. preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=20, lookahead=100) else: preloadingGen = pagegenerators.PreloadingGenerator( gen, pageNumber=maxquerysize) #Finally we open the file for page titles or set article to None if filename: try: #This opens in strict error mode, that means bot will stop #on encoding errors with ValueError. #See http://docs.python.org/library/codecs.html#codecs.open titlefile = codecs.open(filename, encoding='utf-8', mode=(lambda x: x and 'a' or 'w')(append)) except IOError: pywikibot.output("%s cannot be opened for writing." % filename) return bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, editSummary, titlefile) try: bot.run() finally: if titlefile: #Just for the spirit of programming (it was flushed) titlefile.close()