def fixSyntaxSave(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # link to the wiki working on ## TODO: disable this for difflinks and titled links ## http://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&diff=103109563&oldid=103109271 ## text = pywikibot.replaceExcept(text, ## r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]' ## % (self.site.lang, self.site.family.name), ## r'[[\g<link>|\g<title>]]', exceptions) # external link in double brackets text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://[^\]]+?)\]\]', r'[\g<url>]', exceptions) # external link starting with double bracket text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://.+?)\]', r'[\g<url>]', exceptions) # external link and description separated by a dash, with # whitespace in front of the dash, so that it is clear that # the dash is not a legitimate part of the URL. text = pywikibot.replaceExcept(text, r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions) # dash in external link, where the correct end of the URL can # be detected from the file extension. It is very unlikely that # this will cause mistakes. text = pywikibot.replaceExcept(text, r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions) return text
def fixSyntaxSave(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # link to the wiki working on ## TODO: disable this for difflinks and titled links ## https://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&diff=103109563&oldid=103109271 ## text = pywikibot.replaceExcept(text, ## r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]' ## % (self.site.lang, self.site.family.name), ## r'[[\g<link>|\g<title>]]', exceptions) # external link in double brackets text = pywikibot.replaceExcept( text, r'\[\[(?P<url>https?://[^\]]+?)\]\]', r'[\g<url>]', exceptions) # external link starting with double bracket text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://.+?)\]', r'[\g<url>]', exceptions) # external link and description separated by a dash, with # whitespace in front of the dash, so that it is clear that # the dash is not a legitimate part of the URL. text = pywikibot.replaceExcept( text, r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions) # dash in external link, where the correct end of the URL can # be detected from the file extension. It is very unlikely that # this will cause mistakes. text = pywikibot.replaceExcept( text, r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions) return text
def removeUselessSpaces(self, text): multipleSpacesR = re.compile(' +') spaceAtLineEndR = re.compile(' $') exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table', 'template'] text = pywikibot.replaceExcept(text, multipleSpacesR, ' ', exceptions) text = pywikibot.replaceExcept(text, spaceAtLineEndR, '', exceptions) return text
def fixArabicLetters(self, text): if self.site.lang=='ckb' or self.site.lang=='fa': exceptions = [ 'gallery', 'hyperlink', 'interwiki', # but changes letters inside wikilinks #'link', 'math', 'pre', 'template', 'timeline', 'ref', 'source', 'startspace', 'inputbox', ] # do not change inside file links namespaces = list(self.site.namespace(6, all = True)) pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\..+?\]\]', re.UNICODE) exceptions.append(pattern) text = pywikibot.replaceExcept(text, u',', u'،', exceptions) if self.site.lang=='ckb': text = pywikibot.replaceExcept(text, ur'ه([.،_<\]\s])', ur'ە\1', exceptions) text = pywikibot.replaceExcept(text, u'ه', u'ە', exceptions) text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions) text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions) text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions) # replace persian digits for i in range(0,10): if self.site.lang=='ckb': text = pywikibot.replaceExcept(text, u'۰۱۲۳۴۵۶۷۸۹'[i], u'٠١٢٣٤٥٦٧٨٩'[i], exceptions) else: text = pywikibot.replaceExcept(text, u'٠١٢٣٤٥٦٧٨٩'[i], u'۰۱۲۳۴۵۶۷۸۹'[i], exceptions) # do not change digits in class, style and table params pattern = re.compile(u'=".*?"', re.UNICODE) exceptions.append(pattern) # do not change digits inside html-tags pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE) exceptions.append(pattern) exceptions.append('table') #exclude tables for now for i in range(0,10): if self.site.lang=='ckb': text = pywikibot.replaceExcept(text, str(i), u'٠١٢٣٤٥٦٧٨٩'[i], exceptions) else: text = pywikibot.replaceExcept(text, str(i), u'۰۱۲۳۴۵۶۷۸۹'[i], exceptions) return text
def commonsfiledesc(self, text): # section headers to {{int:}} versions exceptions = ["comment", "includeonly", "math", "noinclude", "nowiki", "pre", "source", "ref", "timeline"] text = pywikibot.replaceExcept( text, r"([\r\n]|^)\=\= *Summary *\=\=", r"\1== {{int:filedesc}} ==", exceptions, True ) text = pywikibot.replaceExcept( text, r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=", r"\1== {{int:license}} ==", exceptions, True, ) text = pywikibot.replaceExcept( text, r"([\r\n])\=\= *(Licensing|License information|{{int:license-header}}) *\=\=", r"\1== {{int:license}} ==", exceptions, True, ) # frequent field values to {{int:}} versions text = pywikibot.replaceExcept( text, r"([\r\n]\|[Ss]ource *\= *)(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])", r"\1{{own}}\2", exceptions, True, ) text = pywikibot.replaceExcept( text, r"(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])", r"\1\2", exceptions, True ) # added to transwikied pages text = pywikibot.replaceExcept(text, r"__NOTOC__", "", exceptions, True) # tracker element for js upload form text = pywikibot.replaceExcept(text, r"<!-- *{{ImageUpload\|(?:full|basic)}} *-->", "", exceptions[1:], True) text = pywikibot.replaceExcept(text, r"{{ImageUpload\|(?:basic|full)}}", "", exceptions, True) # duplicated section headers text = pywikibot.replaceExcept( text, r"([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=", r"\1== {{int:filedesc}} ==", exceptions, True, ) text = pywikibot.replaceExcept( text, r"([\r\n]|^)\=\= *{{int:license}} *\=\=(?:[\r\n ]*)\=\= *{{int:license}} *\=\=", r"\1== {{int:license}} ==", exceptions, True, ) return text
def fixReferences(self, text): # http://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm exceptions = ["nowiki", "comment", "math", "pre", "source", "startspace"] # it should be name = " or name=" NOT name =" text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text) # remove empty <ref/>-tag text = pywikibot.replaceExcept(text, r"(?i)(<ref\s*/>|<ref *>\s*</ref>)", r"", exceptions) text = pywikibot.replaceExcept(text, r"(?i)<ref\s+([^>]+?)\s*>\s*</ref>", r"<ref \1/>", exceptions) return text
def removeUselessSpaces(self, text): result = [] multipleSpacesR = re.compile(" +") spaceAtLineEndR = re.compile(" $") exceptions = ["comment", "math", "nowiki", "pre", "startspace", "table", "template"] text = pywikibot.replaceExcept(text, multipleSpacesR, " ", exceptions) text = pywikibot.replaceExcept(text, spaceAtLineEndR, "", exceptions) return text
def fixReferences(self, text): #http://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # it should be name = " or name=" NOT name =" text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text) #remove empty <ref/>-tag text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions) text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions) return text
def fixTypo(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace', 'gallery', 'hyperlink', 'interwiki', 'link'] # change <number> ccm -> <number> cm³ text = pywikibot.replaceExcept(text, ur'(\d)\s* ccm', ur'\1 cm³', exceptions) text = pywikibot.replaceExcept(text, ur'(\d)\s*ccm', ur'\1 cm³', exceptions) # Solve wrong Nº sign with °C or °F # additional exception requested on fr-wiki for this stuff pattern = re.compile(u'«.*?»', re.UNICODE) exceptions.append(pattern) text = pywikibot.replaceExcept(text, ur'(\d)\s* [º°]([CF])', ur'\1 °\2', exceptions) text = pywikibot.replaceExcept(text, ur'(\d)\s*[º°]([CF])', ur'\1 °\2', exceptions) text = pywikibot.replaceExcept(text, ur'º([CF])', ur'°\1', exceptions) return text
def markActiveTables(self, text): """ Mark all hidden table start and end tags. Mark all table start and end tags that are not disabled by nowiki tags, comments etc. We will then later only work on these marked tags. """ text = pywikibot.replaceExcept(text, _table_start_regex, '<##table##', exceptions=['comment', 'math', 'nowiki', 'pre', 'source']) text = pywikibot.replaceExcept(text, _table_end_regex, '</##table##>', exceptions=['comment', 'math', 'nowiki', 'pre', 'source']) return text
def commonsfiledesc(self, text): # section headers to {{int:}} versions exceptions = [ 'comment', 'includeonly', 'math', 'noinclude', 'nowiki', 'pre', 'source', 'ref', 'timeline' ] text = pywikibot.replaceExcept(text, r"([\r\n]|^)\=\= *Summary *\=\=", r"\1== {{int:filedesc}} ==", exceptions, True) text = pywikibot.replaceExcept( text, r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=", r"\1== {{int:license-header}} ==", exceptions, True) text = pywikibot.replaceExcept( text, r"([\r\n])\=\= *(Licensing|License information|{{int:license}}) *\=\=", r"\1== {{int:license-header}} ==", exceptions, True) # frequent field values to {{int:}} versions text = pywikibot.replaceExcept( text, r'([\r\n]\|[Ss]ource *\= *)(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])', r'\1{{own}}\2', exceptions, True) text = pywikibot.replaceExcept( text, r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])', r'\1\2', exceptions, True) # added to transwikied pages text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions, True) # tracker element for js upload form text = pywikibot.replaceExcept( text, r'<!-- *{{ImageUpload\|(?:full|basic)}} *-->', '', exceptions[1:], True) text = pywikibot.replaceExcept(text, r'{{ImageUpload\|(?:basic|full)}}', '', exceptions, True) # duplicated section headers text = pywikibot.replaceExcept( text, r'([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=', r'\1== {{int:filedesc}} ==', exceptions, True) text = pywikibot.replaceExcept( text, r'([\r\n]|^)\=\= *{{int:license-header}} *\=\=(?:[\r\n ]*)\=\= *{{int:license-header}} *\=\=', r'\1== {{int:license-header}} ==', exceptions, True) return text
def fixArabicLetters(self, text): exceptions = [ 'gallery', 'hyperlink', 'interwiki', # but changes letters inside wikilinks #'link', 'math', 'pre', 'template', 'timeline', 'ref', 'source', 'startspace', 'inputbox', ] # valid digits digits = { 'ckb': u'٠١٢٣٤٥٦٧٨٩', 'fa': u'۰۱۲۳۴۵۶۷۸۹', } faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa'] new = digits.pop(self.site.lang) # This only works if there are only two items in digits dict old = digits[digits.keys()[0]] # do not change inside file links namespaces = list(self.site.namespace(6, all=True)) pattern = re.compile( u'\[\[(' + '|'.join(namespaces) + '):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]', re.UNICODE) #not to let bot edits in latin content exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]" % {'fa': faChrs})) exceptions.append(pattern) text = pywikibot.replaceExcept(text, u',', u'،', exceptions) if self.site.lang == 'ckb': text = pywikibot.replaceExcept(text, u'\u0647([.\u060c_<\\]\\s])', u'\u06d5\\1', exceptions) text = pywikibot.replaceExcept(text, u'ه', u'ە', exceptions) text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions) text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions) text = pywikibot.replaceExcept(text, u'[ىي]', u'ی', exceptions) return text # replace persian/arabic digits ## deactivated due to bug #3539407 for i in range(0, 10): text = pywikibot.replaceExcept(text, old[i], new[i], exceptions) # do not change digits in class, style and table params pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE) exceptions.append(pattern) # do not change digits inside html-tags pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE) exceptions.append(pattern) exceptions.append('table') # exclude tables for now # replace digits for i in range(0, 10): text = pywikibot.replaceExcept(text, str(i), new[i], exceptions) return text
def fixReferences(self, text): #https://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # it should be name = " or name=" NOT name =" text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text) #remove empty <ref/>-tag text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref *>\s*</ref>)', r'', exceptions) text = pywikibot.replaceExcept(text, r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions) return text
def fixSyntaxSave(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # external link in double brackets text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://[^\]]+?)\]\]', r'[\g<url>]', exceptions) # external link starting with double bracket text = pywikibot.replaceExcept(text, r'\[\[(?P<url>https?://.+?)\]', r'[\g<url>]', exceptions) # external link and description separated by a dash, with # whitespace in front of the dash, so that it is clear that # the dash is not a legitimate part of the URL. text = pywikibot.replaceExcept(text, r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions) # dash in external link, where the correct end of the URL can # be detected from the file extension. It is very unlikely that # this will cause mistakes. text = pywikibot.replaceExcept(text, r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]', exceptions) return text
def commonsfiledesc(self, text): # section headers to {{int:}} versions exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki', 'pre', 'source', 'ref', 'timeline'] text = pywikibot.replaceExcept(text, r"([\r\n]|^)\=\= *Summary *\=\=", r"\1== {{int:filedesc}} ==", exceptions, True) text = pywikibot.replaceExcept( text, r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=", r"\1== {{int:license}} ==", exceptions, True) text = pywikibot.replaceExcept( text, r"([\r\n])\=\= *(Licensing|License information|{{int:license-header}}) *\=\=", r"\1== {{int:license}} ==", exceptions, True) # frequent field values to {{int:}} versions text = pywikibot.replaceExcept( text, r'([\r\n]\|[Ss]ource *\= *)(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])', r'\1{{own}}\2', exceptions, True) text = pywikibot.replaceExcept( text, r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])', r'\1\2', exceptions, True) # added to transwikied pages text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions, True) # tracker element for js upload form text = pywikibot.replaceExcept( text, r'<!-- *{{ImageUpload\|(?:full|basic)}} *-->', '', exceptions[1:], True) text = pywikibot.replaceExcept(text, r'{{ImageUpload\|(?:basic|full)}}', '', exceptions, True) # duplicated section headers text = pywikibot.replaceExcept( text, r'([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=', r'\1== {{int:filedesc}} ==', exceptions, True) text = pywikibot.replaceExcept( text, r'([\r\n]|^)\=\= *{{int:license}} *\=\=(?:[\r\n ]*)\=\= *{{int:license}} *\=\=', r'\1== {{int:license}} ==', exceptions, True) return text
def markActiveTables(self, text): """ Mark all table start and end tags that are not disabled by nowiki tags, comments etc. We will then later only work on these marked tags. """ tableStartTagR = re.compile("<table", re.IGNORECASE) tableEndTagR = re.compile("</table>", re.IGNORECASE) text = pywikibot.replaceExcept(text, tableStartTagR, "<##table##", exceptions=['comment', 'math', 'nowiki', 'pre', 'source']) text = pywikibot.replaceExcept(text, tableEndTagR, "</##table##>", exceptions=['comment', 'math', 'nowiki', 'pre', 'source']) return text
def translateMagicWords(self, text): """ Makes sure that localized namespace names are used. """ # not wanted at ru # arz uses english stylish codes if self.site.lang not in ['arz', 'ru']: exceptions = ['nowiki', 'comment', 'math', 'pre'] for magicWord in [ 'img_thumbnail', 'img_left', 'img_center', 'img_right', 'img_none', 'img_framed', 'img_frameless', 'img_border', 'img_upright', ]: aliases = self.site.getmagicwords(magicWord) if not aliases: continue text = pywikibot.replaceExcept( text, r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) + ') *(?P<right>(\|.*?)?\]\])', r'[[\g<left>' + aliases[0] + '\g<right>', exceptions) return text
def majUneSeuleListe(self, listePages, titrePageRandomArticles): pageAsLinkBoolean = True if titrePageRandomArticles in blackList: pageAsLinkBoolean = False exceptions = [] texteARajouter = u"" x = 0 # Création d'un texte contenant la liste des pages for page in listePages: x = x + 1 texteARajouter = texteARajouter + u"\n|" + str(x) + u"=" + page.title(asLink=pageAsLinkBoolean) nbPages = x pageRandomArticles = pywikibot.Page(self.site, titrePageRandomArticles) text = pageRandomArticles.get() pywikibot.output(u"\n> \03{lightblue}Traitement de %s\03{default} <" % pageRandomArticles.title()) text = self.suppressionAncienneListe(text) old = re.compile(u"\{\{#switch:\{\{rand\|(1\||2=)[0-9]*\}\}") new = (u"{{#switch:{{rand|1|%i}}%s" % (nbPages, texteARajouter)) text = pywikibot.replaceExcept(text, old, new, exceptions) if not self.save(text, pageRandomArticles, self.summary): pywikibot.output(u'Aucun changement nécessaire')
def translateAndCapitalizeNamespaces(self, text): """ Makes sure that localized namespace names are used. """ # arz uses english stylish codes if self.site.sitename() == 'wikipedia:arz': return text family = self.site.family # wiki links aren't parsed here. exceptions = ['nowiki', 'comment', 'math', 'pre'] for nsNumber in family.namespaces: if not family.isDefinedNSLanguage(nsNumber, self.site.lang): # Skip undefined namespaces continue namespaces = list(family.namespace(self.site.lang, nsNumber, all=True)) thisNs = namespaces.pop(0) if nsNumber == 6 and family.name == 'wikipedia' and \ self.site.lang in ('en', 'fr'): # do not change "Image" on en-wiki and fr-wiki for image in [u'Image', u'image']: if image in namespaces: namespaces.remove(image) # skip main (article) namespace if thisNs and namespaces: text = pywikibot.replaceExcept(text, r'\[\[\s*(' + '|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions) return text
def suppressionAncienneListe(self, text, nombreCrochets=2): exceptions = [] #old = re.compile(u"(\n\|[0-9]{1,} *= *[\[\{]{2}.*[\]\}]{2}|\n[0-9]{1,} *= *[\[\{]{2}.*[\]\}]{2}\|)") old = re.compile(u"(\n\|[0-9]{1,} *= *[\[\{]{0,2}.*[\]\}]{0,2}|\n[0-9]{1,} *= *[\[\{]{0,2}.*[\]\}]{0,2}\|)") new = u"" text = pywikibot.replaceExcept(text, old, new, exceptions) return text
def majUneSeuleListe(self, listePages, titrePageRandomArticles): pageAsLinkBoolean = True if titrePageRandomArticles in blackList: pageAsLinkBoolean = False exceptions = [] texteARajouter = u"" x = 0 # Création d'un texte contenant la liste des pages for page in listePages: x = x + 1 texteARajouter = texteARajouter + u"\n|" + str( x) + u"=" + page.title(asLink=pageAsLinkBoolean) nbPages = x pageRandomArticles = pywikibot.Page(self.site, titrePageRandomArticles) text = pageRandomArticles.get() pywikibot.output(u"\n> \03{lightblue}Traitement de %s\03{default} <" % pageRandomArticles.title()) text = self.suppressionAncienneListe(text) old = re.compile(u"\{\{#switch:\{\{rand\|(1\||2=)[0-9]*\}\}") new = (u"{{#switch:{{rand|1|%i}}%s" % (nbPages, texteARajouter)) text = pywikibot.replaceExcept(text, old, new, exceptions) if not self.save(text, pageRandomArticles, self.summary): pywikibot.output(u'Aucun changement nécessaire')
def translateMagicWords(self, text): """ Makes sure that localized namespace names are used. """ # not wanted at ru # arz uses english stylish codes if self.site.lang not in ["arz", "ru"]: exceptions = ["nowiki", "comment", "math", "pre"] for magicWord in [ "img_thumbnail", "img_left", "img_center", "img_right", "img_none", "img_framed", "img_frameless", "img_border", "img_upright", ]: aliases = self.site.getmagicwords(magicWord) if not aliases: continue text = pywikibot.replaceExcept( text, r"\[\[(?P<left>.+?:.+?\..+?\|) *(" + "|".join(aliases) + ") *(?P<right>(\|.*?)?\]\])", r"[[\g<left>" + aliases[0] + "\g<right>", exceptions, ) return text
def removeNonBreakingSpaceBeforePercent(self, text): """ Newer MediaWiki versions automatically place a non-breaking space in front of a percent sign, so it is no longer required to place it manually. """ text = pywikibot.replaceExcept(text, r"(\d) %", r"\1 %", ["timeline"]) return text
def removeNonBreakingSpaceBeforePercent(self, text): ''' Newer MediaWiki versions automatically place a non-breaking space in front of a percent sign, so it is no longer required to place it manually. ''' text = pywikibot.replaceExcept(text, r'(\d) %', r'\1 %', ['timeline']) return text
def fixHtml(self, text): # Everything case-insensitive (?i) # Keep in mind that MediaWiki automatically converts <br> to <br /> exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''", exceptions) text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', r"'''\1'''", exceptions) text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''", exceptions) text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''", exceptions) # horizontal line without attributes in a single line text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2', exceptions) # horizontal line with attributes; can't be done with wiki syntax # so we only make it XHTML compliant text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />', exceptions) # a header where only spaces are in the same line for level in range(1, 7): equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level) text = pywikibot.replaceExcept( text, r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' % (level, level), r'%s' % equals, exceptions) # TODO: maybe we can make the bot replace <p> tags with \r\n's. return text
def fixStyle(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # convert prettytable to wikitable class if self.site.language in ('de', 'en'): text = pywikibot.replaceExcept(text, r'(class="[^"]*)prettytable([^"]*")', r'\1wikitable\2', exceptions) return text
def suppressionAncienneListe(self, text, nombreCrochets=2): exceptions = [] #old = re.compile(u"(\n\|[0-9]{1,} *= *[\[\{]{2}.*[\]\}]{2}|\n[0-9]{1,} *= *[\[\{]{2}.*[\]\}]{2}\|)") old = re.compile( u"(\n\|[0-9]{1,} *= *[\[\{]{0,2}.*[\]\}]{0,2}|\n[0-9]{1,} *= *[\[\{]{0,2}.*[\]\}]{0,2}\|)" ) new = u"" text = pywikibot.replaceExcept(text, old, new, exceptions) return text
def fixStyle(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] # convert prettytable to wikitable class if self.site.language in ('de', 'en'): text = pywikibot.replaceExcept(text, ur'(class="[^"]*)prettytable([^"]*")', ur'\1wikitable\2', exceptions) return text
def fixTypo(self, text): exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace', 'gallery', 'hyperlink', 'interwiki', 'link'] # change <number> ccm -> <number> cm³ text = pywikibot.replaceExcept(text, r'(\d)\s* ccm', r'\1 ' + u'cm³', exceptions) text = pywikibot.replaceExcept(text, r'(\d)\s*ccm', r'\1 ' + u'cm³', exceptions) # Solve wrong Nº sign with °C or °F # additional exception requested on fr-wiki for this stuff pattern = re.compile(u'«.*?»', re.UNICODE) exceptions.append(pattern) text = pywikibot.replaceExcept(text, r'(\d)\s* ' + u'[º°]([CF])', r'\1 ' + u'°' + r'\2', exceptions) text = pywikibot.replaceExcept(text, r'(\d)\s*' + u'[º°]([CF])', r'\1 ' + u'°' + r'\2', exceptions) text = pywikibot.replaceExcept(text, u'º([CF])', u'°' + r'\1', exceptions) return text
def fixArabicLetters(self, text): exceptions = [ "gallery", "hyperlink", "interwiki", # but changes letters inside wikilinks #'link', "math", "pre", "template", "timeline", "ref", "source", "startspace", "inputbox", ] # valid digits digits = {"ckb": u"٠١٢٣٤٥٦٧٨٩", "fa": u"۰۱۲۳۴۵۶۷۸۹"} new = digits.pop(self.site.lang) # This only works if there are only two items in digits dict old = digits[digits.keys()[0]] # do not change inside file links namespaces = list(self.site.namespace(6, all=True)) pattern = re.compile(u"\[\[(" + "|".join(namespaces) + "):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]", re.UNICODE) exceptions.append(pattern) text = pywikibot.replaceExcept(text, u",", u"،", exceptions) if self.site.lang == "ckb": text = pywikibot.replaceExcept(text, ur"ه([.،_<\]\s])", ur"ە\1", exceptions) text = pywikibot.replaceExcept(text, u"ه", u"ە", exceptions) text = pywikibot.replaceExcept(text, u"ه", u"ھ", exceptions) text = pywikibot.replaceExcept(text, u"ك", u"ک", exceptions) text = pywikibot.replaceExcept(text, ur"[ىي]", u"ی", exceptions) return text # replace persian/arabic digits ## deactivated due to bug #3539407 for i in xrange(0, 10): text = pywikibot.replaceExcept(text, old[i], new[i], exceptions) # do not change digits in class, style and table params pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE) exceptions.append(pattern) # do not change digits inside html-tags pattern = re.compile(u"<[/]*?[^</]+?[/]*?>", re.UNICODE) exceptions.append(pattern) exceptions.append("table") # exclude tables for now # replace digits for i in xrange(0, 10): text = pywikibot.replaceExcept(text, str(i), new[i], exceptions) return text
def cleanUpSectionHeaders(self, text): """ For better readability of section header source code, puts a space between the equal signs and the title. Example: ==Section title== becomes == Section title == NOTE: This space is recommended in the syntax help on the English and German Wikipedia. It might be that it is not wanted on other wikis. If there are any complaints, please file a bug report. """ for level in range(1, 7): equals = '=' * level text = pywikibot.replaceExcept(text, r'\n' + equals + ' *(?P<title>[^=]+?) *' + equals + ' *\r\n', '\n' + equals + ' \g<title> ' + equals + '\r\n', ['comment', 'math', 'nowiki', 'pre']) return text
def replaceDeprecatedTemplates(self, text): exceptions = ['comment', 'math', 'nowiki', 'pre'] if self.site.family.name in deprecatedTemplates and self.site.lang in deprecatedTemplates[self.site.family.name]: for template in deprecatedTemplates[self.site.family.name][self.site.lang]: old = template[0] new = template[1] if new == None: new = '' else: new = '{{'+new+'}}' if not self.site.nocapitalize: old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:] text = pywikibot.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' + old + '(?P<parameters>\|[^}]+|)}}', new, exceptions) return text
def cleanUpSectionHeaders(self, text): """ For better readability of section header source code, puts a space between the equal signs and the title. Example: ==Section title== becomes == Section title == NOTE: This space is recommended in the syntax help on the English and German Wikipedia. It might be that it is not wanted on other wikis. If there are any complaints, please file a bug report. """ return pywikibot.replaceExcept( text, r'(?m)^(={1,7}) *(?P<title>[^=]+?) *\1 *\r?\n', r'\1 \g<title> \1%s' % config.LS, ['comment', 'math', 'nowiki', 'pre'])
def fixTypo(self, text): exceptions = [ "nowiki", "comment", "math", "pre", "source", "startspace", "gallery", "hyperlink", "interwiki", "link", ] # change <number> ccm -> <number> cm³ text = pywikibot.replaceExcept(text, ur"(\d)\s* ccm", ur"\1 cm³", exceptions) text = pywikibot.replaceExcept(text, ur"(\d)\s*ccm", ur"\1 cm³", exceptions) # Solve wrong Nº sign with °C or °F # additional exception requested on fr-wiki for this stuff pattern = re.compile(u"«.*?»", re.UNICODE) exceptions.append(pattern) text = pywikibot.replaceExcept(text, ur"(\d)\s* [º°]([CF])", ur"\1 °\2", exceptions) text = pywikibot.replaceExcept(text, ur"(\d)\s*[º°]([CF])", ur"\1 °\2", exceptions) text = pywikibot.replaceExcept(text, ur"º([CF])", ur"°\1", exceptions) return text
def doReplacements(self, original_text): """ Returns the text which is generated by applying all replacements to the given text. """ new_text = original_text exceptions = [] if "inside-tags" in self.exceptions: exceptions += self.exceptions["inside-tags"] if "inside" in self.exceptions: exceptions += self.exceptions["inside"] for old, new in self.replacements: if self.sleep != None: time.sleep(self.sleep) new_text = pywikibot.replaceExcept(new_text, old, new, exceptions, allowoverlap=self.allowoverlap) return new_text
def translateAndCapitalizeNamespaces(self, text): """ Makes sure that localized namespace names are used. """ # arz uses english stylish codes if self.site.sitename() == 'wikipedia:arz': return text family = self.site.family # wiki links aren't parsed here. exceptions = ['nowiki', 'comment', 'math', 'pre'] for nsNumber in self.site.namespaces(): if nsNumber in (0, 2, 3): # skip main (article) namespace # skip user namespace, maybe gender is used continue # a clone is needed. Won't change the namespace dict namespaces = list(self.site.namespace(nsNumber, all=True)) thisNs = namespaces.pop(0) if nsNumber == 6 and family.name == 'wikipedia': if self.site.lang in ('en', 'fr') and \ family.versionnumber(self.site.lang) >= 14: # do not change "Image" on en-wiki and fr-wiki assert u'Image' in namespaces namespaces.remove(u'Image') if self.site.lang == 'hu': # do not change "Kép" on hu-wiki assert u'Kép' in namespaces namespaces.remove(u'Kép') elif self.site.lang == 'pt': # bug #3346901 should be implemented continue # lowerspaced and underscored namespaces for i in xrange(len(namespaces)): item = namespaces[i].replace(' ', '[ _]') item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:] namespaces[i] = item namespaces.append(thisNs[0].lower() + thisNs[1:]) if thisNs and namespaces: text = pywikibot.replaceExcept( text, r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]' % '|'.join(namespaces), r'[[%s:\g<nameAndLabel>]]' % thisNs, exceptions) return text
def translateAndCapitalizeNamespaces(self, text): """ Makes sure that localized namespace names are used. """ # arz uses english stylish codes if self.site.sitename() == 'wikipedia:arz': return text family = self.site.family # wiki links aren't parsed here. exceptions = ['nowiki', 'comment', 'math', 'pre'] for nsNumber in self.site.namespaces(): if nsNumber in (0, 2, 3): # skip main (article) namespace # skip user namespace, maybe gender is used continue # a clone is needed. Won't change the namespace dict namespaces = list(self.site.namespace(nsNumber, all=True)) thisNs = namespaces.pop(0) if nsNumber == 6 and family.name == 'wikipedia': if self.site.lang in ('en', 'fr') and \ LV(self.site.version()) >= LV('1.14'): # do not change "Image" on en-wiki and fr-wiki assert u'Image' in namespaces namespaces.remove(u'Image') if self.site.lang == 'hu': # do not change "Kép" on hu-wiki assert u'Kép' in namespaces namespaces.remove(u'Kép') elif self.site.lang == 'pt': # bug #3346901 should be implemented continue # lowerspaced and underscored namespaces for i in range(len(namespaces)): item = namespaces[i].replace(' ', '[ _]') item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:] namespaces[i] = item namespaces.append(thisNs[0].lower() + thisNs[1:]) if thisNs and namespaces: text = pywikibot.replaceExcept( text, r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]' % '|'.join(namespaces), r'[[%s:\g<nameAndLabel>]]' % thisNs, exceptions) return text
def putSpacesInLists(self, text): """ For better readability of bullet list and enumeration wiki source code, puts a space between the * or # and the text. NOTE: This space is recommended in the syntax help on the English, German, and French Wikipedia. It might be that it is not wanted on other wikis. If there are any complaints, please file a bug report. """ exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline'] if not (self.redirect or self.template) and \ pywikibot.calledModuleName() != 'capitalize_redirects': text = pywikibot.replaceExcept( text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions) return text
def translateAndCapitalizeNamespaces(self, text): """ Makes sure that localized namespace names are used. """ # arz uses english stylish codes if self.site.sitename() == "wikipedia:arz": return text family = self.site.family # wiki links aren't parsed here. exceptions = ["nowiki", "comment", "math", "pre"] for nsNumber in self.site.namespaces(): if nsNumber in (0, 2, 3): # skip main (article) namespace # skip user namespace, maybe gender is used continue # a clone is needed. Won't change the namespace dict namespaces = list(self.site.namespace(nsNumber, all=True)) thisNs = namespaces.pop(0) if nsNumber == 6 and family.name == "wikipedia": if self.site.lang in ("en", "fr") and family.versionnumber(self.site.lang) >= 14: # do not change "Image" on en-wiki and fr-wiki assert u"Image" in namespaces namespaces.remove(u"Image") if self.site.lang == "hu": # do not change "Kép" on hu-wiki assert u"Kép" in namespaces namespaces.remove(u"Kép") elif self.site.lang == "pt": # bug #3346901 should be implemented continue # lowerspaced and underscored namespaces for i in xrange(len(namespaces)): item = namespaces[i].replace(" ", "[ _]") item = u"[%s%s]" % (item[0], item[0].lower()) + item[1:] namespaces[i] = item namespaces.append(thisNs[0].lower() + thisNs[1:]) if thisNs and namespaces: text = pywikibot.replaceExcept( text, r"\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]" % "|".join(namespaces), r"[[%s:\g<nameAndLabel>]]" % thisNs, exceptions, ) return text
def replaceDeprecatedTemplates(self, text): exceptions = ['comment', 'math', 'nowiki', 'pre'] if self.site.family.name in deprecatedTemplates and \ self.site.lang in deprecatedTemplates[self.site.family.name]: for template in deprecatedTemplates[self.site.family.name][self.site.lang]: old = template[0] new = template[1] if new is None: new = '' else: new = '{{%s}}' % new if not self.site.nocapitalize: old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:] text = pywikibot.replaceExcept( text, r'\{\{([mM][sS][gG]:)?%s(?P<parameters>\|[^}]+|)}}' % old, new, exceptions) return text
def putSpacesInLists(self, text): """ For better readability of bullet list and enumeration wiki source code, puts a space between the * or # and the text. NOTE: This space is recommended in the syntax help on the English, German, and French Wikipedia. It might be that it is not wanted on other wikis. If there are any complaints, please file a bug report. """ if not self.template: exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'template', 'timeline', self.site.redirectRegex()] text = pywikibot.replaceExcept( text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions) return text
def translateMagicWords(self, text): """ Makes sure that localized namespace names are used. """ # not wanted at ru # arz uses english stylish codes if self.site.lang not in ['arz', 'ru']: exceptions = ['nowiki', 'comment', 'math', 'pre'] for magicWord in ['img_thumbnail', 'img_left', 'img_center', 'img_right', 'img_none', 'img_framed', 'img_frameless', 'img_border', 'img_upright', ]: aliases = self.site.getmagicwords(magicWord) if not aliases: continue text = pywikibot.replaceExcept( text, r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) + \ ') *(?P<right>(\|.*?)?\]\])', r'[[\g<left>' + aliases[0] + '\g<right>', exceptions) return text
def doReplacements(self, original_text): """ Returns the text which is generated by applying all replacements to the given text. """ new_text = original_text exceptions = [] if "inside-tags" in self.exceptions: exceptions += self.exceptions['inside-tags'] if "inside" in self.exceptions: exceptions += self.exceptions['inside'] for old, new in self.replacements: if self.sleep is not None: time.sleep(self.sleep) new_text = pywikibot.replaceExcept(new_text, old, new, exceptions, allowoverlap=self.allowoverlap) return new_text
def __iter__(self): try: for entry in self.parser: if self.skipping: if entry.title != self.xmlStart: continue self.skipping = False if not self.isTitleExcepted(entry.title) \ and not self.isTextExcepted(entry.text): new_text = entry.text for old, new in self.replacements: new_text = pywikibot.replaceExcept( new_text, old, new, self.excsInside, self.site) if new_text != entry.text: yield pywikibot.Page(self.site, entry.title) except KeyboardInterrupt: try: if not self.skipping: pywikibot.output( u'To resume, use "-xmlstart:%s" on the command line.' % entry.title) except NameError: pass
def cleanUpLinks(self, text): # helper function which works on one link and either returns it # unmodified, or returns a replacement. def handleOneLink(match): titleWithSection = match.group('titleWithSection') label = match.group('label') trailingChars = match.group('linktrail') newline = match.group('newline') if not self.site.isInterwikiLink(titleWithSection): # The link looks like this: # [[page_title|link_text]]trailing_chars # We only work on namespace 0 because pipes and linktrails work # differently for images and categories. page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site)) try: namespace = page.namespace() except pywikibot.InvalidTitle: return match.group() if namespace == 0: # Replace underlines by spaces, also multiple underlines titleWithSection = re.sub('_+', ' ', titleWithSection) # Remove double spaces titleWithSection = re.sub(' +', ' ', titleWithSection) # Remove unnecessary leading spaces from title, # but remember if we did this because we eventually want # to re-add it outside of the link later. titleLength = len(titleWithSection) titleWithSection = titleWithSection.lstrip() hadLeadingSpaces = (len(titleWithSection) != titleLength) hadTrailingSpaces = False # Remove unnecessary trailing spaces from title, # but remember if we did this because it may affect # the linktrail and because we eventually want to # re-add it outside of the link later. if not trailingChars: titleLength = len(titleWithSection) titleWithSection = titleWithSection.rstrip() hadTrailingSpaces = (len(titleWithSection) != titleLength) # Convert URL-encoded characters to unicode titleWithSection = pywikibot.url2unicode(titleWithSection, site=self.site) if titleWithSection == '': # just skip empty links. return match.group() # Remove unnecessary initial and final spaces from label. # Please note that some editors prefer spaces around pipes. # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway. if label is not None: # Remove unnecessary leading spaces from label, # but remember if we did this because we want # to re-add it outside of the link later. labelLength = len(label) label = label.lstrip() hadLeadingSpaces = (len(label) != labelLength) # Remove unnecessary trailing spaces from label, # but remember if we did this because it affects # the linktrail. if not trailingChars: labelLength = len(label) label = label.rstrip() hadTrailingSpaces = (len(label) != labelLength) else: label = titleWithSection if trailingChars: label += trailingChars if titleWithSection == label or \ titleWithSection[0].lower() + \ titleWithSection[1:] == label: newLink = "[[%s]]" % label # Check if we can create a link with trailing characters # instead of a pipelink elif (len(titleWithSection) <= len(label) and label[:len(titleWithSection)] == titleWithSection and re.sub(trailR, '', label[len(titleWithSection):]) == ''): newLink = "[[%s]]%s" % (label[:len(titleWithSection)], label[len(titleWithSection):]) else: # Try to capitalize the first letter of the title. # Maybe this feature is not useful for languages that # don't capitalize nouns... #if not self.site.nocapitalize: if self.site.sitename() == 'wikipedia:de': titleWithSection = (titleWithSection[0].upper() + titleWithSection[1:]) newLink = "[[%s|%s]]" % (titleWithSection, label) # re-add spaces that were pulled out of the link. # Examples: # text[[ title ]]text -> text [[title]] text # text[[ title | name ]]text -> text [[title|name]] text # text[[ title |name]]text -> text[[title|name]]text # text[[title| name]]text -> text [[title|name]]text if hadLeadingSpaces and not newline: newLink = ' ' + newLink if hadTrailingSpaces: newLink = newLink + ' ' if newline: newLink = newline + newLink return newLink # don't change anything return match.group() trailR = re.compile(self.site.linktrail()) # The regular expression which finds links. Results consist of four groups: # group <newline> depends whether the links starts with a new line. # group <titleWithSection> is the page title and section, that is, # everything before | or ]. It'll include the # to make life easier for us. # group <label> is the alternative link title between | and ]. # group <linktrail> is the link trail after ]] which are part of the word. # note that the definition of 'letter' varies from language to language. linkR = re.compile( r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>' + self.site.linktrail() + ')') text = pywikibot.replaceExcept(text, linkR, handleOneLink, ['comment', 'math', 'nowiki', 'pre', 'startspace']) return text
def majAvecSousPages(self, listePages, titrePageRandomArticles): ########## Variables paramétrables ########## nbMaxPagesParPaquet = 1200 ############################################# exceptions = [] texte = u"" nombrePagesDansChaqueListe = [] paquetsListesPages = [] x = 0 pageAsLinkBoolean = True if titrePageRandomArticles in blackList: pageAsLinkBoolean = False # Création des paquets de listes de pages for page in listePages: x = x + 1 if page != u"{[COUPURE NOUVELLE PAGE]}": texte = texte + u"\n|" + str(x) + u"=" + page.title( asLink=pageAsLinkBoolean) if x == nbMaxPagesParPaquet: paquetsListesPages.append(texte) nombrePagesDansChaqueListe.append(nbMaxPagesParPaquet) texte = u"" x = 0 elif page == u"{[COUPURE NOUVELLE PAGE]}" and x > 1: #pywikibot.output(u"{[COUPURE NOUVELLE PAGE]}") paquetsListesPages.append(texte) nombrePagesDansChaqueListe.append(x) texte = u"" x = 0 if x != 0: nombrePagesDansChaqueListe.append(x) paquetsListesPages.append(texte) pywikibot.output("nombrePagesDansChaqueListe = %s" % nombrePagesDansChaqueListe) num = 0 listeSousPages = [] for texte in paquetsListesPages: nbPages = nombrePagesDansChaqueListe[num] num = num + 1 texteARajouter = texte page = pywikibot.Page(self.site, (u"%s/%i" % (titrePageRandomArticles, num))) listeSousPages.append( u"%s/%i" % (titrePageRandomArticles.replace(u"Modèle:", u""), num)) try: text = page.get() pywikibot.output( u"\n> \03{lightblue}Traitement de %s\03{default} <" % page.title()) text = self.suppressionAncienneListe(text) old = re.compile( u"\{\{#switch:\{\{rand\|(0\||1\||2=)[0-9]*\}\}\|?") new = (u"{{#switch:{{rand|1|%i}}%s" % (nbPages, texteARajouter)) text = pywikibot.replaceExcept(text, old, new, exceptions) if not self.save(text, page, self.summary): pywikibot.output(u'Aucun changement nécessaire') except pywikibot.NoPage: pywikibot.output(u'Page %s inexistante' % page.title()) text = (u"{{#switch:{{rand|1|%i}}%s\n}}" % (nbPages, texteARajouter)) ###### À décommenter pour le bon fonctionnement ###### if not self.creerPage(text, page, self.summary): pywikibot.output(u'La page n\'a pas été crée…') ### # On traite à présent la page même du random pour mettre à jour le nombre de sous-pages existantes page = pywikibot.Page(self.site, titrePageRandomArticles) text = page.get() pywikibot.output(u"\n> \03{lightblue}Traitement de %s\03{default} <" % page.title()) texteARajouter = u"" z = 0 for titreSousPage in listeSousPages: z = z + 1 texteARajouter = texteARajouter + u"\n|%i={{%s}}" % (z, titreSousPage) text = self.suppressionAncienneListe(text) #pywikibot.output(texteARajouter) old = re.compile(u"\{\{#switch:\{\{rand\|(1\||2=)[0-9]*\}\}") new = (u"{{#switch:{{rand|1|%i}}%s" % (len(listeSousPages), texteARajouter)) text = pywikibot.replaceExcept(text, old, new, exceptions) if not self.save(text, page, self.summary): pywikibot.output(u'Aucun changement nécessaire')
def transferImage(self, sourceImagePage): """Gets a wikilink to an image, downloads it and its description, and uploads it to another wikipedia. Returns the filename which was used to upload the image This function is used by imagetransfer.py and by copy_table.py """ sourceSite = sourceImagePage.site() url = sourceImagePage.fileUrl().encode('utf-8') pywikibot.output(u"URL should be: %s" % url) # localize the text that should be printed on the image description page try: description = sourceImagePage.get() # try to translate license templates if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates: for old, new in licenseTemplates[( sourceSite.sitename(), self.targetSite.sitename())].iteritems(): new = '{{%s}}' % new old = re.compile('{{%s}}' % old) description = pywikibot.replaceExcept( description, old, new, ['comment', 'math', 'nowiki', 'pre']) description = pywikibot.translate(self.targetSite, copy_message) \ % (sourceSite, description) description += '\n\n' + sourceImagePage.getFileVersionHistoryTable( ) # add interwiki link if sourceSite.family == self.targetSite.family: description += "\r\n\r\n" + unicode(sourceImagePage) except pywikibot.NoPage: description = '' print "Image does not exist or description page is empty." except pywikibot.IsRedirectPage: description = '' print "Image description page is redirect." else: bot = upload.UploadRobot(url=url, description=description, targetSite=self.targetSite, urlEncoding=sourceSite.encoding(), keepFilename=self.keep_name, verifyDescription=not self.keep_name) # try to upload targetFilename = bot.run() if targetFilename and self.targetSite.family.name == 'commons' and \ self.targetSite.lang == 'commons': # upload to Commons was successful reason = pywikibot.translate(sourceSite, nowCommonsMessage) # try to delete the original image if we have a sysop account if sourceSite.family.name in config.sysopnames and \ sourceSite.lang in config.sysopnames[sourceSite.family.name]: if sourceImagePage.delete(reason): return if sourceSite.lang in nowCommonsTemplate and \ sourceSite.family.name in config.usernames and \ sourceSite.lang in config.usernames[sourceSite.family.name]: # add the nowCommons template. pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title()) sourceImagePage.put( sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, comment=nowCommonsMessage[sourceSite.lang])
def run(self): ############################################# ##### (fr) : Paramètres modifiables ##### (en) : Modifiable parameters ############################################# exceptions = [] titreModeleOld = u'Infobox Stade' ## Obsolète : cf. ƒ2 plus bas # ‹!› Ne pas mettre de parenthèses ! #titreModeleOldRe = u'[iI]nfobox Émission de télévision|[iI]nfobox [Tt]élévision|[Ii]nfobox Télévision nouveau|[Ii]nfobox TV émission' modeleOld = pywikibot.Page(self.site, u'Modèle:%s' % titreModeleOld) modeleNew = u'\\1Infobox Stade' checkNamespace = True checkNumberNamespace = 0 onlySaveIfMajorChange = True ajoutParametresAbsents = False alignementSignesEgal = True useCategories = False listeTitresCategories = [ u"Détection temporaire paramètre float Infobox ville" ] #### Expérimental : ƒ2 #### """ Recherche automatique des redirections afin de remplir la variable titreModeleOldRe avec une expression régulière. ‹!› La variable titreModeleOldRe ne doit pas contenir de parenthèses codantes, auquel cas les expressions régulières utilisant la variable seraient faussées. """ premiereLettreMinuscule = titreModeleOld[0:1].lower() premiereLettreMajuscule = titreModeleOld[0:1].upper() resteTitre = titreModeleOld[1:] titreModeleOldRe = u'[%s%s]%s' % (premiereLettreMinuscule, premiereLettreMajuscule, resteTitre) for page in modeleOld.getReferences(redirectsOnly=True): premiereLettreMinuscule = page.title(asLink=False)[0:1].lower() premiereLettreMajuscule = page.title(asLink=False)[0:1].upper() resteTitre = titreModeleOld[1:] titreModeleOldRe += u'|[%s%s]%s' % ( premiereLettreMinuscule, premiereLettreMajuscule, resteTitre) ############################ ## Obsolète : Remplacé par la détection de l'argument ## '-reprise:' (voir fonction main()). # (fr) : Activer ce paramètre permet de reprendre le # traitement à partir de la page donnée # (en) : Enable this parameter allows restarting the treatment # form the page given #reprise = True #titreReprise = u"La Folle Route" ##### (fr) : Modifications majeures ##### ##### (en) : Major changes ##### ## (fr) : Liste de recherches ## ## (en) : Find list ## listeRechercheElements = { #1 : u'\n? *\| *surnom *= *.*' #1 : u'(\n? *\| *département *= [^\n]*\[ *\[ *[hH]autes[ -][pP]yrénées *\] *\](\n.*)*géoloc-département) *= *(\n|\})', #5 : u'(\n? *\| *département *= [^\n]*\[ *\[ *[hH]auts[ -][dD]e[ -][sS]eine]**\] *\](\n.*)*géoloc-département) *= *(\n|\})', #6 : u'(\n? *\| *département *= [^\n]*\[ *\[ *[sS]eine[ -][sS]aint[ -][dD]enis *\] *\](\n.*)*géoloc-département) *= *(\n|\})', #7 : u'(\n? *\| *département *= [^\n]*\[ *\[ *[Vv]al[ -][Dd]e[ -][mM]arne *\] *\](\n.*)*géoloc-département) *= *(\n|\})' #8 : u'(\n? *\| *département *= [^\n]*\[ *\[ *[sS]eine *\] *\](\n.*)*géoloc-département) *= *(\n|\})', #9 : u'(\n? *\| *département *= [^\n]*\[ *\[ *[sS]eine *\] *\](\n.*)*géoloc-département) *= *(\n|\})' #1 : u'(\n? *\| *)site production( *= *.*)', #2 : u"(\n? *\| *(précédé|suivi) *par *= *)''(.*)''" #3 : u'(\n? *\| *)carte2( *= *.*)', #4 : u'(\n? *\| *taille-logo *= *.*) *(px|cm|mm) *' #1 : re.compile(u'(alt moy *= *[0-9]*) *m *'), #2 : re.compile(u'(\| *)arrondissement( *= *)(.*)') #6 : u'(\n *\|.*) *= *\{ *\{ *[Ff][Oo][Rr][Mm][Aa][Tt][Nn][Uu][Mm] *: *([0-9]*) *\} *\}', #7 : u'(\n? *\| *région) *= *\[ *\[ *Région (.*)', #8 : u'(\n? *\| *région) *= *\[ *\[.*\| *([bB]retagne|[cC]entre).*\] *\]', #9 : u'(\n? *\| *région) *= *\[ *\[.*\| *[Rr]éunion.*\] *\]', #10: u'(.*) *= *[Nn]\.? *[Cc]\.? *', #1 : u'(\n? *\| *(longitude|latitude)) *= *([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*[Ee](st)?', #2 : u'(\n? *\| *(longitude|latitude)) *= *([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*([Oo](uest)?|[Ww](est)?)', #3 : u'(\n? *\| *(longitude|latitude)) *= *([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*[Nn](ord)?', #4 : u'(\n? *\| *(longitude|latitude)) *= *([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*([0-9]*)[°\'‘’" ]*[Ss](ud)?' #6 : u'(\n? *\| *(longitude|latitude) *= .*)//', #1 : u'\n? *\| *(float) *= *.*', ## Paramètres à supprimer #8 : re.compile(u'(\n?) *\| *Région *= *(.*)'), #1 : re.compile(ur'\| *coordonnées *=.*(.*\| *latitude *= *[^\n]+.*\| *longitude *= *[^\n]+\n|.*\| *longitude *= *[^\n]+.*\| *latitude *= *[^\n]+\n)', re.DOTALL), #2 : re.compile(ur'(\| *latitude *= *[^\n]+.*\| *longitude *= *[^\n]+.*|\| *longitude *= *[^\n]+.*\| *latitude *= *[^\n]+.*)\| *coordonnées *=[^\n]*\n?', re.DOTALL), #3 : u'(\n?) *\| *coordonnées *= *\{ *\{ *[Cc]oord *\| *([0-9\.-]+) *\| *([0-9\.]+) *\| *([0-9\.]+) *\| *([NS]) *\| *([0-9\.-]+) *\| *([0-9\.]+) *\| *([0-9\.]+) *\| *([EW])[^.\}]*\} *\}', #4 : u'(\n?) *\| *coordonnées *= *\{ *\{ *[Cc]oord *\| *([0-9\.-]+) *\| *([NS]) *\| *([0-9\.-]+) *\| *([EW])[^.\}]*\} *\}', #5 : u'(\n?) *\| *coordonnées *= *\{ *\{ *[Cc]oord *\| *([0-9\.-]+) *\| *([0-9\.-]+)[^.\}]*\} *\}', #6 : u'(\n?) *\| *coordonnées *= *[^0-9\n]*' #5 : re.compile(ur'\{ *\{ *[Cc]oord[\n]*\} *\}(.*\| *latitude *= *[^\n]+.*\| *longitude *= *[^\n]+.*|.*\| *longitude *= *[^\n]+.*\| *latitude *= *[^\n]+)', re.DOTALL) #13 : u'(\n? *\| *(longitude|latitude)) *= *([0-9\.]*) *[Ee](st)?' #1 : u"((\n? *\| *)image *=.*) ((\$®\$|\|)[0-9]+px *)", #2 : u"(\n? *\| *)image *= *\[ *\[ *([iI]mage|[Ff]ichier|[Ff]ile) *: *([^\|]+) *(((\||\$®\$)[^\|]*)?) *\] *\]" 1: u"→" } ## (fr) : Liste de remplacements ## ## (en) : Replace list ## listeRemplacementsElements = { 1: u" {{info|→|prêt}}" #2 : u"\\1image = \\3\\1légende = \\4" #2 : u"\\1\\3" #1 : u'\\1 = Hautes-Pyrénées\\3', #3 : u'\\1 | latitude = \\2/\\3/\\4/\\5\n | longitude = \\6/\\7/\\8/\\9', #4 : u'\\1 | latitude = \\2/\\3\n | longitude = \\4/\\5', #5 : u'\\1 | latitude = \\2\n | longitude = \\3', #6 : u'\\1 | latitude = \n | longitude = ' #1 : u'|nom de division = [[Départements d\'Haïti|Département]]\n|division =\\3', #2 : u'|nom de division2 = Arrondissement\n|division2 = \\3' #7 : u'\\1 = [[\\2', #8 : u'\\1 = [[Région \\2|\\2]]', #9 : u'\\1 = [[La Réunion|Réunion]]', #10 : u'\\1 = ', #1 : u'\\1 = \\3/\\4/\\5/E', #2 : u'\\1 = \\3/\\4/\\5/W', #3 : u'\\1 = \\3/\\4/\\5/N', #4 : u'\\1 = \\3/\\4/\\5/S' } ## (fr) : Liste d'ajouts ## ## (en) : Adds list ## ##### (fr) : Modifications mineures ##### ##### (en) : Minor changes ##### listeConditionsPositivesAjouts = { #1 : u"" } listeConditionsNegativesAjouts = { #1 : u"\| *carte *= *[EÉée]tats-Unis/[fF]loride" } listeElementsAAjouter = { #1 : u"| carte=États-Unis/Floride" } ## (fr) : Liste de recherches ## ## (en) : Find list ## listeRechercheElementsMineure = { #1 : u'(\| *image *=.*\n) *\| *([^=]*\n)', #2 : u'(\n? *\| *(longueur|largeur) *= *[0-9]*),' } ## (fr) : Liste de remplacements ## ## (en) : Replace list ## listeRemplacementsElementsMineure = { #1 : u'\\1 | légende = \\2', #2 : u'\\1.' } ############################################# #### (fr) : Début du traitement #### (en) : Beginning of the treatment ############################################# if not self.debug: if not useCategories: listePages = [ page for page in modeleOld.getReferences( follow_redirects=False, withTemplateInclusion=True, onlyTemplateInclusion=True, redirectsOnly=False) ] else: listePages = [] for titreCategorie in listeTitresCategories: cat = pywikibot.Category(self.site, titreCategorie) listePages.extend(list(cat.articles())) pywikibot.output(u'Taille de la liste = %i' % len(listePages)) listePagesARetirer = [] if self.reprise: for page in listePages: if page.title() == self.titreReprise: break listePagesARetirer.append(page) if checkNamespace: for page in listePages: if page.namespace( ) != checkNumberNamespace and not page in listePagesARetirer: listePagesARetirer.append(page) for page in listePagesARetirer: listePages.remove(page) elif self.debug: listePages = [ pywikibot.Page(self.site, u'User:Toto Azéro/Bac à sable') ] pywikibot.output(u"Nombre de pages à traiter : %i" % len(listePages)) for page in listePages: if onlySaveIfMajorChange: possibiliteSauvegarder = False else: possibiliteSauvegarder = True text = self.load(page) pywikibot.output( u"\n> \03{lightblue}Traitement de %s\03{default} <" % page.title()) ############ ### À utiliser uniquement dans les cas exceptionnels : ### permet d'autoriser la sauvegarde dans un cas précis global exception_possibiliteSauvegarder exception_possibiliteSauvegarder = False #textOld = text #text = re.sub(u'(\n? *\| *date-sans *=[^\n]*) *\} *\} *(([\n]+.*)*géoloc-département *= *)', u'\\1\\2}}', text) #if text != textOld: # exception_possibiliteSauvegarder = True ############# ################################################################# ############ TRAVAIL SUR L'INFOBOX EXTRAITE DU TEXTE ############ ################################################################# ##### Délimiter le début de l'infobox try: matchDebut = re.search(u'\{ *\{ *(%s)' % titreModeleOldRe, text).group(0) except: pywikibot.output( u"\03{lightred}Absence du modèle %s sur la page %s\03{default}" % (titreModeleOld, page.title())) continue positionMatchDebut = text.index(matchDebut) extraitText = text[positionMatchDebut:] #### Délimiter la fin possible de l'infobox #### (i.e. les premiers u'}}' trouvés) # ‹!› Le résultat n'est pas forcément la fin réelle # de l'infobox : en effet, cet ordre ne tient pas # compte de probables modèles présents dans l'infobox # et s'arrêtera aux premiers u'}}' trouvés, quels # qu'ils soient ! matchFin = re.search(u' *\} *\}', extraitText).group(0) positionMatchFin = extraitText.index(matchFin) extraitText = extraitText[0:(positionMatchFin + len(matchFin))] # NB : dans extraitText[0:(positionMatchFin + len(matchFin))], # on ajoute la longueur des u'}}' pour qu'ils se # trouvent bien dans l'extrait traité # Principe de la boucle : tant que le nombre de u'{' # et celui de u'}' ne sont pas équibilibrés, la variable # extraitText est agrandie jusqu'au u'}}' suivants trouvés # dans le texte. positionMatchFin = text.index(extraitText) resteText = text[positionMatchFin + len(extraitText):] while extraitText.count('{') != extraitText.count('}'): matchFin = re.search( u'([^\{\}]* *\} *\}|(\{ *\{[^\{\}]*\} *\})+[^\{\}]* *\} *\})', resteText).group(0) positionMatchFin = resteText.index(matchFin) extraitTextOld = extraitText extraitText = extraitText + resteText[0:(positionMatchFin + len(matchFin))] resteText = resteText[positionMatchFin + len(matchFin):] ### On travaille sur cet extrait puis on effectuera ### les remplacements sur le texte à la fin extraitTextNew = extraitText ##### Normalisation de l'infobox pour éviter des problèmes dans son traitement ##### ## Enlever les u'|' inutiles et les placer au début du paramètre suivant ## # ex : |cp=66360| \n maire=Gérard Rabat # → |cp=66360\n | maire=Gérard Rabat extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\|[^\[\]\{\}]*=.*)\|\n'), u'\\1\n |', exceptions) extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'\|? *([^\[\]\{\}]*=.*)\| *\n *\|'), u'| \\1\n |', exceptions) #### ### Ce qui suit est spécifique ### à l'infobox {{Infobox Commune de France}} ## Déplacement d'une image mise à côté du nom de la commune (le blason) dans le paramètre 'armoiries' ## # ex : |nomcommune = Saint-Didier-en-Velay [[Image:Blason_Saint-Didier-en-Velay_43.svg|80 px]] # → |nomcommune = Saint-Didier-en-Velay […] |armoiries = Blason_Saint-Didier-en-Velay_43.svg extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile( u'(\| *nomcommune *= *[^\|]*\n?)\[ *\[ *([fF]ichier|[Ii]mage|[Ff]ile) *: *([^\|]*)\|? *([Cc]enter|[Cc]entre|[Ll]eft|[Rr]ight)?[^\]]*\] *\]' ), u'\\1|armoiries = \\3', exceptions) ## On fait passer les u'}}' à la ligne s'ils sont en bout de ligne d'un paramètre ## # ex : u'| géoloc-département = | }}' # → u'| géoloc-département = \n}}' # extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\n *\|? *.*=.*) *\|+ *\} *\}$'), u'\\1\n}}', exceptions) # ex : u'| géoloc-département = }}' # → u'| géoloc-département = \n}}' # extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\n *\|? *.*=.*) *\} *\}$'), u'\\1\n}}', exceptions) #### ## On fait passer un éventuel u'|' présent en bout de ligne du début de l'infobox ## # ex : u'{{Infobox Commune de France|\n\|' # → u'{{Infobox Commune de France\n|' extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'\{ *\{ *(%s[^\n]*) *\| *\n+ *\|' % titreModeleOldRe), u'{{\\1\n | ', exceptions) # ex : u'{{Infobox Commune de France|' # → u'{{Infobox Commune de France\n|' extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'\{ *\{ *(%s[^\n]*) *\| *\n' % titreModeleOldRe), u'{{\\1\n | ', exceptions) ## Suppression de plusieurs u'|' successifs ## # ex : {{Infobox Commune de France||nomcommune=Baudrecourt # → {{Infobox Commune de France|nomcommune=Baudrecourt extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\{ *\{ *(%s.*)) *(\| *){2,}' % titreModeleOldRe), u'\\1|', exceptions) #print u'0-5\n' + extraitTextNew ## Faire passer les u'|' présents en fin de ligne en début de ligne suivante ## extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\n?[^\[\]\{\}\n]*=[^=\n]*) *\| *\n'), u'\\1\n | ', exceptions) #extraitTextNew = pywikibot.replaceExcept(extraitTextNew, re.compile(u'(\n?[^=]*=[^=]*) *\| *\n'), u'\\1\n | ', exceptions) #print u'1\n' + extraitTextNew extraitTextNewOld = extraitTextNew ################################################### ###### Modifications pour éviter des erreurs ###### ################################################### ## TODO/À faire (ƒ4) : commenter les différents remplacements re1 = re.compile( u'(\| *[^\[\]\{\}]*= *\[ *\[ *([fF]ichier|[Ff]ile|[Ii]mage) *:[^=\]]*\|[^=\]]*)=' ) re2 = re.compile( u'(\| *[^\[\]\{\}]*= *[^\[\]\{\}]*\[ *\[[^\[\]\{\}]*)=([^\[\]\{\}]*\] *\])' ) re3 = re.compile( u'(\| *[^\[\]\{\}]*= *[^\[\]\{\}]*\{ *\{[^\[\]\{\}]*)=([^\[\]\{\}]*\} *\})' ) re4 = re.compile(u'(\| *[^\[\]\{\}]*=.*\{ *\{.*)\|(.*\} *\})') re5 = re.compile(u'\|([^\[\]\{\}\|\n]*=) *') #re5 = re.compile(u'(\| *[^\[\]\{\}]*=.*\{ *\{[^\}\|]*(\n+[^\}]*)+)\|([^\}]*(\n*[^\}]*)+\} *\})') while re.search(re1, extraitTextNew): extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re1, u'\\1$±$', exceptions) while re.search(re2, extraitTextNew): extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re2, u'\\1$±$\\2', exceptions) while re.search(re3, extraitTextNew): extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re3, u'\\1$±$\\2', exceptions) while re.search(re4, extraitTextNew): extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re4, u'\\1$®$\\2', exceptions) while re.search(re5, extraitTextNew): extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re5, u'$¡$\\1 ', exceptions) extraitTextNew = extraitTextNew.replace(u'|', u'$®$') extraitTextNew = extraitTextNew.replace(u'$¡$', u'|') ################################################### #print u'2\n' + extraitTextNew #pywikibot.showDiff(extraitTextNewOld, extraitTextNew) ## Séparer tous les paramètres présents sur une même ligne ## # ex : u'|nomcommune = Saint-Félix-de-Sorgues|région = [[Midi-Pyrénées]]' # → u'|nomcommune = Saint-Félix-de-Sorgues # |région = [[Midi-Pyrénées]]' verificationStop = re.compile( u'(\| *[^\[\]\{\}]*=[^\n]*)(\| *[^\[\]\{\}]*=[^\n]*)') #print extraitTextNew while re.search(verificationStop, extraitTextNew): extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile( u'(\| *[^\[\]\{\}]*=[^\n]*)(\| *[^\[\]\{\}]*=[^\n]*)'), u'\\1\n\\2', exceptions) #print u'3\n' + extraitTextNew verificationStop = re.compile( u'(\| *[^\[\{]+=.*\[ *\[(.*\|)+.*\] *\][^\[\n]*)(\| *[^\[\{]+=.*)' ) while re.search(verificationStop, extraitTextNew): ##PROBLEME ICI (RESOlU ?) extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile( u'(\| *[^\[\{]+=.*\[ *\[(.*\|)+.*\] *\][^\[\n]*)(\| *[^\[\{]+=.*)' ), u'\\1\n\\2', exceptions) #print u'4\n' + extraitTextNew #print extraitTextNew ## Fait passer les parmètre en bout d'annonce du modèle à la ligne # ex : {{Infobox Commune de France|nomcommune=Baudrecourt # → {{Infobox Commune de France # | nomcommune=Baudrecourt extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'\{ *\{ *(%s) *\| *(.*)\n' % titreModeleOldRe), u'{{\\1\n | \\2\n', exceptions) #print extraitTextNew ## Suppression d'un '|' inutile (ex : '| }}') # ex : | }} # → }} extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'\n *\|+ *\} *\}'), u'\n}}', exceptions) extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'\n *(\|?.*=.*) *\| *\} *\}'), u'\n\\1}}', exceptions) #print u'5\n' + extraitTextNew extraitTextNewOld = extraitTextNew ## Modifications majeures ## for x in listeRechercheElements: elementAChercher = listeRechercheElements[x] print 'x = %i' % x print elementAChercher print re.search(elementAChercher, extraitTextNew) print listeRemplacementsElements[x] print re.sub(elementAChercher, listeRemplacementsElements[x], extraitTextNew) extraitTextNew = pywikibot.replaceExcept( extraitTextNew, elementAChercher, listeRemplacementsElements[x], exceptions) #extraitTextNew = re.sub(elementAChercher, listeRemplacementsElements[x], extraitTextNew) #print extraitTextNew #print extraitTextNew ## Ajouts majeurs ## for x in listeElementsAAjouter: elementAAjouter = listeElementsAAjouter[x] conditionNegative = listeConditionsNegativesAjouts[x] conditionPositive = listeConditionsPositivesAjouts[x] #print elementAAjouter if not re.search(conditionNegative, extraitTextNew) and re.search( conditionPositive, extraitTextNew): positionFin = extraitTextNew.rindex('\n}}') #print positionFin #print extraitTextNew[0:positionFin] extraitTextNew = extraitTextNew[ 0: positionFin] + u"\n" + elementAAjouter + extraitTextNew[ positionFin:] #print u'5-5\n' + extraitTextNew ### Enlever les séparateurs des milliers dans certains paramètres donnés listeElements = [] for element in listeElements: m = re.search(u'(%s *= *)([0-9]* [0-9]* *)' % element, extraitTextNew) if m != None: new = m.group(1) + m.group(2).replace(u' ', u'') #print u'1-1 : %s — %s' % (m.group(0), new) extraitTextNew = extraitTextNew.replace(m.group(0), new) #print extraitTextNew ## Vérifier si une modification majeure a eu lieu if (extraitTextNew != extraitTextNewOld and onlySaveIfMajorChange) or exception_possibiliteSauvegarder: possibiliteSauvegarder = True #pywikibot.showDiff(extraitTextNewOld, extraitTextNew) for x in listeRechercheElementsMineure: # Modifications mineures elementAChercher = listeRechercheElementsMineure[x] #print 'x = %i' % x #print elementAChercher extraitTextNew = pywikibot.replaceExcept( extraitTextNew, elementAChercher, listeRemplacementsElementsMineure[x], exceptions) #print extraitTextNew else: continue #listeElements = [u'longitude', u'latitude'] #for element in listeElements: # m = re.search(u'\n? *\| *%s *= *([-—–]?[0-9\.]+)' % element, extraitTextNew) # m2 = re.search(u'\n? *\| *%s *= *[0-9]+\.[0-9]{4,}' % element, extraitTextNew) # if m != None and m2 != None: # extraitTextNew = extraitTextNew.replace(m.group(1), (u'%.4f' % float(m.group(1)))) extraitTextNew = pywikibot.replaceExcept(extraitTextNew, re.compile(u'\n *\| *'), u'\n | ', exceptions) extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\| *[a-zé²\- _]{2,17}) *= *'), u'\\1 = ', exceptions) extraitTextNew = pywikibot.replaceExcept( extraitTextNew, re.compile(u'(\|.*=.*)\| *$'), u'\\1', exceptions) ### Fin modification pour éviter des erreurs (1/2) extraitTextNew = extraitTextNew.replace(u'$±$', u'=') ##### Ajouts de tous les paramètres absents ##### listeParametresActuelsAvecValeurs = extraitTextNew.split( u'\n | ')[1:] listeParametres = [ u'nomcommune', u'image', u'image-desc', u'armoiries', u'armoiries-desc', u'armoiries-taille', u'logo', u'logo-desc', u'logo-taille', #u'collectivité', u'région', u'canton', u'arrondissement', u'insee', u'cp', u'maire', u'mandat', u'intercomm', u'latitude', u'longitude', u'alt mini', u'alt maxi', u'km²', u'sans', u'date-sans', u'aire-urbaine', u'date-aire-urbaine', u'nomhab', u'siteweb', u'géoloc-département' ] if ajoutParametresAbsents: for parametre in listeParametres: if not parametre in extraitTextNew: parametrePrecedent = listeParametres[ listeParametres.index(parametre) - 1] old = re.compile(u'(\n? *\| *%s *= *.*)' % parametrePrecedent) new = u'\\1\n | %s = ' % parametre extraitTextNew = pywikibot.replaceExcept( extraitTextNew, old, new, exceptions) #print '6\n' + extraitTextNew ##### Alignement des signes u'=' ##### listeParametresAvecValeurs = extraitTextNew.split(u'\n | ')[1:] tailleMaxParametre = 0 for parametreAvecValeur in listeParametresAvecValeurs: #print parametreAvecValeur match = re.search(u' *=', parametreAvecValeur).group(0) positionSigneEgal = parametreAvecValeur.index(match) partieParametre = parametreAvecValeur[0:positionSigneEgal] #print 'partieParametre = %s ; taille = %s' % (partieParametre, len(partieParametre)) if len(partieParametre) > tailleMaxParametre: tailleMaxParametre = len(partieParametre) tailleMaxParametre = tailleMaxParametre + 1 # Permet de laisser un espace avant le plus long paramètre… #print '\ntailleMaxParametre = %i' % tailleMaxParametre if not alignementSignesEgal: listeParametresAvecValeurs = [] #print listeParametresAvecValeurs for parametreAvecValeur in listeParametresAvecValeurs: #print parametreAvecValeur positionSigneEgal = parametreAvecValeur.index(u'=') partieParametre = parametreAvecValeur[0:positionSigneEgal] partieParametreNew = partieParametre while len(partieParametreNew) < tailleMaxParametre: partieParametreNew = partieParametreNew + u' ' while len(partieParametreNew) > tailleMaxParametre: partieParametreNew = partieParametreNew[0:-1] #print str(len(partieParametreNew)) + partieParametreNew #print 'partieParametre = ' + partieParametre parametreAvecValeurNew = pywikibot.replaceExcept( parametreAvecValeur, u'^%s' % partieParametre, partieParametreNew, exceptions) #parametreAvecValeurNew = parametreAvecValeur.replace(u' | ' + partieParametre, u' | ' + partieParametreNew) #print 'partieParametreNew = ' + partieParametreNew extraitTextNew = extraitTextNew.replace( u'\n | ' + parametreAvecValeur, u'\n | ' + parametreAvecValeurNew) ### Fin modification pour éviter des erreurs (2/2) extraitTextNew = extraitTextNew.replace(u'$®$', u'|') #print extraitTextNew ###### Mettre à jour le texte grâce à l'extrait modifié et le publier ###### #print extraitTextNew text = text.replace(extraitText, extraitTextNew) resume = self.summary #if re.search(u'longitude *= *[^\n]+.*', text) and re.search(u'latitude *= *[^\n]+.*', text): # resume = u'[[WP:RBOT]] : Suppression du paramètre \'coordonnées\' dans le [[modèle:Infobox Pont]] au profit des paramètres \'latitude\' et \'longitude\'' #if re.search(u'\{ *\{ *[Cc]oord *\|.*\} *\}', text) and re.search(u'\| *latitude *= *.+', text) and re.search(u'\| *longitude *= *.+', text): # text = pywikibot.replaceExcept(text, re.compile(u'\{ *\{ *[Cc]oord *\|.*\} *\}\n?'), u'', exceptions) # resume = resume + u' ; suppression du modèle {{coord}} faisant doublon avec ces paramètres' #print extraitTextNew print u"‹!› Modifications ‹!›" pywikibot.showDiff(page.get(), text) #if checkNamespace and page.namespace() == checkNumberNamespace: #print possibiliteSauvegarder if possibiliteSauvegarder: if not self.save(text, page, resume): pywikibot.output(u'La page %s n\'a pas été sauvegardée' % page.title(asLink=True))
def validXhtml(self, text): text = pywikibot.replaceExcept(text, r'(?i)<br[ /]*>', r'<br />', ['comment', 'math', 'nowiki', 'pre']) return text