def clean(self): self.text = lre.sub(self.pat, '', self.text).replace('\r', '') # first order self.text = lre.sub( r'(?is)\{\{(|' + self.begin + ur')?(?:{}):'.format('|'.join(self.siteDest.namespaces()[10])), r'{{\1', self.text) pat = r'(?is)\{\{(|' + self.begin + ')?((?:' + msg[ 'wikitranslator-exempt-tag'] + r').*?\}\})' self.text = lre.sub( pat, r'<!-- {{\1\3 ' + msg['wikitranslator-exempt-notice'] + ' -->', self.text) # use \3 because self.begin has a hidden parenthesis. self.text = lre.sub(r'(?is)\[\[(|' + self.begin + ur')?Category:', ur'[[\1หมวดหมู่:', self.text) self.text = lre.sub( r'(?is)\[\[(|' + self.begin + ur')?(?:Image|File):', ur'[[\1ไฟล์:', self.text) self.text = lre.sub(r'(?mi)^== *See also *== *$', u'== ดูเพิ่ม ==', self.text) self.text = lre.sub(r'(?mi)^== *External links *== *$', u'== แหล่งข้อมูลอื่น ==', self.text) self.text = lre.sub(r'(?mi)^== *References *== *$', u'== อ้างอิง ==', self.text) return self.text
def clean(self): self.text = lre.sub(self.pat, '', self.text).replace('\r', '') # first order self.text = lre.sub(r'(?is)\{\{(|' + self.begin + ur')?(?:{}):'.format('|'.join(self.siteDest.namespaces()[10])), r'{{\1', self.text) pat = r'(?is)\{\{(|' + self.begin + ')?((?:' + msg['wikitranslator-exempt-tag'] + r').*?\}\})' self.text = lre.sub(pat, r'<!-- {{\1\3 ' + msg['wikitranslator-exempt-notice'] + ' -->', self.text) # use \3 because self.begin has a hidden parenthesis. self.text = lre.sub(r'(?is)\[\[(|' + self.begin + ur')?Category:', ur'[[\1หมวดหมู่:', self.text) self.text = lre.sub(r'(?is)\[\[(|' + self.begin + ur')?(?:Image|File):', ur'[[\1ไฟล์:', self.text) self.text = lre.sub(r'(?mi)^== *See also *== *$', u'== ดูเพิ่ม ==', self.text) self.text = lre.sub(r'(?mi)^== *External links *== *$', u'== แหล่งข้อมูลอื่น ==', self.text) self.text = lre.sub(r'(?mi)^== *References *== *$', u'== อ้างอิง ==', self.text) return self.text
def main(): if len(args) != 2: return pagemain = wp.Page(wp.toutf(args[0])) newlink = wp.toutf(args[1]) pywikibot.output("old: " + pagemain.title()) pywikibot.output("new: " + newlink) for page in pagemain.backlinks(content=True): pywikibot.output("processing " + page.title()) txt = page.get() page.put(lre.sub(r"\[\[" + lre.escape(pagemain.title()), "[[" + newlink.replace("_", " "), txt), u"ย้ายลิงก์ไปหน้าใหม่")
def dorender(self): if self.tabactive == 'page': self.content = self.page.get() self.pat_before = '~~~#m!' self.pat_after = self.pat_before[::-1] self.pat = self.pat_before + r'\d+' + self.pat_after self.begin_assert = '((?:(?!~~~).)*?)' self.begin = '~~~#h!' self.begin = self.begin + self.begin_assert + self.begin[::-1] self.end = '~~~#e!' self.leadlink = lre.lre(r'^[\[\{]+') self.traillink = lre.lre(r'[#{].*$') self.cnt = 0 self.text = [] ptr = 0 while ptr < len(self.content): if self.content[ptr:ptr + 2] == '{{' or self.content[ptr:ptr + 2] == '[[': self.text.append(self.content[ptr]) self.text.append(self.pat_before) self.text.append(str(self.cnt)) self.cnt += 1 self.text.append(self.pat_after) self.text.append(self.content[ptr + 1]) ptr += 2 else: self.text.append(self.content[ptr]) ptr += 1 self.text = ''.join(self.text) self.content = self.text self.rmtag('pre') self.rmtag('nowiki') self.rmtag('source') self.content = lre.sub('(?s)<!--.*?-->', '', self.content) matches = list( lre.finditer('(?s)(' + self.pat + r')(.*?)(?=[|}\]\n])', self.content)) links = [] for match in matches: links.append(match.group(2)) translatedLinks = self.translate(links) for i, match in enumerate(matches): self.text = self.text.replace(match.group(), translatedLinks[i], 1) self.finalize()
def dorender(self): if self.tabactive == 'page': self.content = self.page.get() self.pat_before = '~~~#m!' self.pat_after = self.pat_before[::-1] self.pat = self.pat_before + r'\d+' + self.pat_after self.begin_assert = '((?:(?!~~~).)*?)' self.begin = '~~~#h!' self.begin = self.begin + self.begin_assert + self.begin[::-1] self.end = '~~~#e!' self.leadlink = lre.lre(r'^[\[\{]+') self.traillink = lre.lre(r'[#{].*$') self.cnt = 0 self.text = [] ptr = 0 while ptr < len(self.content): if self.content[ptr:ptr+2] == '{{' or self.content[ptr:ptr+2] == '[[': self.text.append(self.content[ptr]) self.text.append(self.pat_before) self.text.append(str(self.cnt)) self.cnt += 1 self.text.append(self.pat_after) self.text.append(self.content[ptr + 1]) ptr += 2 else: self.text.append(self.content[ptr]) ptr += 1 self.text = ''.join(self.text) self.content = self.text self.rmtag('pre') self.rmtag('nowiki') self.rmtag('source') self.content = lre.sub('(?s)<!--.*?-->', '', self.content) matches = list(lre.finditer('(?s)(' + self.pat + r')(.*?)(?=[|}\]\n])', self.content)) links = [] for match in matches: links.append(match.group(2)) translatedLinks = self.translate(links) for i, match in enumerate(matches): self.text = self.text.replace(match.group(), translatedLinks[i], 1) self.finalize()
def fixRepetedVowelTitle(page): """ If found impossible vowel arrangement in title, correct by moving that page. """ opagetitle = page.title() pagetitle = opagetitle for i in checkVowel: pagetitle = lre.sub(i + u"+", i, pagetitle) if pagetitle != opagetitle: pywikibot.output("ย้ายบทความชื่อมีสระซ้อน") reason = u"โรบอต: เปลี่ยนชื่อบทความมีสระซ้อน" try: page.move(pagetitle, reason=reason) except: wp.error() else: page = pywikibot.Page(pywikibot.getSite(), page.title()) page.delete(reason=reason, prompt=False, mark=True)
def finalize(self): self.text = cgi.escape(self.clean()) self.text = lre.sub( r'(?is)' + self.begin, "<a href='" + '//' + self.siteSource.code + '.wikipedia.org/wiki/' + r"\1 ' title='\1'>", self.text) self.text = self.text.replace(self.end, '</a>')
def rmtag(self, tag): self.content = lre.sub("(?s)<{tag}>.*?</{tag}>".format(tag=tag), "", self.content)
def process(page, config, sources): if config.get('disable', False): return source = wp.Page(config["source"]) today = site.getcurrenttime() originalText = page.get() if page.exists() else None if ("stable" in config and (today - pywikibot.Timestamp.fromISOformat( source.getVersionHistory(total=1)[0][1])).days < int(config["stable"])): return page.u_elist = [] deprecated = [] checkcat = [] if sources: source = sources[source] text = source.text if page.namespace() == 828: text = lre.sub('(?<!:)[Tt]emplate:', u'แม่แบบ:', text) for item in config["findText"]: if len(item) == 3: num, find, replace = item regex = False elif len(item) == 4: num, find, replace, regex = item else: page.u_elist.append(u"คำเตือน: ข้อความค้นหาและแทนที่อันดับที่ {} มีจำนวนพารามิเตอร์ไม่ถูกต้อง".format(num)) continue if regex: newtext = lre.sub(find, replace, text) else: newtext = text.replace(find, replace) if newtext == text and find != replace: page.u_elist.append(u"คำเตือน: ไม่เกิดการแทนที่ข้อความที่ {}".format(num)) text = newtext def matchbrace(s, i): lv = 0 for i in xrange(i, len(s)): if s[i] == "{": lv += 1 elif s[i] == "}": lv -= 1 if lv == 0: return i # not return i + 1 to avoid index out of range for item in config["addParam"]: if len(item) == 3: num, param, translate = item else: page.u_elist.append(u"คำเตือน: ข้อความแปลที่ {} มีจำนวนพารามิเตอร์ไม่ถูกต้อง".format(num)) continue lst = [] for i in lre.finditer(r"\{\{\{\s*" + param + "\s*[\|\}]", text): begin, end = i.span() end = matchbrace(text, begin) lst.append((begin, "begin")) lst.append((end, "end")) lst = sorted(lst) lst.append((sys.maxint, sys.maxint)) ilst = 0 out = [] for i in xrange(len(text)): if i == lst[ilst][0]: if lst[ilst][1] == "begin": out.append("{{{" + translate + "|") else: out.append("}}}") # we should put text[i] before "}}}", # but since text[i] == "}", there is no problem :) ilst += 1 out.append(text[i]) newtext = "".join(out) if newtext == text: page.u_elist.append(u"คำเตือน: ไม่เกิดการแปลพารามิเตอร์ที่ {}".format(num)) text = newtext """ for item in config["obsolete"]: if len(item) == 3: num, oldParam, newParam = item showError = False elif len(item) == 4: num, oldParam, newParam, showError = item else: page.u_elist.append(u"คำเตือน: การตรวจสอบพารามิเตอร์ล้าสมัยที่ {} มีจำนวนพารามิเตอร์ไม่ถูกต้อง".format(num)) continue category = wp.Category("Category:" + page.title().replace(":", "") + u" ที่ใช้พารามิเตอร์ " + oldParam) checkcat.append(category) deprecated.append(u'<includeonly>{{{{#if:{{{{{{{}|}}}}}}|[[{}]]' .format(oldParam, category.title()) + ((u'<span class="error">พารามิเตอร์ {} ' u'ล้าสมัยแล้ว โปรดใช้ {} แทนที่</span><br />') .format(oldParam, newParam) if showError else u'') + u'}}</includeonly>') text = "".join(deprecated) + text """ #======= if page.userName() not in ['^Nullzerobot', 'Nullzerobot', 'Nullzero']: page.u_elist.append(u"คำเตือน: ผู้แก้ไขหน้านี้ครั้งสุดท้ายคือ " + page.userName()) if (not page.u_elist) and (text == originalText): pywikibot.output((u"ไม่มีการเปลี่ยนแปลงในหน้า {}; " u"ยกเลิกการปรับปรุงและแจ้งเตือน").format(source.title())) return if page.namespace() == 828 and 'wrappers' in text: page.u_elist.append(u"คำเตือน: มอดูลนี้มี wrapper") if debug: pywikibot.showDiff(originalText or "", text) return if config.get("sandbox", False): page = wp.Page(page.title() + "/sandbox") page.u_text = text page.put(text, u"ปรับปรุงหน้าอัตโนมัติโดยบอต", async=True, callback=callback)
def finalize(self): self.text = cgi.escape(self.clean()) self.text = lre.sub(r'(?is)' + self.begin, "<a href='" + '//' + self.siteSource.code + '.wikipedia.org/wiki/' + r"\1 ' title='\1'>", self.text) self.text = self.text.replace(self.end, '</a>')
def fixRepetedVowel(content): """If found impossible vowel arrangement in text, correct it.""" for i in checkVowel: content = lre.sub(i + u"+", i, content) return content
def main(): if "-html" not in args: return page = wp.Page(wp.toutf(raw_input())) dic = {} while True: if not page.exists(): dic["error"] = u"ไม่มีหน้าดังกล่าว" break elif page.isRedirectPage(): page = page.getRedirectTarget() else: try: oldtext = page.get() except: dic["error"] = u"เกิดข้อผิดพลาดไม่ทราบสาเหตุ" break if "error" not in dic: actuallen = lambda text: sum([int(32 < ord(i) < anchorBegin) for i in text]) resgen = lambda x: "passed" if x else "failed" oldtext = lre.sub(r"[\t\r\f\v]", " ", oldtext) def placemarker(s): placemarker.i += 1 return unichr((placemarker.i % (anchorEnd - anchorBegin + 1)) + anchorBegin) placemarker.i = 0 oldtext = lre.sub("(?m)^|$", placemarker, oldtext) oldtext = lre.sub(r"(?m)(?<=\|)(?=[^\[\]]*\]\])", placemarker, oldtext) oldtext = lre.sub(r"(?m)(?<=\])(?!\])", placemarker, oldtext) oldtext = lre.sub(r"(?m)(?<!\{)(?=\{)", placemarker, oldtext) oldtext = lre.sub(r"(?m)(?<=\})(?!\})", placemarker, oldtext) text = oldtext text, numinline0 = lre.subn(r"(?s)<ref[^>]*?/ *>", "", text) text, numinline = lre.subn(r"(?s)<ref.*?</ref>", "", text) numinline += numinline0 dic["inline"] = {} dic["inline"]["value"] = (u"มีอ้างอิงในบรรทัดทั้งหมดจำนวน %d แห่ง" % numinline) dic["inline"]["result"] = "normal" dic["inline"]["text"] = u"อ้างอิง" text = rem(text) lentext = actuallen(text) dic["newtext"] = showdiff(oldtext, text) dic["len"] = {} dic["len"]["text"] = u"ความยาว" dic["len"]["result"] = resgen(lentext >= 2000) dic["len"]["value"] = u"%d อักขระ..." % lentext now = site.getcurrenttime() revid = None revtimestamp = None for rev in page.getVersionHistory(total=5000): ts = pywikibot.Timestamp.fromISOformat(rev[1]) revid = rev[0] if (now - ts).days <= 14: revtimestamp = ts else: break dic["oldlen"] = {} dic["oldlen"]["text"] = u"รุ่นเก่า" if revtimestamp is None: dic["oldlen"]["result"] = resgen(False) dic["oldlen"]["value"] = u"ไม่พบรุ่นเก่าภายในเวลา 14 วัน" else: lenold = actuallen(rem(page.getOldVersion(revid))) dic["oldlen"]["result"] = resgen( (float(lentext)/float(lenold)) >= 3.0) dic["oldlen"]["value"] = (u"รุ่นเก่าก่อนการแก้ไขเมื่อ %s " u"(%d วันที่แล้ว) " u"มีความยาว %d อักขระ " u"จะได้ว่าขณะนี้มีเนื้อหาเป็น %.3f " u"เท่าเมื่อเทียบกับขณะนั้น..." % (revtimestamp.strftime("%Y-%m-%d %H:%M:%S"), (now - revtimestamp).days, lenold, float(lentext)/float(lenold))) creator = page.getVersionHistory(reverseOrder=True, total=1)[0] tscreate = pywikibot.Timestamp.fromISOformat(creator[1]) dic["create"] = {} dic["create"]["text"] = u"สร้างบทความ" dic["create"]["result"] = resgen((now - tscreate).days <= 14) dic["create"]["value"] = (u"บทความนี้สร้างโดย %s " u"เมื่อ %s (%d วันที่แล้ว)" % (creator[2], tscreate.strftime("%Y-%m-%d %H:%M:%S"), (now - tscreate).days)) if ((dic["create"]["result"] == resgen(True)) or (dic["oldlen"]["result"] == resgen(True))): if dic["create"]["result"] == resgen(False): dic["create"]["result"] = "normal" if dic["oldlen"]["result"] == resgen(False): dic["oldlen"]["result"] = "normal" print json.dumps(dic)