def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: param1 = getparam(t, "1") param2 = getparam(t, "2") if not param2.startswith("8b"): continue param3 = getparam(t, "3") param4 = getparam(t, "4") param5 = getparam(t, "5") assert not getparam(t, "6") if getparam(t, "past_m"): errmsg("WARNING: Has past_m=%s" % getparam(t, "past_m")) pap = getparam(t, "pap") or getparam(t, "past_adv_part") if pap: errmsg("WARNING: Has pap=%s" % pap) pap2 = getparam(t, "pap2") or getparam(t, "past_adv_part2") if pap2: errmsg("WARNING: Has pap2=%s" % pap2) param4 = rulib.make_unstressed_ru(param4) # Fetch non-numbered params. non_numbered_params = [] for param in t.params: pname = unicode(param.name) if not re.search(r"^[0-9]+$", pname) and pname not in ["lang", "nocat", "tr"]: non_numbered_params.append((pname, param.value)) # Erase all params. del t.params[:] # Put back numbered params. t.add("1", param1) t.add("2", param2) t.add("3", param3) t.add("4", param4) if param5: t.add("5", param5) # Put back non-numbered params. for name, value in non_numbered_params: t.add(name, value) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) notes.append("rewrite class 8b verb to correspond to module changes") return unicode(parsed), notes
def errandpagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) errmsg("Page %s %s: %s" % (index, pagetitle, txt))
def yield_lemma_non_lemma_page_titles(): for cat in yield_cats("lemma,non-lemma"): msg("Retrieving pages from %s ..." % cat) errmsg("Retrieving pages from %s ..." % cat) for index, page in blib.cat_articles(cat, None, None): yield page.title()
def yield_lemma_non_lemma_page_titles(): for cat in yield_cats("lemma,non-lemma"): msg("Retrieving pages from %s ..." % cat) errmsg("Retrieving pages from %s ..." % cat) for index, page in blib.cat_articles(cat, None, None): yield page.title() if params.ignore_lemma_non_lemma: pages_to_ignore = set(yield_lemma_non_lemma_page_titles()) else: pages_to_ignore = set() for category in yield_cats(): msg("Processing category %s ..." % category) errmsg("Processing category %s ..." % category) for index, page in blib.cat_articles(category, startFrom, upTo): if page.title() not in pages_to_ignore: blib.do_edit(page, index, remove_translit_one_page, save=params.save, verbose=params.verbose) pa = blib.init_argparser("Remove translit, sc= from hy, xcl, ka, el, grc templates") pa.add_argument("--langs", default="all", help="Languages to do, a comma-separated list or 'all'") pa.add_argument("--cattype", default="all", help="""Categories to examine ('all' or comma-separated list of 'translit', 'lemma', 'non-lemma'; default 'all')""") pa.add_argument("--ignore-lemma-non-lemma", action="store_true", help="""Ignore lemma and non-lemma pages (useful with '--cattype translit').""") pa.add_argument("--do-head", action="store_true", help="""Remove tr= in {{head|..}}""")
def error(text): errmsg("ERROR: Processing line: %s" % line) errmsg("ERROR: %s" % text) assert False
def fatal(line, text): errmsg("ERROR: Processing line %s: %s" % (peeker.lineno, line)) errmsg("ERROR: %s" % text) raise ValueError
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import re, sys, codecs, argparse from blib import msg, errmsg import rulib parser = argparse.ArgumentParser( description="Find lemmas which would have forms saved.") parser.add_argument('--direcfile', help="File containing directives.") args = parser.parse_args() lemmas = set() for line in codecs.open(args.direcfile, "r", "utf-8"): line = line.strip() if "Would save with comment" in line: m = re.search( "Would save with comment.* (?:of|dictionary form) (.*?)(,| after| before| \(add| \(modify| \(update|$)", line) if not m: errmsg("WARNING: Unable to parse line: %s" % line) else: lemmas.add(rulib.remove_accents(m.group(1))) for lemma in sorted(lemmas): print lemma.encode('utf-8')
def err(text): errmsg("Line %s: %s" % (lineno, text))
def errandpagemsg_with_contents(txt): pagemsg_with_contents(txt) errmsg("Page %s %s: %s: %s" % (index, pagetitle, contents_title, txt))
def errandpagemsg_with_spelling(txt): pagemsg_with_spelling(txt) errmsg("Page %s %s: %s: %s" % (index, pagetitle, spelling, txt))
def yield_lemma_non_lemma_page_titles(): for cat in yield_cats("lemma,non-lemma"): msg("Retrieving pages from %s ..." % cat) errmsg("Retrieving pages from %s ..." % cat) for index, page in blib.cat_articles(cat, None, None): yield page.title() if params.ignore_lemma_non_lemma: pages_to_ignore = set(yield_lemma_non_lemma_page_titles()) else: pages_to_ignore = set() for category in yield_cats(): msg("Processing category %s ..." % category) errmsg("Processing category %s ..." % category) for index, page in blib.cat_articles(category, startFrom, upTo): if page.title() not in pages_to_ignore: blib.do_edit(page, index, remove_translit_one_page, save=params.save, verbose=params.verbose) pa = blib.init_argparser( "Remove translit, sc= from hy, xcl, ka, el, grc templates") pa.add_argument("--langs", default="all", help="Languages to do, a comma-separated list or 'all'") pa.add_argument("--cattype",
newval = re.sub("^#\* #\* ", "#* ", subsections[j], 0, re.M) if newval != subsections[j]: notes.append("remove double #* prefix") pagemsg("Removed double #* prefix") subsections[j] = newval newtext = "".join(subsections) if text != newtext: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, newtext)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) if __name__ == "__main__": parser = blib.create_argparser("Fix old cite/quote/reference templates") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in replace_templates: msg("Processing references to Template:%s" % template) errmsg("Processing references to Template:%s" % template) for i, page in blib.references("Template:%s" % template, start, end, includelinks=True): process_page(i, page, args.save, args.verbose)
changed = origt != unicode(t) if changed: notes.append("quote-poem -> quote-book with fixed params") if changed: pagemsg("Replacing %s with %s" % (origt, unicode(t))) return parsed, notes parser = blib.create_argparser( "quote-poem -> quote-book with changed params; quote-magazine/quote-news -> quote-journal; quote-Don Quixote -> RQ:Don Quixote" ) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in [ "quote-poem", "quote-magazine", "quote-news", "quote-Don Quixote" ]: msg("Processing references to Template:%s" % template) errmsg("Processing references to Template:%s" % template) for i, page in blib.references("Template:%s" % template, start, end, includelinks=True): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
def errpagemsg(txt): errmsg("Page %s %s: %s" % (index, pagetitle, txt))
ref_namespaces = args.ref_namespaces and args.ref_namespaces.decode( "utf-8") or None lines = [x.strip() for x in codecs.open(args.tempfile, "r", "utf-8")] msg('{|class="wikitable"') msg("! Aliased template !! Canonical template !! #Uses%s%s" % (" !! Refs" if args.include_refs else "", " !! Suggested disposition" if args.include_disposition else "")) for ref_and_aliases in lines: split_refs = re.split(",", ref_and_aliases) mainref = "Template:%s" % split_refs[0] aliases = split_refs[1:] refs = [(mainref, None)] for alias in aliases: refs.append(("Template:%s" % alias, mainref)) for alias, mainref in refs: errmsg("Processing references to: %s" % alias) template_refs = list( blib.references(alias, start, end, namespaces=ref_namespaces)) num_refs = len(template_refs) msg("|-") msg("| %s || %s || %s%s%s" % ("[[%s]]" % alias if mainref else "'''[[%s]]'''" % alias, "[[%s]]" % mainref if mainref else "'''[[%s]]'''" % alias, num_refs, " || %s" % ", ".join("[[%s]]" % unicode(ref.title()) for i, ref in template_refs) if args.include_refs else "", " || ?" if args.include_disposition else "")) msg("|}")