def process_page(page, index, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj-5c", "ru-conj-6b"]: past_f = getparam(t, "4") if past_f: t.add("past_f", past_f, before="4") rmparam(t, "4") notes.append("Replace 4= with past_f=") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def snarf_adj_accents(): for index, page in blib.cat_articles("Bulgarian adjectives"): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) parsed = blib.parse(page) for t in parsed.filter_templates(): if tname(t) == "bg-adj": adj = getparam(t, "1") if not adj: pagemsg("WARNING: Missing headword in adj: %s" % unicode(t)) continue if bglib.needs_accents(adj): pagemsg("WARNING: Adjective %s missing an accent: %s" % (adj, unicode(t))) continue unaccented_adj = bglib.remove_accents(adj) if unaccented_adj in adjs_to_accents and adjs_to_accents[ unaccented_adj] != adj: pagemsg( "WARNING: Two different accents possible for %s: %s and %s: %s" % (unaccented_adj, adjs_to_accents[unaccented_adj], adj, unicode(t))) adjs_to_accents[unaccented_adj] = adj
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-ux": origt = unicode(t) if t.has("adj"): pagemsg("Removing adj=") notes.append("remove adj= from ru-ux") rmparam(t, "adj") if t.has("shto"): pagemsg("Removing shto=") notes.append("remove shto= from ru-ux") rmparam(t, "shto") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj-4a"]: shch = getparam(t, "4") if shch == u"щ": t.add("3", getparam(t, "3") + shch) rmparam(t, "4") notes.append(u"move param 4 (щ) to param 3") elif shch: pagemsg("WARNING: Strange value %s for param 4" % shch) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in [ "quote-book", "quote-hansard", "quote-journal", "quote-newsgroup", "quote-song", "quote-us-patent", "quote-video", "quote-web", "quote-wikipedia" ] and getparam(t, "lang") == "ru": passage = getparam(t, "passage") m = re.search(r"^\{\{lang\|ru\|(.*)\}\}$", passage) if m: t.add("passage", m.group(1)) notes.append("remove {{lang|ru|...}} from passage= in quote-*") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: verbtype = getparam(t, "2") if verbtype in [ "pf", "pf-intr", "pf-refl", "pf-impers", "pf-intr-impers", "pf-refl-impers", "impf", "impf-intr", "impf-refl", "impf-impers", "impf-intr-impers", "impf-refl-impers" ]: conjtype = getparam(t, "1") t.add("2", conjtype) t.add("1", verbtype) notes.append("move verb type from arg 2 to arg 1") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_inflection_of = False found_head_verb_form = False for t in parsed.filter_templates(): if unicode(t.name) in ["inflection of"]: found_inflection_of = True if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form": found_head_verb_form = True if not found_head_verb_form or not found_inflection_of: # Find definition line foundrussian = False sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M) for j in xrange(2, len(sections), 2): if sections[j-1] == "==Russian==\n": if foundrussian: pagemsg("WARNING: Found multiple Russian sections, skipping page") return foundrussian = True deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M)) if not found_head_verb_form: pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines) if not found_inflection_of: pagemsg("WARNING: No 'inflection of': %s" % deflines)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_audio = False for t in parsed.filter_templates(): if unicode(t.name) == "audio" and getparam(t, "lang") == "ru": found_audio = True break if found_audio: new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text) if new_text != text: comment = "Remove redundant [[:Category:Russian terms with audio links]]" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) text = unicode(page.text) parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) == "ru-IPA": origt = unicode(t) if getparam(t, "phon"): pagemsg("phon= already present: %s" % unicode(t)) else: phon = getparam(t, "1") pagemsg("Adding phon=: %s" % unicode(t)) rmparam(t, "1") t.add("phon", phon) pagemsg("Replaced %s with %s" % (origt, unicode(t))) newtext = unicode(parsed) if newtext != text: if verbose: pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext)) comment = "Add phon= to ru-IPA templates" if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) else: pagemsg("Skipping")
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_page_head = False for t in parsed.filter_templates(): found_this_head = False tname = unicode(t.name) if tname in ru_head_templates: headname = tname found_this_head = True elif tname == "head" and getparam(t, "1") == "ru": headtype = getparam(t, "2") headname = "head|ru|%s" % headtype if headtype in ru_heads_to_warn_about: pagemsg("WARNING: Found %s" % headname) found_this_head = True if found_this_head: cat_head_count[headname] = cat_head_count.get(headname, 0) + 1 overall_head_count[headname] = overall_head_count.get(headname, 0) + 1 found_page_head = True if not found_page_head: pagemsg("WARNING: No head") if index % 100 == 0: output_heads_seen()
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: conjtype = getparam(t, "2") if conjtype.startswith("3a"): if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue t.add("2", conjtype.replace("3a", "3olda")) notes.append("rename conj type 3a -> 3olda") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(page, index): global args pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose) parsed = blib.parse(page) for t in parsed.filter_templates(): tn = tname(t) if tn == "fr-IPA": posval = getparam(t, "pos") pos_arg = "|pos=%s" % posval if posval else "" max_arg = 1 for pronarg in xrange(2, 30): if getparam(t, str(pronarg)): max_arg = pronarg for pronarg in xrange(1, max_arg + 1): pronval = getparam(t, str(pronarg)) or pagetitle pron = expand_text( "{{#invoke:fr-pron|show|%s%s|check_new_module=1}}" % (pronval, pos_arg)) if " || " in pron: pronold, pronnew = pron.split(" || ") pagemsg( "WARNING: {{fr-IPA|%s%s}} == %s in old but %s in new" % (pronval, pos_arg, pronold, pronnew)) else: pagemsg("{{fr-IPA|%s%s}} == %s in both old and new" % (pronval, pos_arg, pron))
def process_page(index, page): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False headword_templates = [] found_invariant_headword_template = False found_decl_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-proper noun"]: found_headword_template = True if getparam(t, "3") == "-": found_invariant_headword_template = True else: headword_templates.append(unicode(t)) if unicode(t.name) in ["ru-noun-table", "ru-decl-noun-see"]: found_decl_template = True if found_headword_template and not found_invariant_headword_template: if found_decl_template: pagemsg("Found old-style headword template(s) %s with decl" % ", ".join(headword_templates)) else: pagemsg("Found old-style headword template(s) %s without decl" % ", ".join(headword_templates))
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam( t, "1").startswith("pf"): if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = blib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val and val != "-": val = re.sub("//.*", "", val) pagemsg( "Found perfective past passive participle: %s" % val)
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if not pagetitle.endswith(u"ся"): return text = unicode(page.text) notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-decl-adj", "ru-adj-old"] and getparam(t, "suffix") == u"ся": lemma = getparam(t, "1") lemma = re.sub(",", u"ся,", lemma) lemma = re.sub("$", u"ся", lemma) t.add("1", lemma) rmparam(t, "suffix") notes.append(u"move suffix=ся to lemma") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"): if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val and val != "-": val = re.sub("//.*", "", val) pagemsg("Found perfective past passive participle: %s" % val)
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if (unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "2") in ["7a", "7b"]): if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue if t.has("past_adv_part_short") and getparam( t, "past_adv_part_short") == "": notes.append("set past_adv_part_short=-") origt = unicode(t) t.add("past_adv_part_short", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) if t.has("past_actv_part") and getparam(t, "past_actv_part") == "": notes.append("set past_actv_part=-") origt = unicode(t) t.add("past_actv_part", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) if new_text != text: return new_text, notes if not notes: pagemsg("WARNING: No changes")
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) param1 = getparam(t, "1") if unicode(t.name) in ["ru-conj"]: if re.search(r"^6[ac]", param1): if getparam(t, "no_iotation"): rmparam(t, "no_iotation") if param1.startswith("6a"): notes.append(u"6a + no_iotation -> 6°a") else: notes.append(u"6c + no_iotation -> 6°c") t.add("1", re.sub("^6", u"6°", param1)) elif re.search(r"^6b", param1): notes.append(u"6b -> 6°b") t.add("1", re.sub("^6", u"6°", param1)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def look_up_tonal_form(pagename, pagemsg, verbose): try: page = pywikibot.Page(site, pagename) except Exception as e: pagemsg("WARNING: Error looking up page %s: %s" % (pagename, unicode(e))) return None try: if not page.exists(): if verbose: pagemsg("look_up_tonal_form: Page %s doesn't exist" % pagename) return None except Exception as e: pagemsg("WARNING: Error checking page existence for %s: %s" % (pagename, unicode(e))) return None tonal_forms = [] for t in blib.parse(page).filter_templates(): if unicode(t.name) == "sl-tonal": if verbose: pagemsg( "look_up_tonal_form: For page %s, found tonal template %s" % (pagename, unicode(t))) if tonal_forms: pagemsg( "WARNING: Found multiple {{sl-tonal}} calls for page %s: new one is %s; can't handle" % (pagename, unicode(t))) return None tonal_forms.append(getparam(t, "1")) for param in ["2", "3", "4", "5", "6"]: if getparam(t, param): tonal_forms.append(getparam(t, param)) return tonal_forms
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-phrase": if t.has("tr"): pagemsg("WARNING: Has tr=: %s" % unicode(t)) if t.has("head"): if t.has("1"): pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t)) else: notes.append("ru-phrase: convert head= to 1=") origt = unicode(t) head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") t.add("1", head) if tr: t.add("tr", tr) pagemsg("Replacing %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(index, page): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-adj"]: found_headword_template = True if not found_headword_template: notes = [] for t in parsed.filter_templates(): if unicode(t.name) in [ "ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+" ]: notes.append("found noun header (%s)" % unicode(t.name)) if unicode(t.name) == "head": notes.append("found head header (%s)" % getparam(t, "2")) pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes)))
def process_page(page, index): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) non_wgem = False wgem = [] for t in parsed.filter_templates(): if tname(t) in ["desc", "desctree"]: if getparam(t, "bor"): continue desc = getparam(t, "1") if desc in [ "got", "gme-cgo", "non", "non-ogt", "non-own", "non-oen", "is", "fo", "nrn", "no", "nb", "nn", "sv", "da", "gmq-osw", "gwq-oda", "gmq-bot", "gmq-jmk", "gmq-scy", "gmq-gut", "ovd" ]: pagemsg("Saw non-West-Germanic descendant %s" % unicode(t)) non_wgem = True else: wgem.append(desc) if not non_wgem: pagemsg("Saw no non-West-Germanic descendants but saw West-Germanic or non-Germanic descendants %s" % ",".join(wgem))
def process_page(index, page, save, verbose, direc): pagetitle = unicode(page.title()) subpagetitle = re.sub(".*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse(page) def frob_gender_param(t, param): val = getparam(t, param) if val == "n": t.add(param, "n-in") elif val == "n-p": t.add(param, "n-in-p") for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-noun-table"]: origt = unicode(t) for param in t.params: if unicode(param.name) != "1": pagemsg("WARNING: Found other than a single param in template, skipping: %s" % unicode(t)) return FIXME if origt != unicode(t): param3 = getparam(t, "3") if param3 != "-": if fix_indeclinable: if param3: pagemsg("WARNING: Can't make indeclinable, has genitive singular given: %s" % origt) return else: t.add("3", "-") notes.append("make indeclinable") pagemsg("Making indeclinable: %s" % unicode(t)) else: pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt) return pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) if notes: comment = "Add inanimacy to neuters (%s)" % "; ".join(notes) else: comment = "Add inanimacy to neuters" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, template, new_name, params_to_add, params_to_remove, params_to_rename, filters, comment): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == template: for filt in filters: m = re.search("^(.*)=(.*)$", filt) if m: if getparam(t, m.group(1)) != m.group(2): pagemsg( "Skipping %s because filter %s doesn't match" % origt, filt) continue else: m = re.search("^(.*)~(.*)$", filt) if m: if not re.search(m.group(2), getparam(t, m.group(1))): pagemsg( "Skipping %s because filter %s doesn't match" % origt, filt) continue else: raise ValueError("Unrecognized filter %s" % filt) for old_param, new_param in params_to_rename: if t.has(old_param): t.add(new_param, getparam(t, old_param), before=old_param, preserve_spacing=False) rmparam(t, old_param) notes.append("rename %s= to %s= in {{%s}}" % (old_param, new_param, tn)) for param in params_to_remove: if t.has(param): rmparam(t, param) notes.append("remove %s= from {{%s}}" % (param, tn)) for param, value in params_to_add: if getparam(t, param) != value: t.add(param, value) notes.append("add %s=%s to {{%s}}" % (param, value, tn)) if new_name: blib.set_template_name(t, new_name) notes.append("rename {{%s}} to {{%s}}" % (template, new_name)) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), comment or notes
def search_noconj(startFrom, upTo): for index, page in blib.cat_articles(u"Arabic verbs", startFrom, upTo): text = unicode(blib.parse(page)) pagetitle = page.title() if "{{ar-verb" not in text: msg("* ar-verb not in {{l|ar|%s}}" % pagetitle) if "{{ar-conj" not in text: msg("* ar-conj not in {{l|ar|%s}}" % pagetitle)
def process_page(templates, index, page, save=False, verbose=False): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) if not page.exists(): pagemsg("WARNING: Page doesn't exist") return parsed = blib.parse(page) should_save = False for t in parsed.filter_templates(): if unicode(t.name) in templates: origt = unicode(t) # Punt if multi-arg-set, can't handle yet should_continue = False for param in t.params: if not param.showkey: val = unicode(param.value) if val == "or": pagemsg("WARNING: Can't handle multi-decl templates: %s" % unicode(t)) should_continue = True break if val == "-" or val == "_" or val.startswith("join:"): pagemsg("WARNING: Can't handle multi-word templates: %s" % unicode(t)) should_continue = True break if should_continue: continue if arg1_is_stress(getparam(t, "1")): oldplarg = "5" newplarg = "4" else: oldplarg = "4" newplarg = "3" plstem = getparam(t, oldplarg) if plstem: if getparam(t, newplarg): pagemsg("WARNING: Something wrong, found args in both positions %s and %s: %s" % (newplarg, oldplarg, unicode(t))) continue rmparam(t, oldplarg) t.add(newplarg, plstem) should_save = True pagemsg("Replacing %s with %s" % (origt, unicode(t))) if should_save: comment = "Move plstem from 5th/4th argument to 4th/3rd" if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: param1 = getparam(t, "1") param2 = getparam(t, "2") if not param2.startswith("7"): continue param3 = getparam(t, "3") param4 = getparam(t, "4") param5 = getparam(t, "5") assert not getparam(t, "6") if param2.startswith("7b"): if re.search( u"[еѣ]сти́(сь)?$", param3) and u"ё" not in param4 and u"ѣ̈" not in param4: assert not param5 param5 = u"ёе" param4 = rulib.make_unstressed_ru(param4) if re.search(u"(л[еѣ]́?зть|с[еѣ]́?сть|обокра́сть)(ся)?$", param3): param5 = "" # Fetch non-numbered params. non_numbered_params = [] for param in t.params: pname = unicode(param.name) if not re.search(r"^[0-9]+$", pname) and pname not in [ "lang", "nocat", "tr" ]: non_numbered_params.append((pname, param.value)) # Erase all params. del t.params[:] # Put back numbered params. t.add("1", param1) t.add("2", param2) t.add("3", param3) t.add("4", param4) if param5: t.add("5", param5) # Put back non-numbered params. for name, value in non_numbered_params: t.add(name, value) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) notes.append( "rewrite class 7 verb to correspond to module changes") return unicode(parsed), notes
def process_page(index, page, direc): pagetitle = unicode(page.title()) subpagetitle = re.sub(".*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse(page) def frob_gender_param(t, param): val = getparam(t, param) if val == "n": t.add(param, "n-in") elif val == "n-p": t.add(param, "n-in-p") for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-noun-table"]: origt = unicode(t) for param in t.params: if unicode(param.name) != "1": pagemsg( "WARNING: Found other than a single param in template, skipping: %s" % unicode(t)) return FIXME if origt != unicode(t): param3 = getparam(t, "3") if param3 != "-": if fix_indeclinable: if param3: pagemsg( "WARNING: Can't make indeclinable, has genitive singular given: %s" % origt) return else: t.add("3", "-") notes.append("make indeclinable") pagemsg("Making indeclinable: %s" % unicode(t)) else: pagemsg( "WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt) return pagemsg("Replacing %s with %s" % (origt, unicode(t))) if notes: comment = "Add inanimacy to neuters (%s)" % "; ".join(notes) else: comment = "Add inanimacy to neuters" return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj"]: conjtype = getparam(t, "1") if conjtype.startswith("6a"): param6 = getparam(t, "6") if param6: rmparam(t, "6") if not getparam(t, "5"): rmparam(t, "5") for i in xrange(1, 4): if not t.has(str(i)): t.add(str(i), "") t.add("4", param6) notes.append("move type 6a arg6 -> arg4") if conjtype.startswith("7b"): param7 = getparam(t, "7") if param7: rmparam(t, "7") for i in xrange(1, 6): if not t.has(str(i)): t.add(str(i), "") t.add("6", param7) notes.append("move type 7b arg7 -> arg6") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose, nouns): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") if not re.search(u"[иы]й$", pagetitle): pagemsg(u"Skipping adjective not in -ый or -ий") return noun = re.sub(u"[иы]й$", u"ость", pagetitle) if noun not in nouns: return text = unicode(page.text) parsed = blib.parse(page) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == u"ru-adj-alt-ё": pagemsg(u"Skipping alt-ё adjective") return for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-adj": heads = blib.fetch_param_chain(t, "1", "head", pagetitle) if len(heads) > 1: pagemsg("Skipping adjective with multiple heads: %s" % ",".join(heads)) return tr = getparam(t, "tr") nounsection = blib.find_lang_section(noun, "Russian", pagemsg, errandpagemsg) if not nounsection: pagemsg("Couldn't find Russian section for %s" % noun) continue if "==Etymology" in nounsection: pagemsg("Noun %s already has etymology" % noun) continue if tr: msg(u"%s %s+tr1=%s+-ость no-etym" % (noun, heads[0], tr)) else: msg(u"%s %s+-ость no-etym" % (noun, heads[0]))
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: param1 = getparam(t, "1") param2 = getparam(t, "2") if not param2.startswith("8b"): continue param3 = getparam(t, "3") param4 = getparam(t, "4") param5 = getparam(t, "5") assert not getparam(t, "6") if getparam(t, "past_m"): errmsg("WARNING: Has past_m=%s" % getparam(t, "past_m")) pap = getparam(t, "pap") or getparam(t, "past_adv_part") if pap: errmsg("WARNING: Has pap=%s" % pap) pap2 = getparam(t, "pap2") or getparam(t, "past_adv_part2") if pap2: errmsg("WARNING: Has pap2=%s" % pap2) param4 = rulib.make_unstressed_ru(param4) # Fetch non-numbered params. non_numbered_params = [] for param in t.params: pname = unicode(param.name) if not re.search(r"^[0-9]+$", pname) and pname not in ["lang", "nocat", "tr"]: non_numbered_params.append((pname, param.value)) # Erase all params. del t.params[:] # Put back numbered params. t.add("1", param1) t.add("2", param2) t.add("3", param3) t.add("4", param4) if param5: t.add("5", param5) # Put back non-numbered params. for name, value in non_numbered_params: t.add(name, value) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) notes.append("rewrite class 8b verb to correspond to module changes") return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj-7a", "ru-conj-7b"]: past_stem = getparam(t, "4") vowel_end = re.search(u"[аэыоуяеиёю́]$", past_stem) past_m = getparam(t, "past_m") past_f = getparam(t, "past_f") past_n = getparam(t, "past_n") past_pl = getparam(t, "past_pl") if past_m or past_f or past_n or past_pl: upast_stem = ru.make_unstressed(past_stem) expected_past_m = past_stem + (u"л" if vowel_end else "") expected_past_f = upast_stem + u"ла́" expected_past_n = upast_stem + u"ло́" expected_past_pl = upast_stem + u"ли́" if ((not past_m or expected_past_m == past_m) and expected_past_f == past_f and expected_past_n == past_n and expected_past_pl == past_pl): msg("Would remove past overrides and add arg5=b") else: msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" % (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) param2 = getparam(t, "2") if unicode(t.name) in ["ru-conj"] and re.search(r"^8[ab]", param2): if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue past_m = getparam(t, "past_m") if past_m: rmparam(t, "past_m") stem = getparam(t, "3") if stem == past_m: pagemsg("Stem %s and past_m same" % stem) notes.append("remove redundant past_m %s" % past_m) elif (param2.startswith("8b") and not param2.startswith("8b/") and ru.make_unstressed(past_m) == stem): pagemsg("Class 8b/b and stem %s is unstressed version of past_m %s, replacing stem with past_m" % ( stem, past_m)) t.add("3", past_m) notes.append("moving past_m %s to arg 3" % past_m) else: pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % ( stem, past_m)) t.add("5", past_m) notes.append("moving past_m %s to arg 5" % past_m) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(page, index): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) == "R:vep:UVVV": refpages = blib.fetch_param_chain(t, "1", "") for refpage in refpages: if not pywikibot.Page(site, refpage).exists(): pagemsg("Page [[%s]] does not exist" % refpage)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) == "R:vep:UVVV": refpages = blib.fetch_param_chain(t, "1", "") for refpage in refpages: if not pywikibot.Page(site, refpage).exists(): pagemsg("Page [[%s]] does not exist" % refpage)
def process_lemma(index, pagetitle, slots, program_args): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, program_args.verbose) page = pywikibot.Page(site, pagetitle) parsed = blib.parse(page) for t in parsed.filter_templates(): tn = tname(t) pos = None if tn == "la-conj": pos = "verb" elif tn == "la-ndecl": pos = "noun" elif tn == "la-adecl": pos = "adj" if pos: args = lalib.generate_infl_forms(pos, unicode(t), errandpagemsg, expand_text) for slot in args: matches = False for spec in slots: if spec == slot: matches = True break if lalib.slot_matches_spec(slot, spec): matches = True break if matches: for formpagename in re.split(",", args[slot]): if "[" in formpagename or "|" in formpagename: pagemsg("WARNING: Skipping page %s with links in it" % formpagename) else: formpagename = lalib.remove_macrons(formpagename) formpage = pywikibot.Page(site, formpagename) if not formpage.exists(): pagemsg("WARNING: Form page %s doesn't exist, skipping" % formpagename) elif formpagename == pagetitle: pagemsg("WARNING: Skipping dictionary form") else: def do_process_page(page, index, parsed): return process_page(index, page, program_args) blib.do_edit(formpage, index, do_process_page, save=program_args.save, verbose=program_args.verbose, diff=program_args.diff)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_inflection_of = False for t in parsed.filter_templates(): if unicode(t.name) in ["inflection of"]: found_inflection_of = True if not found_inflection_of: pagemsg("WARNING: No 'inflection of'")
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) newtext = text parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) # pagemsg("tn=%s" % unicode(tn)) if tn in quote_templates: text_param = None if tn in quote_templates_text_param_6: text_param = "6" elif tn in quote_templates_text_param_7: text_param = "7" elif tn in quote_templates_text_param_8: text_param = "8" textval = "" if text_param: textval = getparam(t, text_param) if not textval: text_param = "text" textval = getparam(t, text_param) if not textval: text_param = "passage" textval = getparam(t, text_param) # pagemsg("%s=%s" % (text_param, textval)) textval = textval.strip() if re.search(r"^\{\{ja-usex\|.*\}\}$", textval, re.S): rmparam(t, text_param) newnewtext = re.sub(r"(\n#+\*) *%s" % re.escape(origt), r"\1 %s\1: %s" % (unicode(t), textval), newtext) if newtext == newnewtext: pagemsg("WARNING: Can't find quote template in text: %s" % origt) else: newtext = newnewtext notes.append("move ja-usex call outside of %s call" % tn) elif "{{ja-usex|" in textval: pagemsg("WARNING: Found {{ja-usex| embedded in quote text but not whole param: %s" % origt) return newtext, notes
def process_page(page, index, do_noun): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) cat = do_noun and "nouns" or "proper nouns" new_text = re.sub(r"\n\n\n*\[\[Category:Russian %s]]\n\n\n*" % cat, "\n\n", text) new_text = re.sub(r"\[\[Category:Russian %s]]\n" % cat, "", new_text) return new_text, "Remove redundant [[:Category:Russian %s]]"
def search_iyya_noetym(startFrom, upTo): for page, index in blib.cat_articles(u"Arabic nouns", startFrom, upTo): text = blib.parse(page) pagetitle = page.title() etym = False suffix = False if pagetitle.endswith(u"ية"): for t in text.filter_templates(): if t.name in ["ar-etym-iyya", "ar-etym-nisba-a", "ar-etym-noun-nisba", "ar-etym-noun-nisba-linking"]: etym = True if t.name == "suffix": suffix = True if not etym: msg("Page %s %s: Ends with -iyya, no appropriate etym template%s" % ( index, pagetitle, " (has suffix template)" if suffix else ""))
def snarf_noun_accents_and_forms(noun, orig_pagemsg): global args pagetitle = bglib.remove_accents(noun) if pagetitle in nouns_to_accents_and_forms: return nouns_to_accents_and_forms[pagetitle] def pagemsg(txt): orig_pagemsg("Noun %s: %s" % (noun, txt)) page = pywikibot.Page(site, pagetitle) parsed = blib.parse(page) lemma = None for t in parsed.filter_templates(): if tname(t) in ["bg-noun", "bg-proper noun"]: if lemma: pagemsg("WARNING: Saw two {{bg-noun}} invocations without intervening {{bg-ndecl}}: %s" % unicode(t)) lemma = getparam(t, "1") if not lemma: pagemsg("WARNING: Missing headword in noun: %s" % unicode(t)) continue if bglib.needs_accents(lemma): pagemsg("WARNING: Noun %s missing an accent: %s" % (lemma, unicode(t))) lemma = False continue if tname(t) == "bg-ndecl": if lemma is False: pagemsg("WARNING: Skipping %s because noun missing an accent" % unicode(t)) continue if lemma is None: pagemsg("WARNING: Skipping %s because no preceding {{bg-noun}}" % unicode(t)) continue if pagetitle in nouns_to_accents_and_forms: pagemsg("WARNING: Saw two {{bg-ndecl}} on the same page: %s" % unicode(t)) nouns_to_accents_and_forms[pagetitle] = (None, None) return (None, None) generate_template = re.sub(r"^\{\{bg-ndecl\|", "{{bg-generate-noun-forms|", unicode(t)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose) generate_result = expand_text(generate_template) if not generate_result: nouns_to_accents_and_forms[pagetitle] = (None, None) return (None, None) nouns_to_accents_and_forms[pagetitle] = (lemma, blib.split_generate_args(generate_result)) if pagetitle in nouns_to_accents_and_forms: return nouns_to_accents_and_forms[pagetitle] pagemsg("WARNING: Couldn't find both lemma and declension") nouns_to_accents_and_forms[pagetitle] = (None, None) return (None, None)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) param1 = getparam(t, "1") if unicode(t.name) in ["ru-conj"]: if re.search(r"^6[ac]", param1): if getparam(t, "no_iotation"): rmparam(t, "no_iotation") if param1.startswith("6a"): notes.append(u"6a + no_iotation -> 6°a") else: notes.append(u"6c + no_iotation -> 6°c") t.add("1", re.sub("^6", u"6°", param1)) elif re.search(r"^6b", param1): notes.append(u"6b -> 6°b") t.add("1", re.sub("^6", u"6°", param1)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) == "ru-adj": comps = blib.fetch_param_chain(t, "2", "comp") newcomps = [] for comp in comps: if re.search(u"е́?й$", comp): regcomp = re.sub(u"(е́?)й$", ur"\1е", comp) if regcomp in newcomps: pagemsg("Skipping informal form %s" % comp) notes.append("remove informal comparative %s" % comp) else: pagemsg("WARNING: Found informal form %s without corresponding regular form") newcomps.append(comp) else: newcomps.append(comp) if comps != newcomps: blib.set_param_chain(t, newcomps, "2", "comp") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False found_decl_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-proper noun"]: found_headword_template = True if unicode(t.name) in ["ru-noun-table", "ru-decl-noun-see"]: found_decl_template = True if found_headword_template and not found_decl_template: pagemsg("Found headword template without decl")
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if (unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "2") in ["7a", "7b"]): if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue if t.has("past_adv_part_short") and getparam(t, "past_adv_part_short") == "": notes.append("set past_adv_part_short=-") origt = unicode(t) t.add("past_adv_part_short", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) if t.has("past_actv_part") and getparam(t, "past_actv_part") == "": notes.append("set past_actv_part=-") origt = unicode(t) t.add("past_actv_part", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) if not notes: pagemsg("WARNING: No changes")
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-phrase": if t.has("tr"): pagemsg("WARNING: Has tr=: %s" % unicode(t)) if t.has("head"): if t.has("1"): pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t)) else: notes.append("ru-phrase: convert head= to 1=") origt = unicode(t) head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") t.add("1", head) if tr: t.add("tr", tr) pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose, do_noun): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) cat = do_noun and "nouns" or "proper nouns" new_text = re.sub(r"\n\n\n*\[\[Category:Russian %s]]\n\n\n*" % cat, "\n\n", text) new_text = re.sub(r"\[\[Category:Russian %s]]\n" % cat, "", new_text) if new_text != text: comment = "Remove redundant [[:Category:Russian %s]]" % cat if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-adj"]: found_headword_template = True if not found_headword_template: notes = [] for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"]: notes.append("found noun header (%s)" % unicode(t.name)) if unicode(t.name) == "head": notes.append("found head header (%s)" % getparam(t, "2")) pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes)))
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj-4a"]: shch = getparam(t, "4") if shch == u"щ": t.add("3", getparam(t, "3") + shch) rmparam(t, "4") notes.append(u"move param 4 (щ) to param 3") elif shch: pagemsg("WARNING: Strange value %s for param 4" % shch) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: verbtype = getparam(t, "2") if verbtype in ["pf", "pf-intr", "pf-refl", "pf-impers", "pf-intr-impers", "pf-refl-impers", "impf", "impf-intr", "impf-refl", "impf-impers", "impf-intr-impers", "impf-refl-impers"]: conjtype = getparam(t, "1") t.add("2", conjtype) t.add("1", verbtype) notes.append("move verb type from arg 2 to arg 1") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if not pagetitle.endswith(u"ся"): return text = unicode(page.text) notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-decl-adj", "ru-adj-old"] and getparam(t, "suffix") == u"ся": lemma = getparam(t, "1") lemma = re.sub(",", u"ся,", lemma) lemma = re.sub("$", u"ся", lemma) t.add("1", lemma) rmparam(t, "suffix") notes.append(u"move suffix=ся to lemma") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose, genders): pagetitle = unicode(page.title()) subpagetitle = re.sub(".*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) headword_template = None for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-proper noun+"]: if headword_template: pagemsg("WARNING: Multiple headword templates, skipping") return headword_template = t if not headword_template: pagemsg("WARNING: No headword templates, skipping") return orig_template = unicode(headword_template) rmparam(headword_template, "g") rmparam(headword_template, "g2") rmparam(headword_template, "g3") rmparam(headword_template, "g4") rmparam(headword_template, "g5") for gnum, g in enumerate(genders): param = "g" if gnum == 0 else "g" + str(gnum+1) headword_template.add(param, g) pagemsg("Replacing %s with %s" % (orig_template, unicode(headword_template))) comment = "Fix headword gender, substituting new value %s" % ",".join(genders) if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj-5c", "ru-conj-6b"]: past_f = getparam(t, "4") if past_f: t.add("past_f", past_f, before="4") rmparam(t, "4") notes.append("Replace 4= with past_f=") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-ux": origt = unicode(t) if t.has("adj"): pagemsg("Removing adj=") notes.append("remove adj= from ru-ux") rmparam(t, "adj") if t.has("shto"): pagemsg("Removing shto=") notes.append("remove shto= from ru-ux") rmparam(t, "shto") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") cons = u"[бцдфгчйклмнпрствшхзжщ]" if pagetitle.endswith(u"ство") or ( not re.search(cons + u"[кц][оаяеёыи]$", pagetitle) and not re.search(cons + cons + u"[оаяеёыи]$", pagetitle) and # not re.search(u"[оеё]" + cons + "$", pagetitle) and # but too many false positives not re.search(u"[оеё][кц]$", pagetitle) ): return text = unicode(page.text) parsed = blib.parse(page) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-noun-table" and "*" not in unicode(t): pagemsg("WARNING: Likely incorrectly-declined reducible: %s" % unicode(t))
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) == "wikipedia": val = getparam(t, "1") newval = ru.remove_accents(val) if val != newval: pagemsg("Removing accents from 1= in {{wikipedia|...}}") notes.append("remove accents from 1= in {{wikipedia|...}}") t.add("1", newval) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") manual_ppp_forms = ["past_pasv_part", "past_pasv_part2", "past_pasv_part3", "past_pasv_part4", "ppp", "ppp2", "ppp3", "ppp4"] text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) tname = unicode(t.name) if tname == "ru-conj": manual_ppps = [] for form in manual_ppp_forms: ppp = getparam(t, form) if ppp and ppp != "-": manual_ppps.append(ppp) if not manual_ppps: continue if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue curvariant = getparam(t, "2") if "+p" in curvariant or "(7)" in curvariant or "(8)" in curvariant: pagemsg("WARNING: Found both manual PPP and PPP variant, something wrong: %s" % unicode(t)) continue t2 = blib.parse_text(unicode(t)).filter_templates()[0] for form in manual_ppp_forms: rmparam(t2, form) variants_to_try = ["+p"] if u"ё" in re.sub(u"ённый$", "", manual_ppps[0]): variants_to_try.append(u"+pё") if u"жденный" in manual_ppps[0] or u"ждённый" in manual_ppps[0]: variants_to_try.append(u"+pжд") notsamemsgs = [] for variant in variants_to_try: t2.add("2", curvariant + variant) tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t2)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) if "past_pasv_part" not in args: pagemsg("WARNING: Something wrong, no past passive participle generated: %s" % unicode(t)) continue auto_ppps = [] for form in manual_ppp_forms: if form in args: for ppp in re.split(",", args[form]): if ppp and ppp != "-": auto_ppps.append(ppp) if manual_ppps == auto_ppps: pagemsg("Manual PPP's %s same as auto-generated PPP's, switching to auto" % ",".join(manual_ppps)) for form in manual_ppp_forms: rmparam(t, form) t.add("2", curvariant + variant) notes.append("replaced manual PPP's with variant %s" % variant) break else: notsamemsgs.append("WARNING: Manual PPP's %s not same as auto-generated PPP's %s: %s" % (",".join(manual_ppps), ",".join(auto_ppps), unicode(t))) else: # no break in for loop for m in notsamemsgs: pagemsg(m) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)