(index, pagetitle, form, II)) remove_param("II") III = getparam(template, "III") if III == u"ء": msg("Page %s %s: form=%s, removing III=%s" % (index, pagetitle, form, III)) remove_param("III") newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if len(paramschanged) > 0: actions_taken.append("form=%s (%s)" % (form, ', '.join(paramschanged))) changelog = "ar-verb: remove params: %s" % '; '.join(actions_taken) #if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog def clean_verb_headword(save, startFrom, upTo): for cat in [u"Arabic verbs"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, clean_one_page_verb_headword, save=save) pa = blib.init_argparser("Clean up verb headword templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) clean_verb_headword(params.save, startFrom, upTo)
if len(newtext) - len(text) != len(to_template) - len( from_template): pagemsg( "WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (from_template, to_template)) changelog = "Undid removal of %s=%s in %s" % ( removed_param, param_value, to_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % (index, pagename)) else: blib.do_edit(page, index, undo_one_page_greek_removal, save=save, verbose=verbose) pa = blib.init_argparser("Undo Greek transliteration removal") pa.add_argument( "--file", help="File containing templates and removal directives to undo") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) undo_greek_removal(params.save, params.verbose, params.file, startFrom, upTo)
pagemsg( "WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s" % (orig_template, repl_template)) changelog = "Undid auto-accenting (per Wikitiki89) of %s" % ( orig_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % (index, pagename)) else: blib.do_edit(page, index, undo_one_page_ru_auto_accent, save=save, verbose=verbose) pa = blib.init_argparser( "Undo auto-accent changes involving ux, usex and lang templates that look like direct quotes" ) pa.add_argument("--file", help="File containing log file from original auto-accent run") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) undo_ru_auto_accent(params.save, params.verbose, params.file, startFrom, upTo)
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import re, codecs import blib import ru_translit from canon_foreign import canon_links pa = blib.init_argparser("Canonicalize Russian and translit") pa.add_argument( "--cattype", default="borrowed", help="""Categories to examine ('vocab', 'borrowed', 'translation', 'links', 'pagetext', 'pages', an arbitrary category or comma-separated list)""" ) pa.add_argument( "--page-file", help="""File containing "pages" to process when --cattype pagetext, or list of pages when --cattype pages""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) pages_to_do = [] if params.page_file:
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import re, codecs import blib import ru_translit from canon_foreign import canon_links pa = blib.init_argparser("Canonicalize Russian and translit") pa.add_argument("--cattype", default="borrowed", help="""Categories to examine ('vocab', 'borrowed', 'translation', 'links', 'pagetext', 'pages' or comma-separated list)""") pa.add_argument("--page-file", help="""File containing "pages" to process when --cattype pagetext, or list of pages when --cattype pages""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) pages_to_do = [] if params.page_file: for line in codecs.open(params.page_file, "r", encoding="utf-8"): line = line.strip() if params.cattype == "pages": pages_to_do.append(line)
rmparam(t, "f") ftr = getparam(t, "ftr") rmparam(t, "ftr") addparam(t, "1", head) if tr: addparam(t, "tr", tr) if f: addparam(t, "f", f) if ftr: addparam(t, "ftr", ftr) if pl: addparam(t, "pl", pl) if pltr: addparam(t, "pltr", pltr) if sort: addparam(t, "sort", sort) temps_changed.append("arz-adj") return text, "rewrite %s to new style" % ", ".join(temps_changed) def rewrite_arz_headword(save, verbose, startFrom, upTo): for cat in [u"Egyptian Arabic adjectives", "Egyptian Arabic nouns"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_arz_headword, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite Egyptian Arabic headword templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_arz_headword(params.save, params.verbose, startFrom, upTo)
def rewrite_one_page_ar_nisba(page, index, text): for template in text.filter_templates(): if template.name == "ar-nisba": if template.has("head") and not template.has(1): head = unicode(template.get("head").value) template.remove("head") addparam(template, "1", head, before=template.params[0].name if len(template.params) > 0 else None) if template.has("plhead"): blib.msg("%s has plhead=" % page.title()) return text, "ar-nisba: head= -> 1=" def rewrite_ar_nisba(save, verbose, startFrom, upTo): for index, page in blib.references("Template:ar-nisba", startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_nisba, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite ar-nisba, changing head= to 1=") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_ar_nisba(params.save, params.verbose, startFrom, upTo)
remove_param("I") II = getparam(template, "II") if (II == u"ء" or II in [u"و", u"ي"] and form in ["2", "II", "3", "III", "5", "V", "6", "VI"]): msg("Page %s %s: form=%s, removing II=%s" % (index, pagetitle, form, II)) remove_param("II") III = getparam(template, "III") if III == u"ء": msg("Page %s %s: form=%s, removing III=%s" % (index, pagetitle, form, III)) remove_param("III") newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if len(paramschanged) > 0: actions_taken.append("form=%s (%s)" % (form, ', '.join(paramschanged))) changelog = "ar-verb: remove params: %s" % '; '.join(actions_taken) #if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog def clean_verb_headword(save, startFrom, upTo): for cat in [u"Arabic verbs"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, clean_one_page_verb_headword, save=save) pa = blib.init_argparser("Clean up verb headword templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) clean_verb_headword(params.save, startFrom, upTo)
else: pages_to_ignore = set() for category in yield_cats(): msg("Processing category %s ..." % category) errmsg("Processing category %s ..." % category) for index, page in blib.cat_articles(category, startFrom, upTo): if page.title() not in pages_to_ignore: blib.do_edit(page, index, remove_translit_one_page, save=params.save, verbose=params.verbose) pa = blib.init_argparser( "Remove translit, sc= from hy, xcl, ka, el, grc templates") pa.add_argument("--langs", default="all", help="Languages to do, a comma-separated list or 'all'") pa.add_argument("--cattype", default="all", help="""Categories to examine ('all' or comma-separated list of 'translit', 'lemma', 'non-lemma'; default 'all')""") pa.add_argument( "--ignore-lemma-non-lemma", action="store_true", help= """Ignore lemma and non-lemma pages (useful with '--cattype translit').""") pa.add_argument("--do-head", action="store_true", help="""Remove tr= in {{head|..}}""")
tname, tlang, "sc", scvalue)) oldtempl = "%s" % unicode(template) template.remove("sc") pagemsg("Replaced %s with %s" % (oldtempl, unicode(template))) newresult = ["remove %s.%s.sc=%s" % (tname, tlang, scvalue)] if result != False: result = result + newresult else: result = newresult return result return blib.process_links(save, verbose, lang, longlang, cattype, startFrom, upTo, process_param, sort_group_changelogs, pages_to_do=pages_to_do) pa = blib.init_argparser("Remove redundant foreign translit and script") pa.add_argument("--lang", help="""Language to use when --cattype is 'vocab' or 'borrowed'.""") pa.add_argument("--cattype", default="borrowed", help="""Categories to examine ('vocab', 'borrowed', 'translation', 'links', 'pagetext', 'pages', an arbitrary category or comma-separated list)""") pa.add_argument("--page-file", help="""File containing "pages" to process when --cattype pagetext, or list of pages when --cattype pages""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) pages_to_do = [] if params.page_file: for line in codecs.open(params.page_file, "r", encoding="utf-8"): line = line.strip()
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import re, codecs import blib import grc_translit from canon_foreign import canon_links pa = blib.init_argparser("Canonicalize Greek and translit") pa.add_argument("--cattype", default="borrowed", help="""Categories to examine ('vocab', 'borrowed', 'translation', 'links', 'pagetext', 'pages' or comma-separated list)""") pa.add_argument("--page-file", help="""File containing "pages" to process when --cattype pagetext, or list of pages when --cattype pages""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) pages_to_do = [] if params.page_file: for line in codecs.open(params.page_file, "r", encoding="utf-8"): line = line.strip() if params.cattype == "pages": pages_to_do.append(line)
for vn in vns: no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn)) newvn = ",".join(no_i3rab_vns) if uncertain: newvn += "?" if newvn != vnvalue: msg("Page %s %s: Verb %s, replacing %s with %s" % ( index, pagename, verbid, vnvalue, newvn)) addparam(template, "vn", newvn) verbids.append(verbid) return text, "Remove i3rab from verbal nouns for verb(s) %s" % ( ', '.join(verbids)) for index, page in blib.cat_articles("Arabic verbs", startFrom, upTo): blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose) pa = blib.init_argparser("Remove i3rab") pa.add_argument("--verb", action='store_true', help="Do verbal nouns in verbs") pa.add_argument("--noun", action='store_true', help="Do arguments in nouns") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.noun: do_nouns(["noun", "adjective"], ["ar-noun", "ar-coll-noun", "ar-sing-noun", "ar-nisba", "ar-noun-nisba", "ar-adj", "ar-numeral"], params.save, startFrom, upTo) if params.verb: do_verbs(params.save, startFrom, upTo)
removeparams, is_proper=False): for index, page in blib.references("Template:%s" % tempname, startFrom, upTo): create_declension(page, index, save, pos, tempname, decltempname, sgnum, removeparams, is_proper=is_proper) pa = blib.init_argparser("Create Arabic declensions") pa.add_argument("--proper", action='store_true', help="Do proper nouns only") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) params_to_remove = [ "2", # gender; not included in declension tables "g2", # second gender; not included in declension tables "singg", # singulative gender; not included in declension tables "collg", # collective gender; not included in declension tables "tr", # transliterations; we check for them in the declension code and # handle them specially "cons", # construct state; always predictable and we do it "dcons", # dual construct state; always predictable and we do it "pauccons", # paucal construct state; always predictable and we do it
assert((not comment) == (newtext == page.text)) if newtext != page.text: if verbose: msg("Replacing [[%s]] with [[%s]]" % (page.text, newtext)) page.text = newtext msg("For page %s, comment = %s" % (pagename, comment)) if save: page.save(comment = comment) def create_declensions(save, pos, tempname, decltempname, sgnum, startFrom, upTo, removeparams, is_proper=False): for index, page in blib.references("Template:%s" % tempname, startFrom, upTo): create_declension(page, index, save, pos, tempname, decltempname, sgnum, removeparams, is_proper=is_proper) pa = blib.init_argparser("Create Arabic declensions") pa.add_argument("--proper", action='store_true', help="Do proper nouns only") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) params_to_remove = [ "2", # gender; not included in declension tables "g2", # second gender; not included in declension tables "singg", # singulative gender; not included in declension tables "collg", # collective gender; not included in declension tables "tr", # transliterations; we check for them in the declension code and # handle them specially "cons", # construct state; always predictable and we do it "dcons", # dual construct state; always predictable and we do it
startFrom, upTo): def rewrite_one_page_template_names(page, index, text): actions = [] for template in text.filter_templates(): if template.name == old: actions.append("rename {{temp|%s}} to {{temp|%s}}" % (old, new)) template.name = new for remove in removelist: if template.has(remove): template.remove(remove) actions.append("remove %s=" % remove) return text, '; '.join(actions) for index, page in blib.references("Template:%s" % old, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_template_names, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite old to new template names") pa.add_argument("-o", "--old", help="Old name of template") pa.add_argument("-n", "--new", help="New name of template") pa.add_argument("-r", "--remove", help="Comma-separated template params to remove") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) removelist = [] if params.remove: removelist = re.split(",", params.remove) rewrite_template_names(params.old, params.new, removelist, params.save, params.verbose, startFrom, upTo)
else: pagemsg("Original template found, taking no action") else: pagemsg("Replaced %s with %s" % (repl_template, orig_template)) if found_orig_template: pagemsg("WARNING: Undid replacement, but original template %s already present!" % orig_template) if len(newtext) - len(text) != len(orig_template) - len(repl_template): pagemsg("WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s" % ( orig_template, repl_template)) changelog = "Undid auto-accenting (per Wikitiki89) of %s" % (orig_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % ( index, pagename)) else: blib.do_edit(page, index, undo_one_page_ru_auto_accent, save=save, verbose=verbose) pa = blib.init_argparser("Undo auto-accent changes involving ux, usex and lang templates that look like direct quotes") pa.add_argument("--file", help="File containing log file from original auto-accent run") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) undo_ru_auto_accent(params.save, params.verbose, params.file, startFrom, upTo)
(form, getparam(template, str(1 + int(formarg))), getparam(template, str(2 + int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog for index, page in blib.references("Template:%s" % tempname, startFrom, upTo): blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save) pa = blib.init_argparser("Rewrite form= to 1= in verb headword templates") pa.add_argument("--headword", action='store_true', help="Rewrite form= to 1= in ar-verb and canonicalize") pa.add_argument( "--canonicalize", action='store_true', help="Canonicalize form in Arabic verb templates other than ar-verb") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.headword: rewrite_verb_headword(params.save, startFrom, upTo) if params.canonicalize: canonicalize_verb_form(params.save, startFrom, upTo, "ar-conj", "1") canonicalize_verb_form(params.save, startFrom, upTo, "ar-past3sm", "1")
actions = [] if idafa_added: actions.append(u"Replaced ʾidāfa params with idafa= param: %s" % ( ", ".join(idafa_added))) if num_new_style: actions.append(u"Corrected %s old-style ʾidāfa param(s) to new-style" % num_new_style) if num_modhead_changed: actions.append(u"Changed modN to modheadN") if num_state_ind_to_ind_def: actions.append(u"Converted state=ind to state=ind-def for proper noun") if num_basestate_ind_def: actions.append(u"Converted state=def|basestate=ind to state=ind-def") if actions: changelog = "; ".join(actions) pagemsg("Changelog = %s" % changelog) return text, changelog return text, "" def rewrite_idafa(save, verbose, startFrom, upTo): for template in arabic_decl_templates: for page, index in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_idafa, save=save, verbose=verbose) pa = blib.init_argparser(u"Rewrite ʾidāfa params with idafa= param, and related changes") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_idafa(params.save, params.verbose, startFrom, upTo)
for digval, dig in sorted(digits.iteritems(), key=lambda x: x[0]): msg( u""" export.numbers[%s] = { numeral = "%s٠٠٠", cardinal = {{"%s", "%s"}}, }""" % (digval * 1000, dig.eastarabnum, dig.thousand, dig.thousandtr) ) msg( u""" return export""" ) pa = blib.init_argparser("Save numbers to Wiktionary") pa.add_argument("--lemmas", action="store_true", help="Do lemmas from 21-99.") pa.add_argument("--non-lemmas", action="store_true", help="Do non-lemmas from 21-99.") pa.add_argument("--ordinal-lemmas", action="store_true", help="Do ordinal lemmas from 11-19.") pa.add_argument("--ordinal-non-lemmas", action="store_true", help="Do ordinal non-lemmas from 11-19.") pa.add_argument("--number-list-data", action="store_true", help="Output number list data.") pa.add_argument("--offline", action="store_true", help="Run offline, checking output only.") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) def iter_pages(createfn): for tenval, ten, digval, dig in iter_numerals(): yield createfn(tenval, ten, digval, dig)
def fix_tool_place_noun(save, verbose, startFrom, upTo): for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_tool_place_noun(page, index, text): pagetitle = page.title() for t in text.filter_templates(): if t.name == template: if getparam(t, "cap"): msg("Page %s %s: Template %s: Remove cap=" % (index, pagetitle, template)) t.remove("cap") else: msg("Page %s %s: Template %s: Add lc=1" % (index, pagetitle, template)) addparam(t, "lc", "1") changelog = "%s: If cap= is present, remove it, else add lc=" % template msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_tool_place_noun, save=save, verbose=verbose) pa = blib.init_argparser("Fix lc vs. cap in tool/place noun etym templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) fix_tool_place_noun(params.save, params.verbose, startFrom, upTo)
if m.group(2): tr = "|tr=%s" % m.group(2) else: tr = "" repl = "{{l|ar|%s%s%s}}" % (m.group(1), tr, gender) msg("Replacing\n%s\nwith\n%s" % (m.group(0), repl)) newtext = text.replace(m.group(0), repl, 1) if newtext == text: msg("WARNING: Unable to do replacement") else: text = newtext linkschanged.append(m.group(1)) return text, "incorporated translit/gender into links: %s" % ', '.join(linkschanged) def correct_link_formatting(save, startFrom, upTo): for cat in [u"Arabic lemmas", u"Arabic non-lemma forms"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, correct_one_page_link_formatting, save=save) pa = blib.init_argparser("Correct formatting of headword templates") pa.add_argument("-l", "--links", action='store_true', help="Vocalize links") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.links: correct_link_formatting(params.save, startFrom, upTo) else: correct_headword_formatting(params.save, startFrom, upTo)
oldtemps.append(origname) if nochange: return None, "" if oldtemps: comment = "convert %s -> ru-noun-table" % ", ".join(oldtemps) else: comment = None return text, comment def rewrite_ru_decl_noun(save, verbose, startFrom, upTo): for cat in [u"Russian nouns"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ru_decl_noun, save=save, verbose=verbose) def rewrite_ru_decl_adj(save, verbose, startFrom, upTo): for cat in [u"Russian adjectives"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ru_decl_adj, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite Russian old declension templates") pa.add_argument("--adjectives", action='store_true', help="Rewrite old adjective templates") pa.add_argument("--nouns", action='store_true', help="Rewrite old noun templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.adjectives: rewrite_ru_decl_adj(params.save, params.verbose, startFrom, upTo) if params.nouns: rewrite_ru_decl_noun(params.save, params.verbose, startFrom, upTo)
return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % (index, pagename)) else: blib.do_edit(page, index, push_one_manual_change, save=save, verbose=verbose, diff=diff) pa = blib.init_argparser("Push manual changes to Wiktionary") pa.add_argument( "--file", help="File containing templates to change, as output by parse_log_file.py") pa.add_argument( "--annotation", default="manually", help= "Annotation in change log message used to indicate source of changes (default 'manually')" ) params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) push_manual_changes(params.save, params.verbose, params.diff, params.file, params.annotation.decode('utf-8'), startFrom, upTo)
else: pagemsg("Original template found, taking no action") else: if found_orig_template: pagemsg("WARNING: Undid removal, but original template %s already present!" % orig_template) if len(newtext) - len(text) != len(to_template) - len(from_template): pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % ( from_template, to_template)) changelog = "Undid removal of %s=%s in %s" % (removed_param, param_value, to_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % ( index, pagename)) else: blib.do_edit(page, index, undo_one_page_greek_removal, save=save, verbose=verbose) pa = blib.init_argparser("Undo Greek transliteration removal") pa.add_argument("--file", help="File containing templates and removal directives to undo") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) undo_greek_removal(params.save, params.verbose, params.file, startFrom, upTo)
# (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import blib from blib import getparam, addparam def rewrite_one_page_ar_plural(page, index, text): for template in text.filter_templates(): if template.name == "ar-plural": template.name = "ar-noun-pl" return text, "rename {{temp|ar-plural}} to {{temp|ar-noun-pl}}" def rewrite_ar_plural(save, verbose, startFrom, upTo): for cat in [u"Arabic plurals"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_plural, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite ar-plural to ar-noun-pl templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_ar_plural(params.save, params.verbose, startFrom, upTo)
# (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import blib from blib import getparam, addparam def rewrite_one_page_ar_plural(page, index, text): for template in text.filter_templates(): if template.name == "ar-plural": template.name = "ar-noun-pl" return text, "rename {{temp|ar-plural}} to {{temp|ar-noun-pl}}" def rewrite_ar_plural(save, verbose, startFrom, upTo): for cat in [u"Arabic plurals"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_plural, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite ar-plural to ar-noun-pl templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_ar_plural(params.save, params.verbose, startFrom, upTo)
if num_modhead_changed: actions.append(u"Changed modN to modheadN") if num_state_ind_to_ind_def: actions.append(u"Converted state=ind to state=ind-def for proper noun") if num_basestate_ind_def: actions.append(u"Converted state=def|basestate=ind to state=ind-def") if actions: changelog = "; ".join(actions) pagemsg("Changelog = %s" % changelog) return text, changelog return text, "" def rewrite_idafa(save, verbose, startFrom, upTo): for template in arabic_decl_templates: for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_idafa, save=save, verbose=verbose) pa = blib.init_argparser( u"Rewrite ʾidāfa params with idafa= param, and related changes") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_idafa(params.save, params.verbose, startFrom, upTo)
yield index, page, None for index, page, totitle in yield_pages(): pagetitle = unicode(page.title()) if filter_pages and not re.search(filter_pages, pagetitle): msg("Skipping %s because doesn't match --filter-pages regex %s" % (pagetitle, filter_pages)) elif not page.exists(): msg("Skipping %s because page doesn't exist" % pagetitle) else: if verbose: msg("Processing %s" % pagetitle) rename_one_page(page, totitle, index) pa = blib.init_argparser("Rename pages") pa.add_argument("-f", "--from", help="From regex, can be specified multiple times", metavar="FROM", dest="from_", action="append") pa.add_argument("-t", "--to", help="To regex, can be specified multiple times", action="append") pa.add_argument( "-r", "--references", "--refs", help="Do pages with references to these pages (comma-separated)")
def fix_one_page_tool_place_noun(page, index, text): pagetitle = page.title() for t in text.filter_templates(): if t.name == template: if getparam(t, "cap"): msg("Page %s %s: Template %s: Remove cap=" % (index, pagetitle, template)) t.remove("cap") else: msg("Page %s %s: Template %s: Add lc=1" % (index, pagetitle, template)) addparam(t, "lc", "1") changelog = "%s: If cap= is present, remove it, else add lc=" % template msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_tool_place_noun, save=save, verbose=verbose) pa = blib.init_argparser("Fix lc vs. cap in tool/place noun etym templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) fix_tool_place_noun(params.save, params.verbose, startFrom, upTo)
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import blib from blib import getparam, addparam def rewrite_one_page_ar_nisba(page, index, text): for template in text.filter_templates(): if template.name == "ar-nisba": if template.has("head") and not template.has(1): head = unicode(template.get("head").value) template.remove("head") addparam(template, "1", head, before=template.params[0].name if len(template.params) > 0 else None) if template.has("plhead"): blib.msg("%s has plhead=" % page.title()) return text, "ar-nisba: head= -> 1=" def rewrite_ar_nisba(save, verbose, startFrom, upTo): for index, page in blib.references("Template:ar-nisba", startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_nisba, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite ar-nisba, changing head= to 1=") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_ar_nisba(params.save, params.verbose, startFrom, upTo)
ruparam, trparam): def pagemsg(text): msg("Page %s %s: %s" % (index, pagetitle, text)) def output_line(directive): pagemsg("%s: %s" % (directive, unicode(template))) result = process_template(pagetitle, index, template, ruparam, trparam, output_line, find_accents, verbose) if index % 100 == 0: output_stats(pagemsg) return result blib.process_links(save, verbose, "ru", "Russian", cattype, startFrom, upTo, check_template_for_missing_accent, join_actions=join_changelog_notes, split_templates=None) pa = blib.init_argparser("Find Russian terms needing accents") pa.add_argument("--cattype", default="vocab", help="Categories to examine ('vocab', 'borrowed', 'translation')") pa.add_argument("--file", help="File containing output from parse_log_file.py") pa.add_argument("--semi-verbose", action="store_true", help="More info but not as much as --verbose") pa.add_argument("--find-accents", action="store_true", help="Look up the accents in existing pages") pa.add_argument("--no-cache", action="store_true", help="Disable caching head lookup results") params = pa.parse_args() semi_verbose = params.semi_verbose or params.verbose global_disable_cache = params.no_cache startFrom, upTo = blib.parse_start_end(params.start, params.end)
# Vocalize link-like templates on pages from STARTFROM to (but not including) # UPTO, either page names or 0-based integers. Save changes if SAVE is true. # Show exact changes if VERBOSE is true. CATTYPE should be 'vocab', 'borrowed' # or 'translation', indicating which categories to examine. def vocalize_links(save, verbose, cattype, startFrom, upTo): def process_param(pagetitle, index, template, param, paramtr): result = vocalize_param(pagetitle, index, template, param, paramtr) if isinstance(result, basestring): result = ["%s (%s)" % (result, template.name)] return result def join_actions(actions): return "vocalize links: %s" % '; '.join(actions) return blib.process_links(save, verbose, "ar", "Arabic", cattype, startFrom, upTo, process_param, join_actions) pa = blib.init_argparser("Correct vocalization and translit") pa.add_argument("-l", "--links", action='store_true', help="Vocalize links") pa.add_argument("--cattype", default="borrowed", help="Categories to examine ('vocab', 'borrowed', 'translation')") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.links: vocalize_links(params.save, params.verbose, params.cattype, startFrom, upTo) else: vocalize_headwords(params.save, params.verbose, startFrom, upTo)
for vn in vns: no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn)) newvn = ",".join(no_i3rab_vns) if uncertain: newvn += "?" if newvn != vnvalue: msg("Page %s %s: Verb %s, replacing %s with %s" % ( index, pagename, verbid, vnvalue, newvn)) addparam(template, "vn", newvn) verbids.append(verbid) return text, "Remove i3rab from verbal nouns for verb(s) %s" % ( ', '.join(verbids)) for page, index in blib.cat_articles("Arabic verbs", startFrom, upTo): blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose) pa = blib.init_argparser("Remove i3rab") pa.add_argument("--verb", action='store_true', help="Do verbal nouns in verbs") pa.add_argument("--noun", action='store_true', help="Do arguments in nouns") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.noun: do_nouns(["noun", "adjective"], ["ar-noun", "ar-coll-noun", "ar-sing-noun", "ar-nisba", "ar-noun-nisba", "ar-adj", "ar-numeral"], params.save, startFrom, upTo) if params.verb: do_verbs(params.save, startFrom, upTo)
include_tempname_in_changelog=True) if getparam(template, "sc") == "Arab": msg("Page %s %s: %s.%s: Removing sc=Arab" % (index, pagetitle, template.name, "sc")) oldtempl = "%s" % unicode(template) template.remove("sc") msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) newresult = ["remove %s.sc=Arab" % template.name] if isinstance(result, list): result = result + newresult else: result = newresult return result return blib.process_links(save, verbose, "ar", "Arabic", cattype, startFrom, upTo, do_process_param, sort_group_changelogs) pa = blib.init_argparser("Remove redundant translit") pa.add_argument("-l", "--links", action='store_true', help="Vocalize links") pa.add_argument("--cattype", default="borrowed", help="Categories to examine ('vocab', 'borrowed', 'translation')") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.links: process_links(params.save, params.verbose, params.cattype, startFrom, upTo) else: process_headwords(params.save, params.verbose, startFrom, upTo)
pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo)) elif refs: pages = blib.references(refs, startFrom, upTo, includelinks=True) else: pages = blib.cat_articles(cat, startFrom, upTo) for page, index in pages: pagetitle = unicode(page.title()) if filter_pages and not re.search(filter_pages, pagetitle): blib.msg("Skipping %s because doesn't match --filter-pages regex %s" % (pagetitle, filter_pages)) else: if verbose: blib.msg("Processing %s" % pagetitle) blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose) pa = blib.init_argparser("Search and replace on pages") pa.add_argument("-f", "--from", help="From regex, can be specified multiple times", metavar="FROM", dest="from_", required=True, action="append") pa.add_argument("-t", "--to", help="To regex, can be specified multiple times", required=True, action="append") pa.add_argument("-r", "--references", "--refs", help="Do pages with references to this page") pa.add_argument("-c", "--category", "--cat", help="Do pages in this category") pa.add_argument("--comment", help="Specify the change comment to use") pa.add_argument('--filter-pages', help="Regex to use to filter page names.") pa.add_argument('--pages', help="List of pages to fix, comma-separated.") pa.add_argument('--pagefile', help="File containing pages to fix.") pa.add_argument('--pagetitle', help="Value to substitute page title with.") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end)
# Check for changed text and save if so. notestext = '; '.join(notes) if notestext: if comment: comment += " (%s)" % notestext else: comment = notestext if page.text != existing_text: if save: pagemsg("Saving with comment = %s" % comment) blib.safe_page_save(page, comment, errandpagemsg) else: pagemsg("Would save with comment = %s" % comment) pa = blib.init_argparser("Save Russian numbers to Wiktionary") pa.add_argument("--offline", help="Operate offline, outputting text of new pages", action="store_true") pa.add_argument( "--overwrite-page", action="store_true", help=u"""If specified, overwrite the entire existing page of inflections. Won't do this if it finds "Etymology N", unless --overwrite-etymologies is given. WARNING: Be careful!""") pa.add_argument( "--overwrite-etymologies", action="store_true", help=u"""If specified and --overwrite-page, overwrite the entire existing page of inflections even if "Etymology N". WARNING: Be careful!""") pa.add_argument(
addparam(t, "tr", tr) if f: addparam(t, "f", f) if ftr: addparam(t, "ftr", ftr) if pl: addparam(t, "pl", pl) if pltr: addparam(t, "pltr", pltr) if sort: addparam(t, "sort", sort) temps_changed.append("arz-adj") return text, "rewrite %s to new style" % ", ".join(temps_changed) def rewrite_arz_headword(save, verbose, startFrom, upTo): for cat in [u"Egyptian Arabic adjectives", "Egyptian Arabic nouns"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_arz_headword, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite Egyptian Arabic headword templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) rewrite_arz_headword(params.save, params.verbose, startFrom, upTo)
def vocalize_links(save, verbose, cattype, startFrom, upTo): def process_param(pagetitle, index, pagetext, template, tlang, param, paramtr): result = vocalize_param(pagetitle, index, template, param, paramtr) if isinstance(result, basestring): result = ["%s (%s)" % (result, template.name)] return result def join_actions(actions): return "vocalize links: %s" % '; '.join(actions) return blib.process_links(save, verbose, "ar", "Arabic", cattype, startFrom, upTo, process_param, join_actions) pa = blib.init_argparser("Correct vocalization and translit") pa.add_argument("-l", "--links", action='store_true', help="Vocalize links") pa.add_argument( "--cattype", default="borrowed", help="Categories to examine ('vocab', 'borrowed', 'translation')") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.links: vocalize_links(params.save, params.verbose, params.cattype, startFrom, upTo) else: vocalize_headwords(params.save, params.verbose, startFrom, upTo)
yield page.title() if params.ignore_lemma_non_lemma: pages_to_ignore = set(yield_lemma_non_lemma_page_titles()) else: pages_to_ignore = set() for category in yield_cats(): msg("Processing category %s ..." % category) errmsg("Processing category %s ..." % category) for index, page in blib.cat_articles(category, startFrom, upTo): if page.title() not in pages_to_ignore: blib.do_edit(page, index, remove_translit_one_page, save=params.save, verbose=params.verbose) pa = blib.init_argparser("Remove translit, sc= from hy, xcl, ka, el, grc templates") pa.add_argument("--langs", default="all", help="Languages to do, a comma-separated list or 'all'") pa.add_argument("--cattype", default="all", help="""Categories to examine ('all' or comma-separated list of 'translit', 'lemma', 'non-lemma'; default 'all')""") pa.add_argument("--ignore-lemma-non-lemma", action="store_true", help="""Ignore lemma and non-lemma pages (useful with '--cattype translit').""") pa.add_argument("--do-head", action="store_true", help="""Remove tr= in {{head|..}}""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) remove_translit(params, startFrom, upTo) msg("Templates processed:")
# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import re, codecs import blib import grc_translit from canon_foreign import canon_links pa = blib.init_argparser("Canonicalize Greek and translit") pa.add_argument( "--cattype", default="borrowed", help="""Categories to examine ('vocab', 'borrowed', 'translation', 'links', 'pagetext', 'pages' or comma-separated list)""") pa.add_argument( "--page-file", help="""File containing "pages" to process when --cattype pagetext, or list of pages when --cattype pages""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) pages_to_do = [] if params.page_file: for line in codecs.open(params.page_file, "r", encoding="utf-8"):
head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % ( index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % ( index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for page, index in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_smp, save=save, verbose=verbose) pa = blib.init_argparser("Change |pl=smp to |pl=sp in declension templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) fix_smp(params.save, params.verbose, startFrom, upTo)
while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_smp, save=save, verbose=verbose) pa = blib.init_argparser("Change |pl=smp to |pl=sp in declension templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) fix_smp(params.save, params.verbose, startFrom, upTo)
index, rewrite_one_page_ru_decl_noun, save=save, verbose=verbose) def rewrite_ru_decl_adj(save, verbose, startFrom, upTo): for cat in [u"Russian adjectives"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ru_decl_adj, save=save, verbose=verbose) pa = blib.init_argparser("Rewrite Russian old declension templates") pa.add_argument("--adjectives", action='store_true', help="Rewrite old adjective templates") pa.add_argument("--nouns", action='store_true', help="Rewrite old noun templates") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.adjectives: rewrite_ru_decl_adj(params.save, params.verbose, startFrom, upTo) if params.nouns: rewrite_ru_decl_noun(params.save, params.verbose, startFrom, upTo)
pagemsg("Replacing [[%s]] with [[%s]]" % (pagetext, newtext)) else: pagemsg("Text has changed") pagetext = newtext # Construct and output comment. notestext = '; '.join(notes) if notestext: if comment: comment += " (%s)" % notestext else: comment = notestext assert(comment) pagemsg("comment = %s" % comment, simple = True) return pagetext, comment def split_etymologies(save, verbose, startFrom, upTo): def split_page_etymologies(page, index, pagetext): return split_one_page_etymologies(page, index, pagetext, verbose) for index, page in blib.cat_articles("Arabic lemmas", startFrom, upTo): blib.do_edit(page, index, split_page_etymologies, save=save, verbose=verbose) pa = blib.init_argparser("Split etymology sections") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) split_etymologies(params.save, True, # params.verbose startFrom, upTo)
msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, str(1+int(formarg))), getparam(template, str(2+int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog for page, index in blib.references("Template:%s" % tempname, startFrom, upTo): blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save) pa = blib.init_argparser("Rewrite form= to 1= in verb headword templates") pa.add_argument("--headword", action='store_true', help="Rewrite form= to 1= in ar-verb and canonicalize") pa.add_argument("--canonicalize", action='store_true', help="Canonicalize form in Arabic verb templates other than ar-verb") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) if params.headword: rewrite_verb_headword(params.save, startFrom, upTo) if params.canonicalize: canonicalize_verb_form(params.save, startFrom, upTo, "ar-conj", "1") canonicalize_verb_form(params.save, startFrom, upTo, "ar-past3sm", "1") canonicalize_verb_form(params.save, startFrom, upTo, "ar-verb-part", "2")
if ratio == int(ratio): pagemsg("WARNING: Replaced %s occurrences of curr=%s with repl=%s" % (int(ratio), curr_template, repl_template)) else: pagemsg("WARNING: Something wrong, length mismatch during replacement: Expected length change=%s, actual=%s, ratio=%.2f, curr=%s, repl=%s" % (repl_curr_diff, newtext_text_diff, ratio, curr_template, repl_template)) changelog = "Replaced %s with %s (%s)" % (curr_template, repl_template, annotation) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % ( index, pagename)) else: blib.do_edit(page, index, push_one_manual_change, save=save, verbose=verbose) pa = blib.init_argparser("Push manual changes to Wiktionary") pa.add_argument("--file", help="File containing templates to change, as output by parse_log_file.py") pa.add_argument("--annotation", default="manually", help="Annotation in change log message used to indicate source of changes (default 'manually')") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) push_manual_changes(params.save, params.verbose, params.file, params.annotation, startFrom, upTo)