Exemplo n.º 1
0
def fix_smp(save, verbose, startFrom, upTo):
    for template in arabic_decl_templates:
        # Fix the template refs. If cap= is present, remove it; else, add lc=.
        def fix_one_page_smp(page, index, text):
            pagetitle = page.title()
            for t in text.filter_templates():
                head = reorder_shadda(getparam(t, "1"))
                if t.name.startswith("ar-decl-"):
                    param = "pl"
                    pl = getparam(t, param)
                    i = 2
                    while pl:
                        if pl == "smp":
                            if head.endswith(TAM):
                                msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing"
                                    % (index, pagetitle, param, head, t.name))
                            else:
                                msg("Page %s %s: Changing %s=smp to %s=sp in %s"
                                    % (index, pagetitle, param, param, t.name))
                                addparam(t, param, "sp")
                        param = "pl%s" % i
                        pl = getparam(t, param)
                        i += 1
            changelog = "Change pl=smp to pl=sp"
            msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
            return text, changelog

        for index, page in blib.references("Template:" + template, startFrom,
                                           upTo):
            blib.do_edit(page,
                         index,
                         fix_one_page_smp,
                         save=save,
                         verbose=verbose)
Exemplo n.º 2
0
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg):
  # Canonicalize the form in ar-conj.
  # Returns the changed text along with a changelog message.
  def canonicalize_one_page_verb_form(page, index, text):
    pagetitle = page.title()
    msg("Processing page %s" % pagetitle)
    actions_taken = []

    for template in text.filter_templates():
      if template.name == tempname:
        origtemp = unicode(template)
        form = getparam(template, formarg)
        if form:
          addparam(template, formarg, canonicalize_form(form))
        newtemp = unicode(template)
        if origtemp != newtemp:
          msg("Replacing %s with %s" % (origtemp, newtemp))
        if re.match("^[1I](-|$)", form):
          actions_taken.append("form=%s (%s/%s)" % (form,
            getparam(template, str(1+int(formarg))),
            getparam(template, str(2+int(formarg)))))
        else:
          actions_taken.append("form=%s" % form)
    changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
        tempname, formarg, '; '.join(actions_taken))
    if len(actions_taken) > 0:
      msg("Change log = %s" % changelog)
    return text, changelog

  for page, index in blib.references("Template:%s" % tempname, startFrom, upTo):
    blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
Exemplo n.º 3
0
def rewrite_pages(refrom, reto, refs, cat, pages, pagefile, pagetitle_sub,
    comment, filter_pages, save, verbose, startFrom, upTo):
  def rewrite_one_page(page, index, text):
    #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto))
    text = unicode(text)
    text = reorder_shadda(text)
    zipped_fromto = zip(refrom, reto)
    for fromval, toval in zipped_fromto:
      if pagetitle_sub:
        pagetitle = unicode(page.title())
        fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle))
        toval = toval.replace(pagetitle_sub, pagetitle)
      text = re.sub(fromval, toval, text)
    return text, comment or "replace %s" % (", ".join("%s -> %s" % (f, t) for f, t in zipped_fromto))

  if pages:
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(pages, startFrom, upTo))
  elif pagefile:
    lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")]
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo))
  elif refs:
    pages = blib.references(refs, startFrom, upTo, includelinks=True)
  else:
    pages = blib.cat_articles(cat, startFrom, upTo)
  for page, index in pages:
    pagetitle = unicode(page.title())
    if filter_pages and not re.search(filter_pages, pagetitle):
      blib.msg("Skipping %s because doesn't match --filter-pages regex %s" %
          (pagetitle, filter_pages))
    else:
      if verbose:
        blib.msg("Processing %s" % pagetitle)
      blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose)
Exemplo n.º 4
0
def fix_smp(save, verbose, startFrom, upTo):
  for template in arabic_decl_templates:
    # Fix the template refs. If cap= is present, remove it; else, add lc=.
    def fix_one_page_smp(page, index, text):
      pagetitle = page.title()
      for t in text.filter_templates():
        head = reorder_shadda(getparam(t, "1"))
        if t.name.startswith("ar-decl-"):
          param = "pl"
          pl = getparam(t, param)
          i = 2
          while pl:
            if pl == "smp":
              if head.endswith(TAM):
                msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (
                  index, pagetitle, param, head, t.name))
              else:
                msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (
                  index, pagetitle, param, param, t.name))
                addparam(t, param, "sp")
            param = "pl%s" % i
            pl = getparam(t, param)
            i += 1
      changelog = "Change pl=smp to pl=sp"
      msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
      return text, changelog

    for page, index in blib.references("Template:" + template, startFrom, upTo):
      blib.do_edit(page, index, fix_one_page_smp, save=save,
          verbose=verbose)
Exemplo n.º 5
0
def fix_tool_place_noun(save, verbose, startFrom, upTo):
    for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]:

        # Fix the template refs. If cap= is present, remove it; else, add lc=.
        def fix_one_page_tool_place_noun(page, index, text):
            pagetitle = page.title()
            for t in text.filter_templates():
                if t.name == template:
                    if getparam(t, "cap"):
                        msg("Page %s %s: Template %s: Remove cap=" %
                            (index, pagetitle, template))
                        t.remove("cap")
                    else:
                        msg("Page %s %s: Template %s: Add lc=1" %
                            (index, pagetitle, template))
                        addparam(t, "lc", "1")
            changelog = "%s: If cap= is present, remove it, else add lc=" % template
            msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
            return text, changelog

        for index, page in blib.references("Template:" + template, startFrom,
                                           upTo):
            blib.do_edit(page,
                         index,
                         fix_one_page_tool_place_noun,
                         save=save,
                         verbose=verbose)
Exemplo n.º 6
0
def rewrite_ar_nisba(save, verbose, startFrom, upTo):
    for index, page in blib.references("Template:ar-nisba", startFrom, upTo):
        blib.do_edit(page,
                     index,
                     rewrite_one_page_ar_nisba,
                     save=save,
                     verbose=verbose)
Exemplo n.º 7
0
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg):
    # Canonicalize the form in ar-conj.
    # Returns the changed text along with a changelog message.
    def canonicalize_one_page_verb_form(page, index, text):
        pagetitle = page.title()
        msg("Processing page %s" % pagetitle)
        actions_taken = []

        for template in text.filter_templates():
            if template.name == tempname:
                origtemp = unicode(template)
                form = getparam(template, formarg)
                if form:
                    addparam(template, formarg, canonicalize_form(form))
                newtemp = unicode(template)
                if origtemp != newtemp:
                    msg("Replacing %s with %s" % (origtemp, newtemp))
                if re.match("^[1I](-|$)", form):
                    actions_taken.append(
                        "form=%s (%s/%s)" %
                        (form, getparam(template, str(1 + int(formarg))),
                         getparam(template, str(2 + int(formarg)))))
                else:
                    actions_taken.append("form=%s" % form)
        changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
            tempname, formarg, '; '.join(actions_taken))
        if len(actions_taken) > 0:
            msg("Change log = %s" % changelog)
        return text, changelog

    for index, page in blib.references("Template:%s" % tempname, startFrom,
                                       upTo):
        blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
Exemplo n.º 8
0
def rewrite_idafa(save, verbose, startFrom, upTo):
    for template in arabic_decl_templates:
        for index, page in blib.references("Template:" + template, startFrom,
                                           upTo):
            blib.do_edit(page,
                         index,
                         rewrite_one_page_idafa,
                         save=save,
                         verbose=verbose)
Exemplo n.º 9
0
 def yield_pages():
     if pages:
         for index, page in blib.iter_items(pages, startFrom, upTo):
             yield index, pywikibot.Page(blib.site, page), None
     if pagefile:
         lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")]
         for index, page in blib.iter_items(lines, startFrom, upTo):
             yield index, pywikibot.Page(blib.site, page), None
     if from_to_pagefile:
         lines = [
             x.strip() for x in codecs.open(from_to_pagefile, "r", "utf-8")
         ]
         for index, line in blib.iter_items(lines, startFrom, upTo):
             if " ||| " not in line:
                 msg("WARNING: Saw bad line in --from-to-pagefile: %s" %
                     line)
                 continue
             frompage, topage = line.split(" ||| ")
             yield index, pywikibot.Page(blib.site, frompage), topage
     if refs:
         for ref in refs:
             for index, page in blib.references(
                     ref, startFrom, upTo, only_template_inclusion=False):
                 yield index, page, None
     if pages_and_refs:
         for page_and_refs in pages_and_refs:
             for index, page in blib.references(
                     page_and_refs,
                     startFrom,
                     upTo,
                     only_template_inclusion=False,
                     include_page=True):
                 yield index, page, None
     if cats:
         for cat in cats:
             for index, page in blib.cat_articles(cat, startFrom, upTo):
                 yield index, page, None
Exemplo n.º 10
0
def process_page(page, index):
  global args
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errpagemsg(txt):
    errmsg("Page %s %s: %s" % (index, pagetitle, txt))
  errpagemsg("Processing references")
  if not args.table_of_uses:
    pagemsg("Processing references")
  aliases = []
  for i, subpage in blib.references(pagetitle, namespaces=[10], only_template_inclusion=False, filter_redirects=args.redirects_only):
    aliases.append(unicode(subpage.title()))
    if not args.table_of_uses:
      process_subpage(page, index, subpage, i)
  if args.table_of_uses:
    msg("%s%s" % (pagetitle.replace("Template:", ""),
      aliases and "," + ",".join(x.replace("Template:", "") for x in aliases) or ""))
Exemplo n.º 11
0
def rewrite_template_names(old, new, removelist, save, verbose,
    startFrom, upTo):
  def rewrite_one_page_template_names(page, index, text):
    actions = []
    for template in text.filter_templates():
      if template.name == old:
        actions.append("rename {{temp|%s}} to {{temp|%s}}" % (old, new))
        template.name = new
      for remove in removelist:
        if template.has(remove):
          template.remove(remove)
          actions.append("remove %s=" % remove)

    return text, '; '.join(actions)

  for index, page in blib.references("Template:%s" % old, startFrom, upTo):
    blib.do_edit(page, index, rewrite_one_page_template_names, save=save,
        verbose=verbose)
Exemplo n.º 12
0
def create_declensions(save,
                       pos,
                       tempname,
                       decltempname,
                       sgnum,
                       startFrom,
                       upTo,
                       removeparams,
                       is_proper=False):
    for index, page in blib.references("Template:%s" % tempname, startFrom,
                                       upTo):
        create_declension(page,
                          index,
                          save,
                          pos,
                          tempname,
                          decltempname,
                          sgnum,
                          removeparams,
                          is_proper=is_proper)
Exemplo n.º 13
0
def fix_tool_place_noun(save, verbose, startFrom, upTo):
  for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]:

    # Fix the template refs. If cap= is present, remove it; else, add lc=.
    def fix_one_page_tool_place_noun(page, index, text):
      pagetitle = page.title()
      for t in text.filter_templates():
        if t.name == template:
          if getparam(t, "cap"):
            msg("Page %s %s: Template %s: Remove cap=" %
                (index, pagetitle, template))
            t.remove("cap")
          else:
            msg("Page %s %s: Template %s: Add lc=1" %
                (index, pagetitle, template))
            addparam(t, "lc", "1")
      changelog = "%s: If cap= is present, remove it, else add lc=" % template
      msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
      return text, changelog

    for index, page in blib.references("Template:" + template, startFrom, upTo):
      blib.do_edit(page, index, fix_one_page_tool_place_noun, save=save,
          verbose=verbose)
Exemplo n.º 14
0
def create_declensions(save, pos, tempname, decltempname, sgnum,
    startFrom, upTo, removeparams, is_proper=False):
  for index, page in blib.references("Template:%s" % tempname, startFrom, upTo):
    create_declension(page, index, save, pos, tempname, decltempname, sgnum,
        removeparams, is_proper=is_proper)
Exemplo n.º 15
0
import rulib as ru

def process_page(index, page):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)

  found_headword_template = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-adj"]:
      found_headword_template = True
  if not found_headword_template:
    notes = []
    for t in parsed.filter_templates():
      if unicode(t.name) in ["ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"]:
        notes.append("found noun header (%s)" % unicode(t.name))
      if unicode(t.name) == "head":
        notes.append("found head header (%s)" % getparam(t, "2"))
    pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes)))

parser = blib.create_argparser("Find missing adjective headwords")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for index, page in blib.references("Template:ru-decl-adj", start, end):
  process_page(index, page)
Exemplo n.º 16
0
def rewrite_ar_nisba(save, verbose, startFrom, upTo):
  for index, page in blib.references("Template:ar-nisba", startFrom, upTo):
    blib.do_edit(page, index, rewrite_one_page_ar_nisba, save=save, verbose=verbose)
Exemplo n.º 17
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  origtext = unicode(page.text)
  text = origtext
  text = re.sub(r"(\{\{was wotd\|.*?\}\}\n)(==English==\n)", r"\2\1", text)
  notes = ["put {{was wotd}} after ==English== per [[User:Smuconlaw]]"]

  if text != origtext:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (origtext, text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Remove adj= and shto= from ru-ux")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:was wotd", start, end):
  process_page(i, page, args.save, args.verbose)
Exemplo n.º 18
0
      if shch == u"щ":
        t.add("3", getparam(t, "3") + shch)
        rmparam(t, "4")
        notes.append(u"move param 4 (щ) to param 3")
      elif shch:
        pagemsg("WARNING: Strange value %s for param 4" % shch)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Convert class-4a 4th param щ to 3rd param")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:tracking/ru-verb/conj-4a", start, end):
  process_page(i, page, args.save, args.verbose)
    else:
      ru_proper_noun_changed = 1

  return unicode(parsed), ru_noun_table_cleaned, ru_noun_table_link_copied, ru_noun_changed, ru_proper_noun_changed

parser = blib.create_argparser("Copy the declension in ru-noun-table to ru-noun+, preserving any m=, f=, g=, etc. in the latter.")
parser.add_argument('--cats', default="nouns,proper nouns", help="Categories to do ('nouns', 'proper nouns' or 'nouns,proper nouns')")
parser.add_argument('--lemma-file', help="File containing lemmas to copy declension of. Will remove extraneous params from ru-noun-table and copy links to ru-noun-table regardless of this.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.lemma_file:
  lemmas = set([x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")])
else:
  lemmas = None

for cat in re.split(",", args.cats):
  if cat == "nouns":
    template = "Template:ru-noun+"
  elif cat == "proper nouns":
    template = "Template:ru-proper noun+"
  else:
    raise ValueError("Invalid value to --cats: %s" % cat)
  msg("Processing references to %s" % template)
  if lemmas:
    for i, page in blib.iter_items(lemmas, start, end):
      process_page(i, pywikibot.Page(site, page), args.save, args.verbose, lemmas)
  else:
    for i, page in blib.references(template, start, end):
      process_page(i, page, args.save, args.verbose, lemmas)
Exemplo n.º 20
0
    for t in parsed.filter_templates():
        if tname(t) == "R:Lexico":
            origt = unicode(t)
            rmparam(t, "lang")
            entry_uk = getparam(t, "entry_uk")
            if entry_uk:
                t.add("entry", entry_uk, before="entry_uk")
            rmparam(t, "entry_uk")
            url_uk = getparam(t, "url_uk")
            if url_uk:
                t.add("url", url_uk, before="url_uk")
            rmparam(t, "url_uk")
            p4 = getparam(t, "4")
            if p4:
                t.add("text", p4, before="4")
            rmparam(t, "4")
            newt = unicode(t)
            if origt != newt:
                notes.append("Remove/rearrange params in {{R:Lexico}}")
                pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes


parser = blib.create_argparser(u"Remove/rearrange params in {{R:Lexico}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:R:Lexico", start, end):
    blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
        arg_set.append(val)

  for t in parsed.filter_templates():
    tname = unicode(t.name)
    if tname == "ru-decl-noun-see":
      pagemsg("WARNING: Skipping ru-decl-noun-see, can't handle yet: %s" % unicode(t))
    elif tname in ["ru-noun+", "ru-proper noun+"]:
      pagemsg("Found %s" % unicode(t))
      process_new_style_headword(t)
    elif tname in ["ru-noun", "ru-proper noun"]:
      pagemsg("WARNING: Skipping ru-noun or ru-proper noun, can't handle yet: %s" % unicode(t))

parser = blib.create_argparser(u"Find red links in multiword lemmas")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

msg("Reading Russian lemmas")
for i, page in blib.cat_articles("Russian lemmas", start, end):
  lemmas.add(unicode(page.title()))

for pos in ["nouns", "proper nouns"]:
  tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos
  msg("PROCESSING REFERENCES TO: %s" % tracking_page)
  for index, page in blib.references(tracking_page, start, end):
    process_page(index, page, args.verbose)

for lemma, nonexistent_msg in sorted(nonexistent_lemmas.items(), key=lambda pair:(-lemma_count[pair[0]], pair[0])):
  msg("* [[%s]] (%s occurrence%s): %s (refs: %s)" % (lemma, lemma_count[lemma],
    "" if lemma_count[lemma] == 1 else "s", nonexistent_msg,
    ", ".join("[[%s]]" % x for x in nonexistent_lemmas_refs[lemma])))
Exemplo n.º 22
0
      newval = re.sub("^#\* #\* ", "#* ", subsections[j], 0, re.M)
      if newval != subsections[j]:
        notes.append("remove double #* prefix")
        pagemsg("Removed double #* prefix")
      subsections[j] = newval
  newtext = "".join(subsections)

  if text != newtext:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, newtext))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

if __name__ == "__main__":
  parser = blib.create_argparser("Fix old cite/quote/reference templates")
  args = parser.parse_args()
  start, end = blib.parse_start_end(args.start, args.end)

  for template in replace_templates:
    msg("Processing references to Template:%s" % template)
    errmsg("Processing references to Template:%s" % template)
    for i, page in blib.references("Template:%s" % template, start, end,
        includelinks=True):
      process_page(i, page, args.save, args.verbose)
Exemplo n.º 23
0
def rewrite_idafa(save, verbose, startFrom, upTo):
  for template in arabic_decl_templates:
    for page, index in blib.references("Template:" + template, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_idafa, save=save,
          verbose=verbose)
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))
      if t.has("past_actv_part") and getparam(t, "past_actv_part") == "":
        notes.append("set past_actv_part=-")
        origt = unicode(t)
        t.add("past_actv_part", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

  if not notes:
    pagemsg("WARNING: No changes")

parser = blib.create_argparser(u"Fix past_adv_part_short to use dash instead of blank")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:tracking/ru-verb/different-conj", start, end):
  process_page(i, page, args.save, args.verbose)
Exemplo n.º 25
0
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    for t in parsed.filter_templates():
        if tname(t) == "quote-Fanny Hill":
            origt = unicode(t)
            t.name = "RQ:Cleland Fanny Hill"
            rmparam(t, "part")
            if getparam(t, "1"):
                t.add("passage", getparam(t, "1"))
                rmparam(t, "1")
            notes.append(
                "Replace {{quote-Fanny Hill}} with {{RQ:Cleland Fanny Hill}}")
            newt = unicode(t)
            if origt != newt:
                pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes


parser = blib.create_argparser(
    u"Convert {{quote-Fanny Hill}} to {{RQ:Cleland Fanny Hill}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:quote-Fanny Hill", start, end):
    blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
Exemplo n.º 26
0
          origt = unicode(t)
          head = getparam(t, "head")
          rmparam(t, "head")
          tr = getparam(t, "tr")
          rmparam(t, "tr")
          t.add("1", head)
          if tr:
            t.add("tr", tr)
          pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Fix ru-phrase templates to use 1= instead of head=")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:ru-phrase", start, end):
  process_page(i, page, args.save, args.verbose)
Exemplo n.º 27
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"List pages in category or references in Zaliznyak order")
parser.add_argument('--cat', help="Category to list")
parser.add_argument('--ref', help="References to list")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

pages = []
if args.cat:
  pages_to_list = blib.cat_articles(args.cat, start, end)
else:
  pages_to_list = blib.references(args.ref, start, end)
for i, page in pages_to_list:
  pages.append(unicode(page.title()))
for page in sorted(pages, key=lambda x:x[::-1]):
  msg(page)
Exemplo n.º 28
0
                    help="Categories to do (can be comma-separated list)")
parser.add_argument('--refs',
                    help="References to do (can be comma-separated list)")
parser.add_argument('--lemmafile',
                    help="File of lemmas to process. May have accents.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.lemmafile:
    lemmas = []
    for i, pagename in blib.iter_items([
            ru.remove_accents(x.strip())
            for x in codecs.open(args.lemmafile, "r", "utf-8")
    ]):
        page = pywikibot.Page(site, pagename)
        process_page(i, page, args.verbose)
elif args.refs:
    for ref in re.split(",", args.refs):
        msg("Processing references to: %s" % ref)
        for i, page in blib.references(ref, start, end):
            process_page(i, page, args.verbose)
else:
    for cat in re.split(",", args.cats):
        msg("Processing category: %s" % cat)
        lemmas = []
        if cat == "Russian verbs":
            for i, page in blib.cat_articles(cat):
                lemmas.append(page.title())
        for i, page in blib.cat_articles(cat, start, end):
            process_page(i, page, args.verbose)
Exemplo n.º 29
0
ref_namespaces = args.ref_namespaces and args.ref_namespaces.decode(
    "utf-8") or None

lines = [x.strip() for x in codecs.open(args.tempfile, "r", "utf-8")]

msg('{|class="wikitable"')
msg("! Aliased template !! Canonical template !! #Uses%s%s" %
    (" !! Refs" if args.include_refs else "",
     " !! Suggested disposition" if args.include_disposition else ""))
for ref_and_aliases in lines:
    split_refs = re.split(",", ref_and_aliases)
    mainref = "Template:%s" % split_refs[0]
    aliases = split_refs[1:]
    refs = [(mainref, None)]
    for alias in aliases:
        refs.append(("Template:%s" % alias, mainref))
    for alias, mainref in refs:
        errmsg("Processing references to: %s" % alias)
        template_refs = list(
            blib.references(alias, start, end, namespaces=ref_namespaces))
        num_refs = len(template_refs)
        msg("|-")
        msg("| %s || %s || %s%s%s" %
            ("[[%s]]" % alias if mainref else "'''[[%s]]'''" % alias,
             "[[%s]]" % mainref if mainref else "'''[[%s]]'''" % alias,
             num_refs, " || %s" % ", ".join("[[%s]]" % unicode(ref.title())
                                            for i, ref in template_refs)
             if args.include_refs else "",
             " || ?" if args.include_disposition else ""))
msg("|}")
Exemplo n.º 30
0

parser = blib.create_argparser(
    "Add pronunciation sections to Russian Wiktionary entries")
parser.add_argument('--mockup',
                    action="store_true",
                    help="Use mocked-up test code")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)
mockup = args.mockup


def ignore_page(page):
    if not isinstance(page, basestring):
        page = unicode(page.title())
    if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page):
        return True
    return False


if mockup:
    test_infer()
else:
    for tempname in decl_templates:
        for index, page in blib.references("Template:" + tempname, start, end):
            if ignore_page(page):
                msg("Page %s %s: Skipping due to namespace" %
                    (index, unicode(page.title())))
            else:
                blib.do_edit(page, index, infer_one_page_decls, save=args.save)
Exemplo n.º 31
0
    proper_noun_headword.params.extend(remaining_params)
    pagemsg("Replacing %s with %s" % (orig_proper_noun_headword, unicode(proper_noun_headword)))

  newtext = unicode(parsed)

  newtext = re.sub(r"\n\n\n*\[\[Category:ru:Names]]\n\n\n*", "\n\n", newtext)
  newtext = re.sub(r"\[\[Category:ru:Names]]\n", "", newtext)
  newtext = re.sub(r"(\{\{surname\|.*)\.\n", r"\1\n", newtext)

  if newtext != text:
    if verbose:
      pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext))
    comment = "Convert ru-adj11 to ru-decl-adj and fix up associated templates"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
  else:
    pagemsg("Skipping")

parser = blib.create_argparser("Fix uses of ru-adj11")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for temp in ["ru-adj11"]:
  msg("Processing references to Template:%s" % temp)
  for i, page in blib.references("Template:" + temp, start, end):
    process_page(i, page, args.save, args.verbose)
Exemplo n.º 32
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"List pages, lemmas and/or non-lemmas")
parser.add_argument('--cats', default="Russian lemmas", help="Categories to do (can be comma-separated list)")
parser.add_argument('--refs', help="References to do (can be comma-separated list)")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.refs:
  for ref in re.split(",", args.refs):
    msg("Processing references to: %s" % ref)
    for i, page in blib.references(ref, start, end):
      msg("Page %s %s: Processing" % (i, unicode(page.title())))
else:
  for cat in re.split(",", args.cats):
    msg("Processing category: %s" % cat)
    for i, page in blib.cat_articles(cat, start, end):
      msg("Page %s %s: Processing" % (i, unicode(page.title())))
Exemplo n.º 33
0
          for i in xrange(1, 6):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("6", param7)
          notes.append("move type 7b arg7 -> arg6")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Fix up class 6a arg 6 -> 4, class 7b arg 7 -> 6")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for vclass in ["6a", "7b"]:
  for i, page in blib.references("Template:tracking/ru-verb/conj-%s" % vclass, start, end):
    process_page(i, page, args.save, args.verbose)
Exemplo n.º 34
0
                    pval = unicode(param.value)
                    if pname == "inline":
                        if pval and pval not in ["0", "n", "no", "false"]:
                            tname = "uxi"
                    elif re.search(r"^[0-9]+$", pname):
                        # move numbered params up by one
                        new_params.append((str(1 + int(pname)), param.value))
                    elif pname == "sub":
                        new_params.append(("subst", param.value))
                    else:
                        new_params.append((pname, param.value))
                del t.params[:]
                t.name = tname
                t.add("1", "ru")
                for pname, pval in new_params:
                    t.add(pname, pval)
                notes.append("Replace {{ru-ux}} with {{%s|ru}}" % tname)
            newt = unicode(t)
            if origt != newt:
                pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes


parser = blib.create_argparser(u"Convert {{ru-ux}} to {{ux|ru}} or {{uxi|ru}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:ru-ux", start, end):
    blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
Exemplo n.º 35
0
      return "test_infer"
  for pagetext in test_templates:
    text = blib.parse_text(pagetext)
    page = Page()
    newtext, comment = infer_one_page_decls(page, 1, text)
    msg("newtext = %s" % unicode(newtext))
    msg("comment = %s" % comment)

parser = blib.create_argparser("Add pronunciation sections to Russian Wiktionary entries")
parser.add_argument('--mockup', action="store_true", help="Use mocked-up test code")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)
mockup = args.mockup

def ignore_page(page):
  if not isinstance(page, basestring):
    page = unicode(page.title())
  if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page):
    return True
  return False

if mockup:
  test_infer()
else:
  for tempname in decl_templates:
    for index, page in blib.references("Template:" + tempname, start, end):
      if ignore_page(page):
        msg("Page %s %s: Skipping due to namespace" % (index, unicode(page.title())))
      else:
        blib.do_edit(page, index, infer_one_page_decls, save=args.save)
Exemplo n.º 36
0
            # Put numbered params in order.
            for name, value, showkey in numbered_params:
                t.add(name, value, showkey=showkey, preserve_spacing=False)
            t.add("volume", volume)
            if chapter:
                t.add("chapter", chapter)
            if text:
                t.add("text", text)
            if translation:
                t.add("t", translation)
            # Put named params in order.
            for name, value, showkey in named_params:
                t.add(name, value, showkey=showkey, preserve_spacing=False)
            notes.append(
                "Replace {{RQ:Don Quixote}} with {{RQ:Cervantes Viardot Don Quichotte}}"
            )
            newt = unicode(t)
            if origt != newt:
                pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes


parser = blib.create_argparser(
    u"Convert {{RQ:Don Quixote}} to {{RQ:Cervantes Viardot Don Quichotte}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:RQ:Don Quixote", start, end):
    blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
        elif tname in ["ru-verb"]:
            pagemsg("Found %s" % unicode(t))
            process_verb_headword(t)
        elif tname in ["ru-noun", "ru-proper noun"]:
            pagemsg(
                "WARNING: Skipping ru-noun or ru-proper noun, can't handle yet: %s"
                % unicode(t))


parser = blib.create_argparser(u"Find red links in multiword lemmas")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

msg("Reading Russian lemmas")
for i, page in blib.cat_articles("Russian lemmas", start, end):
    lemmas.add(unicode(page.title()))

for pos in ["nouns", "proper nouns", "verbs"]:
    tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos
    msg("PROCESSING REFERENCES TO: %s" % tracking_page)
    for index, page in blib.references(tracking_page, start, end):
        process_page(index, page, args.verbose)

for lemma, nonexistent_msg in sorted(nonexistent_lemmas.items(),
                                     key=lambda pair:
                                     (-lemma_count[pair[0]], pair[0])):
    msg("* [[%s]] (%s occurrence%s): %s (refs: %s)" %
        (lemma, lemma_count[lemma], "" if lemma_count[lemma] == 1 else "s",
         nonexistent_msg, ", ".join("[[%s]]" % x
                                    for x in nonexistent_lemmas_refs[lemma])))
            changed = origt != unicode(t)
            if changed:
                notes.append("quote-poem -> quote-book with fixed params")

        if changed:
            pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    return parsed, notes


parser = blib.create_argparser(
    "quote-poem -> quote-book with changed params; quote-magazine/quote-news -> quote-journal; quote-Don Quixote -> RQ:Don Quixote"
)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for template in [
        "quote-poem", "quote-magazine", "quote-news", "quote-Don Quixote"
]:
    msg("Processing references to Template:%s" % template)
    errmsg("Processing references to Template:%s" % template)
    for i, page in blib.references("Template:%s" % template,
                                   start,
                                   end,
                                   includelinks=True):
        blib.do_edit(page,
                     i,
                     process_page,
                     save=args.save,
                     verbose=args.verbose)
Exemplo n.º 39
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"Purge (null-save) pages in category or references")
parser.add_argument('--cat', help="Category to purge")
parser.add_argument('--ref', help="References to purge")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

pages = []
if args.cat:
  pages_to_list = blib.cat_articles(args.cat, start, end)
else:
  pages_to_list = blib.references(args.ref, start, end)
for i, page in pages_to_list:
  # msg("Page %s %s: Null-saving" % (i, unicode(page.title())))
  page.save(comment="null save")
Exemplo n.º 40
0
    #    but it's the default in ru-noun-table unless the lemma is plural.
    #    So remove n=both, generate the arguments, and see if the actual
    #    value of args.n is b (for "both"); if not, set n=both.
    else:
      assert headword_n == "b"
      rmparam(see_template, "n")
      see_generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args",
          unicode(see_template))
      see_generate_result = expand_text(see_generate_template)
      if not see_generate_result:
        pagemsg("WARNING: Error generating ru-noun-table args")
        return None
      see_args = ru.split_generate_args(see_generate_result)
      if see_args["n"] != "b":
        see_template.add("n", "both")

  comment = "Replace ru-decl-noun-see with ru-noun-table, taken from headword template (%s)" % unicode(headword_template.name)
  if save:
    pagemsg("Saving with comment = %s" % comment)
    page.text = unicode(parsed)
    page.save(comment=comment)
  else:
    pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Convert ru-decl-noun-see into ru-noun-table decl template, taken from headword ru-(proper )noun+ template")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for index, page in blib.references("Template:ru-decl-noun-see", start, end):
  process_page(index, page, args.save, args.verbose)
Exemplo n.º 41
0
                        pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt)
                        return
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    new_text = unicode(parsed)

    if new_text != text:
        if verbose:
            pagemsg("Replacing <%s> with <%s>" % (text, new_text))
        if notes:
            comment = "Add inanimacy to neuters (%s)" % "; ".join(notes)
        else:
            comment = "Add inanimacy to neuters"
        if save:
            pagemsg("Saving with comment = %s" % comment)
            page.text = new_text
            page.save(comment=comment)
        else:
            pagemsg("Would save with comment = %s" % comment)


parser = blib.create_argparser("Make neuter nouns be inanimate")
parser.add_argument("--fix-indeclinable", action="store_true", help="Make non-indeclinables be indeclinable")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.references("Template:ru-noun", start, end):
    process_page(i, page, args.save, args.verbose, args.fix_indeclinable)
for i, page in blib.references("Template:ru-proper noun", start, end):
    process_page(i, page, args.save, args.verbose, args.fix_indeclinable)
Exemplo n.º 42
0
            t.add("1", "hu")
            # Put remaining parameters in order.
            for name, value, showkey in params:
                if re.search("^[0-9]+$", name):
                    t.add(str(int(name) + 1),
                          value,
                          showkey=showkey,
                          preserve_spacing=False)
                else:
                    t.add(name, value, showkey=showkey, preserve_spacing=False)
            blib.set_template_name(t, "affix")
            notes.append("convert {{hu-suffix}} to {{affix}}")
        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser("Clean up {{hu-suffix}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for template in ["hu-suffix"]:
    msg("Processing references to Template:%s" % template)
    for i, page in blib.references("Template:%s" % template, start, end):
        blib.do_edit(page,
                     i,
                     process_page,
                     save=args.save,
                     verbose=args.verbose)
Exemplo n.º 43
0
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    parsed = blib.parse(page)

    found_headword_template = False
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-adj"]:
            found_headword_template = True
    if not found_headword_template:
        notes = []
        for t in parsed.filter_templates():
            if unicode(t.name) in [
                    "ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"
            ]:
                notes.append("found noun header (%s)" % unicode(t.name))
            if unicode(t.name) == "head":
                notes.append("found head header (%s)" % getparam(t, "2"))
        pagemsg("Missing adj headword template%s" %
                (notes and "; " + ",".join(notes)))


parser = blib.create_argparser("Find missing adjective headwords")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for index, page in blib.references("Template:ru-decl-adj", start, end):
    process_page(index, page)
Exemplo n.º 44
0
      if origt != newt:
        pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Convert Japanese headwords from old-style to new-style")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

romaji_to_keep = set()
for i, page in blib.cat_articles("Japanese terms with romaji needing attention"):
  pagetitle = unicode(page.title())
  romaji_to_keep.add(pagetitle)

for ref in ["ja-noun", "ja-adj", "ja-verb", "ja-pos"]:
  msg("Processing references to Template:%s" % ref)
  for i, page in blib.references("Template:%s" % ref, start, end):
    process_page(i, page, args.save, args.verbose, romaji_to_keep)
Exemplo n.º 45
0
#!/usr/bin/env python
#coding: utf-8
 
import blib, pywikibot, re, string, sys, codecs
from blib import addparam
import arabiclib
 
def fix(page, index, text):
  for template in text.filter_templates():
    if template.name in arabiclib.arabic_all_headword_templates:
      if template.has("head") and not template.has(1) and not template.has(2) and not template.has(3) and not template.has(4) and not template.has(5) and not template.has(6) and not template.has(7) and not template.has(8):
        head = unicode(template.get("head").value)
        template.remove("head")
        addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None)
 
        if template.params[0].name == "head":
          template.get("head").showkey = False
 
  return text, "ar headword: head= > 1="
 
startFrom, upTo = blib.parse_args()
 
for index, page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo):
  blib.do_edit(page, index, fix)
Exemplo n.º 46
0
def yield_ref_pages():
  for template in templates_to_do:
    for i, page in blib.references("Template:" + template, pargs.start or None,
        pargs.end or None):
      yield i, page
Exemplo n.º 47
0
import arabiclib


def fix(page, index, text):
    for template in text.filter_templates():
        if template.name in arabiclib.arabic_all_headword_templates:
            if template.has("head") and not template.has(
                    1) and not template.has(2) and not template.has(
                        3) and not template.has(4) and not template.has(
                            5) and not template.has(6) and not template.has(
                                7) and not template.has(8):
                head = unicode(template.get("head").value)
                template.remove("head")
                addparam(template,
                         "head",
                         head,
                         before=template.params[0].name
                         if len(template.params) > 0 else None)

                if template.params[0].name == "head":
                    template.get("head").showkey = False

    return text, "ar headword: head= > 1="


startFrom, upTo = blib.parse_args()

for index, page in blib.references(u"Template:tracking/ar-head/head",
                                   startFrom, upTo):
    blib.do_edit(page, index, fix)