Пример #1
0
def process_page(page, index, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("WARNING: Script no longer applies and would need fixing up")
    return

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in ["ru-conj-5c", "ru-conj-6b"]:
            past_f = getparam(t, "4")
            if past_f:
                t.add("past_f", past_f, before="4")
                rmparam(t, "4")
                notes.append("Replace 4= with past_f=")
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
Пример #2
0
def snarf_adj_accents():
    for index, page in blib.cat_articles("Bulgarian adjectives"):
        pagetitle = unicode(page.title())

        def pagemsg(txt):
            msg("Page %s %s: %s" % (index, pagetitle, txt))

        parsed = blib.parse(page)
        for t in parsed.filter_templates():
            if tname(t) == "bg-adj":
                adj = getparam(t, "1")
                if not adj:
                    pagemsg("WARNING: Missing headword in adj: %s" %
                            unicode(t))
                    continue
                if bglib.needs_accents(adj):
                    pagemsg("WARNING: Adjective %s missing an accent: %s" %
                            (adj, unicode(t)))
                    continue
                unaccented_adj = bglib.remove_accents(adj)
                if unaccented_adj in adjs_to_accents and adjs_to_accents[
                        unaccented_adj] != adj:
                    pagemsg(
                        "WARNING: Two different accents possible for %s: %s and %s: %s"
                        % (unaccented_adj, adjs_to_accents[unaccented_adj],
                           adj, unicode(t)))
                adjs_to_accents[unaccented_adj] = adj
Пример #3
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-ux":
      origt = unicode(t)
      if t.has("adj"):
        pagemsg("Removing adj=")
        notes.append("remove adj= from ru-ux")
        rmparam(t, "adj")
      if t.has("shto"):
        pagemsg("Removing shto=")
        notes.append("remove shto= from ru-ux")
        rmparam(t, "shto")
      newt = unicode(t)
      if origt != newt:
        pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes
Пример #4
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("WARNING: Script no longer applies and would need fixing up")
    return

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in ["ru-conj-4a"]:
            shch = getparam(t, "4")
            if shch == u"щ":
                t.add("3", getparam(t, "3") + shch)
                rmparam(t, "4")
                notes.append(u"move param 4 (щ) to param 3")
            elif shch:
                pagemsg("WARNING: Strange value %s for param 4" % shch)
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in [
                "quote-book", "quote-hansard", "quote-journal",
                "quote-newsgroup", "quote-song", "quote-us-patent",
                "quote-video", "quote-web", "quote-wikipedia"
        ] and getparam(t, "lang") == "ru":
            passage = getparam(t, "passage")
            m = re.search(r"^\{\{lang\|ru\|(.*)\}\}$", passage)
            if m:
                t.add("passage", m.group(1))
                notes.append("remove {{lang|ru|...}} from passage= in quote-*")
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
Пример #6
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    notes = []

    parsed = blib.parse(page)
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
            verbtype = getparam(t, "2")
            if verbtype in [
                    "pf", "pf-intr", "pf-refl", "pf-impers", "pf-intr-impers",
                    "pf-refl-impers", "impf", "impf-intr", "impf-refl",
                    "impf-impers", "impf-intr-impers", "impf-refl-impers"
            ]:
                conjtype = getparam(t, "1")
                t.add("2", conjtype)
                t.add("1", verbtype)
                notes.append("move verb type from arg 2 to arg 1")
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  found_inflection_of = False
  found_head_verb_form = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["inflection of"]:
      found_inflection_of = True
    if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form":
      found_head_verb_form = True

  if not found_head_verb_form or not found_inflection_of:
    # Find definition line
    foundrussian = False
    sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M)

    for j in xrange(2, len(sections), 2):
      if sections[j-1] == "==Russian==\n":
        if foundrussian:
          pagemsg("WARNING: Found multiple Russian sections, skipping page")
          return
        foundrussian = True

        deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M))

  if not found_head_verb_form:
    pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines)
  if not found_inflection_of:
    pagemsg("WARNING: No 'inflection of': %s" % deflines)
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)

  found_audio = False
  for t in parsed.filter_templates():
    if unicode(t.name) == "audio" and getparam(t, "lang") == "ru":
      found_audio = True
      break
  if found_audio:
    new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text)
    if new_text != text:
      comment = "Remove redundant [[:Category:Russian terms with audio links]]"
      if save:
        pagemsg("Saving with comment = %s" % comment)
        page.text = new_text
        page.save(comment=comment)
      else:
        pagemsg("Would save with comment = %s" % comment)
Пример #9
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  text = unicode(page.text)
  parsed = blib.parse(page)

  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-IPA":
      origt = unicode(t)
      if getparam(t, "phon"):
        pagemsg("phon= already present: %s" % unicode(t))
      else:
        phon = getparam(t, "1")
        pagemsg("Adding phon=: %s" % unicode(t))
        rmparam(t, "1")
        t.add("phon", phon)
        pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  newtext = unicode(parsed)

  if newtext != text:
    if verbose:
      pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext))
    comment = "Add phon= to ru-IPA templates"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
  else:
    pagemsg("Skipping")
Пример #10
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  found_page_head = False
  for t in parsed.filter_templates():
    found_this_head = False
    tname = unicode(t.name)
    if tname in ru_head_templates:
      headname = tname
      found_this_head = True
    elif tname == "head" and getparam(t, "1") == "ru":
      headtype = getparam(t, "2")
      headname = "head|ru|%s" % headtype
      if headtype in ru_heads_to_warn_about:
        pagemsg("WARNING: Found %s" % headname)
      found_this_head = True
    if found_this_head:
      cat_head_count[headname] = cat_head_count.get(headname, 0) + 1
      overall_head_count[headname] = overall_head_count.get(headname, 0) + 1
      found_page_head = True
  if not found_page_head:
    pagemsg("WARNING: No head")
  if index % 100 == 0:
    output_heads_seen()
Пример #11
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
            conjtype = getparam(t, "2")
            if conjtype.startswith("3a"):
                if [x for x in t.params if unicode(x.value) == "or"]:
                    pagemsg("WARNING: Skipping multi-arg conjugation: %s" %
                            unicode(t))
                    continue
                t.add("2", conjtype.replace("3a", "3olda"))
                notes.append("rename conj type 3a -> 3olda")
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
def process_page(page, index):
    global args
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)

    parsed = blib.parse(page)

    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "fr-IPA":
            posval = getparam(t, "pos")
            pos_arg = "|pos=%s" % posval if posval else ""
            max_arg = 1
            for pronarg in xrange(2, 30):
                if getparam(t, str(pronarg)):
                    max_arg = pronarg
            for pronarg in xrange(1, max_arg + 1):
                pronval = getparam(t, str(pronarg)) or pagetitle
                pron = expand_text(
                    "{{#invoke:fr-pron|show|%s%s|check_new_module=1}}" %
                    (pronval, pos_arg))
                if " || " in pron:
                    pronold, pronnew = pron.split(" || ")
                    pagemsg(
                        "WARNING: {{fr-IPA|%s%s}} == %s in old but %s in new" %
                        (pronval, pos_arg, pronold, pronnew))
                else:
                    pagemsg("{{fr-IPA|%s%s}} == %s in both old and new" %
                            (pronval, pos_arg, pron))
Пример #13
0
def process_page(index, page):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")
    parsed = blib.parse(page)

    found_headword_template = False
    headword_templates = []
    found_invariant_headword_template = False
    found_decl_template = False
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun", "ru-proper noun"]:
            found_headword_template = True
            if getparam(t, "3") == "-":
                found_invariant_headword_template = True
            else:
                headword_templates.append(unicode(t))
        if unicode(t.name) in ["ru-noun-table", "ru-decl-noun-see"]:
            found_decl_template = True
    if found_headword_template and not found_invariant_headword_template:
        if found_decl_template:
            pagemsg("Found old-style headword template(s) %s with decl" %
                    ", ".join(headword_templates))
        else:
            pagemsg("Found old-style headword template(s) %s without decl" %
                    ", ".join(headword_templates))
Пример #14
0
def process_page(index, page, save, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

    pagemsg("Processing")

    parsed = blib.parse(page)
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(
                t, "1").startswith("pf"):
            if tname == "ru-conj":
                tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms",
                                  unicode(t))
            else:
                tempcall = re.sub(r"\{\{ru-conj-old",
                                  "{{ru-generate-verb-forms|old=y", unicode(t))
            result = expand_text(tempcall)
            if not result:
                pagemsg("WARNING: Error generating forms, skipping")
                continue
            args = blib.split_generate_args(result)
            for base in ["past_pasv_part", "ppp"]:
                for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]:
                    val = getparam(t, base + i)
                    if val and val != "-":
                        val = re.sub("//.*", "", val)
                        pagemsg(
                            "Found perfective past passive participle: %s" %
                            val)
Пример #15
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  if not pagetitle.endswith(u"ся"):
    return

  text = unicode(page.text)
  notes = []

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-decl-adj", "ru-adj-old"] and getparam(t, "suffix") == u"ся":
      lemma = getparam(t, "1")
      lemma = re.sub(",", u"ся,", lemma)
      lemma = re.sub("$", u"ся", lemma)
      t.add("1", lemma)
      rmparam(t, "suffix")
      notes.append(u"move suffix=ся to lemma")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes
Пример #16
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

  pagemsg("Processing")

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"):
      if tname == "ru-conj":
        tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t))
      else:
        tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t))
      result = expand_text(tempcall)
      if not result:
        pagemsg("WARNING: Error generating forms, skipping")
        continue
      args = rulib.split_generate_args(result)
      for base in ["past_pasv_part", "ppp"]:
        for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]:
          val = getparam(t, base + i)
          if val and val != "-":
            val = re.sub("//.*", "", val)
            pagemsg("Found perfective past passive participle: %s" % val)
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        if (unicode(t.name) in ["ru-conj", "ru-conj-old"]
                and getparam(t, "2") in ["7a", "7b"]):
            if [x for x in t.params if unicode(x.value) == "or"]:
                pagemsg("WARNING: Skipping multi-arg conjugation: %s" %
                        unicode(t))
                continue
            if t.has("past_adv_part_short") and getparam(
                    t, "past_adv_part_short") == "":
                notes.append("set past_adv_part_short=-")
                origt = unicode(t)
                t.add("past_adv_part_short", "-")
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))
            if t.has("past_actv_part") and getparam(t, "past_actv_part") == "":
                notes.append("set past_actv_part=-")
                origt = unicode(t)
                t.add("past_actv_part", "-")
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    if new_text != text:
        return new_text, notes

    if not notes:
        pagemsg("WARNING: No changes")
Пример #18
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("WARNING: Script no longer applies and would need fixing up")
    return

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        param1 = getparam(t, "1")
        if unicode(t.name) in ["ru-conj"]:
            if re.search(r"^6[ac]", param1):
                if getparam(t, "no_iotation"):
                    rmparam(t, "no_iotation")
                    if param1.startswith("6a"):
                        notes.append(u"6a + no_iotation -> 6°a")
                    else:
                        notes.append(u"6c + no_iotation -> 6°c")
                    t.add("1", re.sub("^6", u"6°", param1))
            elif re.search(r"^6b", param1):
                notes.append(u"6b -> 6°b")
                t.add("1", re.sub("^6", u"6°", param1))
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
def look_up_tonal_form(pagename, pagemsg, verbose):
    try:
        page = pywikibot.Page(site, pagename)
    except Exception as e:
        pagemsg("WARNING: Error looking up page %s: %s" %
                (pagename, unicode(e)))
        return None
    try:
        if not page.exists():
            if verbose:
                pagemsg("look_up_tonal_form: Page %s doesn't exist" % pagename)
            return None
    except Exception as e:
        pagemsg("WARNING: Error checking page existence for %s: %s" %
                (pagename, unicode(e)))
        return None
    tonal_forms = []
    for t in blib.parse(page).filter_templates():
        if unicode(t.name) == "sl-tonal":
            if verbose:
                pagemsg(
                    "look_up_tonal_form: For page %s, found tonal template %s"
                    % (pagename, unicode(t)))
            if tonal_forms:
                pagemsg(
                    "WARNING: Found multiple {{sl-tonal}} calls for page %s: new one is %s; can't handle"
                    % (pagename, unicode(t)))
                return None
            tonal_forms.append(getparam(t, "1"))
            for param in ["2", "3", "4", "5", "6"]:
                if getparam(t, param):
                    tonal_forms.append(getparam(t, param))
    return tonal_forms
Пример #20
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  found_page_head = False
  for t in parsed.filter_templates():
    found_this_head = False
    tname = unicode(t.name)
    if tname in ru_head_templates:
      headname = tname
      found_this_head = True
    elif tname == "head" and getparam(t, "1") == "ru":
      headtype = getparam(t, "2")
      headname = "head|ru|%s" % headtype
      if headtype in ru_heads_to_warn_about:
        pagemsg("WARNING: Found %s" % headname)
      found_this_head = True
    if found_this_head:
      cat_head_count[headname] = cat_head_count.get(headname, 0) + 1
      overall_head_count[headname] = overall_head_count.get(headname, 0) + 1
      found_page_head = True
  if not found_page_head:
    pagemsg("WARNING: No head")
  if index % 100 == 0:
    output_heads_seen()
Пример #21
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-phrase":
      if t.has("tr"):
        pagemsg("WARNING: Has tr=: %s" % unicode(t))
      if t.has("head"):
        if t.has("1"):
          pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t))
        else:
          notes.append("ru-phrase: convert head= to 1=")
          origt = unicode(t)
          head = getparam(t, "head")
          rmparam(t, "head")
          tr = getparam(t, "tr")
          rmparam(t, "tr")
          t.add("1", head)
          if tr:
            t.add("tr", tr)
          pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  return unicode(parsed), notes
Пример #22
0
def process_page(index, page):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    parsed = blib.parse(page)

    found_headword_template = False
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-adj"]:
            found_headword_template = True
    if not found_headword_template:
        notes = []
        for t in parsed.filter_templates():
            if unicode(t.name) in [
                    "ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"
            ]:
                notes.append("found noun header (%s)" % unicode(t.name))
            if unicode(t.name) == "head":
                notes.append("found head header (%s)" % getparam(t, "2"))
        pagemsg("Missing adj headword template%s" %
                (notes and "; " + ",".join(notes)))
def process_page(page, index):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)

  non_wgem = False
  wgem = []
  for t in parsed.filter_templates():
    if tname(t) in ["desc", "desctree"]:
      if getparam(t, "bor"):
        continue
      desc = getparam(t, "1")
      if desc in [
        "got", "gme-cgo", "non", "non-ogt", "non-own", "non-oen",
        "is", "fo", "nrn", "no", "nb", "nn", "sv", "da",
        "gmq-osw", "gwq-oda", "gmq-bot", "gmq-jmk", "gmq-scy", "gmq-gut", "ovd"
      ]:
        pagemsg("Saw non-West-Germanic descendant %s" % unicode(t))
        non_wgem = True
      else:
        wgem.append(desc)
  if not non_wgem:
    pagemsg("Saw no non-West-Germanic descendants but saw West-Germanic or non-Germanic descendants %s" %
        ",".join(wgem))
Пример #24
0
def process_page(index, page, save, verbose, direc):
    pagetitle = unicode(page.title())
    subpagetitle = re.sub(".*:", "", pagetitle)

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    text = unicode(page.text)
    parsed = blib.parse(page)

    def frob_gender_param(t, param):
        val = getparam(t, param)
        if val == "n":
            t.add(param, "n-in")
        elif val == "n-p":
            t.add(param, "n-in-p")

    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun+", "ru-noun-table"]:
            origt = unicode(t)
            for param in t.params:
                if unicode(param.name) != "1":
                    pagemsg("WARNING: Found other than a single param in template, skipping: %s" % unicode(t))
                    return
            FIXME
            if origt != unicode(t):
                param3 = getparam(t, "3")
                if param3 != "-":
                    if fix_indeclinable:
                        if param3:
                            pagemsg("WARNING: Can't make indeclinable, has genitive singular given: %s" % origt)
                            return
                        else:
                            t.add("3", "-")
                            notes.append("make indeclinable")
                            pagemsg("Making indeclinable: %s" % unicode(t))
                    else:
                        pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt)
                        return
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    new_text = unicode(parsed)

    if new_text != text:
        if verbose:
            pagemsg("Replacing <%s> with <%s>" % (text, new_text))
        if notes:
            comment = "Add inanimacy to neuters (%s)" % "; ".join(notes)
        else:
            comment = "Add inanimacy to neuters"
        if save:
            pagemsg("Saving with comment = %s" % comment)
            page.text = new_text
            page.save(comment=comment)
        else:
            pagemsg("Would save with comment = %s" % comment)
Пример #25
0
def process_page(index, page, template, new_name, params_to_add,
                 params_to_remove, params_to_rename, filters, comment):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")
    notes = []

    parsed = blib.parse(page)

    for t in parsed.filter_templates():
        origt = unicode(t)
        tn = tname(t)
        if tn == template:
            for filt in filters:
                m = re.search("^(.*)=(.*)$", filt)
                if m:
                    if getparam(t, m.group(1)) != m.group(2):
                        pagemsg(
                            "Skipping %s because filter %s doesn't match" %
                            origt, filt)
                    continue
                else:
                    m = re.search("^(.*)~(.*)$", filt)
                    if m:
                        if not re.search(m.group(2), getparam(t, m.group(1))):
                            pagemsg(
                                "Skipping %s because filter %s doesn't match" %
                                origt, filt)
                        continue
                    else:
                        raise ValueError("Unrecognized filter %s" % filt)
            for old_param, new_param in params_to_rename:
                if t.has(old_param):
                    t.add(new_param,
                          getparam(t, old_param),
                          before=old_param,
                          preserve_spacing=False)
                    rmparam(t, old_param)
                    notes.append("rename %s= to %s= in {{%s}}" %
                                 (old_param, new_param, tn))
            for param in params_to_remove:
                if t.has(param):
                    rmparam(t, param)
                    notes.append("remove %s= from {{%s}}" % (param, tn))
            for param, value in params_to_add:
                if getparam(t, param) != value:
                    t.add(param, value)
                    notes.append("add %s=%s to {{%s}}" % (param, value, tn))
            if new_name:
                blib.set_template_name(t, new_name)
                notes.append("rename {{%s}} to {{%s}}" % (template, new_name))

        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), comment or notes
Пример #26
0
def search_noconj(startFrom, upTo):
  for index, page in blib.cat_articles(u"Arabic verbs", startFrom, upTo):
    text = unicode(blib.parse(page))
    pagetitle = page.title()
    if "{{ar-verb" not in text:
      msg("* ar-verb not in {{l|ar|%s}}" % pagetitle)
    if "{{ar-conj" not in text:
      msg("* ar-conj not in {{l|ar|%s}}" % pagetitle)
Пример #27
0
def process_page(templates, index, page, save=False, verbose=False):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  if not page.exists():
    pagemsg("WARNING: Page doesn't exist")
    return

  parsed = blib.parse(page)

  should_save = False

  for t in parsed.filter_templates():

    if unicode(t.name) in templates:
      origt = unicode(t)
      # Punt if multi-arg-set, can't handle yet
      should_continue = False
      for param in t.params:
        if not param.showkey:
          val = unicode(param.value)
          if val == "or":
            pagemsg("WARNING: Can't handle multi-decl templates: %s" % unicode(t))
            should_continue = True
            break
          if val == "-" or val == "_" or val.startswith("join:"):
            pagemsg("WARNING: Can't handle multi-word templates: %s" % unicode(t))
            should_continue = True
            break
      if should_continue:
        continue

      if arg1_is_stress(getparam(t, "1")):
        oldplarg = "5"
        newplarg = "4"
      else:
        oldplarg = "4"
        newplarg = "3"
      plstem = getparam(t, oldplarg)
      if plstem:
        if getparam(t, newplarg):
          pagemsg("WARNING: Something wrong, found args in both positions %s and %s: %s" %
              (newplarg, oldplarg, unicode(t)))
          continue
        rmparam(t, oldplarg)
        t.add(newplarg, plstem)
        should_save = True
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  if should_save:
    comment = "Move plstem from 5th/4th argument to 4th/3rd"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = unicode(parsed)
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #28
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
            param1 = getparam(t, "1")
            param2 = getparam(t, "2")
            if not param2.startswith("7"):
                continue
            param3 = getparam(t, "3")
            param4 = getparam(t, "4")
            param5 = getparam(t, "5")
            assert not getparam(t, "6")
            if param2.startswith("7b"):
                if re.search(
                        u"[еѣ]сти́(сь)?$",
                        param3) and u"ё" not in param4 and u"ѣ̈" not in param4:
                    assert not param5
                    param5 = u"ёе"
                param4 = rulib.make_unstressed_ru(param4)
            if re.search(u"(л[еѣ]́?зть|с[еѣ]́?сть|обокра́сть)(ся)?$", param3):
                param5 = ""
            # Fetch non-numbered params.
            non_numbered_params = []
            for param in t.params:
                pname = unicode(param.name)
                if not re.search(r"^[0-9]+$", pname) and pname not in [
                        "lang", "nocat", "tr"
                ]:
                    non_numbered_params.append((pname, param.value))
            # Erase all params.
            del t.params[:]
            # Put back numbered params.
            t.add("1", param1)
            t.add("2", param2)
            t.add("3", param3)
            t.add("4", param4)
            if param5:
                t.add("5", param5)
            # Put back non-numbered params.
            for name, value in non_numbered_params:
                t.add(name, value)
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))
            notes.append(
                "rewrite class 7 verb to correspond to module changes")

    return unicode(parsed), notes
Пример #29
0
def process_page(index, page, direc):
    pagetitle = unicode(page.title())
    subpagetitle = re.sub(".*:", "", pagetitle)

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    text = unicode(page.text)
    parsed = blib.parse(page)

    def frob_gender_param(t, param):
        val = getparam(t, param)
        if val == "n":
            t.add(param, "n-in")
        elif val == "n-p":
            t.add(param, "n-in-p")

    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun+", "ru-noun-table"]:
            origt = unicode(t)
            for param in t.params:
                if unicode(param.name) != "1":
                    pagemsg(
                        "WARNING: Found other than a single param in template, skipping: %s"
                        % unicode(t))
                    return
            FIXME
            if origt != unicode(t):
                param3 = getparam(t, "3")
                if param3 != "-":
                    if fix_indeclinable:
                        if param3:
                            pagemsg(
                                "WARNING: Can't make indeclinable, has genitive singular given: %s"
                                % origt)
                            return
                        else:
                            t.add("3", "-")
                            notes.append("make indeclinable")
                            pagemsg("Making indeclinable: %s" % unicode(t))
                    else:
                        pagemsg(
                            "WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s"
                            % origt)
                        return
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    if notes:
        comment = "Add inanimacy to neuters (%s)" % "; ".join(notes)
    else:
        comment = "Add inanimacy to neuters"

    return unicode(parsed), notes
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj"]:
      conjtype = getparam(t, "1")
      if conjtype.startswith("6a"):
        param6 = getparam(t, "6")
        if param6:
          rmparam(t, "6")
          if not getparam(t, "5"):
            rmparam(t, "5")
          for i in xrange(1, 4):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("4", param6)
          notes.append("move type 6a arg6 -> arg4")
      if conjtype.startswith("7b"):
        param7 = getparam(t, "7")
        if param7:
          rmparam(t, "7")
          for i in xrange(1, 6):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("6", param7)
          notes.append("move type 7b arg7 -> arg6")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #31
0
def process_page(index, page, save, verbose, nouns):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

    pagemsg("Processing")

    if not re.search(u"[иы]й$", pagetitle):
        pagemsg(u"Skipping adjective not in -ый or -ий")
        return

    noun = re.sub(u"[иы]й$", u"ость", pagetitle)
    if noun not in nouns:
        return

    text = unicode(page.text)
    parsed = blib.parse(page)

    for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == u"ru-adj-alt-ё":
            pagemsg(u"Skipping alt-ё adjective")
            return

    for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-adj":
            heads = blib.fetch_param_chain(t, "1", "head", pagetitle)
            if len(heads) > 1:
                pagemsg("Skipping adjective with multiple heads: %s" %
                        ",".join(heads))
                return
            tr = getparam(t, "tr")

            nounsection = blib.find_lang_section(noun, "Russian", pagemsg,
                                                 errandpagemsg)
            if not nounsection:
                pagemsg("Couldn't find Russian section for %s" % noun)
                continue
            if "==Etymology" in nounsection:
                pagemsg("Noun %s already has etymology" % noun)
                continue
            if tr:
                msg(u"%s %s+tr1=%s+-ость no-etym" % (noun, heads[0], tr))
            else:
                msg(u"%s %s+-ость no-etym" % (noun, heads[0]))
Пример #32
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
      param1 = getparam(t, "1")
      param2 = getparam(t, "2")
      if not param2.startswith("8b"):
        continue
      param3 = getparam(t, "3")
      param4 = getparam(t, "4")
      param5 = getparam(t, "5")
      assert not getparam(t, "6")
      if getparam(t, "past_m"):
        errmsg("WARNING: Has past_m=%s" % getparam(t, "past_m"))
      pap = getparam(t, "pap") or getparam(t, "past_adv_part")
      if pap:
        errmsg("WARNING: Has pap=%s" % pap)
      pap2 = getparam(t, "pap2") or getparam(t, "past_adv_part2")
      if pap2:
        errmsg("WARNING: Has pap2=%s" % pap2)
      param4 = rulib.make_unstressed_ru(param4)
      # Fetch non-numbered params.
      non_numbered_params = []
      for param in t.params:
        pname = unicode(param.name)
        if not re.search(r"^[0-9]+$", pname) and pname not in ["lang", "nocat", "tr"]:
          non_numbered_params.append((pname, param.value))
      # Erase all params.
      del t.params[:]
      # Put back numbered params.
      t.add("1", param1)
      t.add("2", param2)
      t.add("3", param3)
      t.add("4", param4)
      if param5:
        t.add("5", param5)
      # Put back non-numbered params.
      for name, value in non_numbered_params:
        t.add(name, value)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))
      notes.append("rewrite class 8b verb to correspond to module changes")

  return unicode(parsed), notes
Пример #33
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj-7a", "ru-conj-7b"]:
      past_stem = getparam(t, "4")
      vowel_end = re.search(u"[аэыоуяеиёю́]$", past_stem)
      past_m = getparam(t, "past_m")
      past_f = getparam(t, "past_f")
      past_n = getparam(t, "past_n")
      past_pl = getparam(t, "past_pl")
      if past_m or past_f or past_n or past_pl:
        upast_stem = ru.make_unstressed(past_stem)
        expected_past_m = past_stem + (u"л" if vowel_end else "")
        expected_past_f = upast_stem + u"ла́"
        expected_past_n = upast_stem + u"ло́"
        expected_past_pl = upast_stem + u"ли́"
        if ((not past_m or expected_past_m == past_m) and
            expected_past_f == past_f and
            expected_past_n == past_n and
            expected_past_pl == past_pl):
          msg("Would remove past overrides and add arg5=b")
        else:
          msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" %
              (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #34
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    param2 = getparam(t, "2")
    if unicode(t.name) in ["ru-conj"] and re.search(r"^8[ab]", param2):
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      past_m = getparam(t, "past_m")
      if past_m:
        rmparam(t, "past_m")
        stem = getparam(t, "3")
        if stem == past_m:
          pagemsg("Stem %s and past_m same" % stem)
          notes.append("remove redundant past_m %s" % past_m)
        elif (param2.startswith("8b") and not param2.startswith("8b/") and
            ru.make_unstressed(past_m) == stem):
          pagemsg("Class 8b/b and stem %s is unstressed version of past_m %s, replacing stem with past_m" % (
            stem, past_m))
          t.add("3", past_m)
          notes.append("moving past_m %s to arg 3" % past_m)
        else:
          pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % (
            stem, past_m))
          t.add("5", past_m)
          notes.append("moving past_m %s to arg 5" % past_m)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_page(page, index):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    if unicode(t.name) == "R:vep:UVVV":
      refpages = blib.fetch_param_chain(t, "1", "")
      for refpage in refpages:
        if not pywikibot.Page(site, refpage).exists():
          pagemsg("Page [[%s]] does not exist" % refpage)
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    if unicode(t.name) == "R:vep:UVVV":
      refpages = blib.fetch_param_chain(t, "1", "")
      for refpage in refpages:
        if not pywikibot.Page(site, refpage).exists():
          pagemsg("Page [[%s]] does not exist" % refpage)
Пример #37
0
def process_lemma(index, pagetitle, slots, program_args):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, program_args.verbose)

  page = pywikibot.Page(site, pagetitle)
  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    tn = tname(t)
    pos = None
    if tn == "la-conj":
      pos = "verb"
    elif tn == "la-ndecl":
      pos = "noun"
    elif tn == "la-adecl":
      pos = "adj"
    if pos:
      args = lalib.generate_infl_forms(pos, unicode(t), errandpagemsg, expand_text)
      for slot in args:
        matches = False
        for spec in slots:
          if spec == slot:
            matches = True
            break
          if lalib.slot_matches_spec(slot, spec):
            matches = True
            break
        if matches:
          for formpagename in re.split(",", args[slot]):
            if "[" in formpagename or "|" in formpagename:
              pagemsg("WARNING: Skipping page %s with links in it" % formpagename)
            else:
              formpagename = lalib.remove_macrons(formpagename)
              formpage = pywikibot.Page(site, formpagename)
              if not formpage.exists():
                pagemsg("WARNING: Form page %s doesn't exist, skipping" % formpagename)
              elif formpagename == pagetitle:
                pagemsg("WARNING: Skipping dictionary form")
              else:
                def do_process_page(page, index, parsed):
                  return process_page(index, page, program_args)
                blib.do_edit(formpage, index, do_process_page,
                    save=program_args.save, verbose=program_args.verbose,
                    diff=program_args.diff)
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  found_inflection_of = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["inflection of"]:
      found_inflection_of = True
  if not found_inflection_of:
    pagemsg("WARNING: No 'inflection of'")
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  found_inflection_of = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["inflection of"]:
      found_inflection_of = True
  if not found_inflection_of:
    pagemsg("WARNING: No 'inflection of'")
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  newtext = text
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    tn = tname(t)
    # pagemsg("tn=%s" % unicode(tn))
    if tn in quote_templates:
      text_param = None
      if tn in quote_templates_text_param_6:
        text_param = "6"
      elif tn in quote_templates_text_param_7:
        text_param = "7"
      elif tn in quote_templates_text_param_8:
        text_param = "8"
      textval = ""
      if text_param:
        textval = getparam(t, text_param)
      if not textval:
        text_param = "text"
        textval = getparam(t, text_param)
      if not textval:
        text_param = "passage"
        textval = getparam(t, text_param)
      # pagemsg("%s=%s" % (text_param, textval))
      textval = textval.strip()
      if re.search(r"^\{\{ja-usex\|.*\}\}$", textval, re.S):
        rmparam(t, text_param)
        newnewtext = re.sub(r"(\n#+\*) *%s" % re.escape(origt),
          r"\1 %s\1: %s" % (unicode(t), textval), newtext)
        if newtext == newnewtext:
          pagemsg("WARNING: Can't find quote template in text: %s" % origt)
        else:
          newtext = newnewtext
          notes.append("move ja-usex call outside of %s call" % tn)
      elif "{{ja-usex|" in textval:
        pagemsg("WARNING: Found {{ja-usex| embedded in quote text but not whole param: %s" %
            origt)

  return newtext, notes
def process_page(page, index, do_noun):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)

    cat = do_noun and "nouns" or "proper nouns"
    new_text = re.sub(r"\n\n\n*\[\[Category:Russian %s]]\n\n\n*" % cat, "\n\n",
                      text)
    new_text = re.sub(r"\[\[Category:Russian %s]]\n" % cat, "", new_text)
    return new_text, "Remove redundant [[:Category:Russian %s]]"
Пример #42
0
def search_iyya_noetym(startFrom, upTo):
  for page, index in blib.cat_articles(u"Arabic nouns", startFrom, upTo):
    text = blib.parse(page)
    pagetitle = page.title()
    etym = False
    suffix = False
    if pagetitle.endswith(u"ية"):
      for t in text.filter_templates():
        if t.name in ["ar-etym-iyya", "ar-etym-nisba-a",
            "ar-etym-noun-nisba", "ar-etym-noun-nisba-linking"]:
          etym = True
        if t.name == "suffix":
          suffix = True
      if not etym:
        msg("Page %s %s: Ends with -iyya, no appropriate etym template%s" % (
          index, pagetitle, " (has suffix template)" if suffix else ""))
Пример #43
0
def snarf_noun_accents_and_forms(noun, orig_pagemsg):
  global args
  pagetitle = bglib.remove_accents(noun)
  if pagetitle in nouns_to_accents_and_forms:
    return nouns_to_accents_and_forms[pagetitle]
  def pagemsg(txt):
    orig_pagemsg("Noun %s: %s" % (noun, txt))
  page = pywikibot.Page(site, pagetitle)
  parsed = blib.parse(page)
  lemma = None
  for t in parsed.filter_templates():
    if tname(t) in ["bg-noun", "bg-proper noun"]:
      if lemma:
        pagemsg("WARNING: Saw two {{bg-noun}} invocations without intervening {{bg-ndecl}}: %s" % unicode(t))
      lemma = getparam(t, "1")
      if not lemma:
        pagemsg("WARNING: Missing headword in noun: %s" % unicode(t))
        continue
      if bglib.needs_accents(lemma):
        pagemsg("WARNING: Noun %s missing an accent: %s" % (lemma, unicode(t)))
        lemma = False
        continue
    if tname(t) == "bg-ndecl":
      if lemma is False:
        pagemsg("WARNING: Skipping %s because noun missing an accent" % unicode(t))
        continue
      if lemma is None:
        pagemsg("WARNING: Skipping %s because no preceding {{bg-noun}}" % unicode(t))
        continue
      if pagetitle in nouns_to_accents_and_forms:
        pagemsg("WARNING: Saw two {{bg-ndecl}} on the same page: %s" % unicode(t))
        nouns_to_accents_and_forms[pagetitle] = (None, None)
        return (None, None)
      generate_template = re.sub(r"^\{\{bg-ndecl\|", "{{bg-generate-noun-forms|", unicode(t))
      def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)
      generate_result = expand_text(generate_template)
      if not generate_result:
        nouns_to_accents_and_forms[pagetitle] = (None, None)
        return (None, None)
      nouns_to_accents_and_forms[pagetitle] = (lemma, blib.split_generate_args(generate_result))
  if pagetitle in nouns_to_accents_and_forms:
    return nouns_to_accents_and_forms[pagetitle]
  pagemsg("WARNING: Couldn't find both lemma and declension")
  nouns_to_accents_and_forms[pagetitle] = (None, None)
  return (None, None)
Пример #44
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    param1 = getparam(t, "1")
    if unicode(t.name) in ["ru-conj"]:
      if re.search(r"^6[ac]", param1):
        if getparam(t, "no_iotation"):
          rmparam(t, "no_iotation")
          if param1.startswith("6a"):
            notes.append(u"6a + no_iotation -> 6°a")
          else:
            notes.append(u"6c + no_iotation -> 6°c")
          t.add("1", re.sub("^6", u"6°", param1))
      elif re.search(r"^6b", param1):
        notes.append(u"6b -> 6°b")
        t.add("1", re.sub("^6", u"6°", param1))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #45
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) == "ru-adj":
      comps = blib.fetch_param_chain(t, "2", "comp")
      newcomps = []
      for comp in comps:
        if re.search(u"е́?й$", comp):
          regcomp = re.sub(u"(е́?)й$", ur"\1е", comp)
          if regcomp in newcomps:
            pagemsg("Skipping informal form %s" % comp)
            notes.append("remove informal comparative %s" % comp)
          else:
            pagemsg("WARNING: Found informal form %s without corresponding regular form")
            newcomps.append(comp)
        else:
          newcomps.append(comp)
      if comps != newcomps:
        blib.set_param_chain(t, newcomps, "2", "comp")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #46
0
def process_page(index, page):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")
    parsed = blib.parse(page)

    found_headword_template = False
    found_decl_template = False
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun", "ru-proper noun"]:
            found_headword_template = True
        if unicode(t.name) in ["ru-noun-table", "ru-decl-noun-see"]:
            found_decl_template = True
    if found_headword_template and not found_decl_template:
        pagemsg("Found headword template without decl")
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if (unicode(t.name) in ["ru-conj", "ru-conj-old"] and
        getparam(t, "2") in ["7a", "7b"]):
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      if t.has("past_adv_part_short") and getparam(t, "past_adv_part_short") == "":
        notes.append("set past_adv_part_short=-")
        origt = unicode(t)
        t.add("past_adv_part_short", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))
      if t.has("past_actv_part") and getparam(t, "past_actv_part") == "":
        notes.append("set past_actv_part=-")
        origt = unicode(t)
        t.add("past_actv_part", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

  if not notes:
    pagemsg("WARNING: No changes")
Пример #48
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-phrase":
      if t.has("tr"):
        pagemsg("WARNING: Has tr=: %s" % unicode(t))
      if t.has("head"):
        if t.has("1"):
          pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t))
        else:
          notes.append("ru-phrase: convert head= to 1=")
          origt = unicode(t)
          head = getparam(t, "head")
          rmparam(t, "head")
          tr = getparam(t, "tr")
          rmparam(t, "tr")
          t.add("1", head)
          if tr:
            t.add("tr", tr)
          pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose, do_noun):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)

  cat = do_noun and "nouns" or "proper nouns"
  new_text = re.sub(r"\n\n\n*\[\[Category:Russian %s]]\n\n\n*" % cat, "\n\n", text)
  new_text = re.sub(r"\[\[Category:Russian %s]]\n" % cat, "", new_text)
  if new_text != text:
    comment = "Remove redundant [[:Category:Russian %s]]" % cat
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #50
0
def process_page(index, page):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)

  found_headword_template = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-adj"]:
      found_headword_template = True
  if not found_headword_template:
    notes = []
    for t in parsed.filter_templates():
      if unicode(t.name) in ["ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"]:
        notes.append("found noun header (%s)" % unicode(t.name))
      if unicode(t.name) == "head":
        notes.append("found head header (%s)" % getparam(t, "2"))
    pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes)))
Пример #51
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj-4a"]:
      shch = getparam(t, "4")
      if shch == u"щ":
        t.add("3", getparam(t, "3") + shch)
        rmparam(t, "4")
        notes.append(u"move param 4 (щ) to param 3")
      elif shch:
        pagemsg("WARNING: Strange value %s for param 4" % shch)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #52
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  notes = []

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
      verbtype = getparam(t, "2")
      if verbtype in ["pf", "pf-intr", "pf-refl",
          "pf-impers", "pf-intr-impers", "pf-refl-impers",
          "impf", "impf-intr", "impf-refl",
          "impf-impers", "impf-intr-impers", "impf-refl-impers"]:
        conjtype = getparam(t, "1")
        t.add("2", conjtype)
        t.add("1", verbtype)
        notes.append("move verb type from arg 2 to arg 1")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #53
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  if not pagetitle.endswith(u"ся"):
    return

  text = unicode(page.text)
  notes = []

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-decl-adj", "ru-adj-old"] and getparam(t, "suffix") == u"ся":
      lemma = getparam(t, "1")
      lemma = re.sub(",", u"ся,", lemma)
      lemma = re.sub("$", u"ся", lemma)
      t.add("1", lemma)
      rmparam(t, "suffix")
      notes.append(u"move suffix=ся to lemma")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #54
0
def process_page(index, page, save, verbose, genders):
  pagetitle = unicode(page.title())
  subpagetitle = re.sub(".*:", "", pagetitle)
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)

  headword_template = None

  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-noun+", "ru-proper noun+"]:
      if headword_template:
        pagemsg("WARNING: Multiple headword templates, skipping")
        return
      headword_template = t
  if not headword_template:
    pagemsg("WARNING: No headword templates, skipping")
    return

  orig_template = unicode(headword_template)
  rmparam(headword_template, "g")
  rmparam(headword_template, "g2")
  rmparam(headword_template, "g3")
  rmparam(headword_template, "g4")
  rmparam(headword_template, "g5")
  for gnum, g in enumerate(genders):
    param = "g" if gnum == 0 else "g" + str(gnum+1)
    headword_template.add(param, g)
  pagemsg("Replacing %s with %s" % (orig_template, unicode(headword_template)))

  comment = "Fix headword gender, substituting new value %s" % ",".join(genders)
  if save:
    pagemsg("Saving with comment = %s" % comment)
    page.text = unicode(parsed)
    page.save(comment=comment)
  else:
    pagemsg("Would save with comment = %s" % comment)
Пример #55
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj-5c", "ru-conj-6b"]:
      past_f = getparam(t, "4")
      if past_f:
        t.add("past_f", past_f, before="4")
        rmparam(t, "4")
        notes.append("Replace 4= with past_f=")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #56
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-ux":
      origt = unicode(t)
      if t.has("adj"):
        pagemsg("Removing adj=")
        notes.append("remove adj= from ru-ux")
        rmparam(t, "adj")
      if t.has("shto"):
        pagemsg("Removing shto=")
        notes.append("remove shto= from ru-ux")
        rmparam(t, "shto")
      newt = unicode(t)
      if origt != newt:
        pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    cons = u"[бцдфгчйклмнпрствшхзжщ]"
    if pagetitle.endswith(u"ство") or (
        not re.search(cons + u"[кц][оаяеёыи]$", pagetitle)
        and not re.search(cons + cons + u"[оаяеёыи]$", pagetitle)
        and
        # not re.search(u"[оеё]" + cons + "$", pagetitle) and # but too many false positives
        not re.search(u"[оеё][кц]$", pagetitle)
    ):
        return
    text = unicode(page.text)
    parsed = blib.parse(page)
    for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-noun-table" and "*" not in unicode(t):
            pagemsg("WARNING: Likely incorrectly-declined reducible: %s" % unicode(t))
Пример #58
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) == "wikipedia":
      val = getparam(t, "1")
      newval = ru.remove_accents(val)
      if val != newval:
        pagemsg("Removing accents from 1= in {{wikipedia|...}}")
        notes.append("remove accents from 1= in {{wikipedia|...}}")
        t.add("1", newval)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Пример #59
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

  pagemsg("Processing")

  manual_ppp_forms = ["past_pasv_part", "past_pasv_part2", "past_pasv_part3",
    "past_pasv_part4", "ppp", "ppp2", "ppp3", "ppp4"]
  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    tname = unicode(t.name)
    if tname == "ru-conj":
      manual_ppps = []
      for form in manual_ppp_forms:
        ppp = getparam(t, form)
        if ppp and ppp != "-":
          manual_ppps.append(ppp)
      if not manual_ppps:
        continue
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      curvariant = getparam(t, "2")
      if "+p" in curvariant or "(7)" in curvariant or "(8)" in curvariant:
        pagemsg("WARNING: Found both manual PPP and PPP variant, something wrong: %s" %
            unicode(t))
        continue
      t2 = blib.parse_text(unicode(t)).filter_templates()[0]
      for form in manual_ppp_forms:
        rmparam(t2, form)
      variants_to_try = ["+p"]
      if u"ё" in re.sub(u"ённый$", "", manual_ppps[0]):
        variants_to_try.append(u"+pё")
      if u"жденный" in manual_ppps[0] or u"ждённый" in manual_ppps[0]:
        variants_to_try.append(u"+pжд")
      notsamemsgs = []
      for variant in variants_to_try:
        t2.add("2", curvariant + variant)
        tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t2))
        result = expand_text(tempcall)
        if not result:
          pagemsg("WARNING: Error generating forms, skipping")
          continue
        args = rulib.split_generate_args(result)
        if "past_pasv_part" not in args:
          pagemsg("WARNING: Something wrong, no past passive participle generated: %s" % unicode(t))
          continue
        auto_ppps = []
        for form in manual_ppp_forms:
          if form in args:
            for ppp in re.split(",", args[form]):
              if ppp and ppp != "-":
                auto_ppps.append(ppp)
        if manual_ppps == auto_ppps:
          pagemsg("Manual PPP's %s same as auto-generated PPP's, switching to auto"
              % ",".join(manual_ppps))
          for form in manual_ppp_forms:
            rmparam(t, form)
          t.add("2", curvariant + variant)
          notes.append("replaced manual PPP's with variant %s" % variant)
          break
        else:
          notsamemsgs.append("WARNING: Manual PPP's %s not same as auto-generated PPP's %s: %s" %
            (",".join(manual_ppps), ",".join(auto_ppps), unicode(t)))
      else: # no break in for loop
        for m in notsamemsgs:
          pagemsg(m)

    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)