示例#1
0
def do_process_text_on_page(index, pagename, text, adj):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagename, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagename, txt))

  pagemsg("Processing")

  notes = []

  if "==Etymology 1==" in text or "==Pronunciation 1==" in text:
    pagemsg("WARNING: Saw Etymology/Pronunciation 1, can't handle yet")
    return

  parsed = blib.parse_text(text)
  headword = None
  for t in parsed.filter_templates():
    tn = tname(t)
    if tn in (adj and ["bg-adj"] or ["bg-noun", "bg-proper noun"]):
      headword = getparam(t, "1")
    if (tn == "bg-decl-adj" if adj else tn.startswith("bg-noun-")):
      origt = unicode(t)
      if not headword:
        pagemsg("WARNING: Saw %s without {{%s}} headword" % (origt, "bg-adj" if adj else "bg-noun"))
        continue
      del t.params[:]
      t.add("1", "%s<>" % headword)
      blib.set_template_name(t, "bg-adecl" if adj else "bg-ndecl")
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))
      notes.append("convert {{%s}} to {{%s}}" % (tn, tname(t)))

  return text, notes
def process_text_on_page(index, pagename, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagename, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagename, txt))

    pagemsg("Processing")

    notes = []

    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "head" and getparam(t, "1") == "la":
            pos = getparam(t, "2")
            if pos not in pos_to_template:
                pagemsg("WARNING: Saw unrecognized part of speech %s: %s" %
                        (pos, unicode(t)))
                continue
            if getparam(t, "3") or getparam(t, "head"):
                pagemsg("WARNING: Saw 3= or head=: %s" % unicode(t))
                continue
            origt = unicode(t)
            t.add("1", pagename)
            blib.set_template_name(t, pos_to_template[pos])
            rmparam(t, "2")
            t.add("FIXME", "1")
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
            notes.append("replace {{head|la|%s}} with {{%s}}" %
                         (pos, tname(t)))

    return unicode(parsed), notes
示例#3
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errpagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))
        errmsg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if tname(t) in [
                "ru-conj", "ru-conj-old", "User:Benwing2/ru-conj",
                "User:Benwing2/ru-conj-old"
        ]:
            t.add("1", getparam(t, "1").replace("-refl", ""))
        elif tname(t) == "temp" and getparam(t, "1") == "ru-conj":
            t.add("2", getparam(t, "2").replace("-refl", ""))
        newt = unicode(t)
        if origt != newt:
            notes.append("remove -refl from verb type")
            pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes
示例#4
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  notes = []

  for t in parsed.filter_templates():
    tn = tname(t)
    if tn == "la-decl-multi":
      t = convert_la_decl_multi_to_new(t, pagetitle, pagemsg, errandpagemsg)
      if t:
        notes.append("converted {{la-decl-multi}} to {{%s}}" % tname(t))
      else:
        return None, None
    elif tn in old_la_noun_decl_templates:
      if convert_template_to_new(t, pagetitle, pagemsg, errandpagemsg):
        notes.append("converted {{%s}} to {{la-ndecl}}" % tn)
      else:
        return None, None

  return unicode(parsed), notes
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []

    def frob(t, param):
        val = getparam(t, param)
        if val:
            newval = val.replace(u"\u02C1", u"\u02E4")
            if newval != val:
                t.add(param, newval)

    for t in parsed.filter_templates():
        origt = unicode(t)
        if tname(t) == "IPAchar":
            frob(t, "1")
        elif tname(t) == "IPA":
            if getparam(t, "lang"):
                firstparam = 1
            else:
                firstparam = 2
            for i in range(firstparam, 20):
                frob(t, str(i))
        newt = unicode(t)
        if origt != newt:
            notes.append(
                "Correct use of U+02C1 pharyngealization mark to U+02E4")
            pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes
def process_lemma_page(page, index, form):
    global args
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)

    notes = []

    parsed = blib.parse_text(text)
    it_adj_template = None
    it_part_template = None
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "it-adj":
            if it_adj_template:
                pagemsg(
                    "WARNING: Saw multiple adjective headword templates in subsection, %s and %s, skipping"
                    % (unicode(it_adj_template), unicode(t)))
                return
            it_adj_template = t
        if tn == "it-pp":
            if it_part_template:
                pagemsg(
                    "WARNING: Saw multiple adjective headword templates in subsection, %s and %s, skipping"
                    % (unicode(it_part_template), unicode(t)))
                return
            it_part_template = t
    if not it_adj_template and not it_part_template:
        pagemsg("WARNING: Didn't see adjective or participle lemma template")
        return None, None
    if it_part_template:
        if it_adj_template:
            pagemsg(
                "WARNING: Saw both %s and %s, choosing adjective template" %
                (unicode(it_adj_template), unicode(it_part_template)))
            template = it_adj_template
        else:
            template = it_part_template
    else:
        template = it_adj_template
    if getparam(template, "sup"):
        pagemsg("Already saw sup=: %s" % unicode(template))
    else:
        origt = unicode(template)
        template.add("sup", form)
        pagemsg("Replaced %s with %s" % (origt, unicode(template)))
        notes.append("add sup=%s to {{%s}}" % (form, tname(template)))

    return unicode(parsed), notes
def process_text_on_page(index, pagetitle, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    notes = []

    origtext = text
    parsed = blib.parse_text(text)
    head = None
    for t in parsed.filter_templates():
        tn = tname(t)
        newhead = None
        if tn == "head" and getparam(t, "1") == "ang" or tn in [
                "ang-noun", "ang-noun-form", "ang-verb", "ang-verb-form",
                "ang-adj", "ang-adj-form", "ang-adv", "ang-con", "ang-prep",
                "ang-prefix", "ang-proper noun", "ang-suffix"
        ]:
            newhead = getparam(t, "head") or pagetitle
        if newhead:
            if head:
                pagemsg("WARNING: Saw head=%s and newhead=%s, skipping" %
                        (head, newhead))
                return
            head = newhead
    if u"ƿ" not in head:
        pagemsg("WARNING: Something wrong, didn't see wynn in head: %s" % head)
    saw_altspell = None
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "alternative spelling of":
            if saw_altspell:
                pagemsg(
                    "WARNING: Saw multiple {{alternative spelling of}}, skipping: %s and %s"
                    % (unicode(saw_altspell), unicode(t)))
                return
            saw_altspell = unicode(t)
            if getparam(t, "1") != "ang":
                pagemsg(
                    "WARNING: {{alternative spelling of}} without language 'ang', skipping: %s"
                    % unicode(t))
                return
            param2 = getparam(t, "2")
            should_param2 = blib.remove_links(head).replace(u"ƿ", "w")
            if param2 != should_param2:
                origt = unicode(t)
                t.add("2", should_param2)
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))
                notes.append(
                    "fix 2= in {{alternative spelling of}} in wynn Old English entries"
                )
    text = re.sub("\n\n+", "\n\n", unicode(parsed))
    if origtext != text and not notes:
        notes.append("condense 3+ newlines to 2")
    return text, notes
示例#8
0
 def insert_into_existing_pron_section(k):
   parsed = blib.parse_text(subsections[k])
   for t in parsed.filter_templates():
     tn = tname(t)
     if tn in pronun_templates:
       pagemsg("Already saw pronunciation template: %s" % unicode(t))
       break
   else: # no break
     new_pron_template, pron_prefix = construct_new_pron_template()
     # Remove existing rhymes/hyphenation/pl-IPA lines
     for template in ["rhyme|pl", "rhymes|pl", "pl-IPA", "hyph|pl", "hyphenation|pl"]:
       re_template = template.replace("|", r"\|")
       regex = r"^([* ]*\{\{%s(?:\|[^{}]*)*\}\}\n)" % re_template
       m = re.search(regex, subsections[k], re.M)
       if m:
         pagemsg("Removed existing %s" % m.group(1).strip())
         notes.append("remove existing {{%s}}" % template)
         subsections[k] = re.sub(regex, "", subsections[k], 0, re.M)
     for template in ["audio|pl"]:
       re_template = template.replace("|", r"\|")
       regex = r"^([* ]*\{\{%s(?:\|[^{}]*)*\}\}\n)" % re_template
       all_audios = re.findall(regex, subsections[k], re.M)
       if len(all_audios) > 1:
         pagemsg("WARNING: Saw multiple {{audio}} templates, skipping: %s" % ",".join(x.strip() for x in all_audios()))
         return
       if len(all_audios) == 1:
         audiot = list(blib.parse_text(all_audios[0].strip()).filter_templates())[0]
         assert(tname(audiot) == "audio")
         if getparam(audiot, "1") != "pl":
           pagemsg("WARNING: Wrong language in {{audio}}, skipping: %s" % audio_line)
           return
         audiofile = getparam(audiot, "2")
         audiogloss = getparam(audiot, "3")
         for param in audiot.params:
           pn = pname(param)
           pv = unicode(param.value)
           if pn not in ["1", "2", "3"]:
             pagemsg("WARNING: Unrecognized param %s=%s in {{audio}}, skipping: %s" % (
               pn, pv, audio_line))
             return
         if audiogloss in ["Audio", "audio"]:
           audiogloss = ""
         params = "|a=%s" % audiofile
         if audiogloss:
           params += "|ac=%s" % audiogloss
         new_pron_template = new_pron_template[:-2] + params + new_pron_template[-2:]
         pagemsg("Removed existing %s in order to incorporate into {{pl-p}}" % all_audios[0].strip())
         notes.append("incorporate existing {{%s}} into {{pl-p}}" % template)
         subsections[k] = re.sub(regex, "", subsections[k], 0, re.M)
     subsections[k] = pron_prefix + new_pron_template + "\n" + subsections[k]
     notes.append("insert %s into existing Pronunciation section" % new_pron_template)
   return True
示例#9
0
def hi_lemma_is_indeclinable(t, pagetitle, pagemsg):
    if tname(t) in ["hi-noun", "hi-proper noun"]:
        return not not getparam(t, "ind")
    if tname(t) == "hi-adj":
        if getparam(t, "ind"):
            return True
        pagename = blib.remove_links(getparam(t, "head") or pagetitle)
        # If the lemma doesn't end with any of the declinable suffixes, it's
        # definitely indeclinable. Some indeclinable adjectives end with these
        # same suffixes, but we have no way to know that these are indeclinable,
        # so assume declinable.
        return not (pagename.endswith(AA) or pagename.endswith(IND_AA)
                    or pagename.endswith(AA + M))
    return False
示例#10
0
def uk_lemma_is_indeclinable(t, pagetitle, pagemsg):
    if tname(t) in ["uk-noun", "uk-proper noun"]:
        if getparam(t, "3") == "-":
            return True
        headword = getparam(t, "1")
        if headword and headword == getparam(
                t, "3") and (not re.search(u"я́?$", headword)
                             or not getparam(t, "2").startswith("n")):
            pagemsg("WARNING: Indeclinable noun not marked as such: %s" %
                    unicode(t))
            return True
    if tname(t) == "uk-adj" and getparam(t, "indecl"):
        return True
    return False
def add_category(secbody, sectail, pagemsg, notes, cat):
    separator = ""
    m = re.match(r"^(.*?\n)(\n*--+\n*)$", sectail, re.S)
    if m:
        sectail, separator = m.groups()
    if re.search(r"\[\[Category:%s(\||\])" % re.escape(cat),
                 secbody + sectail):
        # Category already present
        pagemsg("Category 'Hungarian %s' already present" % cat)
        return secbody, sectail + separator
    parsed = blib.parse_text(secbody + sectail)
    for t in parsed.filter_templates():
        if tname(t) in ["cln", "catlangname"] and getparam(t, "1") == "hu":
            for i in range(2, 30):
                if getparam(t, str(i)) == cat:
                    # Category already present in templatized form
                    pagemsg("Category 'Hungarian %s' already present" % cat)
                    return secbody, sectail + separator

    # Now add the category to existing {{cln}}, or create one.
    parsed = blib.parse_text(sectail)
    for t in parsed.filter_templates():
        if tname(t) in ["cln", "catlangname"] and getparam(t, "1") == "hu":
            for i in range(2, 30):
                if not getparam(t, str(i)):
                    break
            else:  # no break
                pagemsg(
                    "WARNING: Something strange, reached 30= in %s and didn't see place to insert"
                    % unicode(t))
                return secbody, sectail + separator
            before = str(i + 1) if getparam(
                t, str(i + 1)) else "sort" if getparam(t, "sort") else None
            origt = unicode(t)
            t.add(str(i), cat, before=before)
            notes.append("insert '%s' into existing {{%s|hu}}" %
                         (cat, tname(t)))
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
            return secbody, unicode(parsed) + separator
    # Need to create {{cln}}.
    newtext = "{{cln|hu|%s}}" % cat
    sectail = sectail.strip()
    if sectail:
        sectail = sectail + "\n" + newtext
    else:
        sectail = newtext
    notes.append("add %s" % newtext)
    pagemsg("Added %s" % newtext)
    return secbody.rstrip(
        "\n") + "\n", "\n" + sectail + "\n\n" + separator.lstrip("\n")
示例#12
0
def put_back_new_inflection_of_params(t,
                                      notes,
                                      tags,
                                      params,
                                      lang,
                                      term,
                                      tr,
                                      alt,
                                      convert_to_more_specific_template=False):
    # Erase all params.
    del t.params[:]

    # Put back new params.

    # Strip comment continuations and line breaks. Such cases generally
    # have linebreaks after semicolons as well, but we remove those.
    # (FIXME, consider preserving them.)
    t.add("1", remove_comment_continuations(lang))
    t.add("2", remove_comment_continuations(term))
    tr = remove_comment_continuations(tr)
    if tr:
        t.add("tr", tr)

    if (convert_to_more_specific_template
            and tname(t) in generic_inflection_of_templates
            and tuple(tags) in tags_to_templates):
        tempname = tags_to_templates[tuple(tags)]
        old_tn = tname(t)
        # Convert to more specific template, e.g. {{plural of}}.
        blib.set_template_name(t, tempname)
        altparam = remove_comment_continuations(alt)
        if altparam:
            t.add("3", altparam)
        notes.append(
            "replace {{%s|%s|%s|...|%s}} with {{%s|%s|%s}}" %
            (old_tn, lang, term, "|".join(tags), tempname, lang, term))

    else:
        t.add("3", remove_comment_continuations(alt))
        next_tag_param = 4

        # Put back the tags into the template and note stats on bad tags
        for tag in tags:
            t.add(str(next_tag_param), tag)
            next_tag_param += 1

    # Finally, put back misc. tags.
    for pname, pval, showkey in params:
        t.add(pname, pval, showkey=showkey, preserve_spacing=False)
示例#13
0
  def etym_section_is_movable(sectext, header):
    parsed = blib.parse_text(sectext)
    inflection_of_templates_with_unrecognized_tags = []
    saw_inflection_of_with_recognized_tag = False
    for t in parsed.filter_templates():
      tn = tname(t)
      if tn == "inflection of":
        if getparam(t, "lang"):
          lang = getparam(t, "lang")
          first_tag_param = 3
        else:
          lang = getparam(t, "1")
          first_tag_param = 4
        if lang != "ar":
          pagemsg("WARNING: Non-Arabic language in Arabic {{inflection of}} in %s, skipping: %s" % (header, unicode(t)))
          return False
        tags = []
        for param in t.params:
          pn = pname(param)
          pv = unicode(param.value).strip()
          if re.search("^[0-9]+$", pn) and int(pn) >= first_tag_param:
            tags.append(pv)
        if tags not in split_recognized_tag_sets:
          inflection_of_templates_with_unrecognized_tags.append(unicode(t))
        else:
          saw_inflection_of_with_recognized_tag = True

    if not saw_inflection_of_with_recognized_tag:
      return False

    if inflection_of_templates_with_unrecognized_tags:
      pagemsg("WARNING: Unrecognized {{inflection of}} tag set mixed with recognized ones in %s, skipping: %s" %
        (header, " / ".join(inflection_of_templates_with_unrecognized_tags)))
      return False

    for t in parsed.filter_templates():
      tn = tname(t)
      if tn in ["also", "ar-root", "nonlemma", "ar-IPA"]:
        continue
      if tn == "ar-verb-form":
        form = getparam(t, "1")
        if not form.endswith(u"و") and form.endswith(u"وْ"):
          pagemsg("WARNING: ar-verb-form form doesn't end with waw in %s with recognized {{inflection of}} tags, skipping: %s" % (header, unicode(t)))
          return False
        continue
      if tn != "inflection of":
        pagemsg("WARNING: Unrecognized template in %s with recognized {{inflection of}} tags, skipping: %s" % (header, unicode(t)))
        return False
    return True
示例#14
0
  def fix_up_section(sectext, warn_on_multiple_heads):
    parsed = blib.parse_text(sectext)

    heads = set()
    pronun_templates = []
    for t in parsed.filter_templates():
      tn = tname(t)
      if lalib.la_template_is_head(t):
        heads |= set(blib.remove_links(x) for x in lalib.la_get_headword_from_template(t, pagetitle, pagemsg))
      elif tn == "la-IPA":
        pronun_templates.append(t)
    if len(heads) > 1:
      if warn_on_multiple_heads:
        pagemsg("WARNING: Found multiple possible heads, not modifying: %s" % ",".join(heads))
      return sectext
    if len(heads) == 0:
      pagemsg("WARNING: Found no possible heads, not modifying: %s" % ",".join(heads))
      return sectext
    newsectext = re.sub(r"\{\{a\|Classical\}\} \{\{IPA(char)?\|.*?\}\}", "{{la-IPA|%s}}" % list(heads)[0], sectext)
    newsectext = re.sub(r"^\* \{\{IPA(char)?\|.*?\|lang=la\}\}", "{{la-IPA|%s}}" % list(heads)[0], newsectext, 0, re.M)
    if newsectext != sectext:
      notes.append("replaced manual Latin pronun with {{la-IPA|%s}}" % list(heads)[0])
      sectext = newsectext
    # Recompute pronun templates as we may have added one.
    parsed = blib.parse_text(sectext)
    pronun_templates = []
    for t in parsed.filter_templates():
      tn = tname(t)
      if tn == "la-IPA":
        pronun_templates.append(t)
    if "{{a|Ecclesiastical}} {{IPA" in sectext:
      if len(pronun_templates) == 0:
        pagemsg("WARNING: Found manual Ecclesiastical pronunciation but not {{la-IPA}} template")
      elif len(pronun_templates) > 1:
        pagemsg("WARNING: Found manual Ecclesiastical pronunciation and multiple {{la-IPA}} templates: %s" %
          ",".join(unicode(tt) for tt in pronun_templates))
      else:
        origt = unicode(pronun_templates[0])
        pronun_templates[0].add("eccl", "yes")
        pagemsg("Replaced %s with %s" % (origt, unicode(pronun_templates[0])))
        newsectext = re.sub(r"^\* \{\{a\|Ecclesiastical\}\} \{\{IPA(char)?\|.*?\}\}\n", "",
            sectext, 0, re.M)
        if newsectext == sectext:
          pagemsg("WARNING: Unable to remove manual Ecclesiastical prounciation")
        else:
          notes.append("removed manual Ecclesiastical pronunciation and added |eccl=yes to {{la-IPA}}")
          sectext = newsectext
    return sectext
def process_text_on_page(index, pagetitle, text):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagetitle, txt))
  global args
  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)

  notes = []

  pagemsg("Processing")

  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    origt = unicode(t)
    tn = tname(t)
    newarg1 = None
    if tn == "de-conj":
      generate_template = re.sub(r"^\{\{de-conj(?=[|}])", "{{User:Benwing2/de-generate-verb-props", unicode(t))
      result = expand_text(generate_template)
      if not result:
        continue
      forms = blib.split_generate_args(result)
      pagemsg("For %s, class=%s" % (unicode(t), forms["class"]))

    if unicode(t) != origt:
      pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

  return unicode(parsed), notes
示例#16
0
def process_text_on_page(index, pagetitle, text):
    global args

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    if not args.stdin:
        pagemsg("Processing")

    retval = lalib.find_latin_section(text, pagemsg)
    if retval is None:
        return None, None
    sections, j, secbody, sectail, has_non_latin = retval
    parsed = blib.parse_text(secbody)
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn in lalib.la_headword_templates:
            for head in lalib.la_get_headword_from_template(
                    t, pagetitle, pagemsg):
                no_macrons_head = remove_macrons(blib.remove_links(head))
                if pagetitle.startswith("Reconstruction"):
                    unprefixed_title = "*" + re.sub(".*/", "", pagetitle)
                else:
                    unprefixed_title = pagetitle
                if no_macrons_head != unprefixed_title:
                    pagemsg("WARNING: Bad Latin head: %s" % unicode(t))
    return None, None
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    if ":" in pagetitle:
        pagemsg("Skipping page with colon in pagetitle")
        return None, None

    notes = []

    for t in parsed.filter_templates():
        if tname(t) == "la-IPA":
            param1 = getparam(t, "1")
            newparam1 = re.sub(r"^(a[bd]|ob|sub)\.([lr])", r"\1\2", param1)
            if newparam1 != param1:
                origt = unicode(t)
                t.add("1", newparam1)
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))
                notes.append("remove unnecessary period in %s in {{la-IPA}}" %
                             param1)

    return unicode(parsed), notes
示例#18
0
def process_text_on_page_for_full_conj(index, pagename, text, verbs):
  global args
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagename, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagename, txt))

  pagemsg("Processing")

  notes = []

  if pagename not in verbs:
    pagemsg("WARNING: Can't find entry, skipping")
    return

  entry = verbs[pagename]
  origentry = entry
  first, rest = pagename.split(" ", 1)
  restwords = rest.split(" ")
  def_link = "%s<> %s" % (first, " ".join("[[%s]]" % word for word in restwords))
  if def_link == entry:
    pagemsg("Replacing entry '%s' with a blank entry because it's the default" % entry)
    entry = ""
  elif re.sub("<.*?>", "<>", entry) == def_link:
    newentry = blib.remove_links(entry)
    pagemsg("Replacing entry '%s' with entry without links '%s'" % (entry, newentry))
    entry = newentry

  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    tn = tname(t)
    origt = unicode(t)
    if tn == "es-verb":
      if not getparam(t, "attn"):
        pagemsg("Didn't see attn=1: %s" % unicode(t))
        continue
      rmparam(t, "attn")
      if entry:
        t.add("1", entry)
        notes.append("add conjugation '%s' to Spanish verb" % entry)
      else:
        notes.append("add conjugation (default) to Spanish verb")
    if tn == "head" and getparam(t, "1") == "es" and getparam(t, "2") == "verb":
      head = getparam(t, "head")
      if head:
        pagemsg("WARNING: Removing head=%s compared with entry '%s', original entry '%s': %s" %
            (head, entry, origentry, unicode(t)))
        rmparam(t, "head")
      rmparam(t, "2")
      rmparam(t, "1")
      blib.set_template_name(t, "es-verb")
      if entry:
        t.add("1", entry)
        notes.append("convert {{head|es|verb}} to {{es-verb|%s}}" % entry)
      else:
        notes.append("convert {{head|es|verb}} to {{es-verb}}")
    if origt != unicode(t):
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  return unicode(parsed), notes
示例#19
0
def process_text_on_page_for_single_word(index, pagename, text, spec):
  global args
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagename, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagename, txt))

  pagemsg("Processing")

  notes = []

  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    tn = tname(t)
    origt = unicode(t)
    if tn == "es-verb":
      if not getparam(t, "attn"):
        pagemsg("Didn't see attn=1: %s" % unicode(t))
        continue
      rmparam(t, "attn")
      if "<" in spec:
        t.add("1", "%s%s" % (pagename, spec))
        notes.append("add conjugation %s%s to Spanish verb" % (pagename, spec))
      elif spec == "*":
        notes.append("add conjugation (default) to Spanish verb")
      else:
        t.add("pres", spec)
        notes.append("add conjugation pres=%s to Spanish verb" % spec)
    if origt != unicode(t):
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  return unicode(parsed), notes
示例#20
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []

    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "la-decl-multi":
            pagemsg("Skipping la-decl-multi for now: %s" % unicode(t))
        elif tn == "la-decl-irreg" and getparam(t, "noun"):
            pagemsg("Skipping noun la-decl-irreg: %s" % unicode(t))
        elif tn in old_la_adj_decl_templates:
            if convert_template_to_new(t, pagetitle, pagemsg, errandpagemsg):
                notes.append("converted {{%s}} to {{la-adecl}}" % tn)
            else:
                return None, None

    return unicode(parsed), notes
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")
    notes = []

    for t in parsed.filter_templates():
        origt = unicode(t)
        tn = tname(t)
        if tn == "rfdef":
            if getparam(t, "lang"):
                pagemsg("WARNING: has lang=, skipping: %s" % unicode(t))
                continue
            lang = getparam(t, "1")
            if lang in langs_to_convert:
                newlang = langs_to_convert[lang]
                t.add("1", newlang)
                notes.append("convert {{rfdef|%s}} to {{rfdef|%s}}" %
                             (lang, newlang))
                lang = newlang
            if lang in langs_to_remove_sort:
                if t.has("sort"):
                    rmparam(t, "sort")
                    notes.append(
                        "remove sort= from {{rfdef|%s}}, now auto-computed" %
                        lang)
        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), notes
示例#22
0
def la_template_is_head(t):
    tn = tname(t)
    if tn in la_headword_templates:
        return True
    if tn == "head" and getparam(t, "1") == "la":
        return True
    return False
示例#23
0
def process_text_on_page(index, pagetitle, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    notes = []

    if "es-IPA" not in text and "fr-IPA" not in text and "it-IPA" not in text:
        return

    parsed = blib.parse_text(text)

    for t in parsed.filter_templates():
        tn = tname(t)
        origt = unicode(t)
        if tn in ["es-IPA", "fr-IPA", "it-IPA"]:
            must_continue = False
            for i in xrange(2, 11):
                if getparam(t, str(i)):
                    pagemsg("Template has %s=, not touching: %s" % (i, origt))
                    must_continue = True
                    break
            if must_continue:
                continue
            par1 = getparam(t, "1")
            if par1 == pagetitle:
                rmparam(t, "1")
                notes.append("remove redundant 1=%s from {{%s}}" % (par1, tn))
            if unicode(t) != origt:
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes
def find_head_comp_sup(pagetitle, pagemsg):
    page = pywikibot.Page(site, pagetitle)
    text = unicode(page.text)
    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        if tname(t) == "la-adv":
            head = getparam(t, "1")
            comp = getparam(t, "comp") or getparam(t, "2")
            sup = getparam(t, "sup") or getparam(t, "3")
            if not comp or not sup:
                for suff in [
                        "iter", "nter", "ter", "er", u"iē", u"ē", "im", u"ō"
                ]:
                    m = re.search("^(.*?)%s$" % suff, head)
                    if m:
                        stem = m.group(1)
                        if suff == "nter":
                            stem += "nt"
                        default_comp = stem + "ius"
                        default_sup = stem + u"issimē"
                        break
                else:
                    pagemsg(
                        "WARNING: Didn't recognize ending of adverb headword %s"
                        % head)
                    return head, comp, sup
                comp = comp or default_comp
                sup = sup or default_sup
            return head, comp, sup
    return None, None, None
 def replace_trans(m, newlangcode, newlangname):
     prefix, transtext = m.groups()
     parsed = blib.parse_text(transtext)
     for t in parsed.filter_templates():
         origt = unicode(t)
         tn = tname(t)
         if tn in trans_templates:
             if getparam(t, "1") == "ku":
                 t.add("1", newlangcode)
                 rmparam(t, "sc")
                 pagemsg(
                     "Replaced %s with %s based on language prefix of translation entry"
                     % (origt, unicode(t)))
                 notes.append(
                     "{{%s|ku}} -> {{%s|%s}} based on language prefix of translation entry"
                     % (tn, tn, newlangcode))
         elif tn == "t-simple":
             if getparam(t, "1") == "ku":
                 if getparam(t, "langname" != "Kurdish"):
                     pagemsg(
                         "WARNING: Something wrong, t-simple|ku without langname=Kurdish: %s"
                         % unicode(t))
                 else:
                     t.add("1", newlangcode)
                     t.add("langname", newlangname)
                     pagemsg("Replaced %s with %s based on prefix" %
                             (origt, unicode(t)))
                     notes.append(
                         "{{t-simple|ku|langname=Kurdish}} -> {{t-simple|%s|langname=%s}} based on language prefix"
                         % (newlangcode, newlangname))
     transtext = unicode(parsed)
     return prefix + transtext
示例#26
0
def process_page_for_fix(page, index, parsed):
  pagename = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagename, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagename, txt))

  pagemsg("Processing")

  notes = []

  text = unicode(page.text)

  newtext = re.sub(r"\[\[(.*?)\]\]", r"{{l|kmr|\1}}", text)
  if newtext != text:
    notes.append("convert raw links to {{l|kmr|...}}")
    text = newtext

  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    origt = unicode(t)
    tn = tname(t)
    if tn in ["l", "rhymes nav"] and getparam(t, "1") == "ku":
      t.add("1", "kmr")
      notes.append("convert {{%s|ku}} to {{%s|kmr}}" % (tn, tn))
    elif getparam(t, "1") == "ku":
      pagemsg("WARNING: Kurdish-language template of unrecognized name: %s" % unicode(t))
    if origt != unicode(t):
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))
  text = unicode(parsed)

  return text, notes
def process_text_on_page(index, pagename, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagename, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagename, txt))

    pagemsg("Processing")

    notes = []

    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        origt = unicode(t)
        if tn == "RQ:Buk Baibel":
            param1 = getparam(t, "1")
            if param1 in book_map:
                t.add("1", book_map[param1])
                notes.append("convert '%s' to '%s' in 1= in {{%s}}" %
                             (param1, book_map[param1], tn))
            param4 = getparam(t, "4")
            if param4:
                t.add("passage", param4, before="4")
                rmparam(t, "4")
                notes.append("4= -> passage= in {{%s}}" % tn)

        if unicode(t) != origt:
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes
示例#28
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    for t in parsed.filter_templates():
        if tname(t) == "R:Lexico":
            origt = unicode(t)
            rmparam(t, "lang")
            entry_uk = getparam(t, "entry_uk")
            if entry_uk:
                t.add("entry", entry_uk, before="entry_uk")
            rmparam(t, "entry_uk")
            url_uk = getparam(t, "url_uk")
            if url_uk:
                t.add("url", url_uk, before="url_uk")
            rmparam(t, "url_uk")
            p4 = getparam(t, "4")
            if p4:
                t.add("text", p4, before="4")
            rmparam(t, "4")
            newt = unicode(t)
            if origt != newt:
                notes.append("Remove/rearrange params in {{R:Lexico}}")
                pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes
示例#29
0
def snarf_adj_accents():
    for index, page in blib.cat_articles("Bulgarian adjectives"):
        pagetitle = unicode(page.title())

        def pagemsg(txt):
            msg("Page %s %s: %s" % (index, pagetitle, txt))

        parsed = blib.parse(page)
        for t in parsed.filter_templates():
            if tname(t) == "bg-adj":
                adj = getparam(t, "1")
                if not adj:
                    pagemsg("WARNING: Missing headword in adj: %s" %
                            unicode(t))
                    continue
                if bglib.needs_accents(adj):
                    pagemsg("WARNING: Adjective %s missing an accent: %s" %
                            (adj, unicode(t)))
                    continue
                unaccented_adj = bglib.remove_accents(adj)
                if unaccented_adj in adjs_to_accents and adjs_to_accents[
                        unaccented_adj] != adj:
                    pagemsg(
                        "WARNING: Two different accents possible for %s: %s and %s: %s"
                        % (unaccented_adj, adjs_to_accents[unaccented_adj],
                           adj, unicode(t)))
                adjs_to_accents[unaccented_adj] = adj
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []

    text = unicode(page.text)
    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        origt = unicode(t)
        if tn == "head" and getparam(t, "1") == "ang" and getparam(
                t, "2") in ["adjective", "adjectives"]:
            pagemsg("WARNING: {{head}} for adjectives, should not occur: %s" %
                    unicode(t))
        elif tn == "ang-adj":
            if getparam(t, "1"):
                pagemsg("WARNING: 1= in ang-adj, should not occur: %s" %
                        unicode(t))
            else:
                head = getparam(t, "head")
                rmparam(t, "head")
                if head:
                    t.add("1", head)
                notes.append("move head= to 1= in {{ang-adj}}")
        if unicode(t) != origt:
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
    return parsed, notes