Exemplo n.º 1
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    tname = unicode(t.name)
    if tname.startswith("ru-conj-") and tname != "ru-conj-verb-see":
      m = re.search("^ru-conj-(.*)$", tname)
      t.name = "ru-conj"
      conjtype = m.group(1)
      varargno = None
      variant = None
      if conjtype in ["3oa", "4a", "4b", "4c", "6a", "6c", "11a", "16a", "16b", u"irreg-дать", u"irreg-клясть", u"irreg-быть"]:
        varargno = 3
      elif conjtype in ["5a", "5b", "5c", "6b", "9a", "9b", "11b", "14a", "14b", "14c"]:
        varargno = 4
      elif conjtype in ["7b"]:
        varargno = 5
      elif conjtype in ["7a"]:
        varargno = 6
      if varargno:
        variant = getparam(t, str(varargno))
        if re.search("^[abc]", variant):
          variant = "/" + variant
        if getparam(t, str(varargno + 1)) or getparam(t, str(varargno + 2)) or getparam(t, str(varargno + 3)):
          t.add(str(varargno), "")
        else:
          rmparam(t, str(varargno))
        conjtype = conjtype + variant
      notes.append("ru-conj-* -> ru-conj, moving params up by one%s" %
          (variant and " (and move variant spec)" or ""))
      seenval = False
      for i in xrange(20, 0, -1):
        val = getparam(t, str(i))
        if val:
          seenval = True
        if seenval:
          t.add(str(i + 1), val)
      t.add("1", conjtype)
      blib.sort_params(t)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Exemplo n.º 2
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("WARNING: Script no longer applies and would need fixing up")
    return

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        tname = unicode(t.name)
        if tname.startswith("ru-conj-") and tname != "ru-conj-verb-see":
            m = re.search("^ru-conj-(.*)$", tname)
            t.name = "ru-conj"
            conjtype = m.group(1)
            varargno = None
            variant = None
            if conjtype in [
                    "3oa", "4a", "4b", "4c", "6a", "6c", "11a", "16a", "16b",
                    u"irreg-дать", u"irreg-клясть", u"irreg-быть"
            ]:
                varargno = 3
            elif conjtype in [
                    "5a", "5b", "5c", "6b", "9a", "9b", "11b", "14a", "14b",
                    "14c"
            ]:
                varargno = 4
            elif conjtype in ["7b"]:
                varargno = 5
            elif conjtype in ["7a"]:
                varargno = 6
            if varargno:
                variant = getparam(t, str(varargno))
                if re.search("^[abc]", variant):
                    variant = "/" + variant
                if getparam(t, str(varargno + 1)) or getparam(
                        t, str(varargno + 2)) or getparam(
                            t, str(varargno + 3)):
                    t.add(str(varargno), "")
                else:
                    rmparam(t, str(varargno))
                conjtype = conjtype + variant
            notes.append("ru-conj-* -> ru-conj, moving params up by one%s" %
                         (variant and " (and move variant spec)" or ""))
            seenval = False
            for i in xrange(20, 0, -1):
                val = getparam(t, str(i))
                if val:
                    seenval = True
                if seenval:
                    t.add(str(i + 1), val)
            t.add("1", conjtype)
            blib.sort_params(t)
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
Exemplo n.º 3
0
def process_page(index, page, direc, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  origdirec = direc
  for t in parsed.filter_templates():
    origt = unicode(t)
    direc = origdirec
    if unicode(t.name) in ["ru-conj-7b"]:
      rmparam(t, "past_m")
      rmparam(t, "past_f")
      rmparam(t, "past_n")
      rmparam(t, "past_pl")
      rmparam(t, "notes")
      rmparam(t, "past_adv_part")
      rmparam(t, "past_adv_part2")
      rmparam(t, "past_adv_part_short")
      #ppps = blib.fetch_param_chain(t, "past_pasv_part", "past_pasv_part")
      #blib.remove_param_chain(t, "past_pasv_part", "past_pasv_part")
      presstem = getparam(t, "3")
      rmparam(t, "5")
      rmparam(t, "4")
      rmparam(t, "3")
      npp = "npp" in direc
      direc = direc.replace("npp", "")
      yo = u"ё" in direc
      direc = direc.replace(u"ё", "")
      direc = re.sub("7b/?", "", direc)
      if re.search(u"е́?[^аэыоуяеиёю]*$", presstem):
        if not yo:
          pagemsg(u"Something wrong, е-stem present and no ё directive")
        if npp:
          presstem = ru.make_ending_stressed(presstem)
        else:
          presstem = re.sub(u"е́?([^аэыоуяеиёю]*)$", ur"ё\1", presstem)
      else:
        presstem = ru.make_ending_stressed(presstem)
      pap = getparam(t, "past_actv_part")
      pred_pap = presstem + u"ший"
      if direc not in ["b", "b(9)"] and re.search(u"[дт]$", presstem):
        pred_pap = re.sub(u"[дт]$", "", presstem) + u"вший"
      if pap:
        if pap == pred_pap:
          pagemsg("Removing past_actv_part=%s because same as predicted" % pap)
          rmparam(t, "past_actv_part")
        else:
          pagemsg("Not removing unpredictable past_actv_part=%s (predicted %s)" %
              (pap, pred_pap))
      for param in t.params:
        if not re.search("^([0-9]+$|past_pasv_part)", unicode(param.name)):
          pagemsg("Found additional named param %s" % unicode(param))
      t.add("3", presstem)
      if direc:
        t.add("4", "")
        t.add("5", direc)
      blib.sort_params(t)
      #blib.set_param_chain(t, ppps, "past_pasv_part", "past_pasv_part")
      notes.append("set class-7b verb to directive %s%s" %
          (direc, npp and u" (no ё in present stem)" or ""))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Exemplo n.º 4
0
def process_page(index, page, direc):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  origdirec = direc
  for t in parsed.filter_templates():
    origt = unicode(t)
    direc = origdirec
    if unicode(t.name) in ["ru-conj-7b"]:
      rmparam(t, "past_m")
      rmparam(t, "past_f")
      rmparam(t, "past_n")
      rmparam(t, "past_pl")
      rmparam(t, "notes")
      rmparam(t, "past_adv_part")
      rmparam(t, "past_adv_part2")
      rmparam(t, "past_adv_part_short")
      #ppps = blib.fetch_param_chain(t, "past_pasv_part", "past_pasv_part")
      #blib.remove_param_chain(t, "past_pasv_part", "past_pasv_part")
      presstem = getparam(t, "3")
      rmparam(t, "5")
      rmparam(t, "4")
      rmparam(t, "3")
      npp = "npp" in direc
      direc = direc.replace("npp", "")
      yo = u"ё" in direc
      direc = direc.replace(u"ё", "")
      direc = re.sub("7b/?", "", direc)
      if re.search(u"е́?[^аэыоуяеиёю]*$", presstem):
        if not yo:
          pagemsg(u"Something wrong, е-stem present and no ё directive")
        if npp:
          presstem = rulib.make_ending_stressed_ru(presstem)
        else:
          presstem = re.sub(u"е́?([^аэыоуяеиёю]*)$", ur"ё\1", presstem)
      else:
        presstem = rulib.make_ending_stressed_ru(presstem)
      pap = getparam(t, "past_actv_part")
      pred_pap = presstem + u"ший"
      if direc not in ["b", "b(9)"] and re.search(u"[дт]$", presstem):
        pred_pap = re.sub(u"[дт]$", "", presstem) + u"вший"
      if pap:
        if pap == pred_pap:
          pagemsg("Removing past_actv_part=%s because same as predicted" % pap)
          rmparam(t, "past_actv_part")
        else:
          pagemsg("Not removing unpredictable past_actv_part=%s (predicted %s)" %
              (pap, pred_pap))
      for param in t.params:
        if not re.search("^([0-9]+$|past_pasv_part)", unicode(param.name)):
          pagemsg("Found additional named param %s" % unicode(param))
      t.add("3", presstem)
      if direc:
        t.add("4", "")
        t.add("5", direc)
      blib.sort_params(t)
      #blib.set_param_chain(t, ppps, "past_pasv_part", "past_pasv_part")
      notes.append("set class-7b verb to directive %s%s" %
          (direc, npp and u" (no ё in present stem)" or ""))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes