Exemplo n.º 1
0
def rewrite_pages(refrom, reto, refs, cat, pages, pagefile, pagetitle_sub,
    comment, filter_pages, save, verbose, startFrom, upTo):
  def rewrite_one_page(page, index, text):
    #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto))
    text = unicode(text)
    text = reorder_shadda(text)
    zipped_fromto = zip(refrom, reto)
    for fromval, toval in zipped_fromto:
      if pagetitle_sub:
        pagetitle = unicode(page.title())
        fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle))
        toval = toval.replace(pagetitle_sub, pagetitle)
      text = re.sub(fromval, toval, text)
    return text, comment or "replace %s" % (", ".join("%s -> %s" % (f, t) for f, t in zipped_fromto))

  if pages:
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(pages, startFrom, upTo))
  elif pagefile:
    lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")]
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo))
  elif refs:
    pages = blib.references(refs, startFrom, upTo, includelinks=True)
  else:
    pages = blib.cat_articles(cat, startFrom, upTo)
  for page, index in pages:
    pagetitle = unicode(page.title())
    if filter_pages and not re.search(filter_pages, pagetitle):
      blib.msg("Skipping %s because doesn't match --filter-pages regex %s" %
          (pagetitle, filter_pages))
    else:
      if verbose:
        blib.msg("Processing %s" % pagetitle)
      blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose)
Exemplo n.º 2
0
def do_pages(createfn, iterfn=iter_pages):
    pages = iterfn(createfn)
    for current, index in blib.iter_pages(pages,
                                          startFrom,
                                          upTo,
                                          key=lambda x: x[0]):
        pagename, text, changelog = current
        pagetitle = remove_diacritics(pagename)
        if params.offline:
            msg("Text for %s: [[%s]]" % (pagename, text))
            msg("Changelog = %s" % changelog)
        else:
            page = pywikibot.Page(site, pagetitle)
            if page.exists():
                msg("Page %s %s: WARNING, page already exists, skipping" %
                    (index, pagename))
            else:

                def save_text(page, index, parsed):
                    return text, changelog

                blib.do_edit(page,
                             index,
                             save_text,
                             save=params.save,
                             verbose=params.verbose)
Exemplo n.º 3
0
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo):
  template_removals = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.match(r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$",
        line)
    if not m:
      msg("WARNING: Unable to parse line: [%s]" % line)
    else:
      template_removals.append(m.groups())

  for current, index in blib.iter_pages(template_removals, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, removed_param, template_text = current

    def undo_one_page_greek_removal(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      template = blib.parse_text(template_text).filter_templates()[0]
      orig_template = unicode(template)
      if getparam(template, "sc") == "polytonic":
        template.remove("sc")
      to_template = unicode(template)
      param_value = getparam(template, removed_param)
      template.remove(removed_param)
      from_template = unicode(template)
      text = unicode(text)
      found_orig_template = orig_template in text
      newtext = text.replace(from_template, to_template)
      changelog = ""
      if newtext == text:
        if not found_orig_template:
          pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
              % from_template)
        else:
          pagemsg("Original template found, taking no action")
      else:
        if found_orig_template:
          pagemsg("WARNING: Undid removal, but original template %s already present!" %
              orig_template)
        if len(newtext) - len(text) != len(to_template) - len(from_template):
          pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (
            from_template, to_template))
        changelog = "Undid removal of %s=%s in %s" % (removed_param,
            param_value, to_template)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, undo_one_page_greek_removal, save=save,
          verbose=verbose)
Exemplo n.º 4
0
def parse_log_file(fn, startFrom, upTo):
  for current, index in blib.iter_pages(yield_page_lines(fn), startFrom, upTo,
      key=lambda x:x[1]):
    pageindex, pagename, lines = current
    for line in lines:
      m = re.match(r"^Page ([0-9/.-]+) (.*)$", line)
      if m:
        msg("Page %s/%s %s" % (pageindex, m.group(1), m.group(2)))
      else:
        msg(line)
Exemplo n.º 5
0
def undo_ru_auto_accent(save, verbose, direcfile, startFrom, upTo):
  template_removals = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.search(r"^Page [0-9]+ (.*?): Replaced (\{\{.*?\}\}) with (\{\{.*?\}\})$",
        line)
    if not m:
      msg("WARNING: Unable to parse line: [%s]" % line)
    else:
      template_removals.append(m.groups())

  for current, index in blib.iter_pages(template_removals, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, orig_template, repl_template = current
    if not re.search(r"^\{\{(ux|usex|ru-ux|lang)\|", orig_template):
      continue
    def undo_one_page_ru_auto_accent(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      text = unicode(text)
      if not re.search("^#\*:* *%s" % re.escape(repl_template), text, re.M):
        return None, ""
      found_orig_template = orig_template in text
      newtext = text.replace(repl_template, orig_template)
      changelog = ""
      if newtext == text:
        if not found_orig_template:
          pagemsg("WARNING: Unable to locate 'repl' template when undoing Russian auto-accenting: %s"
              % repl_template)
        else:
          pagemsg("Original template found, taking no action")
      else:
        pagemsg("Replaced %s with %s" % (repl_template, orig_template))
        if found_orig_template:
          pagemsg("WARNING: Undid replacement, but original template %s already present!" %
              orig_template)
        if len(newtext) - len(text) != len(orig_template) - len(repl_template):
          pagemsg("WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s" % (
            orig_template, repl_template))
        changelog = "Undid auto-accenting (per Wikitiki89) of %s" % (orig_template)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, undo_one_page_ru_auto_accent, save=save,
          verbose=verbose)
def find_russian_need_vowels(find_accents, cattype, direcfile, save,
    verbose, startFrom, upTo):
  if direcfile:
    processing_lines = []
    for line in codecs.open(direcfile, "r", encoding="utf-8"):
      line = line.strip()
      m = re.match(r"^(Page [^ ]+ )(.*?)(: .*?:) Processing: (\{\{.*?\}\})( <- \{\{.*?\}\} \(\{\{.*?\}\}\))$",
          line)
      if m:
        processing_lines.append(m.groups())

    for current, index in blib.iter_pages(processing_lines, startFrom, upTo,
        # key is the page name
        key = lambda x:x[1]):

      pagenum, pagename, tempname, repltext, rest = current

      def pagemsg(text):
        msg("Page %s(%s) %s: %s" % (pagenum, index, pagetitle, text))
      def check_template_for_missing_accent(pagetitle, index, template,
          ruparam, trparam):
        def output_line(directive):
          msg("* %s[[%s]]%s %s: <nowiki>%s%s</nowiki>" % (pagenum, pagename,
              tempname, directive, unicode(template), rest))
        return process_template(pagetitle, index, template, ruparam, trparam,
            output_line, find_accents, verbose)

      blib.process_links(save, verbose, "ru", "Russian", "pagetext", None,
          None, check_template_for_missing_accent,
          join_actions=join_changelog_notes, split_templates=None,
          pages_to_do=[(pagename, repltext)], quiet=True)
      if index % 100 == 0:
        output_stats(pagemsg)
  else:
    def check_template_for_missing_accent(pagetitle, index, template,
        ruparam, trparam):
      def pagemsg(text):
        msg("Page %s %s: %s" % (index, pagetitle, text))
      def output_line(directive):
        pagemsg("%s: %s" % (directive, unicode(template)))
      result = process_template(pagetitle, index, template, ruparam, trparam,
          output_line, find_accents, verbose)
      if index % 100 == 0:
        output_stats(pagemsg)
      return result

    blib.process_links(save, verbose, "ru", "Russian", cattype, startFrom,
        upTo, check_template_for_missing_accent,
        join_actions=join_changelog_notes, split_templates=None)
Exemplo n.º 7
0
def do_pages(createfn, iterfn=iter_pages):
    pages = iterfn(createfn)
    for current, index in blib.iter_pages(pages, startFrom, upTo, key=lambda x: x[0]):
        pagename, text, changelog = current
        pagetitle = remove_diacritics(pagename)
        if params.offline:
            msg("Text for %s: [[%s]]" % (pagename, text))
            msg("Changelog = %s" % changelog)
        else:
            page = pywikibot.Page(site, pagetitle)
            if page.exists():
                msg("Page %s %s: WARNING, page already exists, skipping" % (index, pagename))
            else:

                def save_text(page, index, parsed):
                    return text, changelog

                blib.do_edit(page, index, save_text, save=params.save, verbose=params.verbose)
Exemplo n.º 8
0
    for ten in sorted(cardinal_tens.keys())[:-1]:  # Skip 100
        for one in sorted(cardinal_ones.keys())[1:]:  # Skip 0
            yield ten + one


def iter_specified_numerals(spec):
    for singlespec in re.split(",", spec):
        if "-" in singlespec:
            fro, to = re.split("-", singlespec)
            for num in range(int(fro), int(to) + 1):
                yield num
        else:
            yield int(singlespec)


if params.numerals:
    pages = iter_specified_numerals(params.numerals)
else:
    pages = iter_numerals()
for current, index in blib.iter_pages(pages,
                                      startFrom,
                                      upTo,
                                      key=lambda x: str(x)):
    if params.offline:
        print "========== Text for #%s: ==========" % current
        print ""
        print generate_page(current).encode('utf-8')
        print ""
    else:
        process_page(index, current, params.save, params.verbose, params)
Exemplo n.º 9
0
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo):
    template_removals = []
    for line in codecs.open(direcfile, "r", encoding="utf-8"):
        line = line.strip()
        m = re.match(
            r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$",
            line)
        if not m:
            msg("WARNING: Unable to parse line: [%s]" % line)
        else:
            template_removals.append(m.groups())

    for current, index in blib.iter_pages(
            template_removals,
            startFrom,
            upTo,
            # key is the page name
            key=lambda x: x[0]):
        pagename, removed_param, template_text = current

        def undo_one_page_greek_removal(page, index, text):
            def pagemsg(txt):
                msg("Page %s %s: %s" % (index, unicode(page.title()), txt))

            template = blib.parse_text(template_text).filter_templates()[0]
            orig_template = unicode(template)
            if getparam(template, "sc") == "polytonic":
                template.remove("sc")
            to_template = unicode(template)
            param_value = getparam(template, removed_param)
            template.remove(removed_param)
            from_template = unicode(template)
            text = unicode(text)
            found_orig_template = orig_template in text
            newtext = text.replace(from_template, to_template)
            changelog = ""
            if newtext == text:
                if not found_orig_template:
                    pagemsg(
                        "WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
                        % from_template)
                else:
                    pagemsg("Original template found, taking no action")
            else:
                if found_orig_template:
                    pagemsg(
                        "WARNING: Undid removal, but original template %s already present!"
                        % orig_template)
                if len(newtext) - len(text) != len(to_template) - len(
                        from_template):
                    pagemsg(
                        "WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s"
                        % (from_template, to_template))
                changelog = "Undid removal of %s=%s in %s" % (
                    removed_param, param_value, to_template)
                pagemsg("Change log = %s" % changelog)
            return newtext, changelog

        page = pywikibot.Page(site, pagename)
        if not page.exists():
            msg("Page %s %s: WARNING, something wrong, does not exist" %
                (index, pagename))
        else:
            blib.do_edit(page,
                         index,
                         undo_one_page_greek_removal,
                         save=save,
                         verbose=verbose)
Exemplo n.º 10
0
def push_manual_changes(save, verbose, diff, direcfile, annotation, startFrom,
                        upTo):
    template_changes = []
    for line in codecs.open(direcfile, "r", encoding="utf-8"):
        line = line.strip()
        repl_on_right = False
        m = re.match(
            r"^Page [^ ]+ (.*?): .*?: (\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)$",
            line)
        if not m:
            m = re.match(
                r"^\* (?:Page [^ ]+ )?\[\[(.*?)\]\]: .*?: <nowiki>(\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)</nowiki>.*$",
                line)
        if not m:
            m = re.match(r"^(?:Page [^ ]+ )(.*?): .* /// (.*?) /// (.*?)$",
                         line)
            repl_on_right = True
        if m:
            if m.group(2) != m.group(3):
                # If the current template is the same as the current template of the
                # previous entry, ignore the previous entry; otherwise we won't be
                # able to locate the current template the second time around. This
                # happens e.g. in the output of find_russian_need_vowels.py when
                # processing a template such as cardinalbox or compound that has
                # more than one foreign-language parameter in it.
                if len(template_changes
                       ) > 0 and template_changes[-1][2] == m.group(3):
                    msg("Ignoring change for pagename %s, %s -> %s" %
                        template_changes[-1])
                    template_changes.pop()
                if repl_on_right:
                    pagename, curr, repl = m.groups()
                    template_changes.append((pagename, repl, curr))
                else:
                    template_changes.append(m.groups())
            else:
                msg("WARNING: Ignoring line with from=to: %s" % line)
        else:
            mpage = re.search(r"^(?:Page [^ ]+ )(.*?): (.*)$", line)
            if not mpage:
                msg("WARNING: Unable to parse line: [%s]" % line)
                continue
            pagename, directives = mpage.groups()
            for m in re.finditer("<from> (.*?) <to> (.*?) <end>", directives):
                curr, repl = m.groups()
                if curr != repl:
                    template_changes.append((pagename, repl, curr))
                else:
                    msg("WARNING: Ignoring line with from=to: %s" % line)

    for current, index in blib.iter_pages(
            template_changes,
            startFrom,
            upTo,
            # key is the page name
            key=lambda x: x[0]):
        pagename, repl_template, curr_template = current

        def push_one_manual_change(page, index, text):
            def pagemsg(txt):
                msg("Page %s %s: %s" % (index, unicode(page.title()), txt))

            #template = blib.parse_text(template_text).filter_templates()[0]
            #orig_template = unicode(template)
            #if getparam(template, "sc") == "polytonic":
            #  template.remove("sc")
            #to_template = unicode(template)
            #param_value = getparam(template, removed_param)
            #template.remove(removed_param)
            #from_template = unicode(template)
            text = unicode(text)
            found_repl_template = repl_template in text
            newtext = text.replace(curr_template, repl_template)
            changelog = ""
            if newtext == text:
                if not found_repl_template:
                    pagemsg("WARNING: Unable to locate current template: %s" %
                            curr_template)
                else:
                    pagemsg(
                        "Replacement template already found, taking no action")
            else:
                if found_repl_template:
                    pagemsg(
                        "WARNING: Made change, but replacement template %s already present!"
                        % repl_template)
                repl_curr_diff = len(repl_template) - len(curr_template)
                newtext_text_diff = len(newtext) - len(text)
                if newtext_text_diff == repl_curr_diff:
                    pass
                elif repl_curr_diff == 0:
                    if newtext_text_diff != 0:
                        pagemsg(
                            "WARNING: Something wrong, no change in text length during replacement but expected change: Expected length change=%s, actual=%s, curr=%s, repl=%s"
                            % (repl_curr_diff, newtext_text_diff, curr, repl))
                else:
                    ratio = float(newtext_text_diff) / repl_curr_diff
                    if ratio == int(ratio):
                        pagemsg(
                            "WARNING: Replaced %s occurrences of curr=%s with repl=%s"
                            % (int(ratio), curr_template, repl_template))
                    else:
                        pagemsg(
                            "WARNING: Something wrong, length mismatch during replacement: Expected length change=%s, actual=%s, ratio=%.2f, curr=%s, repl=%s"
                            % (repl_curr_diff, newtext_text_diff, ratio,
                               curr_template, repl_template))
                changelog = "replace <%s> with <%s> (%s)" % (truncate(
                    curr_template), truncate(repl_template), annotation)
                pagemsg("Change log = %s" % changelog)
            return newtext, changelog

        page = pywikibot.Page(site, pagename)
        if not page.exists():
            msg("Page %s %s: WARNING, something wrong, does not exist" %
                (index, pagename))
        else:
            blib.do_edit(page,
                         index,
                         push_one_manual_change,
                         save=save,
                         verbose=verbose,
                         diff=diff)
Exemplo n.º 11
0
def undo_ru_auto_accent(save, verbose, direcfile, startFrom, upTo):
    template_removals = []
    for line in codecs.open(direcfile, "r", encoding="utf-8"):
        line = line.strip()
        m = re.search(
            r"^Page [0-9]+ (.*?): Replaced (\{\{.*?\}\}) with (\{\{.*?\}\})$",
            line)
        if not m:
            msg("WARNING: Unable to parse line: [%s]" % line)
        else:
            template_removals.append(m.groups())

    for current, index in blib.iter_pages(
            template_removals,
            startFrom,
            upTo,
            # key is the page name
            key=lambda x: x[0]):
        pagename, orig_template, repl_template = current
        if not re.search(r"^\{\{(ux|usex|ru-ux|lang)\|", orig_template):
            continue

        def undo_one_page_ru_auto_accent(page, index, text):
            def pagemsg(txt):
                msg("Page %s %s: %s" % (index, unicode(page.title()), txt))

            text = unicode(text)
            if not re.search("^#\*:* *%s" % re.escape(repl_template), text,
                             re.M):
                return None, ""
            found_orig_template = orig_template in text
            newtext = text.replace(repl_template, orig_template)
            changelog = ""
            if newtext == text:
                if not found_orig_template:
                    pagemsg(
                        "WARNING: Unable to locate 'repl' template when undoing Russian auto-accenting: %s"
                        % repl_template)
                else:
                    pagemsg("Original template found, taking no action")
            else:
                pagemsg("Replaced %s with %s" % (repl_template, orig_template))
                if found_orig_template:
                    pagemsg(
                        "WARNING: Undid replacement, but original template %s already present!"
                        % orig_template)
                if len(newtext) - len(text) != len(orig_template) - len(
                        repl_template):
                    pagemsg(
                        "WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s"
                        % (orig_template, repl_template))
                changelog = "Undid auto-accenting (per Wikitiki89) of %s" % (
                    orig_template)
                pagemsg("Change log = %s" % changelog)
            return newtext, changelog

        page = pywikibot.Page(site, pagename)
        if not page.exists():
            msg("Page %s %s: WARNING, something wrong, does not exist" %
                (index, pagename))
        else:
            blib.do_edit(page,
                         index,
                         undo_one_page_ru_auto_accent,
                         save=save,
                         verbose=verbose)
Exemplo n.º 12
0
def push_manual_changes(save, verbose, direcfile, annotation, startFrom, upTo):
  template_changes = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.match(r"^Page [^ ]+ (.*?): .*?: (\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)$",
        line)
    if not m:
      m = re.match(r"^\* (?:Page [^ ]+ )?\[\[(.*?)\]\]: .*?: <nowiki>(\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)</nowiki>.*$",
          line)
      if not m:
        msg("WARNING: Unable to parse line: [%s]" % line)
        continue
    if m.group(2) != m.group(3):
      # If the current template is the same as the current template of the
      # previous entry, ignore the previous entry; otherwise we won't be
      # able to locate the current template the second time around. This
      # happens e.g. in the output of find_russian_need_vowels.py when
      # processing a template such as cardinalbox or compound that has
      # more than one foreign-language parameter in it.
      if len(template_changes) > 0 and template_changes[-1][2] == m.group(3):
        msg("Ignoring change for pagename %s, %s -> %s" % template_changes[-1])
        template_changes.pop()
      template_changes.append(m.groups())

  for current, index in blib.iter_pages(template_changes, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, repl_template, curr_template = current

    def push_one_manual_change(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      #template = blib.parse_text(template_text).filter_templates()[0]
      #orig_template = unicode(template)
      #if getparam(template, "sc") == "polytonic":
      #  template.remove("sc")
      #to_template = unicode(template)
      #param_value = getparam(template, removed_param)
      #template.remove(removed_param)
      #from_template = unicode(template)
      text = unicode(text)
      found_repl_template = repl_template in text
      newtext = text.replace(curr_template, repl_template)
      changelog = ""
      if newtext == text:
        if not found_repl_template:
          pagemsg("WARNING: Unable to locate current template: %s"
              % curr_template)
        else:
          pagemsg("Replacement template already found, taking no action")
      else:
        if found_repl_template:
          pagemsg("WARNING: Made change, but replacement template %s already present!" %
              repl_template)
        repl_curr_diff = len(repl_template) - len(curr_template)
        newtext_text_diff = len(newtext) - len(text)
        if newtext_text_diff == repl_curr_diff:
          pass
        else:
          ratio = float(newtext_text_diff) / repl_curr_diff
          if ratio == int(ratio):
            pagemsg("WARNING: Replaced %s occurrences of curr=%s with repl=%s"
                % (int(ratio), curr_template, repl_template))
          else:
            pagemsg("WARNING: Something wrong, length mismatch during replacement: Expected length change=%s, actual=%s, ratio=%.2f, curr=%s, repl=%s"
                % (repl_curr_diff, newtext_text_diff, ratio, curr_template,
                  repl_template))
        changelog = "Replaced %s with %s (%s)" % (curr_template, repl_template,
            annotation)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, push_one_manual_change, save=save,
          verbose=verbose)