Exemplo n.º 1
0
    def replace_links(self, text, linkedPage, targetPage):
        """Replace all source links by target."""
        mysite = pywikibot.Site()
        linktrail = mysite.linktrail()

        # make a backup of the original text so we can show the changes later
        linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
                           r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' +
                           linktrail + ')')
        curpos = 0
        # This loop will run until we have finished the current page
        while True:
            m = linkR.search(text, pos=curpos)
            if not m:
                break
            # Make sure that next time around we will not find this same hit.
            curpos = m.start() + 1
            # ignore interwiki links and links to sections of the same page
            if m.group('title').strip() == '' or \
               mysite.isInterwikiLink(m.group('title')):
                continue
            else:
                actualLinkPage = pywikibot.Page(targetPage.site,
                                                m.group('title'))
                # Check whether the link found is to page.
                if actualLinkPage != linkedPage:
                    continue

            # The link looks like this:
            # [[page_title|link_text]]trailing_chars
            page_title = m.group('title')
            link_text = m.group('label')

            if not link_text:
                # or like this: [[page_title]]trailing_chars
                link_text = page_title
            if m.group('section') is None:
                section = ''
            else:
                section = m.group('section')
            if section and targetPage.section():
                pywikibot.warning(
                    'Source section {0} and target section {1} found. '
                    'Skipping.'.format(section, targetPage))
                continue
            trailing_chars = m.group('linktrail')
            if trailing_chars:
                link_text += trailing_chars

            # remove preleading ":"
            if link_text[0] == ':':
                link_text = link_text[1:]
            if link_text[0].isupper() or link_text[0].isdigit():
                new_page_title = targetPage.title()
            else:
                new_page_title = first_lower(targetPage.title())

            # remove preleading ":"
            if new_page_title[0] == ':':
                new_page_title = new_page_title[1:]

            if (new_page_title == link_text and not section):
                newlink = "[[%s]]" % new_page_title
            # check if we can create a link with trailing characters instead of a
            # pipelink
            elif (len(new_page_title) <= len(link_text)
                  and firstcap(link_text[:len(new_page_title)])
                  == firstcap(new_page_title)
                  and re.sub(re.compile(linktrail), '',
                             link_text[len(new_page_title):]) == ''
                  and not section):
                newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                        link_text[len(new_page_title):])
            else:
                newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
            text = text[:m.start()] + newlink + text[m.end():]
            continue
        return text
    def treat(self, refPage, disambPage):
        """Treat a page.

        @param disambPage: the disambiguation page or redirect we don't want
            anything to link to
        @type disambPage: pywikibot.Page
        @param refPage: a page linking to disambPage
        @type refPage: pywikibot.Page
        @return: False if the user pressed q to completely quit the program,
            True otherwise
        @rtype: bool

        """
        # TODO: break this function up into subroutines!

        self.current_page = refPage
        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output('\n\nSkipping %s because it contains %s.\n\n'
                                 % (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s'
                             % (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False, automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put_async(redir_text, summary=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?'
                    % refPage.title(),
                    [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary, main_only=self.main_only
                    )
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.'
                % refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s"
                                         % refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]"
                                  % (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str and
                        self.dn_template_str[:-2] in text[m.end():m.end() +
                                                          len(self.dn_template_str) + 8]):
                    continue

                edit = EditOption('edit page', 'e', text, m.start(), disambPage.title())
                context_option = HighlightContextOption(
                    'more context', 'm', text, 60, start=m.start(), end=m.end())
                context_option.before_question = True

                options = [ListOption(self.alternatives, ''),
                           ListOption(self.alternatives, 'r'),
                           StandardOption('skip link', 's'),
                           edit,
                           StandardOption('next page', 'n'),
                           StandardOption('unlink', 'u')]
                if self.dn_template_str:
                    # '?', '/' for old choice
                    options += [AliasOption('tag template %s' % self.dn_template_str,
                                            ['t', '?', '/'])]
                options += [context_option]
                if not edited:
                    options += [ShowPageOption('show disambiguation page', 'd',
                                               m.start(), disambPage)]
                options += [
                    OutputProxyOption('list', 'l',
                                      SequenceOutputter(self.alternatives)),
                    AddAlternativeOption('add new', 'a',
                                         SequenceOutputter(self.alternatives))]
                if edited:
                    options += [StandardOption('save in this form', 'x')]

                # TODO: Output context on each question
                answer = pywikibot.input_choice('Option', options,
                                                default=self.always,
                                                force=bool(self.always))
                if answer == 'x':
                    assert edited, 'invalid option before editing'
                    break
                elif answer == 's':
                    n -= 1  # TODO what's this for?
                    continue
                elif answer == 'e':
                    text = edit.new_text
                    edited = True
                    curpos = 0
                    continue
                elif answer == 'n':
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                if answer == 't':
                    assert self.dn_template_str
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif answer == 'u':
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    # Check that no option from above was missed
                    assert isinstance(answer, tuple), 'only tuple answer left.'
                    assert answer[0] in ['r', ''], 'only valid tuple answers.'
                    if answer[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    new_page_title = answer[1]
                    repPl = pywikibot.Page(pywikibot.Link(new_page_title,
                                                          disambPage.site))
                    if (new_page_title[0].isupper() or
                            link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title,
                                                  section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text and
                                       not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (
                        (len(new_page_title) <= len(link_text)) and
                        (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and
                        (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and
                        (not section)
                    ):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put_async(text, summary=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Exemplo n.º 3
0
    def treat_disamb_only(self, refPage, disambPage):
        """Resolve the links to disambPage but don't look for its redirects.

        @param disambPage: the disambiguation page or redirect we don't want
            anything to link to
        @type disambPage: pywikibot.Page
        @param refPage: a page linking to disambPage
        @type refPage: pywikibot.Page
        @return: "nextpage" if the user enters "n" to skip this page,
            "nochange" if the page needs no change, and
            "done" if the page is processed successfully
        @rtype: str

        """
        # TODO: break this function up into subroutines!

        self.current_page = refPage
        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output(
                    '\n\nSkipping %s because it contains %s.\n\n' %
                    (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s' %
                             (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False,
                                      automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put(redir_text,
                                    summary=self.comment,
                                    asynchronous=True)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?' %
                    refPage.title(), [('yes', 'y'), ('no', 'n'),
                                      ('change redirect', 'c')],
                    'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary, main_only=self.main_only)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        self.treat(refPage2, refPage)
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.' %
                refPage.title())
            include = False
        if include in (True, "redirect"):
            # save the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        # No changes necessary for this disambiguation title.
                        return 'nochange'
                    else:
                        # stop loop and save page
                        break
                # Ensure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" %
                                  (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str
                        and self.dn_template_str[:-2]
                        in text[m.end():m.end() + len(self.dn_template_str) +
                                8]):
                    continue

                edit = EditOption('edit page', 'e', text, m.start(),
                                  disambPage.title())
                context_option = HighlightContextOption('more context',
                                                        'm',
                                                        text,
                                                        60,
                                                        start=m.start(),
                                                        end=m.end())
                context_option.before_question = True

                options = [
                    ListOption(self.alternatives, ''),
                    ListOption(self.alternatives, 'r'),
                    StandardOption('skip link', 's'), edit,
                    StandardOption('next page', 'n'),
                    StandardOption('unlink', 'u')
                ]
                if self.dn_template_str:
                    # '?', '/' for old choice
                    options += [
                        AliasOption('tag template %s' % self.dn_template_str,
                                    ['t', '?', '/'])
                    ]
                options += [context_option]
                if not edited:
                    options += [
                        ShowPageOption('show disambiguation page', 'd',
                                       m.start(), disambPage)
                    ]
                options += [
                    OutputProxyOption('list', 'l',
                                      SequenceOutputter(self.alternatives)),
                    AddAlternativeOption('add new', 'a',
                                         SequenceOutputter(self.alternatives))
                ]
                if edited:
                    options += [StandardOption('save in this form', 'x')]

                # TODO: Output context on each question
                answer = pywikibot.input_choice('Option',
                                                options,
                                                default=self.always,
                                                force=bool(self.always))
                if answer == 'x':
                    assert edited, 'invalid option before editing'
                    break
                elif answer == 's':
                    n -= 1  # TODO what's this for?
                    continue
                elif answer == 'e':
                    text = edit.new_text
                    edited = True
                    curpos = 0
                    continue
                elif answer == 'n':
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return 'nextpage'

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                if answer == 't':
                    assert self.dn_template_str
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif answer == 'u':
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    # Check that no option from above was missed
                    assert isinstance(answer, tuple), 'only tuple answer left.'
                    assert answer[0] in ['r', ''], 'only valid tuple answers.'
                    if answer[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    new_page_title = answer[1]
                    repPl = pywikibot.Page(
                        pywikibot.Link(new_page_title, disambPage.site))
                    if (new_page_title[0].isupper() or link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title, section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text
                                       and not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif ((len(new_page_title) <= len(link_text))
                          and (firstcap(link_text[:len(new_page_title)])
                               == firstcap(new_page_title))
                          and (re.sub(self.trailR, '',
                                      link_text[len(new_page_title):]) == '')
                          and (not section)):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue
                # Todo: This line is unreachable (T155337)
                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put(text, summary=self.comment, asynchronous=True)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return 'done'
Exemplo n.º 4
0
    def replace_links(self, text, linkedPage, targetPage):
        """Replace all source links by target."""
        mysite = pywikibot.Site()
        linktrail = mysite.linktrail()

        # make a backup of the original text so we can show the changes later
        linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
                           r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')')
        curpos = 0
        # This loop will run until we have finished the current page
        while True:
            m = linkR.search(text, pos=curpos)
            if not m:
                break
            # Make sure that next time around we will not find this same hit.
            curpos = m.start() + 1
            # ignore interwiki links and links to sections of the same page
            if m.group('title').strip() == '' or \
               mysite.isInterwikiLink(m.group('title')):
                continue
            else:
                actualLinkPage = pywikibot.Page(targetPage.site, m.group('title'))
                # Check whether the link found is to page.
                if actualLinkPage != linkedPage:
                    continue

            # The link looks like this:
            # [[page_title|link_text]]trailing_chars
            page_title = m.group('title')
            link_text = m.group('label')

            if not link_text:
                # or like this: [[page_title]]trailing_chars
                link_text = page_title
            if m.group('section') is None:
                section = ''
            else:
                section = m.group('section')
            if section and targetPage.section():
                pywikibot.warning(
                    'Source section {0} and target section {1} found. '
                    'Skipping.'.format(section, targetPage))
                continue
            trailing_chars = m.group('linktrail')
            if trailing_chars:
                link_text += trailing_chars

            # remove preleading ":"
            if link_text[0] == ':':
                link_text = link_text[1:]
            if link_text[0].isupper() or link_text[0].isdigit():
                new_page_title = targetPage.title()
            else:
                new_page_title = first_lower(targetPage.title())

            # remove preleading ":"
            if new_page_title[0] == ':':
                new_page_title = new_page_title[1:]

            if (new_page_title == link_text and not section):
                newlink = "[[%s]]" % new_page_title
            # check if we can create a link with trailing characters instead of a
            # pipelink
            elif (len(new_page_title) <= len(link_text) and
                  firstcap(link_text[:len(new_page_title)]) ==
                  firstcap(new_page_title) and
                  re.sub(re.compile(linktrail), '',
                         link_text[len(new_page_title):]) == '' and
                  not section):
                newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                        link_text[len(new_page_title):])
            else:
                newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
            text = text[:m.start()] + newlink + text[m.end():]
            continue
        return text
Exemplo n.º 5
0
    def treat(self, refPage, disambPage):
        """
        Treat a page.

        Parameters:
            disambPage - The disambiguation page or redirect we don't want
                anything to link to
            refPage - A page linking to disambPage
        Returns False if the user pressed q to completely quit the program.
        Otherwise, returns True.

        """
        # TODO: break this function up into subroutines!

        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output('\n\nSkipping %s because it contains %s.\n\n'
                                 % (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s'
                             % (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False, automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put_async(redir_text, summary=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?'
                    % refPage.title(),
                    [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(refPage,
                                                           self.primary)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.'
                % refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s"
                                         % refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]"
                                  % (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str and
                        self.dn_template_str[:-2] in text[m.end():m.end() +
                                                          len(self.dn_template_str) + 8]):
                    continue

                # This loop will run while the user doesn't choose an option
                # that will actually change the page
                while True:
                    self.current_page = refPage

                    if not self.always:
                        # at the beginning of the link, start red color.
                        # at the end of the link, reset the color to default
                        pywikibot.output(
                            text[max(0, m.start() - context):m.start()] +
                            '\03{lightred}' + text[m.start():m.end()] +
                            '\03{default}' + text[m.end():m.end() + context])
                        options = ['#', 'r#', '[s]kip link', '[e]dit page',
                                   '[n]ext page', '[u]nlink', '[q]uit']
                        if self.dn_template_str:
                            options.append(u'[t]ag template %s' % self.dn_template_str)
                        options.append('[m]ore context')
                        if not edited:
                            options.append('show [d]isambiguation page')
                        options += ['[l]ist', '[a]dd new']
                        if edited:
                            options += ['save in this form [x]']
                        options = concat_options('Option', 72, options)
                        choice = pywikibot.input(options)
                    else:
                        choice = self.always
                    if choice in ['a', 'A']:
                        newAlternative = pywikibot.input(u'New alternative:')
                        self.alternatives.append(newAlternative)
                        self.listAlternatives()
                    elif choice in ['e', 'E']:
                        editor = editarticle.TextEditor()
                        newText = editor.edit(text, jumpIndex=m.start(),
                                              highlight=disambPage.title())
                        # if user didn't press Cancel
                        if newText and newText != text:
                            text = newText
                            break
                    elif choice in ['d', 'D']:
                        editor = editarticle.TextEditor()
                        if disambPage.isRedirectPage():
                            disambredir = disambPage.getRedirectTarget()
                            editor.edit(
                                disambredir.get(),
                                jumpIndex=m.start(),
                                highlight=disambredir.title())
                        else:
                            editor.edit(
                                disambPage.get(),
                                jumpIndex=m.start(),
                                highlight=disambPage.title())
                    elif choice in ['l', 'L']:
                        self.listAlternatives()
                    elif choice in ['m', 'M']:
                        # show more text around the link we're working on
                        context *= 2
                    else:
                        break

                if choice in ['e', 'E']:
                    # user has edited the page and then pressed 'OK'
                    edited = True
                    curpos = 0
                    continue
                elif choice in ['n', 'N']:
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True
                elif choice in ['q', 'Q']:
                    # quit the program
                    self.quit()
                elif choice in ['s', 'S']:
                    # Next link on this page
                    n -= 1
                    continue
                elif choice in ['x', 'X'] and edited:
                    # Save the page as is
                    break

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                # '?', '/' for old choice
                if choice in ['t', 'T', '?', '/'] and self.dn_template_str:
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif choice in ['u', 'U']:
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    if len(choice) > 0 and choice[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                        choice = choice[1:]
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    try:
                        choice = int(choice)
                    except ValueError:
                        pywikibot.output(u"Unknown option")
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    if choice >= len(self.alternatives) or choice < 0:
                        pywikibot.output(
                            u"Choice out of range. Please select a number "
                            u"between 0 and %i." % (len(self.alternatives) - 1))
                        # show list of possible choices
                        self.listAlternatives()
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    new_page_title = self.alternatives[choice]
                    repPl = pywikibot.Page(pywikibot.Link(new_page_title,
                                                          disambPage.site))
                    if (new_page_title[0].isupper() or
                            link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title,
                                                  section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text and
                                       not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (
                        (len(new_page_title) <= len(link_text)) and
                        (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and
                        (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and
                        (not section)
                    ):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put_async(text, summary=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Exemplo n.º 6
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    # make a backup of the original text so we can show the changes later
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()
    linkR = re.compile(
        r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
        % linktrail)
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(mysite, m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        # how many bytes should be displayed around the current link
        context = 30
        # at the beginning of the link, start red color.
        # at the end of the link, reset the color to default
        pywikibot.output(text[max(0, m.start() - context): m.start()] +
                         '\03{lightred}' + text[m.start(): m.end()] +
                         '\03{default}' + text[m.end(): m.end() + context])
        choice = pywikibot.input_choice(
            'What should be done with the link?',
            (('Do not change', 'n'),
             ('Change link to \03{lightpurple}%s\03{default}'
              % targetPage.title(), 'y'),
             ('Change and replace text', 'r'), ('Unlink', 'u')),
            default='n', automatic_quit=False)

        if choice == 'n':
            continue

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')
        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') is None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice == 'u':
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue

        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())
        if choice == 'r' and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif choice == 'r' or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text
Exemplo n.º 7
0
    def treat(self, refPage, disambPage):
        """
        Treat a page.

        Parameters:
            disambPage - The disambiguation page or redirect we don't want
                anything to link to
            refPage - A page linking to disambPage
        Returns False if the user pressed q to completely quit the program.
        Otherwise, returns True.

        """
        # TODO: break this function up into subroutines!

        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output(
                    '\n\nSkipping %s because it contains %s.\n\n' %
                    (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s' %
                             (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False,
                                      automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put_async(redir_text, summary=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?' %
                    refPage.title(), [('yes', 'y'), ('no', 'n'),
                                      ('change redirect', 'c')],
                    'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.' %
                refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s" %
                                         refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" %
                                  (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str
                        and self.dn_template_str[:-2]
                        in text[m.end():m.end() + len(self.dn_template_str) +
                                8]):
                    continue

                # This loop will run while the user doesn't choose an option
                # that will actually change the page
                while True:
                    self.current_page = refPage

                    if not self.always:
                        # at the beginning of the link, start red color.
                        # at the end of the link, reset the color to default
                        pywikibot.output(text[max(0,
                                                  m.start() -
                                                  context):m.start()] +
                                         '\03{lightred}' +
                                         text[m.start():m.end()] +
                                         '\03{default}' +
                                         text[m.end():m.end() + context])
                        options = [
                            '#', 'r#', '[s]kip link', '[e]dit page',
                            '[n]ext page', '[u]nlink', '[q]uit'
                        ]
                        if self.dn_template_str:
                            options.append(u'[t]ag template %s' %
                                           self.dn_template_str)
                        options.append('[m]ore context')
                        if not edited:
                            options.append('show [d]isambiguation page')
                        options += ['[l]ist', '[a]dd new']
                        if edited:
                            options += ['save in this form [x]']
                        options = concat_options('Option', 72, options)
                        choice = pywikibot.input(options)
                    else:
                        choice = self.always
                    if choice in ['a', 'A']:
                        newAlternative = pywikibot.input(u'New alternative:')
                        self.alternatives.append(newAlternative)
                        self.listAlternatives()
                    elif choice in ['e', 'E']:
                        editor = editarticle.TextEditor()
                        newText = editor.edit(text,
                                              jumpIndex=m.start(),
                                              highlight=disambPage.title())
                        # if user didn't press Cancel
                        if newText and newText != text:
                            text = newText
                            break
                    elif choice in ['d', 'D']:
                        editor = editarticle.TextEditor()
                        if disambPage.isRedirectPage():
                            disambredir = disambPage.getRedirectTarget()
                            editor.edit(disambredir.get(),
                                        jumpIndex=m.start(),
                                        highlight=disambredir.title())
                        else:
                            editor.edit(disambPage.get(),
                                        jumpIndex=m.start(),
                                        highlight=disambPage.title())
                    elif choice in ['l', 'L']:
                        self.listAlternatives()
                    elif choice in ['m', 'M']:
                        # show more text around the link we're working on
                        context *= 2
                    else:
                        break

                if choice in ['e', 'E']:
                    # user has edited the page and then pressed 'OK'
                    edited = True
                    curpos = 0
                    continue
                elif choice in ['n', 'N']:
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True
                elif choice in ['q', 'Q']:
                    # quit the program
                    self.quit()
                elif choice in ['s', 'S']:
                    # Next link on this page
                    n -= 1
                    continue
                elif choice in ['x', 'X'] and edited:
                    # Save the page as is
                    break

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                # '?', '/' for old choice
                if choice in ['t', 'T', '?', '/'] and self.dn_template_str:
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif choice in ['u', 'U']:
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    if len(choice) > 0 and choice[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                        choice = choice[1:]
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    try:
                        choice = int(choice)
                    except ValueError:
                        pywikibot.output(u"Unknown option")
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    if choice >= len(self.alternatives) or choice < 0:
                        pywikibot.output(
                            u"Choice out of range. Please select a number "
                            u"between 0 and %i." %
                            (len(self.alternatives) - 1))
                        # show list of possible choices
                        self.listAlternatives()
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    new_page_title = self.alternatives[choice]
                    repPl = pywikibot.Page(
                        pywikibot.Link(new_page_title, disambPage.site))
                    if (new_page_title[0].isupper() or link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title, section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text
                                       and not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif ((len(new_page_title) <= len(link_text))
                          and (firstcap(link_text[:len(new_page_title)])
                               == firstcap(new_page_title))
                          and (re.sub(self.trailR, '',
                                      link_text[len(new_page_title):]) == '')
                          and (not section)):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put_async(text, summary=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Exemplo n.º 8
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()

    # make a backup of the original text so we can show the changes later
    linkR = re.compile(
        r"\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?" r"(\|(?P<label>[^\]]*))?\]\](?P<linktrail>" + linktrail + ")"
    )
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group("title").strip() == "" or mysite.isInterwikiLink(m.group("title")):
            continue
        else:
            actualLinkPage = pywikibot.Page(targetPage.site, m.group("title"))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        choice = "y"

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group("title")
        link_text = m.group("label")

        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group("section") is None:
            section = ""
        else:
            section = m.group("section")
        trailing_chars = m.group("linktrail")
        if trailing_chars:
            link_text += trailing_chars

        if choice in "uU":
            # unlink - we remove the section if there's any
            text = text[: m.start()] + link_text + text[m.end() :]
            continue
        replaceit = choice in "rR"

        # remove preleading ":"
        if link_text[0] == ":":
            link_text = link_text[1:]
        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())

        # remove preleading ":"
        if new_page_title[0] == ":":
            new_page_title = new_page_title[1:]

        if replaceit and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif replaceit or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif (
            len(new_page_title) <= len(link_text)
            and firstcap(link_text[: len(new_page_title)]) == firstcap(new_page_title)
            and re.sub(re.compile(linktrail), "", link_text[len(new_page_title) :]) == ""
            and not section
        ):
            newlink = "[[%s]]%s" % (link_text[: len(new_page_title)], link_text[len(new_page_title) :])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[: m.start()] + newlink + text[m.end() :]
        continue
    return text
Exemplo n.º 9
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()

    # make a backup of the original text so we can show the changes later
    linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
                       r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' +
                       linktrail + ')')
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title').strip() == '' or \
           mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(targetPage.site, m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        choice = 'y'

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')

        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') is None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice in "uU":
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue
        replaceit = choice in "rR"

        # remove preleading ":"
        if link_text[0] == ':':
            link_text = link_text[1:]
        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())

        # remove preleading ":"
        if new_page_title[0] == ':':
            new_page_title = new_page_title[1:]

        if replaceit and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif replaceit or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text
Exemplo n.º 10
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    # make a backup of the original text so we can show the changes later
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()
    linkR = re.compile(
        r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
        % linktrail)
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(mysite, m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        # how many bytes should be displayed around the current link
        context = 30
        # at the beginning of the link, start red color.
        # at the end of the link, reset the color to default
        pywikibot.output(text[max(0,
                                  m.start() - context):m.start()] +
                         '\03{lightred}' + text[m.start():m.end()] +
                         '\03{default}' + text[m.end():m.end() + context])
        choice = pywikibot.input_choice(
            'What should be done with the link?',
            (('Do not change', 'n'),
             ('Change link to \03{lightpurple}%s\03{default}' %
              targetPage.title(), 'y'), ('Change and replace text', 'r'),
             ('Unlink', 'u')),
            default='n',
            automatic_quit=False)

        if choice == 'n':
            continue

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')
        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') is None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice == 'u':
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue

        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())
        if choice == 'r' and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif choice == 'r' or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text