Пример #1
0
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'',
                     override=u'', addCategory=u'', removeCategories=False):
    """Build the final description for the image.

    The description is based on the info from flickrinfo and improved.

    """
    description = u'== {{int:filedesc}} ==\n%s' % flinfoDescription
    if removeCategories:
        description = textlib.removeCategoryLinks(description,
                                                  pywikibot.Site(
                                                      'commons', 'commons'))
    if override:
        description = description.replace(u'{{cc-by-sa-2.0}}\n', u'')
        description = description.replace(u'{{cc-by-2.0}}\n', u'')
        description = description.replace(u'{{flickrreview}}\n', u'')
        description = description.replace(
            '{{copyvio|Flickr, licensed as "All Rights Reserved" which is not '
            'a free license --~~~~}}\n',
            '')
        description = description.replace(u'=={{int:license}}==',
                                          u'=={{int:license}}==\n' + override)
    elif flickrreview:
        if reviewer:
            description = description.replace(
                '{{flickrreview}}',
                '{{flickrreview|' + reviewer +
                '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}')
    if addCategory:
        description = description.replace(u'{{subst:unc}}\n', u'')
        description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description.replace(u'\r\n', u'\n')
    return description
Пример #2
0
	def getWordCount(self, text):
		text = textlib.removeDisabledParts(text)
		text = textlib.removeHTMLParts(text)
		text = textlib.removeLanguageLinks(text)
		text = textlib.removeCategoryLinks(text)
		word_list = re.findall(r"[\w']+", text)

		return len(word_list)
Пример #3
0
def saveImagePage(imagepage, newcats):
    """Remove the old categories and add the new categories to the image."""
    newtext = textlib.removeCategoryLinks(imagepage.text, imagepage.site)
    newtext += '\n'

    for category in newcats:
        newtext = newtext + '[[Category:' + category + ']]\n'

    comment = 'Filtering categories'

    pywikibot.showDiff(imagepage.text, newtext)
    imagepage.text = newtext
    imagepage.save(comment)
Пример #4
0
    def getNewFieldsFromFreetext(self, imagepage):
        """Extract fields from free text for the new information template."""
        text = imagepage.get()

        for toRemove in sourceGarbage[imagepage.site.lang]:
            text = re.sub(toRemove, '', text, flags=re.IGNORECASE)

        for regex, _ in licenseTemplates[imagepage.site.lang]:
            text = re.sub(regex, '', text, flags=re.IGNORECASE)

        text = removeCategoryLinks(text, imagepage.site())

        description = self.convertLinks(text.strip(), imagepage.site())
        date = self.getUploadDate(imagepage)
        source = self.getSource(imagepage)
        author = self.getAuthorText(imagepage)
        return (description, date, source, author)
Пример #5
0
def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
    """Remove the old categories and add the new categories to the image."""
    newtext = textlib.removeCategoryLinks(imagepage.text, imagepage.site)
    if not onlyFilter:
        newtext = removeTemplates(newtext)
        newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
                                                       len(newcats))
    newtext += u'\n'
    for category in newcats:
        newtext = newtext + u'[[Category:' + category + u']]\n'
    if onlyFilter:
        comment = u'Filtering categories'
    else:
        comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]'
    pywikibot.showDiff(imagepage.text, newtext)
    imagepage.text = newtext
    imagepage.save(comment)
    return
Пример #6
0
def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
    """Remove the old categories and add the new categories to the image."""
    newtext = textlib.removeCategoryLinks(imagepage.text, imagepage.site)
    if not onlyFilter:
        newtext = removeTemplates(newtext)
        newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
                                                       len(newcats))
    newtext += u'\n'
    for category in newcats:
        newtext = newtext + u'[[Category:' + category + u']]\n'
    if onlyFilter:
        comment = u'Filtering categories'
    else:
        comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]'
    pywikibot.showDiff(imagepage.text, newtext)
    imagepage.text = newtext
    imagepage.save(comment)
    return
Пример #7
0
def buildDescription(flinfoDescription='',
                     flickrreview=False,
                     reviewer='',
                     override='',
                     addCategory='',
                     removeCategories=False):
    """Build the final description for the image.

    The description is based on the info from flickrinfo and improved.

    """
    description = flinfoDescription
    # use template {{Taken on}}
    datetaken = re.search(r'\|Date=(.*)\n', description).group(1)
    if datetaken:
        datetaken = '{{Taken on|%s}}' % (datetaken)
        description = re.sub(r'\|Date=.*\n', '|Date={}\n'.format(datetaken),
                             description)
    if removeCategories:
        description = textlib.removeCategoryLinks(
            description, pywikibot.Site('commons', 'commons'))
    if override:
        description = description.replace('{{cc-by-sa-2.0}}\n', '')
        description = description.replace('{{cc-by-2.0}}\n', '')
        description = description.replace('{{flickrreview}}\n', '')
        description = description.replace(
            '{{copyvio|Flickr, licensed as "All Rights Reserved" which is not '
            'a free license --~~~~}}\n', '')
        description = description.replace('=={{int:license}}==',
                                          '=={{int:license}}==\n' + override)
    elif flickrreview and reviewer:
        description = description.replace(
            '{{flickrreview}}', '{{flickrreview|%s|'
            '{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-'
            '{{subst:CURRENTDAY2}}}}' % reviewer)

    if '{{subst:unc}}' not in description:
        # Request category check
        description += '\n{{subst:chc}}\n'
    if addCategory:
        description = description.replace('{{subst:unc}}\n', '')
        description += '\n[[Category:{}]]\n'.format(addCategory)
    description = description.replace('\r\n', '\n')
    return description
    def harvestSortKey(self, match):
        text = match.group()
        if self.defaultsortR.search(text):
            return text

        keys = {}
        categories = textlib.getCategoryLinks(text, site=self.site)
        if not any(
                category.title(with_ns=False) in ('Muži', 'Žijící lidé',
                                                  'Ženy')
                for category in categories):
            return text

        for category in categories:
            key = category.sortKey
            if key:
                key = self.tidy_sortkey(key)
                if not key.strip():
                    continue
                keys.setdefault(key, 0.0)
                keys[key] += 1
                if len(keys) > 1:
                    return text

        if not keys:
            return text

        if sum(keys.values()) < 4:
            return text

        key = list(keys.keys()).pop()
        for category in categories:
            if category.sortKey is not None:
                if self.tidy_sortkey(category.sortKey) == key:
                    category.sortKey = None

        categories.sort(key=self.sort_category)
        text = textlib.removeCategoryLinks(text, self.site)
        text += '\n\n{{DEFAULTSORT:%s}}' % key
        before, _, after = textlib.replaceCategoryLinks(
            text, categories, self.site).rpartition('\n\n')  # fixme: safer
        return before + '\n' + after
Пример #9
0
def buildDescription(flinfoDescription=u'',
                     flickrreview=False,
                     reviewer=u'',
                     override=u'',
                     addCategory=u'',
                     removeCategories=False):
    """Build the final description for the image.

    The description is based on the info from flickrinfo and improved.

    """
    description = u'%s' % flinfoDescription
    if removeCategories:
        description = textlib.removeCategoryLinks(
            description, pywikibot.Site('commons', 'commons'))
    if override:
        description = description.replace(u'{{cc-by-sa-2.0}}\n', u'')
        description = description.replace(u'{{cc-by-2.0}}\n', u'')
        description = description.replace(u'{{flickrreview}}\n', u'')
        description = description.replace(
            '{{copyvio|Flickr, licensed as "All Rights Reserved" which is not '
            'a free license --~~~~}}\n', '')
        description = description.replace(u'=={{int:license}}==',
                                          u'=={{int:license}}==\n' + override)
    elif flickrreview:
        if reviewer:
            description = description.replace(
                '{{flickrreview}}', '{{flickrreview|' + reviewer +
                '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}'
            )
    #if addCategory:
    description = description.replace(u'{{subst:unc}}\n', u'')
    #    description = description + u'\n[[Category:' + addCategory + ']]\n'
    description = description + u'{{Wiki Loves Monuments 2017|xx}}\n'
    description = description + u'[[Category:Images from Wiki Loves Monuments 2017 Flickr photowalks]]\n'
    description = description + u'[[Category:Images from Wiki Loves Monuments 2017 Flickr photowalks to check]]\n'

    description = description.replace(u'\r\n', u'\n')
    return description
Пример #10
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!" %
                                 page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(
                    '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')],
                    'n',
                    automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext,
                                 summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext,
                                       summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s' %
                        (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)' %
                                     page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Пример #11
0
def add_text(
    page,
    addText: str,
    summary: Optional[str] = None,
    regexSkip: Optional[str] = None,
    regexSkipUrl: Optional[str] = None,
    always: bool = False,
    up: bool = False,
    putText: bool = True,
    oldTextGiven: Optional[str] = None,
    reorderEnabled: bool = True,
    create: bool = False
) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]:
    """
    Add text to a page.

    @param page: The page to add text to
    @type page: pywikibot.page.BasePage
    @param addText: Text to add
    @param summary: Summary of changes. If None, beginning of addText is used.
    @param regexSkip: Abort if text on page matches
    @param regexSkipUrl: Abort if full url matches
    @param always: Always add text without user confirmation
    @param up: If True, add text to top of page, else add at bottom.
    @param putText: If True, save changes to the page, else return
        (_, newtext, _)
    @param oldTextGiven: If None fetch page text, else use this text
    @param reorderEnabled: If True place text above categories and
        interwiki, else place at page bottom. No effect if up = False.
    @param create: Create page if it does not exist
    @return: If putText=True: (success, success, always)
        else: (_, newtext, _)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if reorderEnabled:
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '\n' + addText
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '\n' + addText
    else:
        newtext = addText + '\n' + text

    if not putText:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        return (text, newtext, always)

    if text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)
                continue

        # either always or choice == 'y' is selected
        result = put_text(page,
                          newtext,
                          summary,
                          error_count,
                          asynchronous=not always)
        if result is not None:
            return (result, result, always)
        error_count += 1
Пример #12
0
 def remove_cats_and_comments(self, text):
     """Remove categories, comments and trailing spaces from wikitext."""
     text = textlib.removeCategoryLinks(text, site=self.site)
     text = textlib.removeDisabledParts(text, tags=['comments'])
     return text.strip()
Пример #13
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '{}{}'.format(config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '{}{}'.format(config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text

    if putText and text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if not putText:
            return (text, newtext, always)

        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)

        if always or choice == 'y':
            result = put_text(page,
                              newtext,
                              summary,
                              error_count,
                              asynchronous=not always)
            if result is not None:
                return (result, result, always)
            error_count += 1
Пример #14
0
def add_text(page, addText, summary=None, regexSkip=None,
             regexSkipUrl=None, always=False, up=False, putText=True,
             oldTextGiven=None, reorderEnabled=True, create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -exceptUrl '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -except '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext,
                                                   categoriesInside, site,
                                                   True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n', automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext, summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Пример #15
0
def linkedImages(page):
    """Return a list of Pages that this Page links to.

    Only returns pages from "normal" internal links. Category links are
    omitted unless prefixed with ":". Image links are omitted when parameter
    withImageLinks is False. Embedded templates are omitted (but links
    within them are returned). All interwiki and external links are omitted.

    @param thistxt: the wikitext of the page
    @return: a list of Page objects.
    """

    Rlink = re.compile(r'\[\[(?P<title>[^\]\|\[]*)(\|[^\]]*)?\]\]')
    result = []
    try:
        thistxt = textlib.removeLanguageLinks(page.get(get_redirect=True),
                                              page.site)
    except pywikibot.NoPage:
        raise
    except pywikibot.IsRedirectPage:
        raise
    except pywikibot.SectionError:
        return []
    thistxt = textlib.removeCategoryLinks(thistxt, page.site)

    # remove HTML comments, pre, nowiki, and includeonly sections
    # from text before processing
    thistxt = textlib.removeDisabledParts(thistxt)

    # resolve {{ns:-1}} or {{ns:Help}}
    # thistxt = page.site.resolvemagicwords(thistxt)

    for match in Rlink.finditer(thistxt):
        try:
            #print(match.group(0))
            title = match.group('title')
            title = title.replace("_", " ").strip(" ")
            # print title
            if title == "":
                # empty link - problem in the page
                continue
            # convert relative link to absolute link
            if title.startswith(".."):
                parts = self.title().split('/')
                parts.pop()
                title = '/'.join(parts) + title[2:]
            elif title.startswith("/"):
                title = '%s/%s' % (page.title(), title[1:])
            if title.startswith("#"):
                # this is an internal section link
                continue
            if not page.site.isInterwikiLink(title):
                page2 = pywikibot.Page(page.site, title)
                try:
                    hash(str(page2))
                except Exception:
                    pywikibot.output("Page %s contains invalid link to [[%s]]."
                                 % (page.title(), title))
                    continue
                if not page2.isImage():
                    continue
                if page2.title(withSection=False) and page2 not in result:
                    result.append(page2)
        except pywikibot.NoUsername:
            continue
        except:
            raise
    return result
Пример #16
0
 def remove_cats_and_comments(self, text):
     """Remove categories, comments and trailing spaces from wikitext."""
     text = textlib.removeCategoryLinks(text, site=self.site)
     text = textlib.removeDisabledParts(text, tags=['comments'])
     return text.strip()