Пример #1
0
 def callback(link, text, groups, rng):
     self.assertEqual(link.site, self.wp_site)
     if link.title == 'World':
         return pywikibot.Link('Homeworld', link.site)
     elif link.title.lower() == 'you':
         return False
Пример #2
0
    def get_redirects_from_dump(self, alsoGetPageTitles=False):
        """
        Extract redirects from dump.

        Load a local XML dump file, look at all pages which have the
        redirect flag set, and find out where they're pointing at. Return
        a dictionary where the redirect names are the keys and the redirect
        targets are the values.
        """
        xmlFilename = self.xmlFilename
        redict = {}
        # open xml dump and read page titles out of it
        dump = xmlreader.XmlDump(xmlFilename)
        redirR = self.site.redirectRegex()
        readPagesCount = 0
        if alsoGetPageTitles:
            pageTitles = set()
        for entry in dump.parse():
            readPagesCount += 1
            # always print status message after 10000 pages
            if readPagesCount % 10000 == 0:
                pywikibot.output('{0} pages read...'.format(readPagesCount))
            if len(self.namespaces) > 0:
                if pywikibot.Page(self.site, entry.title).namespace() \
                        not in self.namespaces:
                    continue
            if alsoGetPageTitles:
                pageTitles.add(space_to_underscore(pywikibot.Link(entry.title,
                                                                  self.site)))

            m = redirR.match(entry.text)
            if m:
                target = m.group(1)
                # There might be redirects to another wiki. Ignore these.
                target_link = pywikibot.Link(target, self.site)
                try:
                    target_link.parse()
                except pywikibot.SiteDefinitionError as e:
                    pywikibot.log(e)
                    pywikibot.output(
                        'NOTE: Ignoring {0} which is a redirect ({1}) to an '
                        'unknown site.'.format(entry.title, target))
                    target_link = None
                else:
                    if target_link.site != self.site:
                        pywikibot.output(
                            'NOTE: Ignoring {0} which is a redirect to '
                            'another site {1}.'
                            .format(entry.title, target_link.site))
                        target_link = None
                # if the redirect does not link to another wiki
                if target_link and target_link.title:
                    source = pywikibot.Link(entry.title, self.site)
                    if target_link.anchor:
                        pywikibot.output(
                            'HINT: {0} is a redirect with a pipelink.'
                            .format(entry.title))
                    redict[space_to_underscore(source)] = (
                        space_to_underscore(target_link))
        if alsoGetPageTitles:
            return redict, pageTitles
        else:
            return redict
Пример #3
0
    def treat_page_and_item(self, page, item):
        """Process a single page/item."""
        if willstop:
            raise KeyboardInterrupt

        templates = page.raw_extracted_templates
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(with_ns=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error("Failed parsing template; '{}' should be "
                                'the template name.'.format(template))
                continue

            if template not in self.templateTitles:
                continue
            # We found the template we were looking for
            for field, value in fielddict.items():
                field = field.strip()
                value = value.strip()
                if not field or not value:
                    continue

                if field not in self.fields:
                    continue

                # This field contains something useful for us
                prop, options = self.fields[field]
                claim = pywikibot.Claim(self.repo, prop)
                if claim.type == 'wikibase-item':
                    # Try to extract a valid page
                    match = pywikibot.link_regex.search(value)
                    if match:
                        link_text = match.group(1)
                    else:
                        if self._get_option_with_fallback(options, 'islink'):
                            link_text = value
                        else:
                            pywikibot.output(
                                '{} field {} value {} is not a wikilink. '
                                'Skipping.'.format(claim.getID(), field,
                                                   value))
                            continue

                    linked_item = self._template_link_target(item, link_text)
                    if not linked_item:
                        continue

                    claim.setTarget(linked_item)
                elif claim.type in ('string', 'external-id'):
                    claim.setTarget(value.strip())
                elif claim.type == 'url':
                    match = self.linkR.search(value)
                    if not match:
                        continue
                    claim.setTarget(match.group('url'))
                elif claim.type == 'commonsMedia':
                    commonssite = pywikibot.Site('commons', 'commons')
                    imagelink = pywikibot.Link(value,
                                               source=commonssite,
                                               default_namespace=6)
                    image = pywikibot.FilePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.FilePage(image.getRedirectTarget())
                    if not image.exists():
                        pywikibot.output(
                            "{0} doesn't exist. I can't link to it"
                            ''.format(image.title(as_link=True)))
                        continue
                    claim.setTarget(image)
                else:
                    pywikibot.output('{} is not a supported datatype.'.format(
                        claim.type))
                    continue

                # A generator might yield pages from multiple sites
                self.user_add_claim_unless_exists(
                    item, claim,
                    self._get_option_with_fallback(options, 'exists'),
                    page.site, pywikibot.output)
Пример #4
0
    def run(self):
        """Run the bot."""
        # validate L10N
        try:
            self.template_list = self.site.family.category_redirect_templates[
                self.site.code]
        except KeyError:
            pywikibot.warning(u"No redirect templates defined for %s" %
                              self.site)
            return
        if not self.get_cat_title():
            pywikibot.warning(u"No redirect category found for %s" % self.site)
            return

        user = self.site.user()  # invokes login()
        newredirs = []

        l = time.localtime()
        today = "%04d-%02d-%02d" % l[:3]
        edit_request_page = pywikibot.Page(
            self.site, u"User:%s/category edit requests" % user)
        datafile = pywikibot.config.datafilepath("%s-catmovebot-data" %
                                                 self.site.dbName())
        try:
            with open(datafile, "rb") as inp:
                record = cPickle.load(inp)
        except IOError:
            record = {}
        if record:
            with open(datafile + ".bak", "wb") as f:
                cPickle.dump(record, f, protocol=config.pickle_protocol)
        # regex to match soft category redirects
        #  note that any templates containing optional "category:" are
        #  incorrect and will be fixed by the bot
        template_regex = re.compile(
            r"""{{\s*(?:%(prefix)s\s*:\s*)?  # optional "template:"
                     (?:%(template)s)\s*\|   # catredir template name
                     (\s*%(catns)s\s*:\s*)?  # optional "category:"
                     ([^|}]+)                # redirect target cat
                     (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
             """ % {
                'prefix':
                self.site.namespace(10).lower(),
                'template':
                "|".join(
                    item.replace(" ", "[ _]+") for item in self.template_list),
                'catns':
                self.site.namespace(14)
            }, re.I | re.X)

        self.check_hard_redirect()

        comment = i18n.twtranslate(self.site, self.move_comment)
        counts = {}
        nonemptypages = []
        redircat = pywikibot.Category(pywikibot.Link(self.cat_title,
                                                     self.site))

        pywikibot.output(u"\nChecking %d category redirect pages" %
                         redircat.categoryinfo['subcats'])
        catpages = set()
        for cat in redircat.subcategories():
            catpages.add(cat)
            cat_title = cat.title(withNamespace=False)
            if "category redirect" in cat_title:
                self.log_text.append(u"* Ignoring %s" %
                                     cat.title(asLink=True, textlink=True))
                continue
            if hasattr(cat, "_catinfo"):
                # skip empty categories that don't return a "categoryinfo" key
                catdata = cat.categoryinfo
                if "size" in catdata and int(catdata['size']):
                    # save those categories that have contents
                    nonemptypages.append(cat)
            if cat_title not in record:
                # make sure every redirect has a record entry
                record[cat_title] = {today: None}
                try:
                    newredirs.append("*# %s -> %s" %
                                     (cat.title(asLink=True, textlink=True),
                                      cat.getCategoryRedirectTarget().title(
                                          asLink=True, textlink=True)))
                except pywikibot.Error:
                    pass
                # do a null edit on cat
                try:
                    cat.save()
                except:
                    pass

        # delete record entries for non-existent categories
        for cat_name in record.keys():
            if pywikibot.Category(self.site,
                                  self.catprefix + cat_name) not in catpages:
                del record[cat_name]

        pywikibot.output(u"\nMoving pages out of %s redirected categories." %
                         len(nonemptypages))

        for cat in pagegenerators.PreloadingGenerator(nonemptypages):
            try:
                if not cat.isCategoryRedirect():
                    self.log_text.append(u"* False positive: %s" %
                                         cat.title(asLink=True, textlink=True))
                    continue
            except pywikibot.Error:
                self.log_text.append(u"* Could not load %s; ignoring" %
                                     cat.title(asLink=True, textlink=True))
                continue
            cat_title = cat.title(withNamespace=False)
            if not self.readyToEdit(cat):
                counts[cat_title] = None
                self.log_text.append(u"* Skipping %s; in cooldown period." %
                                     cat.title(asLink=True, textlink=True))
                continue
            dest = cat.getCategoryRedirectTarget()
            if not dest.exists():
                self.problems.append("# %s redirects to %s" %
                                     (cat.title(asLink=True, textlink=True),
                                      dest.title(asLink=True, textlink=True)))
                # do a null edit on cat to update any special redirect
                # categories this wiki might maintain
                try:
                    cat.save()
                except:
                    pass
                continue
            if dest.isCategoryRedirect():
                double = dest.getCategoryRedirectTarget()
                if double == dest or double == cat:
                    self.log_text.append(
                        u"* Redirect loop from %s" %
                        dest.title(asLink=True, textlink=True))
                    # do a null edit on cat
                    try:
                        cat.save()
                    except:
                        pass
                else:
                    self.log_text.append(
                        u"* Fixed double-redirect: %s -> %s -> %s" %
                        (cat.title(asLink=True, textlink=True),
                         dest.title(asLink=True, textlink=True),
                         double.title(asLink=True, textlink=True)))
                    oldtext = cat.text
                    # remove the old redirect from the old text,
                    # leaving behind any non-redirect text
                    oldtext = template_regex.sub("", oldtext)
                    newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % {
                        'redirtemp': self.template_list[0],
                        'ncat': double.title(withNamespace=False)
                    })
                    newtext = newtext + oldtext.strip()
                    try:
                        cat.text = newtext
                        cat.save(
                            i18n.twtranslate(self.site,
                                             self.dbl_redir_comment))
                    except pywikibot.Error as e:
                        self.log_text.append("** Failed: %s" % e)
                continue

            found, moved = self.move_contents(cat_title,
                                              dest.title(withNamespace=False),
                                              editSummary=comment)
            if found is None:
                self.log_text.append(u"* [[:%s%s]]: error in move_contents" %
                                     (self.catprefix, cat_title))
            elif found:
                record[cat_title][today] = found
                self.log_text.append(u"* [[:%s%s]]: %d found, %d moved" %
                                     (self.catprefix, cat_title, found, moved))
            counts[cat_title] = found
            # do a null edit on cat
            try:
                cat.save()
            except:
                pass

        with open(datafile, "wb") as f:
            cPickle.dump(record, f, protocol=config.pickle_protocol)

        self.log_text.sort()
        self.problems.sort()
        newredirs.sort()
        comment = i18n.twtranslate(self.site, self.maint_comment)
        self.log_page.text = (
            u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n" % time.gmtime()[:6] +
            u'\n'.join(self.log_text) +
            u'\n* New redirects since last report:\n' + u'\n'.join(newredirs) +
            u'\n' + u'\n'.join(self.problems) + u'\n' + self.get_log_text())
        self.log_page.save(comment)
        if self.edit_requests:
            edit_request_page.text = (self.edit_request_text % {
                'itemlist':
                u"\n" + u"\n".join((self.edit_request_item % item)
                                   for item in self.edit_requests)
            })
            edit_request_page.save(comment)
Пример #5
0
        def handleOneLink(match):
            titleWithSection = match.group('titleWithSection')
            label = match.group('label')
            trailingChars = match.group('linktrail')
            newline = match.group('newline')

            try:
                is_interwiki = self.site.isInterwikiLink(titleWithSection)
            except ValueError:  # T111513
                is_interwiki = True

            if not is_interwiki:
                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                # We only work on namespace 0 because pipes and linktrails work
                # differently for images and categories.
                page = pywikibot.Page(
                    pywikibot.Link(titleWithSection, self.site))
                try:
                    namespace = page.namespace()
                except pywikibot.InvalidTitle:
                    return match.group()
                if namespace == 0:
                    # Replace underlines by spaces, also multiple underlines
                    titleWithSection = re.sub('_+', ' ', titleWithSection)
                    # Remove double spaces
                    titleWithSection = re.sub('  +', ' ', titleWithSection)
                    # Remove unnecessary leading spaces from title,
                    # but remember if we did this because we eventually want
                    # to re-add it outside of the link later.
                    titleLength = len(titleWithSection)
                    titleWithSection = titleWithSection.lstrip()
                    hadLeadingSpaces = (len(titleWithSection) != titleLength)
                    hadTrailingSpaces = False
                    # Remove unnecessary trailing spaces from title,
                    # but remember if we did this because it may affect
                    # the linktrail and because we eventually want to
                    # re-add it outside of the link later.
                    if not trailingChars:
                        titleLength = len(titleWithSection)
                        titleWithSection = titleWithSection.rstrip()
                        hadTrailingSpaces = (len(titleWithSection) !=
                                             titleLength)

                    # Convert URL-encoded characters to unicode
                    from pywikibot.page import url2unicode
                    titleWithSection = url2unicode(titleWithSection,
                                                   encodings=self.site)

                    if titleWithSection == '':
                        # just skip empty links.
                        return match.group()

                    # Remove unnecessary initial and final spaces from label.
                    # Please note that some editors prefer spaces around pipes.
                    # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
                    if label is not None:
                        # Remove unnecessary leading spaces from label,
                        # but remember if we did this because we want
                        # to re-add it outside of the link later.
                        labelLength = len(label)
                        label = label.lstrip()
                        hadLeadingSpaces = (len(label) != labelLength)
                        # Remove unnecessary trailing spaces from label,
                        # but remember if we did this because it affects
                        # the linktrail.
                        if not trailingChars:
                            labelLength = len(label)
                            label = label.rstrip()
                            hadTrailingSpaces = (len(label) != labelLength)
                    else:
                        label = titleWithSection
                    if trailingChars:
                        label += trailingChars

                    if self.site.siteinfo['case'] == 'first-letter':
                        firstcase_title = first_lower(titleWithSection)
                        firstcase_label = first_lower(label)
                    else:
                        firstcase_title = titleWithSection
                        firstcase_label = label

                    if firstcase_label == firstcase_title:
                        newLink = '[[%s]]' % label
                    # Check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (firstcase_label.startswith(firstcase_title) and
                          trailR.sub('', label[len(titleWithSection):]) == ''):
                        newLink = '[[%s]]%s' % (label[:len(titleWithSection)],
                                                label[len(titleWithSection):])

                    else:
                        # Try to capitalize the first letter of the title.
                        # Not useful for languages that don't capitalize nouns.
                        # TODO: Add a configuration variable for each site,
                        # which determines if the link target is written in
                        # uppercase
                        if self.site.sitename == 'wikipedia:de':
                            titleWithSection = first_upper(titleWithSection)
                        newLink = "[[%s|%s]]" % (titleWithSection, label)
                    # re-add spaces that were pulled out of the link.
                    # Examples:
                    #   text[[ title ]]text        -> text [[title]] text
                    #   text[[ title | name ]]text -> text [[title|name]] text
                    #   text[[ title |name]]text   -> text[[title|name]]text
                    #   text[[title| name]]text    -> text [[title|name]]text
                    if hadLeadingSpaces and not newline:
                        newLink = ' ' + newLink
                    if hadTrailingSpaces:
                        newLink = newLink + ' '
                    if newline:
                        newLink = newline + newLink
                    return newLink
            # don't change anything
            return match.group()
Пример #6
0
    def findAlternatives(self, disambPage):
        if disambPage.isRedirectPage() and not self.primary:
            if (disambPage.site.lang in self.primary_redir_template
                    and self.primary_redir_template[disambPage.site.lang]
                    in disambPage.templates(get_redirect=True)):
                baseTerm = disambPage.title()
                for template in disambPage.templatesWithParams(
                        get_redirect=True):
                    if template[0] == self.primary_redir_template[
                        disambPage.site.lang] \
                            and len(template[1]) > 0:
                        baseTerm = template[1][1]
                disambTitle = primary_topic_format[self.mylang] % baseTerm
                try:
                    disambPage2 = pywikibot.Page(
                        pywikibot.Link(disambTitle, self.mysite))
                    links = disambPage2.linkedPages()
                    links = [correctcap(l, disambPage2.get()) for l in links]
                except pywikibot.NoPage:
                    pywikibot.output(u"No page at %s, using redirect target." %
                                     disambTitle)
                    links = disambPage.linkedPages()[:1]
                    links = [
                        correctcap(l, disambPage.get(get_redirect=True))
                        for l in links
                    ]
                self.alternatives += links
            else:
                try:
                    target = disambPage.getRedirectTarget().title()
                    self.alternatives.append(target)
                except pywikibot.NoPage:
                    pywikibot.output(u"The specified page was not found.")
                    user_input = pywikibot.input(u"""\
Please enter the name of the page where the redirect should have pointed at,
or press enter to quit:""")
                    if user_input == "":
                        sys.exit(1)
                    else:
                        self.alternatives.append(user_input)
                except pywikibot.IsNotRedirectPage:
                    pywikibot.output(
                        u"The specified page is not a redirect. Skipping.")
                    return False
        elif self.getAlternatives:
            try:
                if self.primary:
                    try:
                        disambPage2 = pywikibot.Page(
                            pywikibot.Link(
                                primary_topic_format[self.mylang] %
                                disambPage.title(), self.mysite))
                        links = disambPage2.linkedPages()
                        links = [
                            correctcap(l, disambPage2.get()) for l in links
                        ]
                    except pywikibot.NoPage:
                        pywikibot.output(
                            u"Page does not exist, using the first link in page %s."
                            % disambPage.title())
                        links = disambPage.linkedPages()[:1]
                        links = [
                            correctcap(l, disambPage.get()) for l in links
                        ]
                else:
                    try:
                        links = disambPage.linkedPages()
                        links = [
                            correctcap(l, disambPage.get()) for l in links
                        ]
                    except pywikibot.NoPage:
                        pywikibot.output(u"Page does not exist, skipping.")
                        return False
            except pywikibot.IsRedirectPage:
                pywikibot.output(u"Page is a redirect, skipping.")
                return False
            self.alternatives += links
        return True
Пример #7
0
 def test_invalid_link_as_source(self):
     """Test IndexPage from invalid Link as source."""
     source = pywikibot.Link(self.not_existing_invalid_title,
                             source=self.site)
     self.assertRaises(ValueError, IndexPage, source)
Пример #8
0
    def move_to_category(self, article, original_cat, current_cat):
        """
        Given an article which is in category original_cat, ask the user if
        it should be moved to one of original_cat's subcategories.
        Recursively run through subcategories' subcategories.
        NOTE: current_cat is only used for internal recursion. You should
        always use current_cat = original_cat.
        """
        pywikibot.output(u'')
        # Show the title of the page where the link was found.
        # Highlight the title in purple.
        pywikibot.output(u'Treating page \03{lightpurple}%s\03{default}, '
                         u'currently in \03{lightpurple}%s\03{default}' %
                         (article.title(), current_cat.title()))

        # Determine a reasonable amount of context to print
        try:
            full_text = article.get(get_redirect=True)
        except pywikibot.NoPage:
            pywikibot.output(u'Page %s not found.' % article.title())
            return
        try:
            contextLength = full_text.index('\n\n')
        except ValueError:  # substring not found
            contextLength = 500
        if full_text.startswith(u'[['):  # probably an image
            # Add extra paragraph.
            contextLength = full_text.find('\n\n', contextLength + 2)
        if contextLength > 1000 or contextLength < 0:
            contextLength = 500

        pywikibot.output('\n' + full_text[:contextLength] + '\n')

        # we need list to index the choice
        subcatlist = list(self.catDB.getSubcats(current_cat))
        supercatlist = list(self.catDB.getSupercats(current_cat))

        if not subcatlist:
            pywikibot.output('This category has no subcategories.\n')
        if not supercatlist:
            pywikibot.output('This category has no supercategories.\n')
        # show subcategories as possible choices (with numbers)
        for i, supercat in enumerate(supercatlist):
            # layout: we don't expect a cat to have more than 10 supercats
            pywikibot.output(u'u%d - Move up to %s' % (i, supercat.title()))
        for i, subcat in enumerate(subcatlist):
            # layout: we don't expect a cat to have more than 100 subcats
            pywikibot.output(u'%2d - Move down to %s' % (i, subcat.title()))
        pywikibot.output(
            ' j - Jump to another category\n'
            ' s - Skip this article\n'
            ' r - Remove this category tag\n'
            ' ? - Print first part of the page (longer and longer)\n'
            u'Enter - Save category as %s' % current_cat.title())

        flag = False
        while not flag:
            pywikibot.output('')
            choice = pywikibot.input(u'Choice:')
            if choice in ['s', 'S']:
                flag = True
            elif choice == '':
                pywikibot.output(u'Saving category as %s' %
                                 current_cat.title())
                if current_cat == original_cat:
                    pywikibot.output('No changes necessary.')
                else:
                    article.change_category(original_cat,
                                            current_cat,
                                            comment=self.editSummary)
                flag = True
            elif choice in ['j', 'J']:
                newCatTitle = pywikibot.input(u'Please enter the category the '
                                              u'article should be moved to:')
                newCat = pywikibot.Category(
                    pywikibot.Link('Category:' + newCatTitle))
                # recurse into chosen category
                self.move_to_category(article, original_cat, newCat)
                flag = True
            elif choice in ['r', 'R']:
                # remove the category tag
                article.change_category(original_cat,
                                        None,
                                        comment=self.editSummary)
                flag = True
            elif choice == '?':
                contextLength += 500
                pywikibot.output('\n' + full_text[:contextLength] + '\n')

                # if categories possibly weren't visible, show them additionally
                # (maybe this should always be shown?)
                if len(full_text) > contextLength:
                    pywikibot.output('')
                    pywikibot.output('Original categories: ')
                    for cat in article.categories():
                        pywikibot.output(u'* %s' % cat.title())
            elif choice[0] == 'u':
                try:
                    choice = int(choice[1:])
                except ValueError:
                    # user pressed an unknown command. Prompt him again.
                    continue
                self.move_to_category(article, original_cat,
                                      supercatlist[choice])
                flag = True
            else:
                try:
                    choice = int(choice)
                except ValueError:
                    # user pressed an unknown command. Prompt him again.
                    continue
                # recurse into subcategory
                self.move_to_category(article, original_cat,
                                      subcatlist[choice])
                flag = True
Пример #9
0
    def treat_page_and_item(self, page, item) -> None:
        """Process a single page/item."""
        if willstop:
            raise KeyboardInterrupt

        templates = page.raw_extracted_templates
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(with_ns=False)
            except InvalidTitleError:
                pywikibot.error("Failed parsing template; '{}' should be "
                                'the template name.'.format(template))
                continue

            if template not in self.templateTitles:
                continue
            # We found the template we were looking for
            for field, value in fielddict.items():
                field = field.strip()
                # todo: extend the list of tags to ignore
                value = textlib.removeDisabledParts(
                    # todo: eventually we may want to import the references
                    value,
                    tags=['ref'],
                    site=page.site).strip()
                if not field or not value:
                    continue

                if field not in self.fields:
                    continue

                # This field contains something useful for us
                prop, options = self.fields[field]
                claim = pywikibot.Claim(self.repo, prop)
                exists_arg = self._get_option_with_fallback(options, 'exists')
                if claim.type == 'wikibase-item':
                    do_multi = self._get_option_with_fallback(options, 'multi')
                    matched = False
                    # Try to extract a valid page
                    for match in pywikibot.link_regex.finditer(value):
                        matched = True
                        link_text = match.group(1)
                        linked_item = self._template_link_target(
                            item, link_text)
                        added = False
                        if linked_item:
                            claim.setTarget(linked_item)
                            added = self.user_add_claim_unless_exists(
                                item, claim, exists_arg, page.site,
                                pywikibot.output)
                            claim = pywikibot.Claim(self.repo, prop)
                        # stop after the first match if not supposed to add
                        # multiple values
                        if not do_multi:
                            break
                        # update exists_arg, so we can add more values
                        if 'p' not in exists_arg and added:
                            exists_arg += 'p'

                    if matched:
                        continue

                    if not self._get_option_with_fallback(options, 'islink'):
                        pywikibot.output(
                            '{} field {} value {} is not a wikilink. Skipping.'
                            .format(claim.getID(), field, value))
                        continue

                    linked_item = self._template_link_target(item, value)
                    if not linked_item:
                        continue

                    claim.setTarget(linked_item)
                elif claim.type in ('string', 'external-id'):
                    claim.setTarget(value.strip())
                elif claim.type == 'url':
                    match = self.linkR.search(value)
                    if not match:
                        continue
                    claim.setTarget(match.group('url'))
                elif claim.type == 'commonsMedia':
                    commonssite = pywikibot.Site('commons', 'commons')
                    imagelink = pywikibot.Link(value,
                                               source=commonssite,
                                               default_namespace=6)
                    image = pywikibot.FilePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.FilePage(image.getRedirectTarget())
                    if not image.exists():
                        pywikibot.output(
                            "{} doesn't exist. I can't link to it".format(
                                image.title(as_link=True)))
                        continue
                    claim.setTarget(image)
                else:
                    pywikibot.output('{} is not a supported datatype.'.format(
                        claim.type))
                    continue

                # A generator might yield pages from multiple sites
                self.user_add_claim_unless_exists(item, claim, exists_arg,
                                                  page.site, pywikibot.output)
Пример #10
0
    def example(self, textvalue):
        prop = self.current_page
        # todo: scope constraint
        if any(
                map(methodcaller('target_equals', 'Q15720608'),
                    prop.claims.get('P31', []))):
            pywikibot.output('{} is for qualifier use'.format(prop.title()))
            return False

        if prop.type in ('external-id', 'string'):
            regex = self.get_regex_from_prop(prop)
            if regex is None:
                pywikibot.output('Regex for "{}" not found'.format(
                    prop.title()))
                return False

            formatter = None
            for claim in prop.claims.get('P1630', []):
                if claim.snaktype != 'value':
                    continue
                searchObj = self.get_formatter_regex().search(
                    claim.getTarget())
                if searchObj is None:
                    pywikibot.output('Found wrongly formatted formatter URL '
                                     'for "{}"'.format(prop.title()))
                    continue

                formatter = searchObj.group()
                break

            if formatter is None:
                if prop.type == 'external-id':
                    pywikibot.output('Info: No formatter found for "{}"'
                                     ''.format(prop.title()))
                try:
                    regex = re.compile('^(?P<value>{})$'.format(regex))
                except re.error as e:
                    pywikibot.output("Couldn't create a regex")
                    pywikibot.exception(e)
                    return False
            else:
                split = formatter.split('$1')
                full_regex = ''
                full_regex += '(?P<value>{})'.format(regex).join(
                    map(re.escape, split[:2]))
                full_regex += '(?P=value)'.join(map(re.escape, split[2:]))
                if full_regex.endswith(re.escape('/')):
                    full_regex += '?'
                else:
                    full_regex += re.escape('/') + '?'
                full_regex = ('(?:' + full_regex +
                              r'|(?:^["\'<]?|\s)(?P<value2>' + regex +
                              r')(?:["\'>]?$|\]))')
                try:
                    regex = re.compile(full_regex)
                except re.error as e:
                    pywikibot.output("Couldn't create a regex")
                    pywikibot.exception(e)
                    return False

        elif prop.type == 'commonsMedia':
            regex = self.get_regex_from_prop(prop)
            if regex is None:
                regex = self.regexes[prop.type]
            else:
                flags = 0
                if regex.startswith('(?i)'):
                    regex = regex[4:]
                    flags |= re.I
                regex = re.compile(
                    r'\b(?:[Ff]il|[Ii]mag)e:(?P<value>{})'
                    r''.format(regex), flags)
        else:
            if prop.type in self.regexes:
                regex = self.regexes[prop.type]
            else:
                pywikibot.output(
                    '"{}" is not supported datatype for matching examples'.
                    format(prop.type))
                return False

        remove = True
        split = self.regexes['split-break'].split(textvalue)
        if len(split) == 1:
            split = self.regexes['split-comma'].split(textvalue)
        for match in split:
            if match.strip() == '':
                continue
            pair = re.split(self.regexes['arrow'], match)
            if len(pair) == 1:
                pywikibot.output(
                    'Example pair not recognized in "{}"'.format(match))
                remove = False
                continue

            pair = [pair[i] for i in (0, -1)]
            searchObj = self.regexes['wikibase-item'].search(pair[0])
            if searchObj is None:
                pywikibot.output('No item id found in "{}"'.format(pair[0]))
                remove = False
                continue

            item_match = 'Q' + searchObj.group('value')
            target = pywikibot.ItemPage(self.repo, item_match)
            while target.isRedirectPage():
                target = target.getRedirectTarget()
            if any(
                    map(methodcaller('target_equals', target),
                        prop.claims.get('P1855', []))):
                pywikibot.output(
                    'There is already one example with "{}"'.format(
                        item_match))
                continue

            qual_match = regex.search(pair[1])
            if not qual_match:
                pywikibot.output(
                    'Couldn\'t match example value in "{}"'.format(pair[1]))
                remove = False
                continue

            for g in ('value', 'value2', 'url'):
                if g in qual_match.groupdict():
                    if qual_match.group(g):
                        qual_target = qual_match.group(g)
                        break

            if prop.type == 'wikibase-item':
                qual_target = pywikibot.ItemPage(self.repo, 'Q' + qual_target)
                if not qual_target.exists():
                    pywikibot.output('"{}" doesn\'t exist'.format(
                        qual_target.title()))
                    remove = False
                    continue
                while qual_target.isRedirectPage():
                    qual_target = qual_target.getRedirectTarget()
            elif prop.type == 'wikibase-property':
                qual_target = pywikibot.PropertyPage(self.repo,
                                                     'P' + qual_target)
            elif prop.type == 'commonsMedia':
                commons = pywikibot.Site('commons', 'commons')
                imagelink = pywikibot.Link(qual_target,
                                           source=commons,
                                           defaultNamespace=6)
                qual_target = pywikibot.FilePage(imagelink)
                if not qual_target.exists():
                    pywikibot.output('"{}" doesn\'t exist'.format(
                        qual_target.title()))
                    remove = False
                    continue
                while qual_target.isRedirectPage():
                    qual_target = pywikibot.FilePage(
                        qual_target.getRedirectTarget())
            elif prop.type == 'quantity':
                try:
                    amount = parse_float(qual_match.group('amount'))
                except ValueError:
                    pywikibot.output(
                        'Couldn\'t parse "{}"'.format(qual_target))
                    remove = False
                    continue
                error = qual_match.group('error')
                unit = qual_match.group('unit')
                if error:
                    try:
                        error = parse_float(error)
                    except ValueError:
                        pywikibot.output(
                            'Couldn\'t parse "{}"'.format(qual_target))
                        remove = False
                        continue
                if unit:
                    search = self.regexes['wikibase-item'].search(unit)
                    unit = pywikibot.ItemPage(self.repo,
                                              'Q' + search.group('value'))
                    if unit.isRedirectPage():
                        unit = unit.getRedirectTarget()
                else:
                    unit = None
                qual_target = pywikibot.WbQuantity(amount,
                                                   unit,
                                                   error,
                                                   site=self.repo)

            claim = pywikibot.Claim(self.repo, 'P1855')
            claim.setTarget(target)
            qualifier = prop.newClaim(is_qualifier=True)
            qualifier.setTarget(qual_target)
            claim.addQualifier(qualifier)
            claim.addSource(self.get_source())
            ok = self.user_add_claim(prop, claim, summary=self.make_summary())
            remove = ok and remove
        return remove
Пример #11
0
    def find_add(self, page):
        """
		Returns (user, oldid, timestamp) where
		  * user is the user who added the {{Mort récente}} template
			 (pywikibot.User)
		  * oldid is the oldid of the revision of this add
			 (int)
	 	  * timestamp
		"""
        death_found = True
        history = page.getVersionHistory()

        if len(history) == 1:
            [(id, timestamp, user, comment)] = history
            return (pywikibot.User(self.site, user), id)

        oldid = None
        requester = None
        timestamp = None
        previous_timestamp = None

        for (id, timestamp, user, comment) in history:
            pywikibot.output(
                "Analyzing id %i: timestamp is %s and user is %s" %
                (id, timestamp, user))

            text = page.getOldVersion(id)
            templates_params_list = textlib.extract_templates_and_params(text)
            death_found = False
            for (template_name, dict_param) in templates_params_list:
                try:
                    template_page = pywikibot.Page(
                        pywikibot.Link(template_name,
                                       self.site,
                                       defaultNamespace=10), self.site)

                    # TODO : auto-finding redirections
                    if template_page.title(withNamespace=False) in [
                            u"Mort récente", u"Décès récent"
                    ]:
                        death_found = True
                        break
                except Exception, myexception:
                    pywikibot.output(
                        u'An error occurred while analyzing template %s' %
                        template_name)
                    pywikibot.output(u'%s %s' %
                                     (type(myexception), myexception.args))

            if oldid:
                print("id is %i ; oldid is %i" % (id, oldid))
            else:
                print("id is %i ; no oldid" % id)
            if not death_found:
                if id == oldid:
                    pywikibot.output(
                        "Last revision does not contain any {{Mort récente}} template!"
                    )
                    return None
                else:
                    pywikibot.output(u"-------------------------------------")
                    triplet = (requester, oldid, previous_timestamp)
                    pywikibot.output(
                        u"Found it: user is %s; oldid is %i and timestamp is %s"
                        % triplet)
                    return triplet
            else:
                requester = pywikibot.User(self.site, user)
                oldid = id
                previous_timestamp = timestamp
Пример #12
0
    def run(self):
        """
        Starts the robot.
        """
        for monument in self.generator:
            try:
                monumentItem = None
                newclaims = []
                if monument.get('id') in self.monumentIds:
                    monumentItemTitle = u'Q%s' % (self.monumentIds.get(
                        monument.get('id')), )
                    print monument
                    print monumentItemTitle
                    monumentItem = pywikibot.ItemPage(self.repo,
                                                      title=monumentItemTitle)

                else:
                    print 'bla'
                    #monumentItem = pywikibot.ItemPage(self.repo, title=u'')

                    # Fix wikitext and more shit
                    monumentName = monument.get('name')

                    #monumentName = re.sub('^\[\[([^\|]+)\|([^\]]+)\]\](.+)$', u'\\2\\3', monumentName)
                    monumentName = re.sub('\[\[([^\|]+)\|([^\]]+)\]\]', u'\\2',
                                          monumentName)
                    #monumentName = re.sub('^\[\[([^\]]+)\]\](.+)$', u'\\1\\2', monumentName)
                    monumentName = re.sub('\[\[([^\]]+)\]\]', u'\\1',
                                          monumentName)

                    if len(monumentName) > 200:
                        monumentName = re.sub('^(.{20,200})\.(.+)$', u'\\1.',
                                              monumentName)

                    if len(monumentName) > 200:
                        monumentName = re.sub('^(.{20,200}),(.+)$', u'\\1.',
                                              monumentName)

                    # Still have to do more shit

                    data = {
                        'labels': {
                            monument.get('lang'): {
                                'language': monument.get('lang'),
                                'value': monumentName
                            }
                        }
                    }
                    identification = {}
                    summary = u'Creating new item with data from %s' % (
                        monument.get('source'), )
                    pywikibot.output(summary)
                    #monumentItem.editEntity(data, summary=summary)
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                    #print result
                    monumentItemTitle = result.get(u'entity').get('id')
                    monumentItem = pywikibot.ItemPage(self.repo,
                                                      title=monumentItemTitle)
                    '''
    {u'success': 1, u'entity': {u'lastrevid': 134951692, u'labels': {u'nl': {u'value
    ': u'[[Elswout]]: hoofdgebouw', u'language': u'nl'}}, u'descriptions': [], u'cla
    ims': [], u'type': u'item', u'id': u'Q17000292', u'aliases': []}}
    {u'success': 1, u'entity': {u'lastrevid': 134951703, u'labels': {u'nl': {u'value
    ': u'Elswout: landgoed', u'language': u'nl'}}, u'descriptions': [], u'claims': [
    ], u'type': u'item', u'id': u'Q17000293', u'aliases': []}}
    {u'success': 1, u'entity': {u'lastrevid': 134951710, u'labels': {u'nl': {u'value
    ': u'Elswout: keermuren van het voorplein', u'language': u'nl'}}, u'descriptions
    ': [], u'claims': [], u'type': u'item', u'id': u'Q17000294', u'aliases': []}}
                    '''
                    #print monumentItem.title()

                    newclaim = pywikibot.Claim(
                        self.repo, u'P%s' % (self.monumentIdProperty, ))
                    newclaim.setTarget(monument.get('id'))
                    pywikibot.output('Adding new id claim to %s' %
                                     monumentItem)
                    monumentItem.addClaim(newclaim)

                if monumentItem and monumentItem.exists():
                    data = monumentItem.get()
                    descriptions = data.get('descriptions')
                    claims = data.get('claims')
                    print claims

                    if monument.get('address') and not descriptions.get(
                            monument.get('lang')):
                        #FIXME: If it contains links like '[[]]' it will break
                        if not u'(' in monument.get('address'):
                            monumentDescription = u'Rijksmonument op %s' % (
                                monument.get('address'), )
                            summary = u'Setting %s description to "%s"' % (
                                monument.get('lang'),
                                monumentDescription,
                            )
                            try:
                                monumentItem.editDescriptions(
                                    {
                                        monument.get('lang'):
                                        monumentDescription
                                    },
                                    summary=summary)
                            except pywikibot.exceptions.APIError:
                                pywikibot.output(
                                    u'Ooops, that didn\'t work. Another item already has the same description'
                                )

                    if u'P31' not in claims:
                        newclaim = pywikibot.Claim(self.repo, u'P31')
                        monumentTypeItem = pywikibot.ItemPage(
                            self.repo, title=self.monumentType)
                        newclaim.setTarget(monumentTypeItem)
                        pywikibot.output('Adding instance claim to %s' %
                                         monumentItem)
                        monumentItem.addClaim(newclaim)

                    if monument.get('adm0') and u'P17' not in claims:
                        print u'no country found'
                        if monument.get('adm0').upper() in self.iso3166_1Codes:
                            #print u'Found an item for the ISO code'
                            adm0ItemTitle = u'Q%s' % (self.iso3166_1Codes.get(
                                monument.get('adm0').upper()), )
                            adm0Item = pywikibot.ItemPage(self.repo,
                                                          title=adm0ItemTitle)

                            newclaim = pywikibot.Claim(self.repo, u'P17')
                            newclaim.setTarget(adm0Item)
                            pywikibot.output('Adding country claim to %s' %
                                             monumentItem)
                            monumentItem.addClaim(newclaim)

                    else:
                        print u'country found'

                    foundProv = False
                    if u'P131' in claims and len(claims.get('P131')) == 1:
                        if monument.get('adm1').upper() in self.iso3166_2Codes:
                            if claims.get('P131')[0].getTarget().title(
                            ) == u'Q%s' % (self.iso3166_2Codes.get(
                                    monument.get('adm1').upper()), ):
                                print u'This item only contains a province claim'
                                foundProv = True

                    if u'P131' not in claims or foundProv:
                        print u'no administrative thingie found'
                        for adm in [
                                monument.get('adm1'),
                                monument.get('adm2'),
                                monument.get('adm3'),
                                monument.get('adm4')
                        ]:
                            if adm:
                                if adm.upper() in self.iso3166_2Codes:
                                    if not foundProv:
                                        print u'Found an item for the ISO code'
                                        admItemTitle = u'Q%s' % (
                                            self.iso3166_2Codes.get(
                                                adm.upper()), )
                                        admItem = pywikibot.ItemPage(
                                            self.repo, title=admItemTitle)

                                        newclaim = pywikibot.Claim(
                                            self.repo, u'P131')
                                        newclaim.setTarget(admItem)
                                        pywikibot.output(
                                            u'Adding %s to %s' %
                                            (admItem.title(),
                                             monumentItem.title()))
                                        monumentItem.addClaim(newclaim)

                                    #print adm1Item.get()
                                else:
                                    adm = adm.replace(u'[',
                                                      u'').replace(u']', u'')
                                    site = pywikibot.Site(
                                        monument.get('lang'), u'wikipedia')
                                    admLink = pywikibot.Link(
                                        adm, source=site, defaultNamespace=0)
                                    admPage = pywikibot.Page(admLink)
                                    if admPage.isRedirectPage():
                                        admPage = pywikibot.Page(
                                            admPage.getRedirectTarget())
                                    if not admPage.exists():
                                        pywikibot.output(
                                            '[[%s]] doesn\'t exist so I can\'t link to it'
                                            % (admPage.title(), ))
                                    elif admPage.isDisambig():
                                        pywikibot.output(
                                            '[[%s]] is a disambiguation page so I can\'t link to it'
                                            % (admPage.title(), ))
                                    else:
                                        admItem = pywikibot.ItemPage.fromPage(
                                            admPage)
                                        if admItem.exists():
                                            munFound = False
                                            if 'P31' in admItem.claims:
                                                for instClaim in admItem.claims.get(
                                                        'P31'):
                                                    if instClaim.getTarget(
                                                    ).title() == 'Q2039348':
                                                        munFound = True
                                            if not munFound:
                                                # It's not an administrative division, but it might be in one
                                                if 'P131' in admItem.claims:
                                                    for possAdmClaim in admItem.claims.get(
                                                            'P131'):
                                                        possAdmItem = possAdmClaim.getTarget(
                                                        )
                                                        possAdmItem.get()
                                                        if 'P31' in possAdmItem.claims:
                                                            for instClaim in possAdmItem.claims.get(
                                                                    'P31'):
                                                                if instClaim.getTarget(
                                                                ).title(
                                                                ) == 'Q2039348':
                                                                    admItem = possAdmItem
                                                                    munFound = True
                                                                    continue

                                                if munFound:
                                                    newclaim = pywikibot.Claim(
                                                        self.repo, u'P131')
                                                    newclaim.setTarget(admItem)
                                                    pywikibot.output(
                                                        u'Adding %s to %s' %
                                                        (admItem.title(),
                                                         monumentItem.title()))
                                                    monumentItem.addClaim(
                                                        newclaim)

                    else:
                        print u'administrative thingie found'

                    if monument.get('address') and u'P969' not in claims:
                        if u'[' not in monument.get(
                                'address') and u']' not in monument.get(
                                    'address') and u'|' not in monument.get(
                                        'address'):
                            newclaim = pywikibot.Claim(self.repo, u'P969')
                            newclaim.setTarget(monument.get('address'))
                            pywikibot.output(u'Adding %s to %s' %
                                             (monument.get('address'),
                                              monumentItem.title()))
                            monumentItem.addClaim(newclaim)
                        else:
                            print u'Contains funky chars, skipping'

                        print u'no address found'
                        # Clean up the address and add it

                    else:
                        print u'address found'

                    if monument.get('lat') and monument.get(
                            'lon') and u'P625' not in claims:
                        print u'no coordinates found'
                        # Build coordinates and add them
                        coordinate = pywikibot.Coordinate(monument.get('lat'),
                                                          monument.get('lon'),
                                                          dim=100)
                        newclaim = pywikibot.Claim(self.repo, u'P625')
                        newclaim.setTarget(coordinate)
                        pywikibot.output(u'Adding %s, %s to %s' %
                                         (coordinate.lat, coordinate.lon,
                                          monumentItem.title()))
                        monumentItem.addClaim(newclaim)

                    else:
                        print u'coordinates found'

                    if monument.get('image') and u'P18' not in claims:
                        print u'no image found'
                        # Construct
                        newclaim = pywikibot.Claim(self.repo, u'P18')
                        commonssite = pywikibot.Site("commons", "commons")
                        imagelink = pywikibot.Link(monument.get('image'),
                                                   source=commonssite,
                                                   defaultNamespace=6)
                        image = pywikibot.ImagePage(imagelink)
                        if image.isRedirectPage():
                            image = pywikibot.ImagePage(
                                image.getRedirectTarget())
                        if not image.exists():
                            pywikibot.output(
                                '[[%s]] doesn\'t exist so I can\'t link to it'
                                % (image.title(), ))
                        else:
                            newclaim.setTarget(image)
                            pywikibot.output(
                                'Adding %s --> %s' %
                                (newclaim.getID(), newclaim.getTarget()))
                            monumentItem.addClaim(newclaim)
                    else:
                        print u'image found'

                    # Europeana ID
                    if u'P727' not in claims:
                        europeanaID = u'2020718/DR_%s' % (monument.get('id'), )

                        newclaim = pywikibot.Claim(self.repo, u'P727')
                        newclaim.setTarget(europeanaID)
                        pywikibot.output('Adding Europeana ID claim to %s' %
                                         monumentItem)
                        monumentItem.addClaim(newclaim)

                    if monument.get('commonscat') and u'P373' not in claims:
                        print u'no image found'
                        # Construct
                        newclaim = pywikibot.Claim(self.repo, u'P373')
                        commonssite = pywikibot.Site("commons", "commons")
                        commonslink = pywikibot.Link(
                            monument.get('commonscat'),
                            source=commonssite,
                            defaultNamespace=14)
                        commonscat = pywikibot.Page(commonslink)
                        if commonscat.isRedirectPage():
                            commonscat = pywikibot.Page(
                                commonscat.getRedirectTarget())
                        if not commonscat.exists():
                            pywikibot.output(
                                '[[%s]] doesn\'t exist so I can\'t link to it'
                                % (commonscat.title(), ))
                        else:
                            newclaim.setTarget(
                                commonscat.title(withNamespace=False))
                            pywikibot.output(
                                'Adding %s --> %s' %
                                (newclaim.getID(), newclaim.getTarget()))
                            monumentItem.addClaim(newclaim)
            except:
                print u'F**k this shit, I am just going to contiue anyway'
                pass
Пример #13
0
def find_add(page):
	"""
	Returns (user, oldid) where
	  * user is the user thatwho added the {{Déblocage}} template
	     (pywikibot.User)
	  * oldid is the oldid of the revision of this add
	     (int)
	"""
	site = pywikibot.Site()
	
	unblock_found = True
	history = page.getVersionHistory()
	
	if len(history) == 1:
		[(id, timestamp, user, comment)] = history
		return (pywikibot.User(site, user), id)
	
	oldid = None
	requester = None
	
	for (id, timestamp, user, comment) in history:
		pywikibot.output("Analyzing id %i: timestamp is %s and user is %s" % (id, timestamp, user))
		
		text = page.getOldVersion(id)
		templates_params_list = textlib.extract_templates_and_params(text)
		unblock_found = False
		for (template_name, dict_param) in templates_params_list:
			#pywikibot.output((template_name, dict_param))
			try:
				print 0
				template_page = pywikibot.Page(pywikibot.Link(template_name, site, defaultNamespace=10), site)
				print 1
				pywikibot.output(template_page)
				pywikibot.output(template_page.title(withNamespace=False))
				# TODO : auto-finding redirections
				if template_page.title(withNamespace=False) in [u"Déblocage", u"Unblock"]:
					# le modèle {{déblocage}} peut ne plus être actif
					print 2
					if ((not dict_param.has_key('nocat')) or (dict_param.has_key('nocat') and dict_param['nocat'] in ["non", ''])) and not (dict_param.has_key('1') and dict_param['1'] in ['nocat', 'oui', 'non', u'traité', u'traité', u'traitée', u'traitée']):				
						pywikibot.output('Found unblock request')
						pywikibot.output((template_name, dict_param))
						unblock_found = True
						print 3
						break
			except Exception, myexception:
				pywikibot.output(u'An error occurred while analyzing template %s' % template_name)
				pywikibot.output(u'%s %s'% (type(myexception), myexception.args))
		
		print("id is %i" % id)
		if oldid:
			print("oldid is %i" % oldid)
		else:
			print "no oldid"
		if not unblock_found:
			if id == oldid:
				pywikibot.output("Last revision does not contain any {{Déblocage}} template!")
				return None
			else:
				return (requester, oldid)
		else:
			requester = pywikibot.User(site, user)
			oldid = id
Пример #14
0
def processArtist(artist, ulanwd, gndwd, repo):
    """
    Get the artist info, look for ULAN, if
    """
    itemPage = requests.get(artist.get('url'))
    ulanregex = u'\<a href\=\"http\:\/\/vocab\.getty\.edu\/page\/ulan\/(\d+)\"\>ULAN\<\/a\>'
    gndregex = u'\<a href\=\"http\:\/\/d-nb\.info\/gnd\/([^\"]+)\"\>GND\<\/a\>'
    wikiregex = u'\<a href\=\"https\:\/\/de\.wikipedia\.org\/wiki\/([^\"]+)">Wikipedia</a>'

    ulanmatch = re.search(ulanregex, itemPage.text)
    gndmatch = re.search(gndregex, itemPage.text)
    wikimatch = re.search(wikiregex, itemPage.text)
    if ulanmatch:
        ulanid = ulanmatch.group(1).encode(u'utf-8')  # Force it to string
        pywikibot.output(u'Found an ULAN match on %s to %s' %
                         (artist.get('url'), ulanid))
        if ulanid in ulanwd:
            itemTitle = ulanwd.get(ulanid).get('qid')
            pywikibot.output(u'Found %s as the Wikidata item to link to' %
                             (itemTitle, ))
            item = pywikibot.ItemPage(repo, title=itemTitle)
            if not item.exists():
                return False

            if item.isRedirectPage():
                item = item.getRedirectTarget()

            data = item.get()
            claims = data.get('claims')

            if u'P3421' in claims:
                # Already has Belvedere, great!
                return True

            newclaim = pywikibot.Claim(repo, u'P3421')
            newclaim.setTarget(artist.get('id'))
            pywikibot.output('Adding Belvedere %s claim to %s' % (
                artist.get('id'),
                item.title(),
            ))

            # Default text is "‎Created claim: Belvedere identifier (P3421): 123, "
            summary = u'based on link to ULAN %s on entry "%s" on Belvedere website' % (
                ulanid,
                artist.get(u'name'),
            )

            item.addClaim(newclaim, summary=summary)
            return True

    if gndmatch:
        gndid = gndmatch.group(1).encode(u'utf-8')  # Force it to string
        pywikibot.output(u'Found an GND match on %s to %s' %
                         (artist.get('url'), gndid))
        if gndid in gndwd:
            itemTitle = gndwd.get(gndid).get('qid')
            pywikibot.output(u'Found %s as the Wikidata item to link to' %
                             (itemTitle, ))
            item = pywikibot.ItemPage(repo, title=itemTitle)
            if not item.exists():
                return False

            if item.isRedirectPage():
                item = item.getRedirectTarget()

            data = item.get()
            claims = data.get('claims')

            if u'P3421' in claims:
                # Already has Belvedere, great!
                return True

            newclaim = pywikibot.Claim(repo, u'P3421')
            newclaim.setTarget(artist.get('id'))
            pywikibot.output('Adding Belvedere %s claim to %s' % (
                artist.get('id'),
                item.title(),
            ))

            # Default text is "‎Created claim: Belvedere identifier (P3421): 123, "
            summary = u'based on link to GND %s on entry "%s" on Belvedere website' % (
                gndid,
                artist.get(u'name'),
            )

            item.addClaim(newclaim, summary=summary)
            return True

    if wikimatch:
        articleTitle = u':de:%s' % (wikimatch.group(1), )
        page = pywikibot.Page(pywikibot.Link(articleTitle))
        if not page.exists():
            return False
        if page.isRedirectPage():
            page = page.getRedirectTarget()
        item = page.data_item()

        if not item or not item.exists():
            return False

        if item.isRedirectPage():
            item = item.getRedirectTarget()

        data = item.get()
        claims = data.get('claims')

        if u'P3421' in claims:
            # Already has Belvedere, great!
            return True

        newclaim = pywikibot.Claim(repo, u'P3421')
        newclaim.setTarget(artist.get('id'))
        pywikibot.output('Adding Belvedere %s claim to %s' % (
            artist.get('id'),
            item.title(),
        ))

        # Default text is "‎Created claim: Belvedere identifier (P3421): 123, "
        summary = u'based on link to [[%s]] on entry "%s" on Belvedere website' % (
            articleTitle,
            artist.get(u'name'),
        )

        item.addClaim(newclaim, summary=summary)
        return True
Пример #15
0
def main(*args):
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    # if the -file argument is used, page titles are dumped in this array.
    # otherwise it will only contain one page.
    generator = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    primary = False
    main_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    for arg in pywikibot.handleArgs(*args):
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-file'):
            if len(arg) == 5:
                generator = pagegenerators.TextfilePageGenerator(filename=None)
            else:
                generator = pagegenerators.TextfilePageGenerator(
                    filename=arg[6:])
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.Site()
                page = pywikibot.Page(pywikibot.Link(arg[5:], mysite))
                if page.exists():
                    alternatives.append(page.title())
                else:
                    answer = pywikibot.inputChoice(
                        u'Possibility %s does not actually exist. Use it anyway?'
                        % page.title(), ['yes', 'no'], ['y', 'N'], 'N')
                    if answer == 'y':
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                if len(arg) <= len('-start:'):
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.Site().disambcategory())
                else:
                    generator = pagegenerators.CategorizedPageGenerator(
                        pywikibot.Site().disambcategory(), start=arg[7:])
                generator = pagegenerators.NamespaceFilterPageGenerator(
                    generator, [0])
            except pywikibot.NoPage:
                pywikibot.output(
                    "Disambiguation category for your wiki is not known.")
                raise
        elif arg.startswith("-"):
            pywikibot.output("Unrecognized command line argument: %s" % arg)
            # show help text and exit
            pywikibot.showHelp()
        else:
            pageTitle.append(arg)
    site = pywikibot.Site()
    site.login()

    # if the disambiguation page is given as a command line argument,
    # connect the title's parts with spaces
    if pageTitle != []:
        pageTitle = ' '.join(pageTitle)
        page = pywikibot.Page(pywikibot.Link(pageTitle, site))
        generator = iter([page])

    # if no disambiguation page was given as an argument, and none was
    # read from a file, query the user
    if not generator:
        pageTitle = pywikibot.input(
            u'On which disambiguation page do you want to work?')
        page = pywikibot.Page(pywikibot.Link(pageTitle, site))
        generator = iter([page])

    bot = DisambiguationRobot(always,
                              alternatives,
                              getAlternatives,
                              dnSkip,
                              generator,
                              primary,
                              main_only,
                              minimum=minimum)
    bot.run()
Пример #16
0
    def handleArg(self, arg):
        """Parse one argument at a time.

        If it is recognized as an argument that specifies a generator, a
        generator is created and added to the accumulation list, and the
        function returns true.  Otherwise, it returns false, so that caller
        can try parsing the argument. Call getCombinedGenerator() after all
        arguments have been parsed to get the final output generator.

        """
        site = pywikibot.getSite()
        gen = None
        if arg.startswith('-filelinks'):
            fileLinksPageTitle = arg[11:]
            if not fileLinksPageTitle:
                fileLinksPageTitle = i18n.input(
                    'pywikibot-enter-file-links-processing')
            if fileLinksPageTitle.startswith(site.namespace(6)
                                             + ":"):
                fileLinksPage = pywikibot.ImagePage(site,
                                                    fileLinksPageTitle)
            else:
                fileLinksPage = pywikibot.ImagePage(site,
                                                    'Image:' +
                                                    fileLinksPageTitle)
            gen = FileLinksGenerator(fileLinksPage)
        elif arg.startswith('-unusedfiles'):
            if len(arg) == 12:
                gen = UnusedFilesGenerator()
            else:
                gen = UnusedFilesGenerator(number = int(arg[13:]))
        elif arg.startswith('-unwatched'):
            if len(arg) == 10:
                gen = UnwatchedPagesPageGenerator()
            else:
                gen = UnwatchedPagesPageGenerator(number = int(arg[11:]))
        elif arg.startswith('-usercontribs'):
            gen = UserContributionsGenerator(arg[14:])
        elif arg.startswith('-withoutinterwiki'):
            if len(arg) == 17:
                gen = WithoutInterwikiPageGenerator()
            else:
                gen = WithoutInterwikiPageGenerator(number = int(arg[18:]))
        elif arg.startswith('-interwiki'):
            title = arg[11:]
            if not title:
                title = i18n.input('pywikibot-enter-page-processing')
            page = pywikibot.Page(pywikibot.Link(title,
                                                 pywikibot.Site()))
            gen = InterwikiPageGenerator(page)
        elif arg.startswith('-recentchanges'):
            if len(arg) >= 15:
                gen = RecentChangesPageGenerator(total=int(arg[15:]))
            else:
                gen = RecentChangesPageGenerator(total=60)
            gen = DuplicateFilterPageGenerator(gen)
        elif arg.startswith('-file'):
            textfilename = arg[6:]
            if not textfilename:
                textfilename = pywikibot.input(
                    u'Please enter the local file name:')
            gen = TextfilePageGenerator(textfilename)
        elif arg.startswith('-namespace'):
            if len(arg) == len('-namespace'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-namespace:'):].split(","))
            return True
        elif arg.startswith('-ns'):
            if len(arg) == len('-ns'):
                self.namespaces.append(
                    pywikibot.input(u'What namespace are you filtering on?'))
            else:
                self.namespaces.extend(arg[len('-ns:'):].split(","))
            return True
        elif arg.startswith('-step'):
            if len(arg) == len('-step'):
                self.step = int(pywikibot.input("What is the step value?"))
            else:
                self.step = int(arg[len('-step:'):])
            return True
        elif arg.startswith('-limit'):
            if len(arg) == len('-limit'):
                self.limit = int(pywikibot.input("What is the limit value?"))
            else:
                self.limit = int(arg[len('-limit:'):])
            return True
        elif arg.startswith('-catr'):
            gen = self.getCategoryGen(arg, len('-catr'), recurse = True)
        elif arg.startswith('-category'):
            gen = self.getCategoryGen(arg, len('-category'))
        elif arg.startswith('-cat'):
            gen = self.getCategoryGen(arg, len('-cat'))
        elif arg.startswith('-subcatsr'):
            gen = self.setSubCategoriesGen(arg, 9, recurse = True)
        elif arg.startswith('-subcats'):
            gen = self.setSubCategoriesGen(arg, 8)
        elif arg.startswith('-page'):
            if len(arg) == len('-page'):
                gen = [pywikibot.Page(
                           pywikibot.Link(
                               pywikibot.input(
                                   u'What page do you want to use?'),
                               pywikibot.getSite())
                           )]
            else:
                gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):],
                                                     pywikibot.getSite())
                                      )]
        elif arg.startswith('-uncatfiles'):
            gen = UnCategorizedImageGenerator()
        elif arg.startswith('-uncatcat'):
            gen = UnCategorizedCategoryGenerator()
        elif arg.startswith('-uncat'):
            gen = UnCategorizedPageGenerator()
        elif arg.startswith('-ref'):
            referredPageTitle = arg[5:]
            if not referredPageTitle:
                referredPageTitle = pywikibot.input(
                    u'Links to which page should be processed?')
            referredPage = pywikibot.Page(pywikibot.Link(referredPageTitle,
                                                         pywikibot.Site()))
            gen = ReferringPageGenerator(referredPage)
        elif arg.startswith('-links'):
            linkingPageTitle = arg[7:]
            if not linkingPageTitle:
                linkingPageTitle = pywikibot.input(
                    u'Links from which page should be processed?')
            linkingPage = pywikibot.Page(pywikibot.Link(linkingPageTitle,
                                                        pywikibot.Site()))
            gen = LinkedPageGenerator(linkingPage)
        elif arg.startswith('-weblink'):
            url = arg[9:]
            if not url:
                url = pywikibot.input(
                    u'Pages with which weblink should be processed?')
            gen = LinksearchPageGenerator(url)
        elif arg.startswith('-transcludes'):
            transclusionPageTitle = arg[len('-transcludes:'):]
            if not transclusionPageTitle:
                transclusionPageTitle = pywikibot.input(
                    u'Pages that transclude which page should be processed?')
            transclusionPage = pywikibot.Page(
                                   pywikibot.Link(transclusionPageTitle,
                                                  defaultNamespace=10,
                                                  source=pywikibot.Site()))
            gen = ReferringPageGenerator(transclusionPage,
                                         onlyTemplateInclusion=True)
        elif arg.startswith('-start'):
            firstPageTitle = arg[7:]
            if not firstPageTitle:
                firstPageTitle = pywikibot.input(
                    u'At which page do you want to start?')
            firstpagelink = pywikibot.Link(firstPageTitle,
                                           pywikibot.Site())
            namespace = firstpagelink.namespace
            firstPageTitle = firstpagelink.title
            gen = AllpagesPageGenerator(firstPageTitle, namespace,
                                        includeredirects=False)
        elif arg.startswith('-prefixindex'):
            prefix = arg[13:]
            namespace = None
            if not prefix:
                prefix = pywikibot.input(
                    u'What page names are you looking for?')
            gen = PrefixingPageGenerator(prefix=prefix)
        elif arg.startswith('-newimages'):
            limit = arg[11:] or pywikibot.input(
                u'How many images do you want to load?')
            gen = NewimagesPageGenerator(total=int(limit))
        elif arg.startswith('-newpages'):
            if len(arg) >= 10:
              gen = NewpagesPageGenerator(total=int(arg[10:]))
            else:
              gen = NewpagesPageGenerator(total=60)
        elif arg.startswith('-imagesused'):
            imagelinkstitle = arg[len('-imagesused:'):]
            if not imagelinkstitle:
                imagelinkstitle = pywikibot.input(
                    u'Images on which page should be processed?')
            imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle,
                                                           pywikibot.Site()))
            gen = ImagesPageGenerator(imagelinksPage)
        elif arg.startswith('-search'):
            mediawikiQuery = arg[8:]
            if not mediawikiQuery:
                mediawikiQuery = pywikibot.input(
                    u'What do you want to search for?')
            # In order to be useful, all namespaces are required
            gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
        elif arg.startswith('-google'):
            gen = GoogleSearchPageGenerator(arg[8:])
        elif arg.startswith('-titleregex'):
            if len(arg) == 6:
                regex = pywikibot.input(
                    u'What page names are you looking for?')
            else:
                regex = arg[7:]
            gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex)
        elif arg.startswith('-yahoo'):
            gen = YahooSearchPageGenerator(arg[7:])
        else:
            pass
        if gen:
            self.gens.append(gen)
            return True
        else:
            return False
Пример #17
0
    def treat(self, refPage, disambPage):
        """
        Parameters:
            disambPage - The disambiguation page or redirect we don't want
                anything to link to
            refPage - A page linking to disambPage
        Returns False if the user pressed q to completely quit the program.
        Otherwise, returns True.

        """
        # TODO: break this function up into subroutines!

        dn_template_str = i18n.translate(self.mysite, dn_template)
        include = False
        unlink = False
        new_targets = []
        try:
            text = refPage.get(throttle=False)
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output(
                    '\n\nSkipping %s because it contains %s.\n\n' %
                    (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s' %
                             (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                choice = pywikibot.inputChoice(
                    u'Do you want to make redirect %s point to %s?' %
                    (refPage.title(), target), ['yes', 'no'], ['y', 'N'], 'N')
                if choice == 'y':
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(default=True), target)
                    try:
                        refPage.put_async(redir_text, comment=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.inputChoice(
                    u'Do you want to work on pages linking to %s?' %
                    refPage.title(), ['yes', 'no', 'change redirect'],
                    ['y', 'N', 'c'], 'N')
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(throttle=False, get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.' %
                refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s" %
                                         refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" %
                                  (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                #there's a {{dn}} here already
                already_dn = text[m.end():m.end() + 8].find(
                    dn_template_str[:4]) > -1
                if already_dn and self.dnSkip:
                    continue

                # This loop will run while the user doesn't choose an option
                # that will actually change the page
                while True:
                    # Show the title of the page where the link was found.
                    # Highlight the title in purple.
                    pywikibot.output(
                        u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                        refPage.title())

                    if not self.always:
                        # at the beginning of the link, start red color.
                        # at the end of the link, reset the color to default
                        pywikibot.output(text[max(0,
                                                  m.start() -
                                                  context):m.start()] +
                                         '\03{lightred}' +
                                         text[m.start():m.end()] +
                                         '\03{default}' +
                                         text[m.end():m.end() + context])
                        if edited:
                            choice = pywikibot.input(
                                u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n"
                                u"        [t]ag template " + dn_template_str +
                                ",\n"
                                u"        [m]ore context, [l]ist, [a]dd new, x=save in this form):"
                            )
                        else:
                            choice = pywikibot.input(
                                u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n"
                                u"        [t]ag template " + dn_template_str +
                                ",\n"
                                u"        [m]ore context, show [d]isambiguation page, [l]ist, [a]dd new):"
                            )
                    else:
                        choice = self.always
                    if choice in ['a', 'A']:
                        newAlternative = pywikibot.input(u'New alternative:')
                        self.alternatives.append(newAlternative)
                        self.listAlternatives()
                    elif choice in ['e', 'E']:
                        editor = editarticle.TextEditor()
                        newText = editor.edit(text,
                                              jumpIndex=m.start(),
                                              highlight=disambPage.title())
                        # if user didn't press Cancel
                        if newText and newText != text:
                            text = newText
                            break
                    elif choice in ['d', 'D']:
                        editor = editarticle.TextEditor()
                        if disambPage.isRedirectPage():
                            disambredir = disambPage.getRedirectTarget()
                            editor.edit(disambredir.get(),
                                        jumpIndex=m.start(),
                                        highlight=disambredir.title())
                        else:
                            editor.edit(disambPage.get(),
                                        jumpIndex=m.start(),
                                        highlight=disambPage.title())
                    elif choice in ['l', 'L']:
                        self.listAlternatives()
                    elif choice in ['m', 'M']:
                        # show more text around the link we're working on
                        context *= 2
                    else:
                        break

                if choice in ['e', 'E']:
                    # user has edited the page and then pressed 'OK'
                    edited = True
                    curpos = 0
                    continue
                elif choice in ['n', 'N']:
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True
                elif choice in ['q', 'Q']:
                    # quit the program
                    return False
                elif choice in ['s', 'S']:
                    # Next link on this page
                    n -= 1
                    continue
                elif choice in ['x', 'X'] and edited:
                    # Save the page as is
                    break

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                # '?', '/' for old choice
                if choice in ['t', 'T', '?', '/']:
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search("\s", search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    #insert dab needed template
                    text = (text[:m.end() + position_split] + dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif choice in ['u', 'U']:
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink = True
                    continue
                else:
                    if len(choice) > 0 and choice[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = True
                        choice = choice[1:]
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    try:
                        choice = int(choice)
                    except ValueError:
                        pywikibot.output(u"Unknown option")
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    if choice >= len(self.alternatives) or choice < 0:
                        pywikibot.output(
                            u"Choice out of range. Please select a number "
                            u"between 0 and %i." %
                            (len(self.alternatives) - 1))
                        # show list of possible choices
                        self.listAlternatives()
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    new_page_title = self.alternatives[choice]
                    repPl = pywikibot.Page(
                        pywikibot.Link(new_page_title, disambPage.site))
                    if (new_page_title[0].isupper() or link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = (new_page_title[0].lower() +
                                          new_page_title[1:])
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title, section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text
                                       and not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif ((len(new_page_title) <= len(link_text))
                          and (firstcap(link_text[:len(new_page_title)])
                               == firstcap(new_page_title))
                          and (re.sub(self.trailR, '',
                                      link_text[len(new_page_title):]) == '')
                          and (not section)):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink, dn)
                try:
                    refPage.put_async(text, comment=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Пример #18
0
 def setpage(self):
     """Set page and page title."""
     page_title = self.options.page or pywikibot.input('Page to edit:')
     self.page = pywikibot.Page(pywikibot.Link(page_title, self.site))
     if not self.options.edit_redirect and self.page.isRedirectPage():
         self.page = self.page.getRedirectTarget()
Пример #19
0
 def test_invalid_link_source(self):
     """Test ProofreadPage from invalid Link as source."""
     source = pywikibot.Link(self.not_existing_invalid['title'],
                             source=self.site)
     self.assertRaises(ValueError, ProofreadPage, source)
Пример #20
0
    def run(self):
        """
        Starts the robot.
        """
        for painting in self.generator:
            # Buh, for this one I know for sure it's in there

            #print painting[u'id']
            print painting[u'url']

            paintingItem = None
            newclaims = []
            if painting[u'id'] in self.paintingIds:
                paintingItemTitle = u'Q%s' % (self.paintingIds.get(
                    painting[u'id']), )
                print paintingItemTitle
                paintingItem = pywikibot.ItemPage(self.repo,
                                                  title=paintingItemTitle)

            else:
                #Break for now
                print u'Let us create stuff'
                #continue
                #print u'WTFTFTFTFT???'

                #print 'bla'

                data = {
                    'labels': {},
                    'descriptions': {},
                }

                data['labels']['en'] = {
                    'language': 'en',
                    'value': painting[u'title']
                }

                data['descriptions']['en'] = {
                    'language': u'en',
                    'value': u'painting by %s' % (painting[u'creator'], )
                }
                data['descriptions']['nl'] = {
                    'language': u'nl',
                    'value': u'schilderij van %s' % (painting[u'creator'], )
                }

                print data

                identification = {}
                summary = u'Creating new item with data from %s ' % (
                    painting[u'url'], )
                pywikibot.output(summary)
                #monumentItem.editEntity(data, summary=summary)
                try:
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                except pywikibot.exceptions.APIError:
                    # We got ourselves a duplicate label and description, let's correct that
                    pywikibot.output(
                        u'Oops, already had that one. Trying again')
                    data['descriptions']['en'] = {
                        'language':
                        u'en',
                        'value':
                        u'painting by %s (%s, %s)' %
                        (painting[u'creator'], painting[u'collectionshort'],
                         painting[u'id'])
                    }
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                    pass

                #print result
                paintingItemTitle = result.get(u'entity').get('id')
                paintingItem = pywikibot.ItemPage(self.repo,
                                                  title=paintingItemTitle)

                # Add to self.paintingIds so that we don't create dupes
                self.paintingIds[painting[u'id']] = paintingItemTitle.replace(
                    u'Q', u'')

                newclaim = pywikibot.Claim(
                    self.repo, u'P%s' % (self.paintingIdProperty, ))
                newclaim.setTarget(painting[u'id'])
                pywikibot.output('Adding new id claim to %s' % paintingItem)
                paintingItem.addClaim(newclaim)

                self.addReference(paintingItem, newclaim, painting[u'url'])

                newqualifier = pywikibot.Claim(
                    self.repo, u'P195')  #Add collection, isQualifier=True
                newqualifier.setTarget(self.collectionitem)
                pywikibot.output('Adding new qualifier claim to %s' %
                                 paintingItem)
                newclaim.addQualifier(newqualifier)

                collectionclaim = pywikibot.Claim(self.repo, u'P195')
                collectionclaim.setTarget(self.collectionitem)
                pywikibot.output('Adding collection claim to %s' %
                                 paintingItem)
                paintingItem.addClaim(collectionclaim)

                # Add the date they got it as a qualifier to the collection
                if painting.get(u'acquisitiondate'):
                    colqualifier = pywikibot.Claim(self.repo, u'P580')
                    acdate = None
                    if len(painting[u'acquisitiondate']) == 4 and painting[
                            u'acquisitiondate'].isnumeric():  # It's a year
                        acdate = pywikibot.WbTime(
                            year=painting[u'acquisitiondate'])
                    elif len(painting[u'acquisitiondate'].split(u'-', 2)) == 3:
                        (acday, acmonth,
                         acyear) = painting[u'acquisitiondate'].split(u'-', 2)
                        acdate = pywikibot.WbTime(year=int(acyear),
                                                  month=int(acmonth),
                                                  day=int(acday))
                    if acdate:
                        colqualifier.setTarget(acdate)
                        pywikibot.output(
                            'Adding new acquisition date qualifier claim to collection on %s'
                            % paintingItem)
                        collectionclaim.addQualifier(colqualifier)

                self.addReference(paintingItem, collectionclaim,
                                  painting[u'url'])

            if paintingItem and paintingItem.exists():
                painting['wikidata'] = paintingItem.title()

                data = paintingItem.get()
                claims = data.get('claims')
                #print claims

                if painting.get(u'creator'):
                    self.fixDescription(paintingItem, painting.get(u'creator'))

                # located in
                if u'P276' not in claims and painting.get(u'location'):
                    newclaim = pywikibot.Claim(self.repo, u'P276')
                    location = pywikibot.ItemPage(self.repo,
                                                  painting.get(u'location'))
                    newclaim.setTarget(location)
                    pywikibot.output('Adding located in claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)

                    self.addReference(paintingItem, newclaim, painting['url'])

                # instance of always painting while working on the painting collection
                if u'P31' not in claims:

                    dcformatItem = pywikibot.ItemPage(self.repo,
                                                      title='Q3305213')

                    newclaim = pywikibot.Claim(self.repo, u'P31')
                    newclaim.setTarget(dcformatItem)
                    pywikibot.output('Adding instance claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)

                    self.addReference(paintingItem, newclaim, painting['url'])

                # creator
                if u'P170' not in claims and painting.get(u'creator'):
                    #print painting[u'creator']
                    creategen = pagegenerators.PreloadingEntityGenerator(
                        pagegenerators.WikidataItemGenerator(
                            pagegenerators.SearchPageGenerator(
                                painting[u'creator'],
                                step=None,
                                total=10,
                                namespaces=[0],
                                site=self.repo)))

                    newcreator = None

                    try:
                        for creatoritem in creategen:
                            print creatoritem.title()
                            if creatoritem.get().get('labels').get(
                                    'en'
                            ) == painting[u'creator'] or creatoritem.get(
                            ).get('labels').get('nl') == painting[u'creator']:
                                #print creatoritem.get().get('labels').get('en')
                                #print creatoritem.get().get('labels').get('nl')
                                # Check occupation and country of citizinship
                                if u'P106' in creatoritem.get().get('claims'):
                                    existing_claims = creatoritem.get().get(
                                        'claims').get('P106')
                                    for existing_claim in existing_claims:
                                        if existing_claim.target_equals(
                                                u'Q1028181'):
                                            newcreator = creatoritem
                                    continue
                            elif (
                                    creatoritem.get().get('aliases').get('en')
                                    and painting[u'creator'] in
                                    creatoritem.get().get('aliases').get('en')
                            ) or (creatoritem.get().get('aliases').get('nl')
                                  and painting[u'creator'] in
                                  creatoritem.get().get('aliases').get('nl')):
                                if u'P106' in creatoritem.get().get('claims'):
                                    existing_claims = creatoritem.get().get(
                                        'claims').get('P106')
                                    for existing_claim in existing_claims:
                                        if existing_claim.target_equals(
                                                u'Q1028181'):
                                            newcreator = creatoritem
                                    continue
                    except pywikibot.exceptions.APIError:
                        print u'Search API is acting up, just let it be'
                        pass

                    if newcreator:
                        pywikibot.output(newcreator.title())

                        newclaim = pywikibot.Claim(self.repo, u'P170')
                        newclaim.setTarget(newcreator)
                        pywikibot.output('Adding creator claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                        #print creatoritem.title()
                        #print creatoritem.get()

                    else:
                        pywikibot.output('No item found for %s' %
                                         (painting[u'creator'], ))

                else:
                    print u'Already has a creator'

                # date of creation
                if u'P571' not in claims and painting.get(u'date'):
                    if len(
                            painting[u'date']
                    ) == 4 and painting[u'date'].isnumeric():  # It's a year
                        newdate = pywikibot.WbTime(year=painting[u'date'])
                        newclaim = pywikibot.Claim(self.repo, u'P571')
                        newclaim.setTarget(newdate)
                        pywikibot.output(
                            'Adding date of creation claim to %s' %
                            paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                # material used
                if u'P186' not in claims and painting.get(u'medium'):
                    if painting.get(u'medium') == u'Oil on canvas':
                        olieverf = pywikibot.ItemPage(self.repo, u'Q296955')
                        doek = pywikibot.ItemPage(self.repo, u'Q4259259')
                        oppervlak = pywikibot.ItemPage(self.repo, u'Q861259')

                        newclaim = pywikibot.Claim(self.repo, u'P186')
                        newclaim.setTarget(olieverf)
                        pywikibot.output('Adding new oil paint claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                        newclaim = pywikibot.Claim(self.repo, u'P186')
                        newclaim.setTarget(doek)
                        pywikibot.output('Adding new canvas claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        self.addReference(paintingItem, newclaim,
                                          painting[u'url'])

                        newqualifier = pywikibot.Claim(
                            self.repo, u'P518')  #Applies to part
                        newqualifier.setTarget(oppervlak)
                        pywikibot.output('Adding new qualifier claim to %s' %
                                         paintingItem)
                        newclaim.addQualifier(newqualifier)

                # Described at url
                if u'P973' not in claims:
                    newclaim = pywikibot.Claim(self.repo, u'P973')
                    newclaim.setTarget(painting[u'url'])
                    pywikibot.output('Adding described at claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)
                #    self.addReference(paintingItem, newclaim, uri)

                # Upload an image baby! BUT NOT NOW

                imagetitle = u''
                if painting.get(u'imageurl') and u'P18' not in claims:
                    commonssite = pywikibot.Site("commons", "commons")
                    photo = Photo(painting[u'imageurl'], painting)
                    titlefmt = u'%(creator)s - %(title)s - %(id)s - Minneapolis Institute of Arts.%(_ext)s'
                    pagefmt = u'User:Multichill/Minneapolis Institute of Arts'

                    duplicates = photo.findDuplicateImages()
                    if duplicates:
                        pywikibot.output(u"Skipping duplicate of %r" %
                                         duplicates)
                        imagetitle = duplicates[0]
                        #return duplicates[0]
                    else:

                        imagetitle = self.cleanUpTitle(
                            photo.getTitle(titlefmt))
                        pywikibot.output(imagetitle)
                        description = photo.getDescription(pagefmt)
                        pywikibot.output(description)

                        handle, tempname = tempfile.mkstemp()
                        with os.fdopen(handle, "wb") as t:
                            t.write(photo.downloadPhoto().getvalue())
                            t.close()
                        #tempname

                        bot = upload.UploadRobot(url=tempname,
                                                 description=description,
                                                 useFilename=imagetitle,
                                                 keepFilename=True,
                                                 verifyDescription=False,
                                                 uploadByUrl=False,
                                                 targetSite=commonssite)
                        #bot._contents = photo.downloadPhoto().getvalue()

                        #bot._retrieved = True
                        bot.run()

                if u'P18' not in claims and imagetitle:
                    newclaim = pywikibot.Claim(self.repo, u'P18')
                    imagelink = pywikibot.Link(imagetitle,
                                               source=commonssite,
                                               defaultNamespace=6)
                    image = pywikibot.ImagePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.ImagePage(image.getRedirectTarget())
                    newclaim.setTarget(image)
                    pywikibot.output('Adding %s --> %s' %
                                     (newclaim.getID(), newclaim.getTarget()))
                    paintingItem.addClaim(newclaim)
Пример #21
0
    def procesPage(self, page):
        """
        Proces a single page
        """
        item = pywikibot.ItemPage.fromPage(page)
        pywikibot.output('Processing %s' % page)
        if not item.exists():
            pywikibot.output('%s doesn\'t have a wikidata item :(' % page)
            #TODO FIXME: We should provide an option to create the page
        else:
            pagetext = page.get()
            templates = pywikibot.extract_templates_and_params(pagetext)
            for (template, fielddict) in templates:
                # We found the template we were looking for
                if template.replace(u'_', u' ') == self.templateTitle:
                    for field, value in fielddict.items():
                        # This field contains something useful for us
                        if field in self.fields:
                            # Check if the property isn't already set
                            claim = pywikibot.Claim(self.repo,
                                                    self.fields[field])
                            if claim.getID() in item.get().get('claims'):
                                pywikibot.output(
                                    u'A claim for %s already exists. Skipping'
                                    % claim.getID())
                                # TODO FIXME: This is a very crude way of dupe
                                # checking
                            else:
                                if claim.getType() == 'wikibase-item':
                                    # Try to extract a valid page
                                    match = re.search(pywikibot.link_regex,
                                                      value)
                                    if match:
                                        try:
                                            link = pywikibot.Link(
                                                match.group(1))
                                            linkedPage = pywikibot.Page(link)
                                            if linkedPage.isRedirectPage():
                                                linkedPage = linkedPage.getRedirectTarget(
                                                )
                                            linkedItem = pywikibot.ItemPage.fromPage(
                                                linkedPage)
                                            claim.setTarget(linkedItem)
                                        except pywikibot.exceptions.NoPage:
                                            pywikibot.output(
                                                '[[%s]] doesn\'t exist so I can\'t link to it'
                                                % (linkedItem.title(), ))
                                            continue
                                elif claim.getType() == 'string':
                                    claim.setTarget(value.strip())
                                else:
                                    print "%s is not a supported datatype." % claim.getType(
                                    )
                                    continue

                                pywikibot.output(
                                    'Adding %s --> %s' %
                                    (claim.getID(), claim.getTarget()))
                                item.addClaim(claim)
                                if self.source:
                                    claim.addSource(self.source, bot=True)
Пример #22
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    generator = None
    primary = False
    main_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    local_args = pywikibot.handle_args(args)
    generator_factory = pagegenerators.GeneratorFactory(
        positional_arg_name='page')

    for arg in local_args:
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.Site()
                page = pywikibot.Page(pywikibot.Link(arg[5:], mysite))
                if page.exists():
                    alternatives.append(page.title())
                else:
                    if pywikibot.input_yn(
                            u'Possibility %s does not actually exist. Use it '
                            'anyway?' % page.title(),
                            default=False,
                            automatic_quit=False):
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                generator = pagegenerators.CategorizedPageGenerator(
                    pywikibot.Site().disambcategory(),
                    start=arg[7:],
                    namespaces=[0])
            except pywikibot.NoPage:
                pywikibot.output(
                    'Disambiguation category for your wiki is not known.')
                raise
        else:
            generator_factory.handleArg(arg)

    site = pywikibot.Site()

    generator = generator_factory.getCombinedGenerator(generator)

    if not generator:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    site.login()

    bot = DisambiguationRobot(always,
                              alternatives,
                              getAlternatives,
                              dnSkip,
                              generator,
                              primary,
                              main_only,
                              minimum=minimum)
    bot.run()
    def treat(self, page, item):
        """Process a single page/item."""
        if willstop:
            raise KeyboardInterrupt
        self.current_page = page
        item.get()
        if set(val[0]
               for val in self.fields.values()) <= set(item.claims.keys()):
            pywikibot.output('%s item %s has claims for all properties. '
                             'Skipping.' % (page, item.title()))
            return

        templates = page.raw_extracted_templates
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(withNamespace=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error(
                    "Failed parsing template; '%s' should be the template name."
                    % template)
                continue
            # We found the template we were looking for
            if template in self.templateTitles:
                for field, value in fielddict.items():
                    field = field.strip()
                    value = value.strip()
                    if not field or not value:
                        continue

                    # This field contains something useful for us
                    if field in self.fields:
                        prop, options = self.fields[field]
                        # Check if the property isn't already set
                        claim = pywikibot.Claim(self.repo, prop)
                        if claim.getID() in item.get().get('claims'):
                            pywikibot.output(
                                'A claim for %s already exists. Skipping.' %
                                claim.getID())
                            # TODO: Implement smarter approach to merging
                            # harvested values with existing claims esp.
                            # without overwriting humans unintentionally.
                        else:
                            if claim.type == 'wikibase-item':
                                # Try to extract a valid page
                                match = pywikibot.link_regex.search(value)
                                if match:
                                    link_text = match.group(1)
                                else:
                                    if self._get_option_with_fallback(
                                            options, 'islink'):
                                        link_text = value
                                    else:
                                        pywikibot.output(
                                            '%s field %s value %s is not a '
                                            'wikilink. Skipping.' %
                                            (claim.getID(), field, value))
                                        continue

                                linked_item = self._template_link_target(
                                    item, link_text)
                                if not linked_item:
                                    continue

                                claim.setTarget(linked_item)
                            elif claim.type in ('string', 'external-id'):
                                claim.setTarget(value.strip())
                            elif claim.type == 'url':
                                match = self.linkR.search(value)
                                if not match:
                                    continue
                                claim.setTarget(match.group('url'))
                            elif claim.type == 'commonsMedia':
                                commonssite = pywikibot.Site(
                                    'commons', 'commons')
                                imagelink = pywikibot.Link(value,
                                                           source=commonssite,
                                                           defaultNamespace=6)
                                image = pywikibot.FilePage(imagelink)
                                if image.isRedirectPage():
                                    image = pywikibot.FilePage(
                                        image.getRedirectTarget())
                                if not image.exists():
                                    pywikibot.output(
                                        "{0} doesn't exist. I can't link to it"
                                        ''.format(image.title(asLink=True)))
                                    continue
                                claim.setTarget(image)
                            else:
                                pywikibot.output(
                                    '%s is not a supported datatype.' %
                                    claim.type)
                                continue

                            # A generator might yield pages from multiple sites
                            self.user_add_claim(item, claim, page.site)
Пример #24
0
    def treat_disamb_only(self, refPage, disambPage):
        """Resolve the links to disambPage but don't look for its redirects.

        @param disambPage: the disambiguation page or redirect we don't want
            anything to link to
        @type disambPage: pywikibot.Page
        @param refPage: a page linking to disambPage
        @type refPage: pywikibot.Page
        @return: "nextpage" if the user enters "n" to skip this page,
            "nochange" if the page needs no change, and
            "done" if the page is processed successfully
        @rtype: str

        """
        # TODO: break this function up into subroutines!

        self.current_page = refPage
        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output(
                    '\n\nSkipping %s because it contains %s.\n\n' %
                    (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s' %
                             (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False,
                                      automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put(redir_text,
                                    summary=self.comment,
                                    asynchronous=True)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?' %
                    refPage.title(), [('yes', 'y'), ('no', 'n'),
                                      ('change redirect', 'c')],
                    'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary, main_only=self.main_only)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        self.treat(refPage2, refPage)
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.' %
                refPage.title())
            include = False
        if include in (True, "redirect"):
            # save the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        # No changes necessary for this disambiguation title.
                        return 'nochange'
                    else:
                        # stop loop and save page
                        break
                # Ensure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" %
                                  (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str
                        and self.dn_template_str[:-2]
                        in text[m.end():m.end() + len(self.dn_template_str) +
                                8]):
                    continue

                edit = EditOption('edit page', 'e', text, m.start(),
                                  disambPage.title())
                context_option = HighlightContextOption('more context',
                                                        'm',
                                                        text,
                                                        60,
                                                        start=m.start(),
                                                        end=m.end())
                context_option.before_question = True

                options = [
                    ListOption(self.alternatives, ''),
                    ListOption(self.alternatives, 'r'),
                    StandardOption('skip link', 's'), edit,
                    StandardOption('next page', 'n'),
                    StandardOption('unlink', 'u')
                ]
                if self.dn_template_str:
                    # '?', '/' for old choice
                    options += [
                        AliasOption('tag template %s' % self.dn_template_str,
                                    ['t', '?', '/'])
                    ]
                options += [context_option]
                if not edited:
                    options += [
                        ShowPageOption('show disambiguation page', 'd',
                                       m.start(), disambPage)
                    ]
                options += [
                    OutputProxyOption('list', 'l',
                                      SequenceOutputter(self.alternatives)),
                    AddAlternativeOption('add new', 'a',
                                         SequenceOutputter(self.alternatives))
                ]
                if edited:
                    options += [StandardOption('save in this form', 'x')]

                # TODO: Output context on each question
                answer = pywikibot.input_choice('Option',
                                                options,
                                                default=self.always,
                                                force=bool(self.always))
                if answer == 'x':
                    assert edited, 'invalid option before editing'
                    break
                elif answer == 's':
                    n -= 1  # TODO what's this for?
                    continue
                elif answer == 'e':
                    text = edit.new_text
                    edited = True
                    curpos = 0
                    continue
                elif answer == 'n':
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return 'nextpage'

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                if answer == 't':
                    assert self.dn_template_str
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif answer == 'u':
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    # Check that no option from above was missed
                    assert isinstance(answer, tuple), 'only tuple answer left.'
                    assert answer[0] in ['r', ''], 'only valid tuple answers.'
                    if answer[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    new_page_title = answer[1]
                    repPl = pywikibot.Page(
                        pywikibot.Link(new_page_title, disambPage.site))
                    if (new_page_title[0].isupper() or link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title, section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text
                                       and not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif ((len(new_page_title) <= len(link_text))
                          and (firstcap(link_text[:len(new_page_title)])
                               == firstcap(new_page_title))
                          and (re.sub(self.trailR, '',
                                      link_text[len(new_page_title):]) == '')
                          and (not section)):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue
                # Todo: This line is unreachable (T155337)
                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put(text, summary=self.comment, asynchronous=True)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return 'done'
Пример #25
0
def translate(page=None,
              hints=None,
              auto=True,
              removebrackets=False,
              site=None):
    """
    Return a list of links to pages on other sites based on hints.

    Entries for single page titles list those pages. Page titles for entries
    such as "all:" or "xyz:" or "20:" are first built from the page title of
    'page' and then listed. When 'removebrackets' is True, a trailing pair of
    brackets and the text between them is removed from the page title.
    If 'auto' is true, known year and date page titles are autotranslated
    to all known target languages and inserted into the list.

    """
    result = set()

    assert page or site

    if site is None and page:
        site = page.site

    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                if page is None:
                    continue
                newname = page.title(withNamespace=False)
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(
                        re.compile(r"\W*?\(.*?\)\W*?", re.UNICODE), u" ",
                        newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')

            for newcode in codes:

                if newcode in site.languages():
                    if newcode != site.code:
                        ns = page.namespace() if page else 0
                        x = pywikibot.Link(newname,
                                           site.getSite(code=newcode),
                                           defaultNamespace=ns)
                        result.add(x)
                else:
                    if config.verbose_output:
                        pywikibot.output(u"Ignoring unknown language code %s" %
                                         newcode)

    # Autotranslate dates into all other languages, the rest will come from
    # existing interwiki links.
    if auto and page:
        # search inside all dictionaries for this link
        sitelang = page.site.code
        dictName, value = date.getAutoFormat(sitelang, page.title())
        if dictName:
            if True:
                pywikibot.output(
                    u'TitleTranslate: %s was recognized as %s with value %d' %
                    (page.title(), dictName, value))
                for entryLang, entry in date.formats[dictName].items():
                    if entryLang not in site.languages():
                        continue
                    if entryLang != sitelang:
                        if True:
                            newname = entry(value)
                            x = pywikibot.Link(
                                newname,
                                pywikibot.Site(code=entryLang,
                                               fam=site.family))
                            result.add(x)
    return list(result)
Пример #26
0
    def run(self):
        """Run the bot"""
        global destmap, catlist, catmap

        user = self.site.user()
        problems = []
        newredirs = []

        l = time.localtime()
        today = "%04d-%02d-%02d" % l[:3]
        edit_request_page = pywikibot.Page(
            self.site, u"User:%(user)s/category edit requests" % locals())
        datafile = pywikibot.config.datafilepath("%s-catmovebot-data" %
                                                 self.site.dbName())
        try:
            inp = open(datafile, "rb")
            record = cPickle.load(inp)
            inp.close()
        except IOError:
            record = {}
        if record:
            cPickle.dump(record, open(datafile + ".bak", "wb"), -1)

        try:
            template_list = self.site.family.category_redirect_templates[
                self.site.code]
        except KeyError:
            pywikibot.output(u"No redirect templates defined for %s" %
                             self.site.sitename())
            return
        # regex to match soft category redirects
        #  note that any templates containing optional "category:" are
        #  incorrect and will be fixed by the bot
        template_regex = re.compile(
            r"""{{\s*(?:%(prefix)s\s*:\s*)?  # optional "template:"
                     (?:%(template)s)\s*\|   # catredir template name
                     (\s*%(catns)s\s*:\s*)?  # optional "category:"
                     ([^|}]+)                # redirect target cat
                     (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
             """ % {
                'prefix':
                self.site.namespace(10).lower(),
                'template':
                "|".join(item.replace(" ", "[ _]+") for item in template_list),
                'catns':
                self.site.namespace(14)
            }, re.I | re.X)

        # check for hard-redirected categories that are not already marked
        # with an appropriate template
        comment = i18n.twtranslate(self.site.lang, self.redir_comment)
        for page in pagegenerators.PreloadingGenerator(self.site.allpages(
                namespace=14, filterredir=True),
                                                       step=250):
            # generator yields all hard redirect pages in namespace 14
            if page.isCategoryRedirect():
                # this is already a soft-redirect, so skip it (for now)
                continue
            try:
                target = page.getRedirectTarget()
            except pywikibot.CircularRedirect:
                target = page
                problems.append(u"# %s is a self-linked redirect" %
                                page.title(asLink=True, textlink=True))
            except RuntimeError:
                # race condition: someone else removed the redirect while we
                # were checking for it
                continue
            if target.namespace() == 14:
                # this is a hard-redirect to a category page
                newtext = (u"{{%(template)s|%(cat)s}}" % {
                    'cat': target.title(withNamespace=False),
                    'template': template_list[0]
                })
                try:
                    page.put(newtext, comment, minorEdit=True)
                    self.log_text.append(
                        u"* Added {{tl|%s}} to %s" %
                        (template_list[0],
                         page.title(asLink=True, textlink=True)))
                except pywikibot.Error as e:
                    self.log_text.append(
                        u"* Failed to add {{tl|%s}} to %s" %
                        (template_list[0],
                         page.title(asLink=True, textlink=True)))
            else:
                problems.append(u"# %s is a hard redirect to %s" %
                                (page.title(asLink=True, textlink=True),
                                 target.title(asLink=True, textlink=True)))

        pywikibot.output("Done checking hard-redirect category pages.")

        comment = i18n.twtranslate(self.site.lang, self.move_comment)
        counts, destmap, catmap = {}, {}, {}
        catlist, nonemptypages = [], []
        redircat = pywikibot.Category(
            pywikibot.Link(
                self.cat_redirect_cat[self.site.family.name][self.site.code],
                self.site))

        # get a list of all members of the category-redirect category
        catpages = dict((c, None) for c in redircat.subcategories())

        # check the category pages for redirected categories
        pywikibot.output(u"")
        pywikibot.output(u"Checking %s category redirect pages" %
                         len(catpages))
        for cat in catpages:
            cat_title = cat.title(withNamespace=False)
            if "category redirect" in cat_title:
                self.log_text.append(u"* Ignoring %s" %
                                     cat.title(asLink=True, textlink=True))
                continue
            if hasattr(cat, "_catinfo"):
                # skip empty categories that don't return a "categoryinfo" key
                catdata = cat.categoryinfo
                if "size" in catdata and int(catdata['size']):
                    # save those categories that have contents
                    nonemptypages.append(cat)
            if cat_title not in record:
                # make sure every redirect has a record entry
                record[cat_title] = {today: None}
                try:
                    newredirs.append("*# %s -> %s" %
                                     (cat.title(asLink=True, textlink=True),
                                      cat.getCategoryRedirectTarget().title(
                                          asLink=True, textlink=True)))
                except pywikibot.Error:
                    pass
                # do a null edit on cat
                try:
                    cat.put(cat.get(get_redirect=True))
                except:
                    pass

        # delete record entries for non-existent categories
        for cat_name in record.keys():
            if pywikibot.Category(self.site,
                                  self.catprefix + cat_name) not in catpages:
                del record[cat_name]

        pywikibot.output(u"")
        pywikibot.output(u"Moving pages out of %s redirected categories." %
                         len(nonemptypages))

        for cat in pagegenerators.PreloadingGenerator(nonemptypages):
            try:
                if not cat.isCategoryRedirect():
                    self.log_text.append(u"* False positive: %s" %
                                         cat.title(asLink=True, textlink=True))
                    continue
            except pywikibot.Error:
                self.log_text.append(u"* Could not load %s; ignoring" %
                                     cat.title(asLink=True, textlink=True))
                continue
            cat_title = cat.title(withNamespace=False)
            if not self.readyToEdit(cat):
                counts[cat_title] = None
                self.log_text.append(u"* Skipping %s; in cooldown period." %
                                     cat.title(asLink=True, textlink=True))
                continue
            dest = cat.getCategoryRedirectTarget()
            if not dest.exists():
                problems.append("# %s redirects to %s" %
                                (cat.title(asLink=True, textlink=True),
                                 dest.title(asLink=True, textlink=True)))
                # do a null edit on cat to update any special redirect
                # categories this wiki might maintain
                try:
                    cat.put(cat.get(get_redirect=True))
                except:
                    pass
                continue
            if dest.isCategoryRedirect():
                double = dest.getCategoryRedirectTarget()
                if double == dest or double == cat:
                    self.log_text.append(
                        u"* Redirect loop from %s" %
                        dest.title(asLink=True, textlink=True))
                    # do a null edit on cat
                    try:
                        cat.put(cat.get(get_redirect=True))
                    except:
                        pass
                else:
                    self.log_text.append(
                        u"* Fixed double-redirect: %s -> %s -> %s" %
                        (cat.title(asLink=True, textlink=True),
                         dest.title(asLink=True, textlink=True),
                         double.title(asLink=True, textlink=True)))
                    oldtext = cat.get(get_redirect=True)
                    # remove the old redirect from the old text,
                    # leaving behind any non-redirect text
                    oldtext = template_regex.sub("", oldtext)
                    newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % {
                        'redirtemp': template_list[0],
                        'ncat': double.title(withNamespace=False)
                    })
                    newtext = newtext + oldtext.strip()
                    try:
                        cat.put(newtext,
                                i18n.twtranslate(self.site.lang,
                                                 self.dbl_redir_comment),
                                minorEdit=True)
                    except pywikibot.Error as e:
                        self.log_text.append("** Failed: %s" % e)
                continue

            found, moved = self.move_contents(cat_title,
                                              dest.title(withNamespace=False),
                                              editSummary=comment)
            if found is None:
                self.log_text.append(u"* [[:%s%s]]: error in move_contents" %
                                     (self.catprefix, cat_title))
            elif found:
                record[cat_title][today] = found
                self.log_text.append(u"* [[:%s%s]]: %d found, %d moved" %
                                     (self.catprefix, cat_title, found, moved))
            counts[cat_title] = found
            # do a null edit on cat
            try:
                cat.put(cat.get(get_redirect=True))
            except:
                pass
            continue

        cPickle.dump(record, open(datafile, "wb"), -1)

        pywikibot.setAction(
            i18n.twtranslate(self.site.lang, self.maint_comment))
        self.log_text.sort()
        problems.sort()
        newredirs.sort()
        self.log_page.put(u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n" %
                          time.gmtime()[:6] + u"\n".join(self.log_text) +
                          u"\n* New redirects since last report:\n" +
                          u"\n".join(newredirs) + u"\n" +
                          u"\n".join(problems) + u"\n" + self.get_log_text())
        if self.edit_requests:
            edit_request_page.put(
                self.edit_request_text % {
                    'itemlist':
                    u"\n" + u"\n".join((self.edit_request_item % item)
                                       for item in self.edit_requests)
                })
Пример #27
0
    def parse_page_tuples(self, wikitext, user=None):
        """Parse page details apart from 'user:'******'li':
                    current_user = None
            elif isinstance(node, mwparserfromhell.nodes.text.Text):
                if node.endswith('\n'):
                    current_user = False
            elif isinstance(node, mwparserfromhell.nodes.wikilink.Wikilink):
                if current_user is False:
                    pywikibot.debug(
                        'Link to "{0}" ignored as outside '
                        'list'.format(node.title), _logger)
                    continue

                obj = pywikibot.Link(node.title, self.site)
                if obj.namespace == -1:
                    # the parser accepts 'special:prefixindex/' as a wildcard
                    # this allows a prefix that doesnt match an existing page
                    # to be a blue link, and can be clicked to see what pages
                    # will be included in the whitelist
                    name, sep, prefix = obj.title.partition('/')
                    if name.lower() in self._prefixindex_aliases:
                        if not prefix:
                            if pywikibot.config.verbose_output:
                                pywikibot.output(u'Whitelist everything')
                            page = ''
                        else:
                            page = prefix
                            if pywikibot.config.verbose_output:
                                pywikibot.output(u'Whitelist prefixindex hack '
                                                 u'for: %s' % page)
                            # p = pywikibot.Page(self.site, obj.target[20:])
                            # obj.namespace = p.namespace
                            # obj.target = p.title()

                elif obj.namespace == 2 and not current_user:
                    # if a target user hasn't been found yet, and the link is
                    # 'user:'******'Whitelist user: %s' % current_user)
                    continue
                else:
                    page = obj.canonical_title()

                if current_user:
                    if not user or current_user == user:
                        if self.is_wikisource_author_page(page):
                            if pywikibot.config.verbose_output:
                                pywikibot.output('Whitelist author: %s' % page)
                            page = LinkedPagesRule(page)
                        else:
                            if pywikibot.config.verbose_output:
                                pywikibot.output(u'Whitelist page: %s' % page)
                        if pywikibot.config.verbose_output:
                            pywikibot.output('Adding {0}:{1}'.format(
                                current_user, page))
                        whitelist[current_user].append(page)
                    elif pywikibot.config.verbose_output:
                        pywikibot.output(u'Discarding whitelist page for '
                                         u'another user: %s' % page)
                else:
                    raise Exception(u'No user set for page %s' % page)

        return dict(whitelist)
Пример #28
0
    def treat(self, page, item):
        """Process a single page/item."""
        self.current_page = page
        item.get()
        if set(self.fields.values()) <= set(item.claims.keys()):
            pywikibot.output(
                u'%s item %s has claims for all properties. Skipping' %
                (page, item.title()))
            return

        pagetext = page.get()
        templates = textlib.extract_templates_and_params(pagetext)
        for (template, fielddict) in templates:
            # Clean up template
            try:
                template = pywikibot.Page(page.site, template,
                                          ns=10).title(withNamespace=False)
            except pywikibot.exceptions.InvalidTitle:
                pywikibot.error(
                    u"Failed parsing template; '%s' should be the template name."
                    % template)
                continue
            # We found the template we were looking for
            if template in self.templateTitles:
                for field, value in fielddict.items():
                    field = field.strip()
                    value = value.strip()
                    if not field or not value:
                        continue

                    # This field contains something useful for us
                    if field in self.fields:
                        # Check if the property isn't already set
                        claim = pywikibot.Claim(self.repo, self.fields[field])
                        if claim.getID() in item.get().get('claims'):
                            pywikibot.output(
                                u'A claim for %s already exists. Skipping' %
                                claim.getID())
                            # TODO: Implement smarter approach to merging
                            # harvested values with existing claims esp.
                            # without overwriting humans unintentionally.
                        else:
                            if claim.type == 'wikibase-item':
                                # Try to extract a valid page
                                match = re.search(pywikibot.link_regex, value)
                                if not match:
                                    pywikibot.output(
                                        u'%s field %s value %s isnt a wikilink. Skipping'
                                        % (claim.getID(), field, value))
                                    continue

                                link_text = match.group(1)
                                linked_item = self._template_link_target(
                                    item, link_text)
                                if not linked_item:
                                    continue

                                claim.setTarget(linked_item)
                            elif claim.type == 'string':
                                claim.setTarget(value.strip())
                            elif claim.type == 'commonsMedia':
                                commonssite = pywikibot.Site(
                                    "commons", "commons")
                                imagelink = pywikibot.Link(value,
                                                           source=commonssite,
                                                           defaultNamespace=6)
                                image = pywikibot.FilePage(imagelink)
                                if image.isRedirectPage():
                                    image = pywikibot.FilePage(
                                        image.getRedirectTarget())
                                if not image.exists():
                                    pywikibot.output(
                                        '[[%s]] doesn\'t exist so I can\'t link to it'
                                        % (image.title(), ))
                                    continue
                                claim.setTarget(image)
                            else:
                                pywikibot.output(
                                    "%s is not a supported datatype." %
                                    claim.type)
                                continue

                            pywikibot.output(
                                'Adding %s --> %s' %
                                (claim.getID(), claim.getTarget()))
                            item.addClaim(claim)
                            # A generator might yield pages from multiple sites
                            source = self.getSource(page.site)
                            if source:
                                claim.addSource(source, bot=True)
Пример #29
0
 def new_title(self):
     """Return page object of the new title."""
     if not hasattr(self, '_new_title'):
         self._new_title = pywikibot.Page(
             pywikibot.Link(self.data['move']['new_title']))
     return self._new_title
Пример #30
0
 def make_image_item(self, filename):
     commonssite = utils.create_site_instance("commons", "commons")
     imagelink = pywikibot.Link(filename,
                                source=commonssite,
                                defaultNamespace=6)
     return pywikibot.FilePage(imagelink)