示例#1
0
文件: patrol.py 项目: xqt/pywikibot
    def is_wikisource_author_page(self, title):
        """Patrol a single item."""
        if self.site.family.name != 'wikisource':
            return False

        author_ns = 0
        with suppress(AttributeError, KeyError):
            author_ns = self.site.family.authornamespaces[self.site.lang][0]

        author_ns_prefix = self.site.namespace(author_ns) + ':'
        author_page_name = removeprefix(title, author_ns_prefix)
        if title != author_page_name:
            verbose_output('Found author ' + author_page_name)
            return True
        return False
示例#2
0
    def process(self, text):
        """Process the page."""
        # keys are ref groups
        # values are a dict where :
        #   keys are ref content
        #   values are [name, [list of full ref matches],
        #               quoted, need_to_change]
        found_refs = {}
        found_ref_names = {}
        # Replace key by [value, quoted]
        named_repl = {}

        for match in self.REFS.finditer(text):
            content = match.group('content')
            if not content.strip():
                continue

            params = match.group('params')
            group = self.GROUPS.match(params)
            if group not in found_refs:
                found_refs[group] = {}

            groupdict = found_refs[group]
            if content in groupdict:
                v = groupdict[content]
                v[1].append(match.group())
            else:
                v = [None, [match.group()], False, False]

            found = self.NAMES.match(params)
            if found:
                quoted = found.group('quote') in ['"', "'"]
                name = found.group('name')
                if v[0]:
                    if v[0] != name:
                        named_repl[name] = [v[0], v[2]]
                else:
                    # First name associated with this content
                    if name == 'population':
                        pywikibot.output(content)
                    if name not in found_ref_names:
                        # first time ever we meet this name
                        if name == 'population':
                            pywikibot.output('in')
                        v[2] = quoted
                        v[0] = name
                    else:
                        # if has_key, means that this name is used
                        # with another content. We'll need to change it
                        v[3] = True
                found_ref_names[name] = 1
            groupdict[content] = v

        used_numbers = set()
        for name in found_ref_names:
            number = removeprefix(name, self.autogen)
            with suppress(ValueError):
                used_numbers.add(int(number))

        # iterator to give the next free number
        free_number = iter({str(i) for i in range(1, 1000)  # should be enough
                            if i not in used_numbers})

        for (g, d) in found_refs.items():
            group = ''
            if g:
                group = 'group="{}" '.format(group)

            for (k, v) in d.items():
                if len(v[1]) == 1 and not v[3]:
                    continue

                name = v[0]
                if not name:
                    name = '"{}{}"'.format(self.autogen, next(free_number))
                elif v[2]:
                    name = '"{}"'.format(name)

                named = '<ref {}name={}>{}</ref>'.format(group, name, k)
                text = text.replace(v[1][0], named, 1)

                # make sure that the first (named ref) is not
                # removed later :
                pos = text.index(named) + len(named)
                header = text[:pos]
                end = text[pos:]

                unnamed = '<ref {}name={} />'.format(group, name)
                for ref in v[1][1:]:
                    # Don't replace inside templates (T266411)
                    end = replaceExcept(end, re.escape(ref), unnamed,
                                        exceptions=['template'])
                text = header + end

        for (k, v) in named_repl.items():
            # TODO : Support ref groups
            name = v[0]
            if v[1]:
                name = '"{}"'.format(name)

            text = re.sub(
                r'<ref name\s*=\s*(?P<quote>["\']?)\s*{}\s*(?P=quote)\s*/>'
                .format(k),
                '<ref name={} />'.format(name), text)
        return text
示例#3
0
    def process(self, text):
        """Process the page."""
        # keys are ref groups
        # values are a dict where :
        #   keys are ref content
        #   values are [name, [list of full ref matches],
        #               quoted, need_to_change]
        found_refs = {}
        found_ref_names = set()
        # Replace key by [value, quoted]
        named_repl = {}

        # Parse references
        for match in self.REFS.finditer(text):
            content = match.group('content')
            if not content.strip():
                continue

            params = match.group('params')
            group = self.GROUPS.search(params) or ''
            if group not in found_refs:
                found_refs[group] = {}

            groupdict = found_refs[group]
            if content in groupdict:
                v = groupdict[content]
                v[IX.reflist].append(match.group())
            else:
                v = [None, [match.group()], False, False]

            found = self.NAMES.search(params)
            if found:
                quoted = found.group('quote') in ['"', "'"]
                name = found.group('name')

                if not v[IX.name]:
                    # First name associated with this content
                    if name not in found_ref_names:
                        # first time ever we meet this name
                        v[IX.quoted] = quoted
                        v[IX.name] = name
                    else:
                        # if has_key, means that this name is used
                        # with another content. We'll need to change it
                        v[IX.change_needed] = True
                elif v[IX.name] != name:
                    named_repl[name] = [v[IX.name], v[IX.quoted]]

                found_ref_names.add(name)
            groupdict[content] = v

        # Find used autogenerated numbers
        used_numbers = set()
        for name in found_ref_names:
            number = removeprefix(name, self.autogen)
            with suppress(ValueError):
                used_numbers.add(int(number))

        # generator to give the next free number for autogenerating names
        free_number = (str(i) for i in itertools.count(start=1)
                       if i not in used_numbers)

        # Fix references
        for groupname, references in found_refs.items():
            group = 'group="{}" '.format(groupname) if groupname else ''

            for ref, v in references.items():
                if len(v[IX.reflist]) == 1 and not v[IX.change_needed]:
                    continue

                name = v[IX.name]
                if not name:
                    name = '"{}{}"'.format(self.autogen, next(free_number))
                elif v[IX.quoted]:
                    name = '"{}"'.format(name)

                named = '<ref {}name={}>{}</ref>'.format(group, name, ref)
                text = text.replace(v[IX.reflist][0], named, 1)

                # make sure that the first (named ref) is not removed later
                pos = text.index(named) + len(named)
                header = text[:pos]
                end = text[pos:]

                # replace multiple identical references with repeated ref
                repeated_ref = '<ref {}name={} />'.format(group, name)
                for ref in v[IX.reflist][1:]:
                    # Don't replace inside templates (T266411)
                    end = replaceExcept(end, re.escape(ref), repeated_ref,
                                        exceptions=['template'])
                text = header + end

        # Fix references with different names
        for ref, v in named_repl.items():
            # TODO : Support ref groups
            name = v[IX.name]
            if v[IX.reflist]:
                name = '"{}"'.format(name)

            text = re.sub(
                r'<ref name\s*=\s*(?P<quote>["\']?)\s*{}\s*(?P=quote)\s*/>'
                .format(ref),
                '<ref name={} />'.format(name), text)
        return text