def is_wikisource_author_page(self, title): """Patrol a single item.""" if self.site.family.name != 'wikisource': return False author_ns = 0 with suppress(AttributeError, KeyError): author_ns = self.site.family.authornamespaces[self.site.lang][0] author_ns_prefix = self.site.namespace(author_ns) + ':' author_page_name = removeprefix(title, author_ns_prefix) if title != author_page_name: verbose_output('Found author ' + author_page_name) return True return False
def process(self, text): """Process the page.""" # keys are ref groups # values are a dict where : # keys are ref content # values are [name, [list of full ref matches], # quoted, need_to_change] found_refs = {} found_ref_names = {} # Replace key by [value, quoted] named_repl = {} for match in self.REFS.finditer(text): content = match.group('content') if not content.strip(): continue params = match.group('params') group = self.GROUPS.match(params) if group not in found_refs: found_refs[group] = {} groupdict = found_refs[group] if content in groupdict: v = groupdict[content] v[1].append(match.group()) else: v = [None, [match.group()], False, False] found = self.NAMES.match(params) if found: quoted = found.group('quote') in ['"', "'"] name = found.group('name') if v[0]: if v[0] != name: named_repl[name] = [v[0], v[2]] else: # First name associated with this content if name == 'population': pywikibot.output(content) if name not in found_ref_names: # first time ever we meet this name if name == 'population': pywikibot.output('in') v[2] = quoted v[0] = name else: # if has_key, means that this name is used # with another content. We'll need to change it v[3] = True found_ref_names[name] = 1 groupdict[content] = v used_numbers = set() for name in found_ref_names: number = removeprefix(name, self.autogen) with suppress(ValueError): used_numbers.add(int(number)) # iterator to give the next free number free_number = iter({str(i) for i in range(1, 1000) # should be enough if i not in used_numbers}) for (g, d) in found_refs.items(): group = '' if g: group = 'group="{}" '.format(group) for (k, v) in d.items(): if len(v[1]) == 1 and not v[3]: continue name = v[0] if not name: name = '"{}{}"'.format(self.autogen, next(free_number)) elif v[2]: name = '"{}"'.format(name) named = '<ref {}name={}>{}</ref>'.format(group, name, k) text = text.replace(v[1][0], named, 1) # make sure that the first (named ref) is not # removed later : pos = text.index(named) + len(named) header = text[:pos] end = text[pos:] unnamed = '<ref {}name={} />'.format(group, name) for ref in v[1][1:]: # Don't replace inside templates (T266411) end = replaceExcept(end, re.escape(ref), unnamed, exceptions=['template']) text = header + end for (k, v) in named_repl.items(): # TODO : Support ref groups name = v[0] if v[1]: name = '"{}"'.format(name) text = re.sub( r'<ref name\s*=\s*(?P<quote>["\']?)\s*{}\s*(?P=quote)\s*/>' .format(k), '<ref name={} />'.format(name), text) return text
def process(self, text): """Process the page.""" # keys are ref groups # values are a dict where : # keys are ref content # values are [name, [list of full ref matches], # quoted, need_to_change] found_refs = {} found_ref_names = set() # Replace key by [value, quoted] named_repl = {} # Parse references for match in self.REFS.finditer(text): content = match.group('content') if not content.strip(): continue params = match.group('params') group = self.GROUPS.search(params) or '' if group not in found_refs: found_refs[group] = {} groupdict = found_refs[group] if content in groupdict: v = groupdict[content] v[IX.reflist].append(match.group()) else: v = [None, [match.group()], False, False] found = self.NAMES.search(params) if found: quoted = found.group('quote') in ['"', "'"] name = found.group('name') if not v[IX.name]: # First name associated with this content if name not in found_ref_names: # first time ever we meet this name v[IX.quoted] = quoted v[IX.name] = name else: # if has_key, means that this name is used # with another content. We'll need to change it v[IX.change_needed] = True elif v[IX.name] != name: named_repl[name] = [v[IX.name], v[IX.quoted]] found_ref_names.add(name) groupdict[content] = v # Find used autogenerated numbers used_numbers = set() for name in found_ref_names: number = removeprefix(name, self.autogen) with suppress(ValueError): used_numbers.add(int(number)) # generator to give the next free number for autogenerating names free_number = (str(i) for i in itertools.count(start=1) if i not in used_numbers) # Fix references for groupname, references in found_refs.items(): group = 'group="{}" '.format(groupname) if groupname else '' for ref, v in references.items(): if len(v[IX.reflist]) == 1 and not v[IX.change_needed]: continue name = v[IX.name] if not name: name = '"{}{}"'.format(self.autogen, next(free_number)) elif v[IX.quoted]: name = '"{}"'.format(name) named = '<ref {}name={}>{}</ref>'.format(group, name, ref) text = text.replace(v[IX.reflist][0], named, 1) # make sure that the first (named ref) is not removed later pos = text.index(named) + len(named) header = text[:pos] end = text[pos:] # replace multiple identical references with repeated ref repeated_ref = '<ref {}name={} />'.format(group, name) for ref in v[IX.reflist][1:]: # Don't replace inside templates (T266411) end = replaceExcept(end, re.escape(ref), repeated_ref, exceptions=['template']) text = header + end # Fix references with different names for ref, v in named_repl.items(): # TODO : Support ref groups name = v[IX.name] if v[IX.reflist]: name = '"{}"'.format(name) text = re.sub( r'<ref name\s*=\s*(?P<quote>["\']?)\s*{}\s*(?P=quote)\s*/>' .format(ref), '<ref name={} />'.format(name), text) return text