def replace(self, match): template_name = match.group('template').replace('_', ' ').strip() if template_name.startswith(tuple(self.defaultsort)): return match.group() template_name_norm = first_upper(template_name).partition('<!--')[0] if template_name_norm not in self.cache: template = pywikibot.Page(self.site, template_name_norm, ns=10) try: do_replace = template.exists() and template.isRedirectPage() except pywikibot.exceptions.InvalidTitle: do_replace = False except pywikibot.exceptions.InconsistentTitleReceived: do_replace = False if do_replace: target = template.getRedirectTarget() self.cache[template_name_norm] = target.title(with_ns=False) else: self.cache[template_name_norm] = None target = self.cache[template_name_norm] if not target: return match.group() if template_name != first_upper(template_name): if all(part.islower() for part in target.partition(' ')[0][1:] if part.isalpha()): target = first_lower(target) return match.group('before') + target + match.group('after')
def main(): """Print environment variables.""" _pwb_dir = os.path.abspath(os.path.join( os.path.split(__file__)[0], '..', '..')) _pwb_dir = first_upper(_pwb_dir) print('os.environ:') for k, v in sorted(os.environ.items()): # Don't leak the password into logs if k == 'USER_PASSWORD': continue # This only appears in subprocesses if k == 'PYWIKIBOT_DIR_PWB': continue print('{}: {}'.format(k, v)) print('sys.path:') for path in sys.path: if path == '' or path.startswith('.'): continue # Normalise DOS drive letter path = first_upper(path) if path.startswith(_pwb_dir): continue print(path)
def ReplaceLink(self, text, oldtxt, newtxt): frmParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)] toParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)] if len(frmParts) != len(toParts): raise ValueError("Splitting parts do not match counts") for i in xrange(0, len(frmParts)): if len(frmParts[i]) != len(toParts[i]): raise ValueError("Splitting parts do not match word length") if len(frmParts[i]) > 0: text = text.replace(first_lower(frmParts[i]), first_lower(toParts[i])) text = text.replace(first_upper(frmParts[i]), first_upper(toParts[i])) return text
def ReplaceLink(self, text, oldtxt, newtxt): """Replace links.""" frmParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)] toParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)] if len(frmParts) != len(toParts): raise ValueError('Splitting parts do not match counts') for i, part in enumerate(frmParts): if part != len(toParts[i]): raise ValueError('Splitting parts do not match word length') if part: text = text.replace(first_lower(part), first_lower(toParts[i])) text = text.replace(first_upper(part), first_upper(toParts[i])) return text
def replace(self, match): text = match.group() code = self.parser.parse(text, skip_style_tags=True) sections = [] for header in code.ifilter_headings(): name = header.title.strip() if name in self.replace_headers: name = self.replace_headers[name] if name in self.iter_all_headers(): sections.append({ 'name': first_upper(name), 'nodes': [header], }) else: sections[:] = [] if not sections: return text do_more = False first_index = min( code.nodes.index(sect['nodes'][0]) for sect in sections) last_index = self.add_contents(sections, code) do_more = self.deduplicate(sections, code) or do_more do_more = self.check_levels(sections, code) or do_more if do_more: sections.sort(key=self.sortkey) self.reorganize(sections, code) self.clean_empty(sections, code, do_more) code.nodes[first_index:last_index] = [ node for sect in sections for node in sect['nodes'] ] return str(code)
def sametitle(self, title1: str, title2: str) -> bool: """ Return True if title1 and title2 identify the same wiki page. title1 and title2 may be unequal but still identify the same page, if they use different aliases for the same namespace. """ def ns_split(title): """Separate the namespace from the name.""" ns, delim, name = title.partition(':') if delim: ns = self.namespaces.lookup_name(ns) if not delim or not ns: return default_ns, title return ns, name # Replace alias characters like underscores with title # delimiters like spaces and multiple combinations of them with # only one delimiter sep = self.family.title_delimiter_and_aliases[0] pattern = re.compile('[{}]+'.format( self.family.title_delimiter_and_aliases)) title1 = pattern.sub(sep, title1) title2 = pattern.sub(sep, title2) if title1 == title2: return True default_ns = self.namespaces[0] # determine whether titles contain namespace prefixes ns1_obj, name1 = ns_split(title1) ns2_obj, name2 = ns_split(title2) if ns1_obj != ns2_obj: # pages in different namespaces return False name1 = name1.strip() name2 = name2.strip() # If the namespace has a case definition it's overriding the site's # case definition if ns1_obj.case == 'first-letter': name1 = first_upper(name1) name2 = first_upper(name2) return name1 == name2
def ReplaceLink(self, text, oldtxt, newtxt): frmParts = [ s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt) ] toParts = [ s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt) ] if len(frmParts) != len(toParts): raise ValueError(u'Splitting parts do not match counts') for i in xrange(0, len(frmParts)): if len(frmParts[i]) != len(toParts[i]): raise ValueError(u'Splitting parts do not match word length') if len(frmParts[i]) > 0: text = text.replace(first_lower(frmParts[i]), first_lower(toParts[i])) text = text.replace(first_upper(frmParts[i]), first_upper(toParts[i])) return text
def normalize(self, template): #return self.parser.normalize(template) return first_upper(template .partition('<!--')[0] .replace('_', ' ') .strip())
def handleOneLink(match): titleWithSection = match.group('titleWithSection') label = match.group('label') trailingChars = match.group('linktrail') newline = match.group('newline') try: is_interwiki = self.site.isInterwikiLink(titleWithSection) except ValueError: # T111513 is_interwiki = True if not is_interwiki: # The link looks like this: # [[page_title|link_text]]trailing_chars # We only work on namespace 0 because pipes and linktrails work # differently for images and categories. page = pywikibot.Page( pywikibot.Link(titleWithSection, self.site)) try: namespace = page.namespace() except pywikibot.InvalidTitle: return match.group() if namespace == 0: # Replace underlines by spaces, also multiple underlines titleWithSection = re.sub('_+', ' ', titleWithSection) # Remove double spaces titleWithSection = re.sub(' +', ' ', titleWithSection) # Remove unnecessary leading spaces from title, # but remember if we did this because we eventually want # to re-add it outside of the link later. titleLength = len(titleWithSection) titleWithSection = titleWithSection.lstrip() hadLeadingSpaces = (len(titleWithSection) != titleLength) hadTrailingSpaces = False # Remove unnecessary trailing spaces from title, # but remember if we did this because it may affect # the linktrail and because we eventually want to # re-add it outside of the link later. if not trailingChars: titleLength = len(titleWithSection) titleWithSection = titleWithSection.rstrip() hadTrailingSpaces = (len(titleWithSection) != titleLength) # Convert URL-encoded characters to unicode from pywikibot.page import url2unicode titleWithSection = url2unicode(titleWithSection, encodings=self.site) if titleWithSection == '': # just skip empty links. return match.group() # Remove unnecessary initial and final spaces from label. # Please note that some editors prefer spaces around pipes. # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway. if label is not None: # Remove unnecessary leading spaces from label, # but remember if we did this because we want # to re-add it outside of the link later. labelLength = len(label) label = label.lstrip() hadLeadingSpaces = (len(label) != labelLength) # Remove unnecessary trailing spaces from label, # but remember if we did this because it affects # the linktrail. if not trailingChars: labelLength = len(label) label = label.rstrip() hadTrailingSpaces = (len(label) != labelLength) else: label = titleWithSection if trailingChars: label += trailingChars if self.site.siteinfo['case'] == 'first-letter': firstcase_title = first_lower(titleWithSection) firstcase_label = first_lower(label) else: firstcase_title = titleWithSection firstcase_label = label if firstcase_label == firstcase_title: newLink = '[[%s]]' % label # Check if we can create a link with trailing characters # instead of a pipelink elif (firstcase_label.startswith(firstcase_title) and trailR.sub('', label[len(titleWithSection):]) == ''): newLink = '[[%s]]%s' % (label[:len(titleWithSection)], label[len(titleWithSection):]) else: # Try to capitalize the first letter of the title. # Not useful for languages that don't capitalize nouns. # TODO: Add a configuration variable for each site, # which determines if the link target is written in # uppercase if self.site.sitename == 'wikipedia:de': titleWithSection = first_upper(titleWithSection) newLink = "[[%s|%s]]" % (titleWithSection, label) # re-add spaces that were pulled out of the link. # Examples: # text[[ title ]]text -> text [[title]] text # text[[ title | name ]]text -> text [[title|name]] text # text[[ title |name]]text -> text[[title|name]]text # text[[title| name]]text -> text [[title|name]]text if hadLeadingSpaces and not newline: newLink = ' ' + newLink if hadTrailingSpaces: newLink = newLink + ' ' if newline: newLink = newline + newLink return newLink # don't change anything return match.group()
def handleOneLink(match): titleWithSection = match.group('titleWithSection') label = match.group('label') trailingChars = match.group('linktrail') newline = match.group('newline') if not self.site.isInterwikiLink(titleWithSection): # The link looks like this: # [[page_title|link_text]]trailing_chars # We only work on namespace 0 because pipes and linktrails work # differently for images and categories. page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site)) try: namespace = page.namespace() except pywikibot.InvalidTitle: return match.group() if namespace == 0: # Replace underlines by spaces, also multiple underlines titleWithSection = re.sub('_+', ' ', titleWithSection) # Remove double spaces titleWithSection = re.sub(' +', ' ', titleWithSection) # Remove unnecessary leading spaces from title, # but remember if we did this because we eventually want # to re-add it outside of the link later. titleLength = len(titleWithSection) titleWithSection = titleWithSection.lstrip() hadLeadingSpaces = (len(titleWithSection) != titleLength) hadTrailingSpaces = False # Remove unnecessary trailing spaces from title, # but remember if we did this because it may affect # the linktrail and because we eventually want to # re-add it outside of the link later. if not trailingChars: titleLength = len(titleWithSection) titleWithSection = titleWithSection.rstrip() hadTrailingSpaces = (len(titleWithSection) != titleLength) # Convert URL-encoded characters to unicode from pywikibot.page import url2unicode titleWithSection = url2unicode(titleWithSection, encodings=self.site) if titleWithSection == '': # just skip empty links. return match.group() # Remove unnecessary initial and final spaces from label. # Please note that some editors prefer spaces around pipes. # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway. if label is not None: # Remove unnecessary leading spaces from label, # but remember if we did this because we want # to re-add it outside of the link later. labelLength = len(label) label = label.lstrip() hadLeadingSpaces = (len(label) != labelLength) # Remove unnecessary trailing spaces from label, # but remember if we did this because it affects # the linktrail. if not trailingChars: labelLength = len(label) label = label.rstrip() hadTrailingSpaces = (len(label) != labelLength) else: label = titleWithSection if trailingChars: label += trailingChars if titleWithSection == label or \ first_lower(titleWithSection) == label: newLink = "[[%s]]" % label # Check if we can create a link with trailing characters # instead of a pipelink elif (len(titleWithSection) <= len(label) and label[:len(titleWithSection)] == titleWithSection and re.sub(trailR, '', label[len(titleWithSection):]) == ''): newLink = "[[%s]]%s" % (label[:len(titleWithSection)], label[len(titleWithSection):]) else: # Try to capitalize the first letter of the title. # Not useful for languages that don't capitalize nouns. # TODO: Add a configuration variable for each site, # which determines if the link target is written in # uppercase if self.site.sitename == 'wikipedia:de': titleWithSection = first_upper(titleWithSection) newLink = "[[%s|%s]]" % (titleWithSection, label) # re-add spaces that were pulled out of the link. # Examples: # text[[ title ]]text -> text [[title]] text # text[[ title | name ]]text -> text [[title|name]] text # text[[ title |name]]text -> text[[title|name]]text # text[[title| name]]text -> text [[title|name]]text if hadLeadingSpaces and not newline: newLink = ' ' + newLink if hadTrailingSpaces: newLink = newLink + ' ' if newline: newLink = newline + newLink return newLink # don't change anything return match.group()
#!/usr/bin/python # -*- coding: utf-8 -*- """Script that forms part of pwb_tests.""" from __future__ import absolute_import, unicode_literals import os import sys from pywikibot.tools import first_upper _pwb_dir = os.path.abspath(os.path.join( os.path.split(__file__)[0], '..', '..')) _pwb_dir = first_upper(_pwb_dir) print('os.environ:') for k, v in sorted(os.environ.items()): # Don't leak the password into logs if k == 'USER_PASSWORD': continue # This only appears in subprocesses if k in ['PYWIKIBOT2_DIR_PWB']: continue print("{0!r}: {1!r}".format(k, v)) print('sys.path:') for path in sys.path: if path == '' or path.startswith('.'): continue # Normalise DOS drive letter path = first_upper(path) if path.startswith(_pwb_dir):
"Couldn't determine the item for values {}/{} ({} items)". format(params[0], params[1], len(items))) continue item = items.pop() if params[2] != item.getID(): # 3rd param is index 2 template.add(3, item.getID()) change = True if index['název'] is not None: title_cell = cells[index['název']] nodes = title_cell.contents.nodes # fixme: ignore #wikilinks = title_cell.contents.filter_wikilinks() #if not wikilinks: if len(nodes) == 1: match = titleR.fullmatch(str(nodes[0])) link = item.sitelinks.get(page.site) if link and match: groups = match.groups() if first_upper(groups[1]) == link.title: new = '{}[[{}]]{}'.format(*groups) else: new = '{1}[[{0}|{2}]]{3}'.format( link.title, *groups) title_cell.contents.replace(nodes[0], new) change = True if change: page.text = str(code) page.save(summary='doplnění článků a/nebo položek na Wikidatech', asynchronous=True)
#!/usr/bin/python # -*- coding: utf-8 -*- """Script that forms part of pwb_tests.""" from __future__ import absolute_import, unicode_literals import os import sys from pywikibot.tools import first_upper _pwb_dir = os.path.abspath(os.path.join( os.path.split(__file__)[0], '..', '..')) _pwb_dir = first_upper(_pwb_dir) print('os.environ:') for k, v in sorted(os.environ.items()): # Don't leak the password into logs if k == 'USER_PASSWORD': continue # This only appears in subprocesses if k in ['PYWIKIBOT2_DIR_PWB']: continue print("%r: %r" % (k, v)) print('sys.path:') for path in sys.path: if path == '' or path.startswith('.'): continue # Normalise DOS drive letter path = first_upper(path) if path.startswith(_pwb_dir): continue