def transform(self, ispdf=False): """Normalize the title.""" # convert html entities if not ispdf: self.title = pywikibot.html2unicode(self.title) self.title = re.sub(r'-+', '-', self.title) # remove formatting, i.e long useless strings self.title = re.sub(r'[\.+\-=]{4,}', ' ', self.title) # remove \n and \r and Unicode spaces from titles self.title = re.sub(r'(?u)\s', ' ', self.title) self.title = re.sub(r'[\n\r\t]', ' ', self.title) # remove extra whitespaces # remove leading and trailing ./;/,/-/_/+/ / self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_ ')) self.avoid_uppercase() # avoid closing the link before the end self.title = self.title.replace(']', ']') # avoid multiple } being interpreted as a template inclusion self.title = self.title.replace('}}', '}}') # prevent multiple quotes being interpreted as '' or ''' self.title = self.title.replace('\'\'', '\''') # avoid multiple | being interpreted as a template parameter self.title = self.title.replace('|', '|') self.title = pywikibot.unicode2html(self.title, self.site.encoding())
def transform(self, ispdf=False): """Normalize the title""" # convert html entities if not ispdf: self.title = pywikibot.html2unicode(self.title) self.title = re.sub(r'-+', '-', self.title) # remove formatting, i.e long useless strings self.title = re.sub(r'[\.+\-=]{4,}', ' ', self.title) # remove \n and \r and Unicode spaces from titles self.title = re.sub(r'(?u)\s', ' ', self.title) self.title = re.sub(r'[\n\r\t]', ' ', self.title) # remove extra whitespaces # remove leading and trailing ./;/,/-/_/+/ / self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_ ')) self.avoid_uppercase() # avoid closing the link before the end self.title = self.title.replace(']', ']') # avoid multiple } being interpreted as a template inclusion self.title = self.title.replace('}}', '}}') # prevent multiple quotes being interpreted as '' or ''' self.title = self.title.replace('\'\'', '\''') self.title = pywikibot.unicode2html(self.title, self.site.encoding())
def transform(self, ispdf=False): """Normalize the title.""" # convert html entities if not ispdf: self.title = pywikibot.html2unicode(self.title) self.title = re.sub(r"-+", "-", self.title) # remove formatting, i.e long useless strings self.title = re.sub(r"[\.+\-=]{4,}", " ", self.title) # remove \n and \r and Unicode spaces from titles self.title = re.sub(r"(?u)\s", " ", self.title) self.title = re.sub(r"[\n\r\t]", " ", self.title) # remove extra whitespaces # remove leading and trailing ./;/,/-/_/+/ / self.title = re.sub(r" +", " ", self.title.strip(r"=.;,-+_ ")) self.avoid_uppercase() # avoid closing the link before the end self.title = self.title.replace("]", "]") # avoid multiple } being interpreted as a template inclusion self.title = self.title.replace("}}", "}}") # prevent multiple quotes being interpreted as '' or ''' self.title = self.title.replace("''", "''") self.title = pywikibot.unicode2html(self.title, self.site.encoding())