def check_summary(self, pkg, lang, ignored_words): summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang) if use_utf8: if not Pkg.is_utf8_bytestr(summary): printError(pkg, 'tag-not-utf8', 'Summary', lang) summary = Pkg.to_unicode(summary) else: summary = Pkg.b2s(summary) self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary) spell_check(pkg, summary, 'Summary(%s)', lang, ignored_words) if '\n' in summary: printError(pkg, 'summary-on-multiple-lines', lang) if summary[0] != summary[0].upper(): printWarning(pkg, 'summary-not-capitalized', lang, summary) if summary[-1] == '.': printWarning(pkg, 'summary-ended-with-dot', lang, summary) if len(summary) > max_line_len: printError(pkg, 'summary-too-long', lang, summary) if leading_space_regex.search(summary): printError(pkg, 'summary-has-leading-spaces', lang, summary) res = forbidden_words_regex.search(summary) if res and Config.getOption('ForbiddenWords'): printWarning(pkg, 'summary-use-invalid-word', lang, res.group(1)) if pkg.name: sepchars = r'[\s%s]' % punct res = re.search(r'(?:^|\s)(%s)(?:%s|$)' % (re.escape(pkg.name), sepchars), summary, re.IGNORECASE | re.UNICODE) if res: printWarning(pkg, 'name-repeated-in-summary', lang, res.group(1))
def check_summary(self, pkg, lang, ignored_words): summary = pkg.langtag(rpm.RPMTAG_SUMMARY, lang) if use_utf8: if not Pkg.is_utf8_bytestr(summary): printError(pkg, 'tag-not-utf8', 'Summary', lang) summary = Pkg.to_unicode(summary) else: summary = Pkg.b2s(summary) self._unexpanded_macros(pkg, 'Summary(%s)' % lang, summary) spell_check(pkg, summary, 'Summary(%s)', lang, ignored_words) if '\n' in summary: printError(pkg, 'summary-on-multiple-lines', lang) if summary[0] != summary[0].upper(): printWarning(pkg, 'summary-not-capitalized', lang, summary) if summary[-1] == '.': printWarning(pkg, 'summary-ended-with-dot', lang, summary) if len(summary) > max_line_len: printError(pkg, 'summary-too-long', lang, summary) if leading_space_regex.search(summary): printError(pkg, 'summary-has-leading-spaces', lang, summary) res = forbidden_words_regex.search(summary) if res and Config.getOption('ForbiddenWords'): printWarning(pkg, 'summary-use-invalid-word', lang, res.group(1)) if pkg.name: sepchars = '[\s' + punct + ']' res = re.search('(?:^|\s)(%s)(?:%s|$)' % (re.escape(pkg.name), sepchars), summary, re.IGNORECASE | re.UNICODE) if res: printWarning(pkg, 'name-repeated-in-summary', lang, res.group(1))
def check_description(self, pkg, lang, ignored_words): description = pkg.langtag(rpm.RPMTAG_DESCRIPTION, lang) if use_utf8: if not Pkg.is_utf8_bytestr(description): printError(pkg, 'tag-not-utf8', '%description', lang) description = Pkg.to_unicode(description) else: description = Pkg.b2s(description) self._unexpanded_macros(pkg, '%%description -l %s' % lang, description) spell_check(pkg, description, '%%description -l %s', lang, ignored_words) for l in description.splitlines(): if len(l) > max_line_len: printError(pkg, 'description-line-too-long', lang, l) res = forbidden_words_regex.search(l) if res and Config.getOption('ForbiddenWords'): printWarning(pkg, 'description-use-invalid-word', lang, res.group(1)) res = tag_regex.search(l) if res: printWarning(pkg, 'tag-in-description', lang, res.group(1))
def spell_check(pkg, str, fmt, lang, ignored): dict_found = True warned = set() if enchant: if lang == 'C': lang = 'en_US' checker = _enchant_checkers.get(lang) if not checker and lang not in _enchant_checkers: try: checker = enchant.checker.SpellChecker( lang, filters=[enchant.tokenize.EmailFilter, enchant.tokenize.URLFilter, enchant.tokenize.WikiWordFilter]) except enchant.DictNotFoundError: printInfo(pkg, 'enchant-dictionary-not-found', lang) pass _enchant_checkers[lang] = checker if checker: # squeeze whitespace to ease leading context check checker.set_text(re.sub(r'\s+', ' ', str)) if use_utf8: uppername = Pkg.to_unicode(pkg.header[rpm.RPMTAG_NAME]).upper() else: uppername = pkg.name.upper() upperparts = uppername.split('-') if lang.startswith('en'): ups = [x + "'S" for x in upperparts] upperparts.extend(ups) for err in checker: # Skip already warned and ignored words if err.word in warned or err.word in ignored: continue # Skip all capitalized words that do not start a sentence if err.word[0].isupper() and not \ sentence_break_regex.search(checker.leading_context(3)): continue upperword = err.word.upper() # Skip all uppercase words if err.word == upperword: continue # Skip errors containing package name or equal to a # "component" of it, case insensitively if uppername in upperword or upperword in upperparts: continue # Work around enchant's digit tokenizing behavior: # http://github.com/rfk/pyenchant/issues/issue/3 if checker.leading_context(1).isdigit() or \ checker.trailing_context(1).isdigit(): continue # Warn and suggest sug = ', '.join(checker.suggest()[:3]) if sug: sug = '-> %s' % sug printWarning(pkg, 'spelling-error', fmt % lang, err.word, sug) warned.add(err.word) else: dict_found = False if not enchant or not dict_found: for seq in str.split(): for word in re.split(r'[^a-z]+', seq.lower()): if len(word) == 0: continue correct = BAD_WORDS.get(word) if not correct: continue if word[0] == '\'': word = word[1:] if word[-1] == '\'': word = word[:-1] if word in warned or word in ignored: continue printWarning(pkg, 'spelling-error', fmt % lang, word, '->', correct) warned.add(word)
def spell_check(pkg, str, fmt, lang, ignored): dict_found = True warned = set() if enchant: if lang == 'C': lang = 'en_US' checker = _enchant_checkers.get(lang) if not checker and lang not in _enchant_checkers: try: checker = enchant.checker.SpellChecker( lang, filters=[enchant.tokenize.EmailFilter, enchant.tokenize.URLFilter, enchant.tokenize.WikiWordFilter]) except enchant.DictNotFoundError: printInfo(pkg, 'enchant-dictionary-not-found', lang) pass _enchant_checkers[lang] = checker if checker: # squeeze whitespace to ease leading context check checker.set_text(re.sub(r'\s+', ' ', str)) if use_utf8: uppername = Pkg.to_unicode(pkg.header[rpm.RPMTAG_NAME]).upper() else: uppername = pkg.name.upper() upperparts = uppername.split('-') if lang.startswith('en'): ups = [x + "'S" for x in upperparts] upperparts.extend(ups) for err in checker: # Skip already warned and ignored words if err.word in warned or err.word in ignored: continue # Skip all capitalized words that do not start a sentence if err.word[0].isupper() and not \ sentence_break_regex.search(checker.leading_context(3)): continue upperword = err.word.upper() # Skip all uppercase words if err.word == upperword: continue # Skip errors containing package name or equal to a # "component" of it, case insensitively if uppername in upperword or upperword in upperparts: continue # Work around enchant's digit tokenizing behavior: # http://github.com/rfk/pyenchant/issues/issue/3 if checker.leading_context(1).isdigit() or \ checker.trailing_context(1).isdigit(): continue # Warn and suggest sug = ', '.join(checker.suggest()[:3]) if sug: sug = '-> %s' % sug printWarning(pkg, 'spelling-error', fmt % lang, err.word, sug) warned.add(err.word) else: dict_found = False if not enchant or not dict_found: for seq in str.split(): for word in re.split('[^a-z]+', seq.lower()): if len(word) == 0: continue correct = BAD_WORDS.get(word) if not correct: continue if word[0] == '\'': word = word[1:] if word[-1] == '\'': word = word[:-1] if word in warned or word in ignored: continue printWarning(pkg, 'spelling-error', fmt % lang, word, '->', correct) warned.add(word)