Пример #1
0
def cap_author_token(token):
    lt = lower(token)
    if lt in ("von", "de", "el", "van", "le"):
        return lt
    # no digits no spez. characters
    if re.match(r"([^\d\W]\.){2,}$", lt, re.UNICODE) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split(".")
        return ". ".join(map(capitalize, parts)).strip()
    scots_name = None
    for x in ("mc", "mac"):
        if (
            token.lower().startswith(x)
            and len(token) > len(x)
            and (token[len(x)] == upper(token[len(x)]) or lt == token)
        ):
            scots_name = len(x)
            break
    ans = capitalize(token)
    if scots_name is not None:
        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name + 1 :]
    for x in ("-", "'"):
        idx = ans.find(x)
        if idx > -1 and len(ans) > idx + 2:
            ans = ans[: idx + 1] + upper(ans[idx + 1]) + ans[idx + 2 :]
    return ans
Пример #2
0
def cap_author_token(token):
    lt = lower(token)
    if lt in ('von', 'de', 'el', 'van', 'le'):
        return lt
    # no digits no spez. characters
    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split('.')
        return '. '.join(map(capitalize, parts)).strip()
    scots_name = None
    for x in ('mc', 'mac'):
        if (token.lower().startswith(x) and len(token) > len(x) and
                (
                    token[len(x)] == upper(token[len(x)]) or
                    lt == token
                )):
            scots_name = len(x)
            break
    ans = capitalize(token)
    if scots_name is not None:
        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
    for x in ('-', "'"):
        idx = ans.find(x)
        if idx > -1 and len(ans) > idx+2:
            ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
    return ans
Пример #3
0
def cap_author_token(token):
    lt = lower(token)
    if lt in ('von', 'de', 'el', 'van', 'le'):
        return lt
    # no digits no spez. characters
    if re.match(r'([^\d\W]\.){2,}$', lt, re.UNICODE) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split('.')
        return '. '.join(map(capitalize, parts)).strip()
    scots_name = None
    for x in ('mc', 'mac'):
        if (token.lower().startswith(x) and len(token) > len(x) and
                (
                    token[len(x)] == upper(token[len(x)]) or
                    lt == token
                )):
            scots_name = len(x)
            break
    ans = capitalize(token)
    if scots_name is not None:
        ans = ans[:scots_name] + upper(ans[scots_name]) + ans[scots_name+1:]
    for x in ('-', "'"):
        idx = ans.find(x)
        if idx > -1 and len(ans) > idx+2:
            ans = ans[:idx+1] + upper(ans[idx+1]) + ans[idx+2:]
    return ans
Пример #4
0
    def test_change_case(self):
        ' Test the various ways of changing the case '
        from calibre.utils.titlecase import titlecase
        # Test corner cases
        self.ae('A', icu.upper(b'a'))

        for x in ('a', 'Alice\'s code', 'macdonald\'s machIne', '02 the wars'):
            self.ae(icu.upper(x), x.upper())
            self.ae(icu.lower(x), x.lower())
            # ICU's title case algorithm is different from ours, when there are
            # capitals inside words
            self.ae(icu.title_case(x), titlecase(x).replace('machIne', 'Machine'))
            self.ae(icu.capitalize(x), x[0].upper() + x[1:].lower())
Пример #5
0
    def test_change_case(self):
        ' Test the various ways of changing the case '
        from calibre.utils.titlecase import titlecase
        # Test corner cases
        self.ae('A', icu.upper(b'a'))
        for x in ('', None, False, 1):
            self.ae(x, icu.capitalize(x))

        for x in ('a', 'Alice\'s code', 'macdonald\'s machIne', '02 the wars'):
            self.ae(icu.upper(x), x.upper())
            self.ae(icu.lower(x), x.lower())
            # ICU's title case algorithm is different from ours, when there are
            # capitals inside words
            self.ae(icu.title_case(x), titlecase(x).replace('machIne', 'Machine'))
            self.ae(icu.capitalize(x), x[0].upper() + x[1:].lower())
Пример #6
0
def titlecase(text):
    """
    Titlecases input text

    This filter changes all words to Title Caps, and attempts to be clever
    about *un*capitalizing SMALL words like a/an/the in the input.

    The list of "SMALL words" which are not capped comes from
    the New York Times Manual of Style, plus 'vs' and 'v'.

    """

    all_caps = upper(text) == text

    words = re.split('\\s+', text)
    line = []
    for word in words:
        if all_caps:
            if UC_INITIALS.match(word):
                line.append(word)
                continue
            else:
                word = icu_lower(word)

        if APOS_SECOND.match(word):
            word = word.replace(word[0], icu_upper(word[0]), 1)
            word = word[:2] + icu_upper(word[2]) + word[3:]
            line.append(word)
            continue
        if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
            line.append(word)
            continue
        if SMALL_WORDS.match(word):
            line.append(icu_lower(word))
            continue

        hyphenated = []
        for item in word.split('-'):
            hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
        line.append("-".join(hyphenated))

    result = " ".join(line)

    result = SMALL_FIRST.sub(lambda m: '%s%s' % (
        m.group(1),
        capitalize(m.group(2))
    ), result)

    result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
        capitalize(m.group(2))
    ), result)

    result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)

    result = SUBPHRASE.sub(lambda m: '%s%s' % (
        m.group(1),
        capitalize(m.group(2))
    ), result)

    return result
Пример #7
0
def titlecase(text):
    """
    Titlecases input text

    This filter changes all words to Title Caps, and attempts to be clever
    about *un*capitalizing SMALL words like a/an/the in the input.

    The list of "SMALL words" which are not capped comes from
    the New York Times Manual of Style, plus 'vs' and 'v'.

    """

    all_caps = upper(text) == text

    words = re.split('\\s+', text)
    line = []
    for word in words:
        if all_caps:
            if UC_INITIALS.match(word):
                line.append(word)
                continue
            else:
                word = icu_lower(word)

        if APOS_SECOND.match(word):
            word = word.replace(word[0], icu_upper(word[0]), 1)
            word = word[:2] + icu_upper(word[2]) + word[3:]
            line.append(word)
            continue
        if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
            line.append(word)
            continue
        if SMALL_WORDS.match(word):
            line.append(icu_lower(word))
            continue

        hyphenated = []
        for item in word.split('-'):
            hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
        line.append("-".join(hyphenated))

    result = " ".join(line)

    result = SMALL_FIRST.sub(lambda m: '%s%s' % (
        m.group(1),
        capitalize(m.group(2))
    ), result)

    result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
        capitalize(m.group(2))
    ), result)

    result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)

    result = SUBPHRASE.sub(lambda m: '%s%s' % (
        m.group(1),
        capitalize(m.group(2))
    ), result)

    return result
Пример #8
0
    def test_change_case(self):
        " Test the various ways of changing the case "
        from calibre.utils.titlecase import titlecase

        # Test corner cases
        self.ae("A", icu.upper(b"a"))
        for x in ("", None, False, 1):
            self.ae(x, icu.capitalize(x))

        for x in ("a", "Alice's code", "macdonald's machIne", "02 the wars"):
            self.ae(icu.upper(x), x.upper())
            self.ae(icu.lower(x), x.lower())
            # ICU's title case algorithm is different from ours, when there are
            # capitals inside words
            self.ae(icu.title_case(x), titlecase(x).replace("machIne", "Machine"))
            self.ae(icu.capitalize(x), x[0].upper() + x[1:].lower())
Пример #9
0
 def upper_case(self):
     from calibre.utils.icu import upper
     self.setText(upper(unicode_type(self.text())))
Пример #10
0
 def upper_case(self):
     from calibre.utils.icu import upper
     self.setText(upper(unicode_type(self.text())))
Пример #11
0
 def upper_case(self):
     from calibre.utils.icu import upper
     self.setText(upper(str(self.text())))