def word_length(word): """ Calculate the length of a word on Twitter. It takes into account the size of shortened URLs and unicode emojis. See https://developer.twitter.com/en/docs/counting-characters """ unicode_ranges = [ range(0, 4351), range(8192, 8205), range(8208, 8223), range(8242, 8247), ] length = 0 if is_url(word): length = SHORT_URL_LENGTH else: for char in word: if any([ ord(normalize("NFC", char)) in char_range for char_range in unicode_ranges ]): length += 1 else: length += 2 return length
def test_no_urls(self): for no_url in URLS['is_not_url']: self.assertFalse(twitter_utils.is_url(no_url), no_url)
def test_yes_urls(self): for yes_url in URLS['is_url']: self.assertTrue(twitter_utils.is_url(yes_url), yes_url)