예제 #1
0
    def valid_username(self):
        if not self.text:
            return False

        extracted = Extractor(self.text).extract_mentioned_screen_names()

        return len(extracted) == 1 and extracted[0] == self.text[1:]
예제 #2
0
    def tweet_length(self, options = {}):
        """
        Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC
        (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a
        string no matter which actual form was transmitted. For example:

             U+0065  Latin Small Letter E
         +   U+0301  Combining Acute Accent
         ----------
         =   2 bytes, 2 characters, displayed as é (1 visual glyph)
             … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1

         The string could also contain U+00E9 already, in which case the canonicalization will not change the value.
        """

        assert (not self.parent or not getattr(self.parent, 'has_been_linked', False) ), 'The validator should only be run on text before it has been modified.'

        for key in DEFAULT_TCO_URL_LENGTHS:
            if not key in options:
                options[key] = DEFAULT_TCO_URL_LENGTHS[key]

        length = len(self.text)
        # thanks force_unicode for making this so much simpler than the ruby version

        for url in Extractor(self.text).extract_urls_with_indices():
            # remove the link of the original URL
            length += url['indices'][0] - url['indices'][1]
            # add the length of the t.co URL that will replace it
            length += options.get('short_url_length_https') if url['url'].lower().find('https://') > -1 else options.get('short_url_length')

        if self.parent and hasattr(self.parent, 'tweet_length'):
            self.parent.tweet_length = length
        return length
예제 #3
0
    def valid_hashtag(self):
        if not self.text:
            return False

        extracted = Extractor(self.text).extract_hashtags()

        return len(extracted) == 1 and extracted[0] == self.text[1:]
예제 #4
0
 def __init__(self, text, **kwargs):
     self.text = force_unicode(text)
     self.parent = kwargs.get('parent', False)
     self.extractor = Extractor(self.text)
예제 #5
0
 def extractor(self):
     return Extractor(self.text)