Exemplo n.º 1
0
def get_tweet_tags(tweet):
    """ Break up a tweet into individual word parts """
    tknzr = TweetTokenizer()
    tokens = tknzr.tokenize(tweet)
    # replace handles with real names
    for n, tok in enumerate(tokens):
        if tok.startswith('@'):
            handle = tok.strip("@")
            if handle in user.students:
                # If we have a database entry for the mentioned user, we can
                # easily substitute a full name.
                usr = user.NPUser(handle)
                tokens[n] = usr.fullname
            else:
                # If there is no database entry, we use the user's alias. While
                # this is the full name in many cases, it is often not reliable
                usr = api.get_user(handle)
                tokens[n] = usr.name
    tagged = nltk.pos_tag(tokens)
    # In nltk, if a teacher's name is written with a period after an
    # abbreviated prefix, it is awkwardly broken up into 3 tags
    for n, tag in enumerate(tagged):
        # If there is the weird period after the prefix,
        if tag[1] == '.':
            # and it is in fact splitting up a person's name,
            if tagged[n - 1][1] == 'NNP' and tagged[n + 1][1] == 'NNP':
                if tagged[n - 1][0] in ['Mr', 'Ms', 'Mrs', 'Mx']:
                    # combine it into the actual name,
                    tagged[n - 1] = ('{}. {}'.format(tagged[n - 1][0],
                                                     tagged[n + 1][0]), 'NNP')
                    # and then remove the extra tags.
                    del tagged[n + 1]
                    del tagged[n]
    return tagged
Exemplo n.º 2
0
 def test_basics(self):
     luke = user.NPUser("1Defenestrator")
     self.assertIsInstance(luke.followers, list)
     self.assertIsInstance(luke.following, list)
     self.assertIn(luke.grade,
                   ["Freshman", "Sophomore", "Junior", "Senior"])
     self.assertIsInstance(luke.has_graduated, bool)
Exemplo n.º 3
0
 def test_has_graduated(self):
     """ Test has_graduated by manipulating values """
     moshe = user.NPUser("G4_Y5_3X")
     moshe.user_info["grade"] = "3000"  # In the past
     self.assertFalse(moshe.has_graduated)
     moshe.user_info["grade"] = "2000"  # In the future
     self.assertTrue(moshe.has_graduated)
     moshe.user_info["grade"] = "2017"  # Reset
Exemplo n.º 4
0
 def test_tweet_tagging(self):
     # Test that tagging works in its most basic form
     moshe = user.NPUser("G4_Y5_3X")
     tags = tweet.get_tweet_tags(moshe.tweepy.status.text)
     self.assertIsInstance(tags, list)
     # Test that names are replaced and recognized as proper nouns
     tags = tweet.get_tweet_tags(self.sentence)
     self.assertEqual(tags[0][0], "Luke Taylor")
     self.assertEqual(tags[0][1], "NNP")
     # Test that teacher names are appropriately tagged
     tags = tweet.get_tweet_tags(self.sentence3)
     self.assertEqual(tags[0][0], "Mr. Haas")
     self.assertNotEqual(tags[1][0], ".")
Exemplo n.º 5
0
 def test_misc(self):
     with self.assertRaises(ValueError):
         user.NPUser("this_is_too_long_to_be_a_twitter_handle")
Exemplo n.º 6
0
 def test_magicmethod(self):
     chris = user.NPUser("bravoc9")
     self.assertEqual(chris.fullname, "Chris Bravo")
     self.assertEqual(chris.sex, "M")
     with self.assertRaises(AttributeError):
         chris.this_is_not_a_valid_attribute