Exemplo n.º 1
0
class TestTwitterMostRecent(unittest.TestCase):
    ''' Testset for working with twitter interface'''
    def setUp(self):
        self.mytwitterwrapper = TwitterWrapper("Matt_LeBlanc")

    def test_since_one_value(self):
        ''' Confirm that since works with one item '''
        since = self.mytwitterwrapper.get_recent_tweet_id(DATA_ONE_ITEM)
        self.assertEqual(1067863966829801500, since)

    def test_since_zero_values(self):
        ''' Confirm that an empty json list returns 1'''
        self.assertEqual(
            1, self.mytwitterwrapper.get_recent_tweet_id(DATA_EMPTYLIST))

    def test_since_null_values(self):
        ''' Confirm that an empty json list returns 1'''

        self.assertEqual(1, self.mytwitterwrapper.get_recent_tweet_id(None))

    def test_since_two_values_first(self):
        '''Confirm that the max value is found in a list if it is the first value'''
        since = self.mytwitterwrapper.get_recent_tweet_id(DATA_TWO_ITEMS)
        self.assertEqual(1067863966829801500, since)

    def test_since_two_values_second(self):
        '''Confirm that max value is found if it isn't the first value'''

        since = self.mytwitterwrapper.get_recent_tweet_id(DATA_TWO_ITEMS)
        self.assertEqual(1067863966829801500, since)
Exemplo n.º 2
0
def twitter_id_topics(twitter_id, topic_count):
    tw_handle = TwitterWrapper(twitter_id)

    cache_only = True
    if (request.args.get('force_call').lower() == 'true'):
        cache_only = False

    df = tw_handle.get_tweet_text(cache_only=cache_only)
    return get_topic_models(df, n_top_words=int(topic_count))
Exemplo n.º 3
0
def random_tweet(name):
    global INVERTED_INDEX, MY_COLLECTION
    tw_handle = TwitterWrapper(name)

    INVERTED_INDEX, MY_COLLECTION = load_data(
        tw_handle.load_tweets(cache_only=True), [])

    document = MY_COLLECTION.get_document(1)

    return_value = {'id': document.document_id}
    return json.dumps(return_value)
Exemplo n.º 4
0
def twitter_name_top_tweets(twitter_name, tweet_count):
    tw_handle = TwitterWrapper("")
    tw_handle.set_screen_name(twitter_name)

    cache_only = True
    if (request.args.get('force_call') is not None):
        if (request.args.get('force_call').lower() == 'true'):
            cache_only = False

    df = tw_handle.get_tweet_id_text(cache_only=cache_only)
    return get_topic_models_graph(df.head(50), int(tweet_count))
Exemplo n.º 5
0
def twitter_name_topics(twitter_name, topic_count):
    tw_handle = TwitterWrapper("")
    tw_handle.set_screen_name(twitter_name)

    cache_only = True
    if (request.args.get('force_call') is not None):
        if (request.args.get('force_call').lower() == 'true'):
            cache_only = False

    df = tw_handle.get_tweet_text(cache_only=False)
    return get_topic_models(df, n_top_words=int(topic_count))
Exemplo n.º 6
0
def do_main():
    ''' A place to do all of the main work for now '''
    tw_handle = TwitterWrapper("4348237453")

    global INVERTED_INDEX, MY_COLLECTION

    word = request.args.get('user')
    #    eprint(word)
    INVERTED_INDEX, MY_COLLECTION = load_data(
        tw_handle.load_tweets(cache_only=True), [])
    #    return(load_data(tw_handle.load_tweets(cache_only=True), []))
    return "{}".format(INVERTED_INDEX.word_count(word))
Exemplo n.º 7
0
def test_topics():
    tw_handle = TwitterWrapper("")
    tw_handle.set_screen_name("realdonaldtrump")

    #    tw_handle.more_timeline()

    #    df = tw_handle.load_tweets(cache_only=False)
    df = tw_handle.get_tweet_text(cache_only=True)

    get_topic_models(df, n_top_words=int(10))


#test_topics()
def evaluate(base=False):
    # testing done here
    handles_to_evaluate = ['elonmusk', 'barackobama', 'realdonaldtrump', 'justinbieber', 'neiltyson', 'wendys',
                           'gordonramsay', 'katyperry']

    for handle in handles_to_evaluate:
        # print handle name
        print(handle)

        # get data
        tw_handle = TwitterWrapper("")
        tw_handle.set_test_name(handle)
        df = tw_handle.get_tweet_id_text(cache_only=True)
        test_documents = convert_real_data(df)

        # run algorithm
        doc_dict = {}
        id = 0
        for doc in test_documents:
            doc_dict[id] = doc.content
            id += 1

        lrs = LexRankSummarizer(doc_dict)
        res = lrs.summarize(threshold=0.1, tolerance=0.0001)

        if base is True:
            lrs = BaseSummarizer(doc_dict)
            res = lrs.summarize()

        # test result rouge1, rouge2
        with open(f'./data/{handle}_gold.txt') as f:
            content = f.readlines()

        scorer = rouge_scorer.RougeScorer(
            ['rouge1', 'rouge2'], use_stemmer=True)

        prediction = [doc.raw.strip() for doc in res[:50]]
        prediction = ' '.join(prediction)

        target = [x.strip() for x in content]
        target = ' '.join(target)

        scores = scorer.score(target, prediction)

        # print scores and newline
        print(scores)
        print()

    # finish confirmation
    print('done!')
Exemplo n.º 9
0
class TestTwitterJoin(unittest.TestCase):
    ''' Testset for working with twitter interface'''
    def setUp(self):
        self.mytwitterwrapper = TwitterWrapper("Matt_LeBlanc")

    def test_jointweets_two_duplicate(self):
        '''Confirm joining two dupes makes the correct list of two items'''
        tweets_out = self.mytwitterwrapper.join_tweets(DATA_TWO_ITEMS,
                                                       DATA_TWO_ITEMS)
        self.assertEqual(len(tweets_out), 2)

    def test_jointweets_two_null_values(self):
        '''Confirm that max value is found if it isn't the first value'''

        tweets_out = self.mytwitterwrapper.join_tweets(None, None)
        self.assertEqual(len(tweets_out), 0)
Exemplo n.º 10
0
class TestTwitterSort(unittest.TestCase):
    ''' Testset for working with twitter interface'''
    def setUp(self):
        self.mytwitterwrapper = TwitterWrapper("Matt_LeBlanc")

    def test_sort_tweets(self):
        '''Confirm that max value is found if it isn't the first value'''

        tweets_out = self.mytwitterwrapper.sort_tweets(DATA_TWO_ITEMS_REVERSED)
        self.assertEqual(tweets_out[0]["id"], 1067863966829801500)
def evaluate():
    # testing done here
    handles_to_evaluate = [
        'elonmusk', 'barackobama', 'realdonaldtrump', 'justinbieber',
        'neiltyson', 'wendys', 'gordonramsay', 'katyperry'
    ]

    for handle in handles_to_evaluate:
        # print handle name
        print(handle)

        # get data
        tw_handle = TwitterWrapper("")
        tw_handle.set_test_name(handle)
        df = tw_handle.get_tweet_text(cache_only=True)
        res = get_top_docs(df)

        # test result rouge1, rouge2
        with open(f'./data/{handle}_gold.txt') as f:
            content = f.readlines()

        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2'],
                                          use_stemmer=True)

        prediction = [doc.strip() for doc in res]
        prediction = ' '.join(prediction)

        target = [x.strip() for x in content]
        target = ' '.join(target)

        scores = scorer.score(target, prediction)

        # print scores and newline
        print(scores)
        print()

    # finish confirmation
    print('done!')
Exemplo n.º 12
0
 def setUp(self):
     self.mytwitterwrapper = TwitterWrapper("Matt_LeBlanc")
Exemplo n.º 13
0
    global INVERTED_INDEX, MY_COLLECTION

    word = request.args.get('user')
    #    eprint(word)
    INVERTED_INDEX, MY_COLLECTION = load_data(
        tw_handle.load_tweets(cache_only=True), [])
    #    return(load_data(tw_handle.load_tweets(cache_only=True), []))
    return "{}".format(INVERTED_INDEX.word_count(word))


#    print("do_main complete")
#do_main()

if __name__ == '__main__':
    tw_handle = TwitterWrapper("")
    tw_handle.set_screen_name("BarackObama")
    df = tw_handle.get_tweet_id_text(cache_only=False)
    t = get_topic_models(df.head(500), 10)
    print(t)
#def eprint(*args, **kwargs):
#    print(*args, file=sys.stderr, **kwargs)


def test_topics():
    tw_handle = TwitterWrapper("")
    tw_handle.set_screen_name("realdonaldtrump")

    #    tw_handle.more_timeline()

    #    df = tw_handle.load_tweets(cache_only=False)