Exemplo n.º 1
0
def search_data(query, lang, depth, output):
    """ Prepares arguments to search tweets and save them in a file
	Arguments:
	----------
		query:
			type: string
			info: string with logic operations (AND, OR...)
		lang:
			type: string
			info: language abbreviation to filter the tweets
		depth:
			type: int
			info: number of tweets to retrieve
		output:
			type: string
			info: output file name including extension
	"""

    miner = TwitterMiner(token_key=U_K['token_key'],
                         token_secret=U_K['token_secret'])

    text_producer = miner.search_tweets(query=query, lang=lang, depth=depth)

    text_consumer = append_text(output)
    next(text_consumer)

    for text in text_producer:
        text_consumer.send(text)

    text_consumer.close()
Exemplo n.º 2
0
def predict_user(user_id, filter_word, profile_path):
    """ Prepares arguments to predict Twitter account tweets labels
	Arguments:
	----------
		user_id:
			type: string
			info: Twitter user account without the '@'
		filter_word:
			type: string
			info: word applied to filter all tweets sentences
		profile_path:
			type: string
			info: relative path to the JSON profile file
	"""

    h_clf = HierarchicalClassif(profile_path)

    miner = TwitterMiner(token_key=U_K['token_key'],
                         token_secret=U_K['token_secret'])

    tweets = miner.get_user_tweets(user=user_id, word=filter_word.lower())

    results = Counter()

    for tweet in tweets:
        label = h_clf.predict(tweet)
        if label is not None: results[label] += 1

    print(results)
Exemplo n.º 3
0
def predict_user(user_id, filter_word, profile_name):
    """ Prepares arguments to predict Twitter account tweets labels

	Arguments:
	----------
		user_id:
			type: string
			info: Twitter user account without the '@'

		filter_word:
			type: string
			info: word applied to filter all tweets sentences

		profile_name:
			type: string
			info: name of the JSON profile file
	"""

    h_clf = HierarchicalClassif(profile_name)

    miner = TwitterMiner(token_key=U_K['token_key'],
                         token_secret=U_K['token_secret'])

    tweets = miner.get_user_tweets(user=user_id, word=filter_word.lower())

    results = Counter()

    for tweet in tweets:
        label = h_clf.predict(tweet)
        if label is not None: results[label] += 1

    FiguresDrawer.draw_pie(counter=results,
                           labels=h_clf.get_labels(),
                           colors=h_clf.get_colors(),
                           title=user_id + ' on ' + filter_word)
Exemplo n.º 4
0
def get_supervia_status(log):
	keywords = [
		'Belford Roxo',
		'BelfordRoxo'
	]
	logged_id = TwitterMiner.get_last_tweet_id(ids['SUPERVIA_TWITTER'], log)
	msg_list = tm.get_new_tweets(ids['SUPERVIA_TWITTER'], logged_id, keywords)
	if msg_list:
		for msg in msg_list:
			sender.send_message(msg)

	TwitterMiner.update_last_tweet_id(ids['SUPERVIA_TWITTER'], logged_id, tm, log)
Exemplo n.º 5
0
def main(keys='keys.ini',
         raw_tweets_file='twitter_data.txt',
         no_tweets=1000,
         tracked_words_file='tracks.csv',
         formatted_tweets_file='formatted_tweets.txt',
         dictionaries=['misoginy_dictionary.yml', 'curses_dictionary.yml']):
    """Perform an analyisis to find sexist and rude words in tweets

    This module employs every other module to perform a full analysis on data
    retrieved from the Twitter stream. First a TwitterMiner retrieves data and
    dumps it, then a TweetFormatter parses the data into a list of tweets that
    are lists of words. Then it uses the spaghetti tagger to POStag every word,
    yielding a list of tweets that are lists with elements with the form (word,
    [tags]). A DictionaryTagger adds our custom tags to the [tags] list. Finally
    a TagCounter perform a count of every tag found in tweets. This program 
    prints the number of coincidences of our custom tags.
    """

    miner = TwitterMiner(keys, raw_tweets_file, no_tweets)
    miner.mine(tracked_words_file)

    formatter = TweetFormatter(raw_tweets_file)
    tweets = formatter.convert2json()
    tweets = formatter.convert2text(tweets, formatted_tweets_file)
    tweets = formatter.clean_tweets(tweets)
    tweets = [tweet.split() for tweet in tweets]

    tagger = DictionaryTagger(dictionaries)
    postagged_sents = spgt.pos_tag_sents(tweets)
    tagged_sents = tagger.tag(postagged_sents)

    counter = TagCounter(tagged_sents)
    res = counter.count()

    try:
        print("Palabras misóginas: {}".format(res['misóginia']))
    except KeyError:
        pass

    try:
        print("Palabras groseras: {}".format(res['grosería']))
    except KeyError:
        pass
Exemplo n.º 6
0
def get_time(status):
	time = datetime.now()
	print(f'{time.strftime("%Y/%m/%d_%H:%M:%S")} - {status}')


def get_supervia_status(log):
	keywords = [
		'Belford Roxo',
		'BelfordRoxo'
	]
	logged_id = TwitterMiner.get_last_tweet_id(ids['SUPERVIA_TWITTER'], log)
	msg_list = tm.get_new_tweets(ids['SUPERVIA_TWITTER'], logged_id, keywords)
	if msg_list:
		for msg in msg_list:
			sender.send_message(msg)

	TwitterMiner.update_last_tweet_id(ids['SUPERVIA_TWITTER'], logged_id, tm, log)


if __name__ == '__main__':
	get_time('Starting')
	tm = TwitterMiner(
		key=cfg.TWITTER_API_KEY, key_secret=cfg.TWITTER_API_KEY_SECRET,
		token=cfg.TWITTER_API_TOKEN, token_secret=cfg.TWITTER_API_TOKEN_SECRET
	)
	sender = Sender(cfg.TELEGRAM_API_TOKEN, cfg.TELEGRAM_API_CHAT_ID)
	log_file = 'last-id.log'
	path = '/set/your/path/here' + log_file
	get_supervia_status(path)
	get_time('Done')
Exemplo n.º 7
0
from __future__ import print_function, division, absolute_import

from twitter_miner import TwitterMiner

miner = TwitterMiner()
miner.authenticate()
print(miner.get_timeline())