Пример #1
0
def fetchsamples(needed_sent_val=None, max_iters=1000):
    word_list = sh.english_word_list()
    afinn_dict = cs.load_afinn_dictionary('text_sentiment/AFINN-111.txt')
    huliu_dict = \
        cs.load_huliu_dict('text_sentiment/hu_liu/opinion-lexicon-English/')
    url = "https://stream.twitter.com/1/statuses/sample.json"
    parameters = []
    response = ts.twitterreq(url, "GET", parameters)
    num_iters = 0

    for line in response:

        if num_iters > max_iters:
            break

        if isinstance(line, bytes):
            line = line.decode('utf-8')

        # decode if not error message; else wait 1 sec to avoid rate limits
        try:
            tweet = json.loads(line.strip())
        except:
            time.sleep(1)
            print('waiting....')
            continue

        # stop processing if tweet doesn't meet basic criteria
        if not prt.decide_to_include_tweet(tweet):
            continue
        if not prt.image_is_original(tweet):
            continue

        # Calculate tweet sentiment
        tweet_txt = tweet['text']
        cleaned_text = sh.parse_sentence(tweet_txt, word_list)
        vader_sent = cs.calculate_vader(cleaned_text)
        afinn_sent = cs.calculate_simple_sentiment(cleaned_text, afinn_dict)
        hului_sent = cs.calculate_simple_sentiment(cleaned_text, huliu_dict)
        consistent = vader_sent == afinn_sent == hului_sent
        if not consistent:
            continue
        if needed_sent_val and (vader_sent != needed_sent_val):
            continue

        # retrieve and hash image
        image_url = tweet['extended_entities']['media'][0]['media_url']
        img = fetch_image(image_url)
        image_hash = dedupe.calculate_image_hash(img)

        # Ensure not an exact duplicate
        match = dedupe.find_matching_hash(image_hash, tweet['id'])
        if match:
            try:
                add_dupe_to_db(tweet, match, vader_sent,
                               image_hash, cleaned_text)
            except Exception as err:
                print(err)
            continue

        # Save image and write info to db
        try:
            add_new_record_to_db(tweet, vader_sent, image_hash, cleaned_text)
            img.save(IMAGE_SAVE_PATH + tweet['id_str'] + '.jpg')
        except Exception as err:
            print(err)
            continue
        num_iters += 1

    return
Пример #2
0
"""
Combines all the sentiment dictionaries into one file to use in parsing
hashtags (because other words don't matter)
"""

from Python_code.text_sentiment import compare_sentiments as cs

# words are keys
afinn_dict = cs.load_afinn_dictionary("AFINN-111.txt")
hului_dict = cs.load_huliu_dict("hu_liu/opinion-lexicon-English/")
vader_dict = cs.vader.make_lex_dict("vader/vader_sentiment_lexicon.txt")

sentiment_words = []

for key in afinn_dict:
    sentiment_words.append(key)
for key in hului_dict:
    if key not in sentiment_words:
        sentiment_words.append(key)
for key in vader_dict:
    if key not in sentiment_words:
        sentiment_words.append(key)

sentiment_words.sort()

with open("hashtag_dict.txt", "w") as file:
    for word in sentiment_words:
        file.write(word + "\n")