def main(): anew_dict = load_anew_dict() anew_words = anew_dict.keys() users = {} dates = [] statuses = {} status_reader.load_statuses(dates, statuses, users) users_words = user_words(statuses) userID_color_file = open('userid_hexColor', 'r') for line in userID_color_file: entry = line.split('\t') userid = entry[0].strip() color = entry[1].strip() for i in xrange(len(users_words)): # print users_words[i]["user_id"], userid if users_words[i]["user_id"] == userid: print userid users_words[i]["color"] = color total_word_counts = {} for user in users_words: for word in user["word_counts"]: if word not in total_word_counts: total_word_counts[word] = 1 total_word_counts[word] += user["word_counts"][word] # Print total word counts # for w in sorted(total_word_counts, key=total_word_counts.get, reverse=True): # print w, total_word_counts[w] # Create Word Count Files for user in users_words: new_file = open("./user_info/" + user["color"] + "_words" , 'w') new_file.write("word" + '\t' + "count" + '\t' + "ANEW" + '\n') for w in sorted(user["word_counts"], key=user["word_counts"].get, reverse=True): if w in anew_words: new_file.write(w + '\t' + str(user["word_counts"][w]) + '\t' + "1") else: new_file.write(w + '\t' + str(user["word_counts"][w]) + '\t' + "0") new_file.write('\n') new_file.close()
from os import path from wordcloud import WordCloud import status_reader import sys d = path.dirname(__file__) category_ids = open(sys.argv[1]).readlines() category = category_ids[0] user_ids = [x.strip()[0:40] for x in category_ids[1:]] users = {} dates = [] statuses = {} status_reader.load_statuses(dates, statuses, users) category_text = [] for user_id in statuses: user_id_trimmed = user_id[0:40] if user_id_trimmed in user_ids: for status in statuses[user_id]: category_text.append(status) text_compilation = ' '.join(category_text) # Generate a word cloud image wordcloud = WordCloud('../DINOT.otf').generate(text_compilation) # Display the generated image: # the matplotlib way: