def parse_post_per_word_face(collect, parameters, RECENT, LIMIT, SKIP): word = parameters[0]['$match'].pop('word') output = [] # MongoDB find db_cursor = collect.aggregate(parameters) print('\nData acquired.\n') for doc in db_cursor: # print(doc) text = doc['message'] if not text: text = '' tmp_text = filtered(text) tmp_text = tmp_text.translate(punct_tab) tmp_text = tmp_text.split(' ') if any(filtered(word) == filtered(w) for w in tmp_text): output.append(doc) output = sorted(output, key=itemgetter('like_count'), reverse=True) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ output = output[SKIP:SKIP + LIMIT] # pagination implementation return output
def parse_post_per_word_no_rt(collect, parameters): """ Returns list of tweets per word. """ word = parameters[0]['$match'].pop('word') output = [] # MongoDB find db_cursor = collect.aggregate(parameters) print('\nData acquired.\n') for doc in db_cursor: text = doc['status']['text'] tmp_text = filtered(text) tmp_text = tmp_text.translate(punct_tab) tmp_text = tmp_text.split(' ') if any(filtered(word) == filtered(w) for w in tmp_text): output.append(doc) return output
def parse_word_face(collect, FILTER, SKIP, LIMIT, parameters, RECENT): word_count = {} top = [] # MongoDB find db_cursor = collect.aggregate(parameters) print('\nPosts Acquired.\n') for doc in db_cursor: text = doc['message'] if not text: text = '' tmp = clear_text(text) # creates a word count for word in tmp: try: word_count[filtered(word)] += 1 except KeyError: word_count[filtered(word)] = 1 # CREATES LIST WITH COUNT for word in word_count: top.append([word, word_count[word]]) top = sorted(top, key=lambda x: x[1], reverse=True) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ top = top[SKIP:SKIP + LIMIT] # pagination implementation top_list = [] for item in top: top_list.append({'word': item[0], 'count': item[1]}) return top_list
def parse_post_per_word(collect, FILTER, projection, SKIP,LIMIT, parameters, RECENT): """ Returns list of tweets per word. """ word = parameters[0]['$match'].pop('word') output = [] # MongoDB aggreg db_cursor = collect.aggregate(parameters) print('\nRetweets acquired.\n') for doc in db_cursor: # print(doc) # input() text = doc['status']['retweeted_status']['text'] tmp_text = filtered(text) tmp_text = tmp_text.translate(punct_tab) tmp_text = tmp_text.split(' ') if any(filtered(word)==filtered(w) for w in tmp_text): output.append(doc) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # find for original tweets if len(output) < LIMIT+SKIP and not RECENT: FILTER['status.retweeted_status'] = {'$exists':False} db_cursor = collect.find(FILTER,projection) print('\nTweets acquired.\n') for doc in db_cursor: text = doc['status']['text'] tmp_text = filtered(text) tmp_text = tmp_text.translate(punct_tab) tmp_text = tmp_text.split(' ') if any(filtered(word)==filtered(w) for w in tmp_text): doc['count'] = 0 doc['status']['id'] = doc['status']['id_str'] output.append(doc) db_cursor.close() output = sorted(output, key=itemgetter('count'), reverse=True) output = output[SKIP:SKIP+LIMIT] # pagination implementation return output
def parse_word(collect, FILTER, projection, SKIP, LIMIT, parameters, RECENT): """ Returns list of top words plus count. """ word_count = {} top = [] # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ db_cursor = collect.aggregate(parameters) print('\nRetweets\' texts acquired.\n') for doc in db_cursor: text = doc['retweeted_status']['text'] rt_count = doc['count'] tmp = clear_text(text) # creates a word count temp_words = [] for word in tmp: if word not in temp_words: temp_words.append(word) try: word_count[filtered(word)] += rt_count except KeyError: word_count[filtered(word)] = rt_count else: # count words only once pass if not RECENT: # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # MongoDB find FILTER['status.retweeted_status'] = {'$exists': False} db_cursor = collect.find(FILTER, projection) print('\nTweets\' texts acquired.\n') for doc in db_cursor: text = doc['status']['text'] tmp = clear_text(text) temp_words = [] for word in tmp: if word not in temp_words: temp_words.append(word) try: word_count[filtered(word)] += 1 except KeyError: word_count[filtered(word)] = 1 else: # count words only once pass db_cursor.close() # CREATES LIST WITH COUNT for word in word_count: tmp_tweets = [] top.append([word, word_count[word]]) top = sorted(top, key=lambda x: x[1], reverse=True) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ top = top[SKIP:SKIP + LIMIT] # pagination implementation top_list = [] for item in top: top_list.append({'word': item[0], 'count': item[1]}) return top_list