def test_simple_text(self): freq = word_frequency("buffalo buffalo buffalo buffalo buffalo " "buffalo buffalo") self.assertEqual(freq["buffalo"], 7)
def test_complex_text(self): freq = word_frequency(gettysburg) self.assertEqual(freq["dedicated"], 4) self.assertEqual(freq["great"], 3) self.assertEqual(freq["devotion"], 2)
def test_complex_text(self): freq = word_frequency(gettysburg_address) self.assertEqual(freq["dedicated"], 4) self.assertEqual(freq["great"], 3) self.assertEqual(freq["devotion"], 2)
# print y # c3=c+c2 # print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' # for y in c3.most_common(): ##sorts it by most common # print y posts10=posts2 cFinal=Counter() allDates = {} counter=0 for x in posts10: date = dateParser(x) if date is None: continue c = word_frequency(x['body']) if allDates.has_key(date): cDict=allDates.get(date) cDict = cDict+c allDates[date]=cDict else: allDates[date]=c counter += 1 if (counter % 100) == 0: print counter # for y in allDates: # print allDates[y]
# print y # c3=c+c2 # print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@' # for y in c3.most_common(): ##sorts it by most common # print y posts10 = posts2 cFinal = Counter() allDates = {} counter = 0 for x in posts10: date = dateParser(x) if date is None: continue c = word_frequency(x['body']) if allDates.has_key(date): cDict = allDates.get(date) cDict = cDict + c allDates[date] = cDict else: allDates[date] = c counter += 1 if (counter % 100) == 0: print counter # for y in allDates: # print allDates[y] with open('JSON dump/stuff.txt', 'w') as outfile: json.dump(allDates, outfile)
st.write("Number of total submissions: {}".format(len(topic_lst))) reddit_df = pd.DataFrame(reddit_dict) with st.beta_expander('Show full text'): # Hide the output st.table(reddit_dict["title"]) with st.beta_expander('Show more information & optional download' ): # Show additional information like num. of comments st.markdown(get_table_download_link(reddit_df, filename="reddit_news.csv"), unsafe_allow_html=True) # Allow/enable download st.dataframe(reddit_df) #.drop(columns="title", axis=0)) #st.write("Number of comments: ", reddit_df[reddit_df["subreddit"] == "WorldNews"].num_comments.sum(axis=0)) word_frequency( ". ".join(topic_lst)) # Function to get top keywords & create a wordcloud ####################################################################################################################################################### st.markdown("## Search posts about a specific keyword") r_search_input = st.text_input("Enter a keyword", "bitcoin") r_search_sort = st.selectbox("Select sorting option", ["relevance", "hot", "top", "new", "comments"], key="r_search_sort") # search option r_search_time = st.selectbox("Select time filter option", ["all", "day", "month", "week", "year"], key="r_search_time") # time option r_search_output = st.slider("How many results should be displayed?", min_value=5, max_value=100, value=10,