def test_simple_text(self):
     freq = word_frequency("buffalo buffalo buffalo buffalo buffalo "
                           "buffalo buffalo")
     self.assertEqual(freq["buffalo"], 7)
 def test_complex_text(self):
     freq = word_frequency(gettysburg)
     self.assertEqual(freq["dedicated"], 4)
     self.assertEqual(freq["great"], 3)
     self.assertEqual(freq["devotion"], 2)
 def test_complex_text(self):
     freq = word_frequency(gettysburg_address)
     self.assertEqual(freq["dedicated"], 4)
     self.assertEqual(freq["great"], 3)
     self.assertEqual(freq["devotion"], 2)
 def test_simple_text(self):
     freq = word_frequency("buffalo buffalo buffalo buffalo buffalo "
                           "buffalo buffalo")
     self.assertEqual(freq["buffalo"], 7)
Пример #5
0
#     print y
# c3=c+c2
# print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
# for y in c3.most_common():  ##sorts it by most common
#     print y

posts10=posts2
cFinal=Counter()
allDates = {}
counter=0

for x in posts10:
    date = dateParser(x)
    if date is None:
        continue
    c = word_frequency(x['body'])
    if allDates.has_key(date):
        cDict=allDates.get(date)
        cDict = cDict+c
        allDates[date]=cDict
    else:
        allDates[date]=c
    counter += 1
    if (counter % 100) == 0:
        print counter




# for y in allDates:
#     print allDates[y]
Пример #6
0
#     print y
# c3=c+c2
# print '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@'
# for y in c3.most_common():  ##sorts it by most common
#     print y

posts10 = posts2
cFinal = Counter()
allDates = {}
counter = 0

for x in posts10:
    date = dateParser(x)
    if date is None:
        continue
    c = word_frequency(x['body'])
    if allDates.has_key(date):
        cDict = allDates.get(date)
        cDict = cDict + c
        allDates[date] = cDict
    else:
        allDates[date] = c
    counter += 1
    if (counter % 100) == 0:
        print counter

# for y in allDates:
#     print allDates[y]

with open('JSON dump/stuff.txt', 'w') as outfile:
    json.dump(allDates, outfile)
Пример #7
0
st.write("Number of total submissions: {}".format(len(topic_lst)))

reddit_df = pd.DataFrame(reddit_dict)

with st.beta_expander('Show full text'):  # Hide the output
    st.table(reddit_dict["title"])

with st.beta_expander('Show more information & optional download'
                      ):  # Show additional information like num. of comments
    st.markdown(get_table_download_link(reddit_df, filename="reddit_news.csv"),
                unsafe_allow_html=True)  # Allow/enable download
    st.dataframe(reddit_df)  #.drop(columns="title", axis=0))

#st.write("Number of comments: ", reddit_df[reddit_df["subreddit"] == "WorldNews"].num_comments.sum(axis=0))

word_frequency(
    ". ".join(topic_lst))  # Function to get top keywords & create a wordcloud

#######################################################################################################################################################

st.markdown("## Search posts about a specific keyword")
r_search_input = st.text_input("Enter a keyword", "bitcoin")
r_search_sort = st.selectbox("Select sorting option",
                             ["relevance", "hot", "top", "new", "comments"],
                             key="r_search_sort")  # search option
r_search_time = st.selectbox("Select time filter option",
                             ["all", "day", "month", "week", "year"],
                             key="r_search_time")  # time option
r_search_output = st.slider("How many results should be displayed?",
                            min_value=5,
                            max_value=100,
                            value=10,