from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase import pandas as pd import numpy as np fbDatabase = FacebookDataDatabase() counts = list(map(lambda x: x[0] if x[0] > 0 else 0, fbDatabase.selectColumnData("commentCount"))) for x in counts[:100]: print(x) df = pd.DataFrame(counts, columns=["commentCount"]) df.to_csv("comment_counts.csv")
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase facebookDb = FacebookDataDatabase() shareCountsTuples = facebookDb.selectColumnData("shareCount") shareCounts = list(map(lambda x: x[0], shareCountsTuples)) import numpy as np from matplotlib import pyplot as plt # fixed bin size bins = np.arange(0, 100, 1) # fixed bin size plt.xlim([min(shareCounts), 100]) plt.hist(shareCounts, bins=bins, alpha=0.5) plt.savefig( '/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/DataStats/shareCountHist.png' ) print(np.std(shareCounts)) print(np.var(shareCounts))
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase import matplotlib.pyplot as plt facebookDb = FacebookDataDatabase() commentCounts = list( map(lambda x: x[0], facebookDb.selectColumnData("shareCount")))[:5000] plt.hist(commentCounts, bins=5000) # arguments are passed to np.histogram plt.xlim(0, 200) plt.title("Histogram of Share Counts") plt.xlabel("Share Count") plt.ylabel("Count") plt.savefig( "/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/shareCountHistogram.png" )
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase import matplotlib.pyplot as plt facebookDb = FacebookDataDatabase() commentCounts = list( filter(lambda x: x > -1, map(lambda x: x[0], facebookDb.selectColumnData("postPositivity")))) plt.hist(commentCounts, bins=100) # arguments are passed to np.histogram plt.title("Histogram of Post Sentiment Positivity") plt.xlabel("Post Sentiment") plt.ylabel("Bin Count") plt.savefig( "/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/postSentimentHistogram.png" )
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase import matplotlib.pyplot as plt facebookDb = FacebookDataDatabase() commentCounts = list( map(lambda x: x[0], facebookDb.selectColumnData("commentCount"))) plt.hist(commentCounts, bins=2000) # arguments are passed to np.histogram plt.xlim(0, 150) plt.title("Histogram of Comment Counts") plt.xlabel("Comment Count") plt.ylabel("Count in Bin") plt.savefig( "/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/commentCountHistogram.png" )
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase facebookDb = FacebookDataDatabase() sentimentsTuples = facebookDb.selectColumnData("postPositivity") sentiments = list(map(lambda x: x[0], sentimentsTuples)) import numpy as np from matplotlib import pyplot as plt # fixed bin size bins = np.arange(0, 100, 1) # fixed bin size plt.xlim([min(sentiments), 100]) plt.hist(sentiments, bins=bins, alpha=0.5) plt.savefig( '/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/DataStats/sentimentHist.png' ) print(np.std(sentiments)) print(np.var(sentiments))
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase from Notebooks.LinkDatabases.PostComments import PostDataDatabase import numpy as np facebookDb = FacebookDataDatabase() commentDb = PostDataDatabase() commentCounts = facebookDb.selectColumnData("commentCount") print("Comment Count Variance: {0}".format(np.var(commentCounts))) shareCounts = facebookDb.selectColumnData("shareCount") print("Share Count Variance: {0}".format(np.var(shareCounts))) sentiments = list(map(lambda x: x[0] * 100, facebookDb.selectColumnData("postPositivity"))) print(sentiments[:20]) print("Sentiment Variance: {0}".format(np.var(sentiments)))
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase import numpy as np facebookDb = FacebookDataDatabase() commentCountsTuples = facebookDb.selectColumnData("commentCount") commentCounts = list(map(lambda x: x[0], commentCountsTuples)) commentCountsLog = list(map(lambda x: np.log(x) if x > 0 else x, commentCounts)) import numpy as np from matplotlib import pyplot as plt # fixed bin size bins = np.arange(0, 100, 1) # fixed bin size plt.xlim([min(commentCountsLog), 100]) plt.hist(commentCountsLog, bins=bins, alpha=0.5) plt.savefig( '/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/DataStats/commentCountHist.png' ) print(np.std(commentCountsLog)) print(np.var(commentCountsLog))