def run():
    indexStart = 0
    facebookDb = FacebookDataDatabase()
    for raw_url in adDb.selectAdData()[indexStart:]:
        global token
        token = getToken()
        url = modifyUrl(raw_url)
        post_number = get_page_id(url)
        if not facebookDb.isPageInDb(post_number):
            page = Page()
            page.metrics = getPageMetrics(url)
            page.posts = getPostData(url)
            if page.posts:
                for post in page.posts:
                    token = getToken()
                    set_post_data(post)
                    StoreInFacebookData(post.id, post.image_url, post.message,
                                        post.share_count, post.comment_count,
                                        page.metrics.fan_count,
                                        page.metrics.rating_count,
                                        page.metrics.talking_about_count,
                                        page.metrics.star_rating, -1)
                    print("Stored!", post.id)
        else:
            pass
Пример #2
0
class MessageGetter:
    facebookDb = FacebookDataDatabase()

    @staticmethod
    def get_columns(remove_columns=True):
        columns = MessageGetter.facebookDb.getColumnNames()
        columns = list(map(lambda x: x[1], columns))
        if remove_columns:
            columns.remove("imageId")
            columns.remove("imageUrl")
        return columns

    @staticmethod
    def __dict_factory(row, columns):
        d = {}
        for idx, col in enumerate(columns):
            d[col] = row[idx]
        return d

    @staticmethod
    def __get_post():
        number_of_posts_to_train_on = len(
            MessageGetter.facebookDb.selectFacebookData())
        postData = MessageGetter.facebookDb.selectFacebookData()
        for i, post in enumerate(postData[:number_of_posts_to_train_on]):
            if i % 1000 == 0:
                print("gather data percent: ", i / number_of_posts_to_train_on)
            post_obj = MessageGetter.__dict_factory(
                post, MessageGetter.get_columns(False))
            yield post_obj

    @staticmethod
    def get_post_generator():
        post_generator = MessageGetter.__get_post()
        return post_generator
Пример #3
0
class Global:
    batch_size = 256
    epochs = 100
    group_size = 100_000
    plot_losses = PlotLearning("nlp_nn_sentiment_count")
    facebookDb = FacebookDataDatabase()
    regularizer_function = None
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getShareCount  # Set this to change the model type
    limit = len(os.listdir("../Image_CNN/images"))
    group_size = limit
    plot_losses = PlotLearning("combined_keras_model")
    batch_size = 1
    epochs = 20
Пример #5
0
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getCommentCount  # Set this to change the model type
    limit = 80_000  # len(os.listdir("../Image_CNN/images"))
    group_size = limit
    plot_losses = PlotLearning("combined_keras_model_comment_count")
    batch_size = 512
    epochs = 100
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getShareCount  # Set this to change the model type
    group_size = 6000
    limit = 7000
    plot_losses = PlotLearning("combined_keras_model")
    batch_size = 1
    epochs = 20
class Global:
    batch_size = 256
    epochs = 100
    group_size = 100_000
    plot_losses = PlotLearning("nlp_nn_comment_count")
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getCommentCount
    metric_name = "commentCount"
    regularizer_function = None
Пример #8
0
def test_to_vector():
    import os
    all_files = os.listdir("../Image_CNN/images")
    ids = list(map(lambda x: x[:-4], all_files))
    from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getCommentCount  # Set this to change the model type

    rows = list(
        map(
            lambda x: x[0],
            filter(lambda x: x if x else None,
                   map(lambda x: facebookDb.getRow(x), ids))))
    data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows))
    messages = list(map(lambda x: x[1], data))
    word_vectors = to_vector(messages)
    for word_vector in word_vectors[:100]:
        print(word_vector)
class Global:
    batch_size = 1
    epochs = 5
    group_size = 6000
    plot_losses = PlotLearning("nlp_nn")
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getSentiment
    metric_name = None
    regularizer_function = None
Пример #10
0
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getSentiment  # Set this to change the model type
    group_size = 40000
    limit = 60000
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getCommentCount  # Set this to change the model type
    limit = 30000
Пример #12
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import matplotlib.pyplot as plt

facebookDb = FacebookDataDatabase()
commentCounts = list(
    filter(lambda x: x > -1,
           map(lambda x: x[0], facebookDb.selectColumnData("postPositivity"))))
plt.hist(commentCounts, bins=100)  # arguments are passed to np.histogram
plt.title("Histogram of Post Sentiment Positivity")
plt.xlabel("Post Sentiment")
plt.ylabel("Bin Count")
plt.savefig(
    "/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/postSentimentHistogram.png"
)
Пример #13
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
from collections import defaultdict

d = defaultdict(lambda: 0)
facebookDb = FacebookDataDatabase()
ids = list(map(lambda x: x[0], facebookDb.get_post_ids()))
for id in ids:
    page_id = id.split("_")[0]
    d[page_id] += 1

import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(10, 4))
plt.subplots_adjust(wspace=0.3)

plt.figure(1)
plt.hist(d.values(), bins=30, color='g')
plt.ylabel('Bin Count')
plt.xlabel('Number of Posts Scraped')
plt.plot()

plt.subplots_adjust(hspace=.5)

plt.savefig('Posts_Per_Page_Histogram.png', bbox_inches='tight', dpi=300)
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getShareCount  # Set this to change the model type
    limit = 100000  # len(os.listdir("../Image_CNN/images"))
    group_size = limit
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getShareCount  # Set this to change the model type
    group_size = 150000
    limit = 200000
Пример #16
0
            if (sentiment["neu"] == 1) and sentiment["neg"] == 0 and sentiment["pos"] == 0 and (compound == 0):
                return 0
            else:
                return compound

        messages = commentDb.getMessages(postId)
        if len(messages) == 0:
            print("No comments for: {0}".format(postId))
            return
        scores = []
        for message in messages:
            sentiment = get_sentiment_scores(message[0])
            if sentiment != -1:
                scores.append(sentiment)
        if not scores:
            return 0
        return statistics.mean(scores)


facebookDb = FacebookDataDatabase()
post_ids = list(map(lambda x: x[0], facebookDb.getImageIdWithPositiveCommentCounts()))
comment_db_post_ids = list(map(lambda x: x[0], commentDb.getPostIds()))
post_ids_with_new_comments = set(post_ids).union(set(comment_db_post_ids))

for data in list(post_ids_with_new_comments):
    postId = data
    mean_sentiment_score = SentimentAnalyzer.GetPostSentiment(postId)
    print(mean_sentiment_score)
    if mean_sentiment_score:
        facebookDb.insertSentimentData(mean_sentiment_score, -1, postId)
Пример #17
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase

facebookDb = FacebookDataDatabase()

sentimentsTuples = facebookDb.selectColumnData("postPositivity")
sentiments = list(map(lambda x: x[0], sentimentsTuples))

import numpy as np
from matplotlib import pyplot as plt

# fixed bin size
bins = np.arange(0, 100, 1)  # fixed bin size

plt.xlim([min(sentiments), 100])

plt.hist(sentiments, bins=bins, alpha=0.5)

plt.savefig(
    '/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/DataStats/sentimentHist.png'
)

print(np.std(sentiments))
print(np.var(sentiments))
Пример #18
0
import os
import sys
sys.path.append(os.pardir)
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import os
facebookDb = FacebookDataDatabase()
files = os.listdir("./images")

delete_paths = []

ids = list(map(lambda x: x.replace(".png", "").replace(".jpg", ""), files))
for x in ids:
    shareCount = facebookDb.getShareCount(x)
    if shareCount is None:
        print(x)
        delete_paths.append(os.path.join("./images", x))

print()
for x in ids:
    shareCount = facebookDb.getCommentCount(x)
    if shareCount is None:
        print(x)
        delete_paths.append(os.path.join("./images", x))

#should_delete = input("Should we delete these files?")
#if should_delete == 'y' or should_delete == 'yes':
#    for path in delete_paths:
#        os.remove(path)
Пример #19
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
from Notebooks.LinkDatabases.PostComments import PostDataDatabase
import numpy as np

facebookDb = FacebookDataDatabase()
commentDb = PostDataDatabase()

commentCounts = facebookDb.selectColumnData("commentCount")
print("Comment Count Variance: {0}".format(np.var(commentCounts)))

shareCounts = facebookDb.selectColumnData("shareCount")
print("Share Count Variance: {0}".format(np.var(shareCounts)))

sentiments = list(map(lambda x: x[0] * 100, facebookDb.selectColumnData("postPositivity")))
print(sentiments[:20])
print("Sentiment Variance: {0}".format(np.var(sentiments)))
Пример #20
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import pandas as pd

facebookDb = FacebookDataDatabase()

# fanCount INT,numberOfRatings INT, talkingAboutCount INT, pageRating REAL
data = facebookDb.selectPageMetrics()
df = pd.DataFrame(data,
                  columns=[
                      "fanCount", "numberOfRatings", "talkingAboutCount",
                      "shareCount", "commentCount", "sentiment"
                  ])
df.to_csv("page_metrics.csv")
Пример #21
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase

facebookDb = FacebookDataDatabase()

shareCountsTuples = facebookDb.selectColumnData("shareCount")
shareCounts = list(map(lambda x: x[0], shareCountsTuples))

import numpy as np
from matplotlib import pyplot as plt

# fixed bin size
bins = np.arange(0, 100, 1)  # fixed bin size

plt.xlim([min(shareCounts), 100])

plt.hist(shareCounts, bins=bins, alpha=0.5)

plt.savefig(
    '/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/DataStats/shareCountHist.png'
)

print(np.std(shareCounts))
print(np.var(shareCounts))
Пример #22
0
    for file in files:
        try:
            file_loaded = image.load_img(file, target_size=(200, 200))
            final.append(file_loaded)
        except:
            print("Skipping file: {0}".format(file))
            files.remove(file)
    assert len(final) > 1
    return final, files


def to_array(images):
    return np.array(list(map(lambda x: img_to_array(x) / 255, images)))


facebookDb = FacebookDataDatabase()
metricGetter = facebookDb.getShareCount

all_files = os.listdir("./images")[:image_count]
files = list(filter(lambda x: ".jpg" in x or ".png" in x, filter(lambda x: not ".DS_Store" in x, all_files)))
imagePaths = list(map(lambda x: os.path.join("./images", x), files))
imagePaths = list(filter(lambda x: getSize(x) > 1, imagePaths))
imagePaths = list(filter(lambda x: os.path.exists(x), imagePaths))
images, imagePaths = load_images(imagePaths)  # weed out images that fail to load, there are only a few
image_arrays = to_array(images)
shareCounts = to_share_count(get_ids(imagePaths), metricGetter)
labels = np.array(shareCounts)
(trainX, testX, trainY, testY) = train_test_split(image_arrays,
                                                  labels, test_size=0.25, random_state=42)

model_name = '{0}.h5'.format(metricGetter.__name__)
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import matplotlib.pyplot as plt

facebookDb = FacebookDataDatabase()
commentCounts = list(
    map(lambda x: x[0], facebookDb.selectColumnData("shareCount")))[:5000]
plt.hist(commentCounts, bins=5000)  # arguments are passed to np.histogram
plt.xlim(0, 200)
plt.title("Histogram of Share Counts")
plt.xlabel("Share Count")
plt.ylabel("Count")
plt.savefig(
    "/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/shareCountHistogram.png"
)
class Static:
    facebookDb = FacebookDataDatabase()
    metric_getter = facebookDb.getCommentCount  # Set this to change the model type
    group_size = 10000
    limit = 10000
    plot_losses = PlotLearning("combined_keras_model")
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import pandas as pd
import numpy as np

fbDatabase = FacebookDataDatabase()
counts = list(map(lambda x: x[0] if x[0] > 0 else 0, fbDatabase.selectColumnData("commentCount")))
for x in counts[:100]:
    print(x)
df = pd.DataFrame(counts, columns=["commentCount"])
df.to_csv("comment_counts.csv")
Пример #26
0
import pandas as pd

from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
from Notebooks.LinkDatabases.PostComments import PostDataDatabase

facebookDb = FacebookDataDatabase()
commentDb = PostDataDatabase()

comment_data = commentDb.selectPostData()

df = pd.DataFrame.from_records(comment_data, columns=["imageId", "commentId", "text"])

for imageId in df.imageId.unique():
    count = len(df[df["imageId"] == imageId])
    facebookDb.insertCommentCountData(count, imageId)
import sys

sys.path.append(os.pardir)
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
from imageio import imwrite
import cv2
import urllib.request
from PIL import Image
import re
import os
import numpy as np
from numpy import array
from scipy.ndimage import filters
import ntpath

facebookDb = FacebookDataDatabase()
image_regex = "([A-Za-z_\d]+.jpg|[A-Za-z_\d]+.png)"


def getExistingImages():
    files = []
    base = "./images"
    for file in os.listdir(base):
        files.append(file)
    return files


def denoise_image(full_path):
    im = Image.open(full_path)
    if im.size == (1, 1,):
        os.remove(full_path)
Пример #28
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import matplotlib.pyplot as plt

facebookDb = FacebookDataDatabase()
commentCounts = list(
    map(lambda x: x[0], facebookDb.selectColumnData("commentCount")))
plt.hist(commentCounts, bins=2000)  # arguments are passed to np.histogram
plt.xlim(0, 150)
plt.title("Histogram of Comment Counts")
plt.xlabel("Comment Count")
plt.ylabel("Count in Bin")
plt.savefig(
    "/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/commentCountHistogram.png"
)
Пример #29
0
import os
import sys
from datetime import datetime

sys.path.append("../../")

from Notebooks.SearchFbData.GetKeyData import get_key_data
from Notebooks.Token.GenerateToken import getToken
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
from Notebooks.LinkDatabases.PostComments import PostDataDatabase
import requests

import ast
import unicodedata

facebookDb = FacebookDataDatabase()
commentDb = PostDataDatabase()
global token
token = getToken()


# Get/Set post data.  These are person posts on the Ad in question
class Comment:
    def __init__(self):
        self.id = None
        self.message = None
        self.like_count = None

    def setId(self, id):
        self.id = id
Пример #30
0
from Notebooks.LinkDatabases.FacebookData import FacebookDataDatabase
import numpy as np

facebookDb = FacebookDataDatabase()

commentCountsTuples = facebookDb.selectColumnData("commentCount")
commentCounts = list(map(lambda x: x[0], commentCountsTuples))
commentCountsLog = list(map(lambda x: np.log(x)
                            if x > 0 else x, commentCounts))

import numpy as np
from matplotlib import pyplot as plt

# fixed bin size
bins = np.arange(0, 100, 1)  # fixed bin size

plt.xlim([min(commentCountsLog), 100])

plt.hist(commentCountsLog, bins=bins, alpha=0.5)

plt.savefig(
    '/Users/ccrowe/Documents/Thesis/facebook_api/Notebooks/DataStats/commentCountHist.png'
)

print(np.std(commentCountsLog))
print(np.var(commentCountsLog))