def get_data(): all_files = os.listdir("../Image_CNN/images")[:Static.limit] ids = list(map(lambda x: x[:-4], all_files)) # for id in ids: # value = Static.facebookDb.getRow(id) # print(id, ": ", value) rows = list( map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids)))) data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows)) messages = list(map(lambda x: x[1], data)) # share_counts = list(map(lambda x: x[2], data)) comment_counts = list(map(lambda x: x[3], data)) image_data = transform_image_data(all_files) message_data = to_vector(messages) y_data = list(map(lambda x: x if x > 0 else 0, filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids)))) combined_data = zip(image_data, message_data, y_data) import statistics print("Data Var: ", statistics.stdev(y_data) ** 2) for data_chunk in group(combined_data, Static.group_size): # ):group( image_data_batch, message_data_batch, y_data_batch = zip(*data_chunk) (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch, test_size=0.25, random_state=42) (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch, y_data_batch, test_size=0.25, random_state=42) assert trainY_image == trainY_message assert testY_image == testY_message yield trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data(): all_files = os.listdir("../Image_CNN/images")[:Static.limit] print("Size of all files: {0}".format(len(all_files))) ids = list(map(lambda x: x[:-4], all_files)) # for id in ids: # value = Static.facebookDb.getRow(id) # print(id, ": ", value) rows = list( map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids)))) data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows)) messages = list(map(lambda x: x[1], data)) # share_counts = list(map(lambda x: x[2], data)) comment_counts = list(map(lambda x: x[3], data)) image_data = transform_image_data(all_files) message_data = to_vector(messages) y_data = list(map(lambda x: Static.metric_getter(x), ids)) combined_data = zip(image_data, message_data, y_data) image_data_batch, message_data_batch, y_data_batch = zip(*combined_data) (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch, test_size=0.25, random_state=42) (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch, y_data_batch, test_size=0.25, random_state=42) assert trainY_image == trainY_message assert testY_image == testY_message return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data(): all_files = os.listdir("../Image_CNN/images")[:Static.limit] ids = list(map(lambda x: x[:-4], all_files)) rows = list( map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids)))) data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows)) messages = list(map(lambda x: x[1], data)) image_data = transform_image_data(all_files) message_data = to_vector(messages) y_data_metrics = list(map(lambda x: x if x > 0 else 0, filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids)))) y_data = list(map(lambda x: x, y_data_metrics)) combined_data = zip(image_data, message_data, y_data) if Static.group_size is None: Static.group_size = len(list(combined_data)) import statistics print("Data Var: ", statistics.stdev(y_data) ** 2) print(y_data) image_data_batch, message_data_batch, y_data_batch = zip(*combined_data) (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch, test_size=0.1, random_state=42) (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch, y_data_batch, test_size=0.1, random_state=42) assert trainY_image == trainY_message assert testY_image == testY_message return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data(): sentiment_post_ids = list(map(lambda x: x[0], Static.facebookDb.getImageIdWithPositiveCommentCounts())) all_dir_files = os.listdir("../Image_CNN/images") all_files = [] for post_id in sentiment_post_ids: for file_name in all_dir_files: if post_id in file_name: all_files.append(file_name) print("Number of files in analysis: {0}".format(len(all_files))) print(all_files) ids = list(map(lambda x: x[:-4], all_files)) rows = list( map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids)))) data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows)) messages = list(map(lambda x: x[1], data)) image_data = transform_image_data(all_files) message_data = to_vector(messages) y_data = list(filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids))) combined_data = zip(image_data, message_data, y_data) if Static.group_size is None: Static.group_size = len(list(combined_data)) - 10000 import statistics print("Data Var: ", statistics.stdev(y_data) ** 2) image_data_batch, message_data_batch, y_data_batch = zip(*combined_data) # ):group( (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch, test_size=0.25, random_state=42) (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch, y_data_batch, test_size=0.25, random_state=42) assert trainY_image == trainY_message assert testY_image == testY_message return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
def get_data(): sentiment_post_ids = list(map(lambda x: x[0], Static.facebookDb.getImageIdWithPositiveCommentCounts())) all_files = os.listdir("../Image_CNN/images")[:Static.limit] print("Number of files in analysis: {0}".format(len(all_files))) ids = list(map(lambda x: x[:-4], all_files)) # for id in ids: # value = Static.facebookDb.getRow(id) # print(id, ": ", value) rows = list( map(lambda x: x[0], filter(lambda x: x if x else None, map(lambda x: Static.facebookDb.getRow(x), ids)))) data = list(map(lambda x: (x[0], x[10], x[2], x[3]), rows)) messages = list(map(lambda x: x[1], data)) # share_counts = list(map(lambda x: x[2], data)) comment_counts = list(map(lambda x: x[3], data)) image_data = transform_image_data(all_files) message_data = to_vector(messages) y_data = list(map(lambda x: x if x > 0 else 0, filter(lambda x: x != None, map(lambda x: Static.metric_getter(x), ids)))) combined_data = zip(image_data, message_data, y_data) import statistics print("Data Var: ", statistics.stdev(y_data) ** 2) image_data_batch, message_data_batch, y_data_batch = zip(*combined_data) (trainX_image, testX_image, trainY_image, testY_image) = train_test_split(image_data_batch, y_data_batch, test_size=0.25, random_state=42) (trainX_message, testX_message, trainY_message, testY_message) = train_test_split(message_data_batch, y_data_batch, test_size=0.25, random_state=42) assert trainY_image == trainY_message assert testY_image == testY_message return trainX_image, trainX_message, trainY_image, testX_image, testX_message, testY_image
Global.metric_name = "commentCount" elif "Sentiment" in Global.metric_getter.__name__: Global.metric_name = "postPositivity" else: raise Exception("Did not get the right metric name") postTexts = [] for post in MessageGetter.get_post_generator(): pt = PostText(post) count = pt.getValues() postTexts.append(pt.getValues()) df = pd.DataFrame.from_records(postTexts, columns=MessageGetter.get_columns()) df = df[df[Global.metric_name] > 0] # FILTER OUT ZEROS FOR NOW x_data = to_vector(df["message"]) y_data = list(map(lambda x: x if x > 0 else 0, df[Global.metric_name])) print(len(x_data)) print(len(y_data)) model = Sequential() def create_model(): input_layer_count = 1 # y_data.shape[1] model.add(Dense(200, input_dim=x_data.shape[1])) model.add(Dropout(0.3)) model.add(Dense(100, input_dim=x_data.shape[1])) model.add(Dropout(0.3))