def compute_gender(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "RandomForest_Gender.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'gender']), 1, inplace=True) image_df = Utils.read_data_to_dataframe(test_data_path + "/Image/oxford.csv") image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(image_df, profile_df, on='userid') merged_df.drop(['userid', 'faceID', 'gender'], axis=1, inplace=True) model = Utils.read_pickle_from_file(model_path) model.predict(merged_df) image_df["gender"] = model.predict(merged_df) predicted_df = profile_df["userid"].to_frame() # image_df['userid'] = image_df['userid'].astype('|S') predicted_df = pd.merge(predicted_df, image_df, on="userid", how="left") user_gender_df = predicted_df.filter(["userid", "gender"]) user_gender_df["gender"].fillna(1, inplace=True) user_gender_df = aggregate_duplicate_ids(user_gender_df, 'gender') df_results = pd.merge(df_results, user_gender_df, on='userid', how="left") df_results.drop(['gender_x'], axis=1, inplace=True) df_results.rename(columns={"gender_y": "gender"}, inplace=True) df_results.loc[df_results.gender == 0, 'gender'] = "male" df_results.loc[df_results.gender == 1, 'gender'] = "female" return df_results
def get_image_age_training_data(): util = Utils() profile_df = util.read_data_to_dataframe("../data/Train/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True) image_df = util.read_data_to_dataframe("../data/Train/Image/oxford.csv") image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(image_df, profile_df, on='userid') merged_df = merged_df.filter( ['faceRectangle_width', 'faceRectangle_height', 'faceRectangle_left', 'faceRectangle_top', 'pupilLeft_x', 'pupilLeft_y', 'pupilRight_x', 'pupilRight_y', 'noseTip_x', 'noseTip_y', 'mouthLeft_x', 'mouthLeft_y', 'mouthRight_x', 'mouthRight_y', 'eyebrowLeftOuter_x', 'eyebrowLeftOuter_y', 'eyebrowLeftInner_x', 'eyebrowLeftInner_y', 'eyeLeftOuter_x', 'eyeLeftOuter_y', 'eyeLeftTop_x', 'eyeLeftTop_y', 'eyeLeftBottom_x', 'eyeLeftBottom_y', 'eyeLeftInner_x', 'eyeLeftInner_y', 'eyebrowRightInner_x', 'eyebrowRightInner_y', 'eyebrowRightOuter_x', 'eyebrowRightOuter_y', 'eyeRightInner_x', 'eyeRightInner_y', 'eyeRightTop_x', 'eyeRightTop_y', 'eyeRightBottom_x', 'eyeRightBottom_y', 'eyeRightOuter_x', 'eyeRightOuter_y', 'noseRootLeft_x', 'noseRootLeft_y', 'noseRootRight_x', 'noseRootRight_y', 'noseLeftAlarTop_x', 'noseLeftAlarTop_y', 'noseRightAlarTop_x', 'noseRightAlarTop_y', 'noseLeftAlarOutTip_x', 'noseLeftAlarOutTip_y', 'noseRightAlarOutTip_x', 'noseRightAlarOutTip_y', 'upperLipTop_x', 'upperLipTop_y', 'upperLipBottom_x', 'upperLipBottom_y', 'underLipTop_x', 'underLipTop_y', 'underLipBottom_x', 'underLipBottom_y', 'facialHair_mustache', 'facialHair_beard', 'facialHair_sideburns', 'headPose_roll', 'headPose_yaw', 'headPose_pitch' , 'age'], axis=1) merged_df['age'] = pd.cut(merged_df['age'], [0, 25, 35, 50, 200], labels=["xx-24", "25-34", "35-49", "50-xx"], right=False) return merged_df
def compute_neu(test_data_path, df_results): model_path = os.path.join(abs_path, os.path.join("resources", "RidgeCV_neu.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'neu']), 1, inplace=True) nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv") liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv") personality = Personality() image_df = personality.read_image( profiles_path=test_data_path + "/Profile/Profile.csv", image_path=test_data_path + "/Image/oxford.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(nrc_df, liwc_df, on='userid') merged_df = pd.merge(merged_df, image_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df = merged_df.filter([ 'positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo', 'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote', 'Apostro', 'ext' ], axis=1) merged_df = Personality.normalize(merged_df) model = Utils.read_pickle_from_file(model_path) predictions = model.predict(merged_df) image_df['neu'] = predictions[:, 0] predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, image_df, on="userid", how="left") user_pers_df = predicted_df.filter(['userid', 'neu']) user_pers_df = aggregate_duplicate_ids_average(user_pers_df, "neu") df_results = pd.merge(df_results, user_pers_df, on='userid', how="left") df_results["neu"] = np.where(df_results['neu_y'].isnull(), df_results['neu_x'], df_results['neu_y']) df_results.drop(['neu_x', 'neu_y'], axis=1, inplace=True) return df_results
def compute_ext(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "LinearRegression_ext_v2.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'ext']), 1, inplace=True) nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv") liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(nrc_df, liwc_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df.drop(['userid', 'ext'], axis=1, inplace=True) merged_df = merged_df.filter([ 'positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo', 'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote', 'Apostro', 'ext' ], axis=1) merged_df = np.log(merged_df + 1) merged_df = (merged_df - merged_df.min()) / (merged_df.max() - merged_df.min()) merged_df.fillna(0, inplace=True) model = Utils.read_pickle_from_file(model_path) predictions = model.predict(merged_df) nrc_df['ext'] = predictions[:, 0] predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, nrc_df, on="userid", how="left") user_pers_df = predicted_df.filter(['userid', 'ext']) df_results = pd.merge(df_results, user_pers_df, on='userid', how="left") df_results["ext"] = np.where(df_results['ext_y'].isnull(), df_results['ext_x'], df_results['ext_y']) df_results.drop(['ext_x', 'ext_y'], axis=1, inplace=True) return df_results
def get_data(self, labels=['userid', 'ope', 'con', 'ext', 'agr', 'neu'], include_image=False): util = Utils() profile_df = util.read_data_to_dataframe("../data/Train/Profile/Profile.csv") profile_df = profile_df.filter(labels, axis=1) nrc_df = util.read_data_to_dataframe("../data/Train/Text/nrc.csv") liwc_df = util.read_data_to_dataframe("../data/Train/Text/liwc.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) if include_image: image_df = self.read_image() merged_df = pd.merge(nrc_df, liwc_df, on='userid') if include_image: merged_df = pd.merge(merged_df, image_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df.drop(['userid'], axis=1, inplace=True) return merged_df
def compute_personality(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "LinearReg_Personality.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference( ['userid', 'ope', 'con', 'ext', 'agr', 'neu']), 1, inplace=True) nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv") liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(nrc_df, liwc_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df.drop(['userid', 'ope', 'con', 'ext', 'agr', 'neu'], axis=1, inplace=True) merged_df = np.log(merged_df + 1) merged_df = (merged_df - merged_df.min()) / (merged_df.max() - merged_df.min()) model = Utils.read_pickle_from_file(model_path) predictions = model.predict(merged_df) nrc_df['ope'] = predictions[:, 0] nrc_df['con'] = predictions[:, 1] nrc_df['ext'] = predictions[:, 2] nrc_df['agr'] = predictions[:, 3] nrc_df['neu'] = predictions[:, 4] predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, nrc_df, on="userid", how="left") user_pers_df = predicted_df.filter( ['userid', 'ope', 'con', 'ext', 'agr', 'neu']) df_results.drop(['ope', 'con', 'ext', 'agr', 'neu'], axis=1, inplace=True) df_results = pd.merge(df_results, user_pers_df, on='userid', how="left") return df_results
def read_liwc(profiles_path="../data/Train/Profile/Profile.csv", liwc_path="../data/Train/Text/liwc.csv"): util = Utils() profile_df = util.read_data_to_dataframe(profiles_path) liwc_df = util.read_data_to_dataframe(liwc_path) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(profile_df, liwc_df, on='userid', how='left') return merged_df.filter([ 'userid', 'WC', 'WPS', 'Sixltr', 'Dic', 'Numerals', 'funct', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'article', 'verb', 'auxverb', 'past', 'present', 'future', 'adverb', 'preps', 'conj', 'negate', 'quant', 'number', 'swear', 'social', 'family', 'friend', 'humans', 'affect', 'posemo', 'negemo', 'anx', 'anger', 'sad', 'cogmech', 'insight', 'cause', 'discrep', 'tentat', 'certain', 'inhib', 'incl', 'excl', 'percept', 'see', 'hear', 'feel', 'bio', 'body', 'health', 'sexual', 'ingest', 'relativ', 'motion', 'space', 'time', 'work', 'achieve', 'leisure', 'home', 'money', 'relig', 'death', 'assent', 'nonfl', 'filler', 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'AllPct', 'age' ], axis=1)
def read_image(profiles_path="../data/Train/Profile/Profile.csv", image_path="../data/Train/Image/oxford.csv"): util = Utils() profile_df = util.read_data_to_dataframe(profiles_path) profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True) image_df = util.read_data_to_dataframe(image_path) image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(image_df, profile_df, on='userid') merged_df = merged_df.filter( ['userid', 'faceRectangle_width', 'faceRectangle_height', 'faceRectangle_left', 'faceRectangle_top', 'pupilLeft_x', 'pupilLeft_y', 'pupilRight_x', 'pupilRight_y', 'noseTip_x', 'noseTip_y', 'mouthLeft_x', 'mouthLeft_y', 'mouthRight_x', 'mouthRight_y', 'eyebrowLeftOuter_x', 'eyebrowLeftOuter_y', 'eyebrowLeftInner_x', 'eyebrowLeftInner_y', 'eyeLeftOuter_x', 'eyeLeftOuter_y', 'eyeLeftTop_x', 'eyeLeftTop_y', 'eyeLeftBottom_x', 'eyeLeftBottom_y', 'eyeLeftInner_x', 'eyeLeftInner_y', 'eyebrowRightInner_x', 'eyebrowRightInner_y', 'eyebrowRightOuter_x', 'eyebrowRightOuter_y', 'eyeRightInner_x', 'eyeRightInner_y', 'eyeRightTop_x', 'eyeRightTop_y', 'eyeRightBottom_x', 'eyeRightBottom_y', 'eyeRightOuter_x', 'eyeRightOuter_y', 'noseRootLeft_x', 'noseRootLeft_y', 'noseRootRight_x', 'noseRootRight_y', 'noseLeftAlarTop_x', 'noseLeftAlarTop_y', 'noseRightAlarTop_x', 'noseRightAlarTop_y', 'noseLeftAlarOutTip_x', 'noseLeftAlarOutTip_y', 'noseRightAlarOutTip_x', 'noseRightAlarOutTip_y', 'upperLipTop_x', 'upperLipTop_y', 'upperLipBottom_x', 'upperLipBottom_y', 'underLipTop_x', 'underLipTop_y', 'underLipBottom_x', 'underLipBottom_y', 'facialHair_mustache', 'facialHair_beard', 'facialHair_sideburns', 'headPose_roll', 'headPose_yaw', 'headPose_pitch'], axis=1) return merged_df
def compute_age(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "LogisticRegressionAge_v2.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True) image_df = Utils.read_data_to_dataframe(test_data_path + "/Image/oxford.csv") image_df.rename(columns={'userId': 'userid'}, inplace=True) combined_classifier = CombinedClassifier() merged_df = combined_classifier.merge_images_piwc( is_train=False, profiles_path=test_data_path + "/Profile/Profile.csv", liwc_path=test_data_path + "/Text/liwc.csv", image_path=test_data_path + "/Image/oxford.csv") merged_df.drop(['age_x', 'age_y'], axis=1, inplace=True) merged_df = merged_df.filter([ "faceRectangle_left", "faceRectangle_top", "pupilLeft_x", "pupilLeft_y", "pupilRight_x", "pupilRight_y", "noseTip_x", "noseTip_y", "mouthLeft_x", "mouthLeft_y", "mouthRight_x", "mouthRight_y", "eyebrowLeftOuter_x", "eyebrowLeftOuter_y", "eyebrowLeftInner_x", "eyebrowLeftInner_y", "eyeLeftOuter_x", "eyeLeftOuter_y", "eyeLeftTop_y", "eyeLeftBottom_x", "eyeLeftBottom_y", "eyeLeftInner_x", "eyeLeftInner_y", "eyebrowRightInner_x", "eyebrowRightInner_y", "eyebrowRightOuter_x", "eyebrowRightOuter_y", "eyeRightInner_x", "eyeRightInner_y", "eyeRightTop_x", "eyeRightTop_y", "eyeRightBottom_x", "eyeRightBottom_y", "eyeRightOuter_x", "eyeRightOuter_y", "noseRootLeft_x", "noseRootLeft_y", "noseRootRight_y", "noseLeftAlarTop_x", "noseLeftAlarTop_y", "noseRightAlarTop_x", "noseRightAlarTop_y", "noseLeftAlarOutTip_x", "noseLeftAlarOutTip_y", "noseRightAlarOutTip_x", "noseRightAlarOutTip_y", "upperLipTop_x", "upperLipTop_y", "upperLipBottom_x", "upperLipBottom_y", "underLipTop_x", "underLipTop_y", "underLipBottom_x", "underLipBottom_y", "facialHair_mustache", "facialHair_beard", "facialHair_sideburns", "Sixltr", "Dic", "Numerals", "funct", "pronoun", "ppron", "i", "we", "shehe", "they", "article", "verb", "auxverb", "past", "present", "future", "adverb", "preps", "conj", "negate", "quant", "number", "swear", "social", "family", "friend", "humans", "affect", "posemo", "negemo", "anx", "anger", "sad", "cogmech", "insight", "cause", "discrep", "tentat", "certain", "inhib", "incl", "excl", "percept", "see", "hear", "feel", "bio", "body", "health", "sexual", "ingest", "work", "achieve", "leisure", "home", "money", "relig", "death", "assent", "nonfl", "filler", "Period", "Comma", "Colon", "SemiC", "QMark", "Exclam", "Dash", "Quote", "Apostro", "Parenth", "OtherP", "AllPct" ], axis=1) model = Utils.read_pickle_from_file(model_path) image_df["age_group"] = model.predict(merged_df) predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, image_df, on='userid', how="left") user_age_df = predicted_df.filter(["userid", "age_group"]) user_age_df["age_group"].fillna("xx-24", inplace=True) user_age_df = aggregate_duplicate_ids(user_age_df, 'age_group') df_results = pd.merge(df_results, user_age_df, on='userid') return df_results.drop(columns='age')