def compute_gender(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "RandomForest_Gender.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'gender']), 1, inplace=True) image_df = Utils.read_data_to_dataframe(test_data_path + "/Image/oxford.csv") image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(image_df, profile_df, on='userid') merged_df.drop(['userid', 'faceID', 'gender'], axis=1, inplace=True) model = Utils.read_pickle_from_file(model_path) model.predict(merged_df) image_df["gender"] = model.predict(merged_df) predicted_df = profile_df["userid"].to_frame() # image_df['userid'] = image_df['userid'].astype('|S') predicted_df = pd.merge(predicted_df, image_df, on="userid", how="left") user_gender_df = predicted_df.filter(["userid", "gender"]) user_gender_df["gender"].fillna(1, inplace=True) user_gender_df = aggregate_duplicate_ids(user_gender_df, 'gender') df_results = pd.merge(df_results, user_gender_df, on='userid', how="left") df_results.drop(['gender_x'], axis=1, inplace=True) df_results.rename(columns={"gender_y": "gender"}, inplace=True) df_results.loc[df_results.gender == 0, 'gender'] = "male" df_results.loc[df_results.gender == 1, 'gender'] = "female" return df_results
def compute_neu(test_data_path, df_results): model_path = os.path.join(abs_path, os.path.join("resources", "RidgeCV_neu.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'neu']), 1, inplace=True) nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv") liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv") personality = Personality() image_df = personality.read_image( profiles_path=test_data_path + "/Profile/Profile.csv", image_path=test_data_path + "/Image/oxford.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) image_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(nrc_df, liwc_df, on='userid') merged_df = pd.merge(merged_df, image_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df = merged_df.filter([ 'positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo', 'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote', 'Apostro', 'ext' ], axis=1) merged_df = Personality.normalize(merged_df) model = Utils.read_pickle_from_file(model_path) predictions = model.predict(merged_df) image_df['neu'] = predictions[:, 0] predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, image_df, on="userid", how="left") user_pers_df = predicted_df.filter(['userid', 'neu']) user_pers_df = aggregate_duplicate_ids_average(user_pers_df, "neu") df_results = pd.merge(df_results, user_pers_df, on='userid', how="left") df_results["neu"] = np.where(df_results['neu_y'].isnull(), df_results['neu_x'], df_results['neu_y']) df_results.drop(['neu_x', 'neu_y'], axis=1, inplace=True) return df_results
def compute_ext(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "LinearRegression_ext_v2.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'ext']), 1, inplace=True) nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv") liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(nrc_df, liwc_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df.drop(['userid', 'ext'], axis=1, inplace=True) merged_df = merged_df.filter([ 'positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo', 'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote', 'Apostro', 'ext' ], axis=1) merged_df = np.log(merged_df + 1) merged_df = (merged_df - merged_df.min()) / (merged_df.max() - merged_df.min()) merged_df.fillna(0, inplace=True) model = Utils.read_pickle_from_file(model_path) predictions = model.predict(merged_df) nrc_df['ext'] = predictions[:, 0] predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, nrc_df, on="userid", how="left") user_pers_df = predicted_df.filter(['userid', 'ext']) df_results = pd.merge(df_results, user_pers_df, on='userid', how="left") df_results["ext"] = np.where(df_results['ext_y'].isnull(), df_results['ext_x'], df_results['ext_y']) df_results.drop(['ext_x', 'ext_y'], axis=1, inplace=True) return df_results
def compute_personality(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "LinearReg_Personality.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference( ['userid', 'ope', 'con', 'ext', 'agr', 'neu']), 1, inplace=True) nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv") liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv") nrc_df.rename(columns={'userId': 'userid'}, inplace=True) liwc_df.rename(columns={'userId': 'userid'}, inplace=True) merged_df = pd.merge(nrc_df, liwc_df, on='userid') merged_df = pd.merge(merged_df, profile_df, on='userid') merged_df.drop(['userid', 'ope', 'con', 'ext', 'agr', 'neu'], axis=1, inplace=True) merged_df = np.log(merged_df + 1) merged_df = (merged_df - merged_df.min()) / (merged_df.max() - merged_df.min()) model = Utils.read_pickle_from_file(model_path) predictions = model.predict(merged_df) nrc_df['ope'] = predictions[:, 0] nrc_df['con'] = predictions[:, 1] nrc_df['ext'] = predictions[:, 2] nrc_df['agr'] = predictions[:, 3] nrc_df['neu'] = predictions[:, 4] predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, nrc_df, on="userid", how="left") user_pers_df = predicted_df.filter( ['userid', 'ope', 'con', 'ext', 'agr', 'neu']) df_results.drop(['ope', 'con', 'ext', 'agr', 'neu'], axis=1, inplace=True) df_results = pd.merge(df_results, user_pers_df, on='userid', how="left") return df_results
def compute_age(test_data_path, df_results): model_path = os.path.join( abs_path, os.path.join("resources", "LogisticRegressionAge_v2.sav")) profile_df = Utils.read_data_to_dataframe(test_data_path + "/Profile/Profile.csv") profile_df.drop(profile_df.columns.difference(['userid', 'age']), 1, inplace=True) image_df = Utils.read_data_to_dataframe(test_data_path + "/Image/oxford.csv") image_df.rename(columns={'userId': 'userid'}, inplace=True) combined_classifier = CombinedClassifier() merged_df = combined_classifier.merge_images_piwc( is_train=False, profiles_path=test_data_path + "/Profile/Profile.csv", liwc_path=test_data_path + "/Text/liwc.csv", image_path=test_data_path + "/Image/oxford.csv") merged_df.drop(['age_x', 'age_y'], axis=1, inplace=True) merged_df = merged_df.filter([ "faceRectangle_left", "faceRectangle_top", "pupilLeft_x", "pupilLeft_y", "pupilRight_x", "pupilRight_y", "noseTip_x", "noseTip_y", "mouthLeft_x", "mouthLeft_y", "mouthRight_x", "mouthRight_y", "eyebrowLeftOuter_x", "eyebrowLeftOuter_y", "eyebrowLeftInner_x", "eyebrowLeftInner_y", "eyeLeftOuter_x", "eyeLeftOuter_y", "eyeLeftTop_y", "eyeLeftBottom_x", "eyeLeftBottom_y", "eyeLeftInner_x", "eyeLeftInner_y", "eyebrowRightInner_x", "eyebrowRightInner_y", "eyebrowRightOuter_x", "eyebrowRightOuter_y", "eyeRightInner_x", "eyeRightInner_y", "eyeRightTop_x", "eyeRightTop_y", "eyeRightBottom_x", "eyeRightBottom_y", "eyeRightOuter_x", "eyeRightOuter_y", "noseRootLeft_x", "noseRootLeft_y", "noseRootRight_y", "noseLeftAlarTop_x", "noseLeftAlarTop_y", "noseRightAlarTop_x", "noseRightAlarTop_y", "noseLeftAlarOutTip_x", "noseLeftAlarOutTip_y", "noseRightAlarOutTip_x", "noseRightAlarOutTip_y", "upperLipTop_x", "upperLipTop_y", "upperLipBottom_x", "upperLipBottom_y", "underLipTop_x", "underLipTop_y", "underLipBottom_x", "underLipBottom_y", "facialHair_mustache", "facialHair_beard", "facialHair_sideburns", "Sixltr", "Dic", "Numerals", "funct", "pronoun", "ppron", "i", "we", "shehe", "they", "article", "verb", "auxverb", "past", "present", "future", "adverb", "preps", "conj", "negate", "quant", "number", "swear", "social", "family", "friend", "humans", "affect", "posemo", "negemo", "anx", "anger", "sad", "cogmech", "insight", "cause", "discrep", "tentat", "certain", "inhib", "incl", "excl", "percept", "see", "hear", "feel", "bio", "body", "health", "sexual", "ingest", "work", "achieve", "leisure", "home", "money", "relig", "death", "assent", "nonfl", "filler", "Period", "Comma", "Colon", "SemiC", "QMark", "Exclam", "Dash", "Quote", "Apostro", "Parenth", "OtherP", "AllPct" ], axis=1) model = Utils.read_pickle_from_file(model_path) image_df["age_group"] = model.predict(merged_df) predicted_df = profile_df["userid"].to_frame() predicted_df = pd.merge(predicted_df, image_df, on='userid', how="left") user_age_df = predicted_df.filter(["userid", "age_group"]) user_age_df["age_group"].fillna("xx-24", inplace=True) user_age_df = aggregate_duplicate_ids(user_age_df, 'age_group') df_results = pd.merge(df_results, user_age_df, on='userid') return df_results.drop(columns='age')