Пример #1
0
def compute_gender(test_data_path, df_results):
    model_path = os.path.join(
        abs_path, os.path.join("resources", "RandomForest_Gender.sav"))
    profile_df = Utils.read_data_to_dataframe(test_data_path +
                                              "/Profile/Profile.csv")
    profile_df.drop(profile_df.columns.difference(['userid', 'gender']),
                    1,
                    inplace=True)
    image_df = Utils.read_data_to_dataframe(test_data_path +
                                            "/Image/oxford.csv")
    image_df.rename(columns={'userId': 'userid'}, inplace=True)

    merged_df = pd.merge(image_df, profile_df, on='userid')
    merged_df.drop(['userid', 'faceID', 'gender'], axis=1, inplace=True)
    model = Utils.read_pickle_from_file(model_path)

    model.predict(merged_df)
    image_df["gender"] = model.predict(merged_df)
    predicted_df = profile_df["userid"].to_frame()

    # image_df['userid'] = image_df['userid'].astype('|S')
    predicted_df = pd.merge(predicted_df, image_df, on="userid", how="left")
    user_gender_df = predicted_df.filter(["userid", "gender"])
    user_gender_df["gender"].fillna(1, inplace=True)
    user_gender_df = aggregate_duplicate_ids(user_gender_df, 'gender')

    df_results = pd.merge(df_results, user_gender_df, on='userid', how="left")
    df_results.drop(['gender_x'], axis=1, inplace=True)
    df_results.rename(columns={"gender_y": "gender"}, inplace=True)

    df_results.loc[df_results.gender == 0, 'gender'] = "male"
    df_results.loc[df_results.gender == 1, 'gender'] = "female"
    return df_results
Пример #2
0
def compute_neu(test_data_path, df_results):
    model_path = os.path.join(abs_path,
                              os.path.join("resources", "RidgeCV_neu.sav"))
    profile_df = Utils.read_data_to_dataframe(test_data_path +
                                              "/Profile/Profile.csv")
    profile_df.drop(profile_df.columns.difference(['userid', 'neu']),
                    1,
                    inplace=True)
    nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv")
    liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv")
    personality = Personality()
    image_df = personality.read_image(
        profiles_path=test_data_path + "/Profile/Profile.csv",
        image_path=test_data_path + "/Image/oxford.csv")

    nrc_df.rename(columns={'userId': 'userid'}, inplace=True)
    liwc_df.rename(columns={'userId': 'userid'}, inplace=True)
    image_df.rename(columns={'userId': 'userid'}, inplace=True)

    merged_df = pd.merge(nrc_df, liwc_df, on='userid')
    merged_df = pd.merge(merged_df, image_df, on='userid')
    merged_df = pd.merge(merged_df, profile_df, on='userid')

    merged_df = merged_df.filter([
        'positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear',
        'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we',
        'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo',
        'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote',
        'Apostro', 'ext'
    ],
                                 axis=1)

    merged_df = Personality.normalize(merged_df)

    model = Utils.read_pickle_from_file(model_path)
    predictions = model.predict(merged_df)

    image_df['neu'] = predictions[:, 0]

    predicted_df = profile_df["userid"].to_frame()
    predicted_df = pd.merge(predicted_df, image_df, on="userid", how="left")
    user_pers_df = predicted_df.filter(['userid', 'neu'])
    user_pers_df = aggregate_duplicate_ids_average(user_pers_df, "neu")

    df_results = pd.merge(df_results, user_pers_df, on='userid', how="left")
    df_results["neu"] = np.where(df_results['neu_y'].isnull(),
                                 df_results['neu_x'], df_results['neu_y'])
    df_results.drop(['neu_x', 'neu_y'], axis=1, inplace=True)

    return df_results
Пример #3
0
def compute_ext(test_data_path, df_results):
    model_path = os.path.join(
        abs_path, os.path.join("resources", "LinearRegression_ext_v2.sav"))
    profile_df = Utils.read_data_to_dataframe(test_data_path +
                                              "/Profile/Profile.csv")
    profile_df.drop(profile_df.columns.difference(['userid', 'ext']),
                    1,
                    inplace=True)
    nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv")
    liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv")

    nrc_df.rename(columns={'userId': 'userid'}, inplace=True)
    liwc_df.rename(columns={'userId': 'userid'}, inplace=True)

    merged_df = pd.merge(nrc_df, liwc_df, on='userid')
    merged_df = pd.merge(merged_df, profile_df, on='userid')
    merged_df.drop(['userid', 'ext'], axis=1, inplace=True)
    merged_df = merged_df.filter([
        'positive', 'negative', 'anger_x', 'anticipation', 'disgust', 'fear',
        'joy', 'sadness', 'surprise', 'trust', 'pronoun', 'ppron', 'i', 'we',
        'you', 'shehe', 'they', 'ipron', 'future', 'affect', 'posemo',
        'negemo', 'anx', 'incl', 'work', 'death', 'assent', 'nonfl', 'Quote',
        'Apostro', 'ext'
    ],
                                 axis=1)

    merged_df = np.log(merged_df + 1)
    merged_df = (merged_df - merged_df.min()) / (merged_df.max() -
                                                 merged_df.min())

    merged_df.fillna(0, inplace=True)

    model = Utils.read_pickle_from_file(model_path)
    predictions = model.predict(merged_df)

    nrc_df['ext'] = predictions[:, 0]

    predicted_df = profile_df["userid"].to_frame()
    predicted_df = pd.merge(predicted_df, nrc_df, on="userid", how="left")
    user_pers_df = predicted_df.filter(['userid', 'ext'])

    df_results = pd.merge(df_results, user_pers_df, on='userid', how="left")
    df_results["ext"] = np.where(df_results['ext_y'].isnull(),
                                 df_results['ext_x'], df_results['ext_y'])
    df_results.drop(['ext_x', 'ext_y'], axis=1, inplace=True)

    return df_results
Пример #4
0
def compute_personality(test_data_path, df_results):
    model_path = os.path.join(
        abs_path, os.path.join("resources", "LinearReg_Personality.sav"))
    profile_df = Utils.read_data_to_dataframe(test_data_path +
                                              "/Profile/Profile.csv")
    profile_df.drop(profile_df.columns.difference(
        ['userid', 'ope', 'con', 'ext', 'agr', 'neu']),
                    1,
                    inplace=True)
    nrc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/nrc.csv")
    liwc_df = Utils.read_data_to_dataframe(test_data_path + "/Text/liwc.csv")

    nrc_df.rename(columns={'userId': 'userid'}, inplace=True)
    liwc_df.rename(columns={'userId': 'userid'}, inplace=True)

    merged_df = pd.merge(nrc_df, liwc_df, on='userid')
    merged_df = pd.merge(merged_df, profile_df, on='userid')

    merged_df.drop(['userid', 'ope', 'con', 'ext', 'agr', 'neu'],
                   axis=1,
                   inplace=True)

    merged_df = np.log(merged_df + 1)
    merged_df = (merged_df - merged_df.min()) / (merged_df.max() -
                                                 merged_df.min())

    model = Utils.read_pickle_from_file(model_path)
    predictions = model.predict(merged_df)

    nrc_df['ope'] = predictions[:, 0]
    nrc_df['con'] = predictions[:, 1]
    nrc_df['ext'] = predictions[:, 2]
    nrc_df['agr'] = predictions[:, 3]
    nrc_df['neu'] = predictions[:, 4]

    predicted_df = profile_df["userid"].to_frame()
    predicted_df = pd.merge(predicted_df, nrc_df, on="userid", how="left")
    user_pers_df = predicted_df.filter(
        ['userid', 'ope', 'con', 'ext', 'agr', 'neu'])

    df_results.drop(['ope', 'con', 'ext', 'agr', 'neu'], axis=1, inplace=True)
    df_results = pd.merge(df_results, user_pers_df, on='userid', how="left")

    return df_results
Пример #5
0
def compute_age(test_data_path, df_results):
    model_path = os.path.join(
        abs_path, os.path.join("resources", "LogisticRegressionAge_v2.sav"))
    profile_df = Utils.read_data_to_dataframe(test_data_path +
                                              "/Profile/Profile.csv")

    profile_df.drop(profile_df.columns.difference(['userid', 'age']),
                    1,
                    inplace=True)
    image_df = Utils.read_data_to_dataframe(test_data_path +
                                            "/Image/oxford.csv")
    image_df.rename(columns={'userId': 'userid'}, inplace=True)

    combined_classifier = CombinedClassifier()
    merged_df = combined_classifier.merge_images_piwc(
        is_train=False,
        profiles_path=test_data_path + "/Profile/Profile.csv",
        liwc_path=test_data_path + "/Text/liwc.csv",
        image_path=test_data_path + "/Image/oxford.csv")

    merged_df.drop(['age_x', 'age_y'], axis=1, inplace=True)
    merged_df = merged_df.filter([
        "faceRectangle_left", "faceRectangle_top", "pupilLeft_x",
        "pupilLeft_y", "pupilRight_x", "pupilRight_y", "noseTip_x",
        "noseTip_y", "mouthLeft_x", "mouthLeft_y", "mouthRight_x",
        "mouthRight_y", "eyebrowLeftOuter_x", "eyebrowLeftOuter_y",
        "eyebrowLeftInner_x", "eyebrowLeftInner_y", "eyeLeftOuter_x",
        "eyeLeftOuter_y", "eyeLeftTop_y", "eyeLeftBottom_x", "eyeLeftBottom_y",
        "eyeLeftInner_x", "eyeLeftInner_y", "eyebrowRightInner_x",
        "eyebrowRightInner_y", "eyebrowRightOuter_x", "eyebrowRightOuter_y",
        "eyeRightInner_x", "eyeRightInner_y", "eyeRightTop_x", "eyeRightTop_y",
        "eyeRightBottom_x", "eyeRightBottom_y", "eyeRightOuter_x",
        "eyeRightOuter_y", "noseRootLeft_x", "noseRootLeft_y",
        "noseRootRight_y", "noseLeftAlarTop_x", "noseLeftAlarTop_y",
        "noseRightAlarTop_x", "noseRightAlarTop_y", "noseLeftAlarOutTip_x",
        "noseLeftAlarOutTip_y", "noseRightAlarOutTip_x",
        "noseRightAlarOutTip_y", "upperLipTop_x", "upperLipTop_y",
        "upperLipBottom_x", "upperLipBottom_y", "underLipTop_x",
        "underLipTop_y", "underLipBottom_x", "underLipBottom_y",
        "facialHair_mustache", "facialHair_beard", "facialHair_sideburns",
        "Sixltr", "Dic", "Numerals", "funct", "pronoun", "ppron", "i", "we",
        "shehe", "they", "article", "verb", "auxverb", "past", "present",
        "future", "adverb", "preps", "conj", "negate", "quant", "number",
        "swear", "social", "family", "friend", "humans", "affect", "posemo",
        "negemo", "anx", "anger", "sad", "cogmech", "insight", "cause",
        "discrep", "tentat", "certain", "inhib", "incl", "excl", "percept",
        "see", "hear", "feel", "bio", "body", "health", "sexual", "ingest",
        "work", "achieve", "leisure", "home", "money", "relig", "death",
        "assent", "nonfl", "filler", "Period", "Comma", "Colon", "SemiC",
        "QMark", "Exclam", "Dash", "Quote", "Apostro", "Parenth", "OtherP",
        "AllPct"
    ],
                                 axis=1)

    model = Utils.read_pickle_from_file(model_path)
    image_df["age_group"] = model.predict(merged_df)

    predicted_df = profile_df["userid"].to_frame()
    predicted_df = pd.merge(predicted_df, image_df, on='userid', how="left")
    user_age_df = predicted_df.filter(["userid", "age_group"])
    user_age_df["age_group"].fillna("xx-24", inplace=True)
    user_age_df = aggregate_duplicate_ids(user_age_df, 'age_group')
    df_results = pd.merge(df_results, user_age_df, on='userid')
    return df_results.drop(columns='age')