Пример #1
0
def flixster_obfuscated(classifier):
    import FlixsterData as FD
    X1, T, _ = FD.load_flixster_data_subset()
    X2, _, _ = FD.load_flixster_data_subset_masked(
        file_index=15)  # max_user=max_user, max_item=max_item)
    # X2 = X1
    print(X1.shape, X2.shape)

    # X1, T = Utils.balance_data(X1, T)
    # X2, T2 = Utils.balance_data(X2, T)
    # X1 = Utils.normalize(X1)
    # X2 = Utils.normalize(X2)
    X_train, T_train = X1[0:int(0.8 * len(X1))], T[0:int(0.8 * len(X1))]
    X_test, T_test = X2[int(0.8 * len(X2)):], T[int(0.8 * len(X2)):]
    print(list(X1[0, :]))
    print(list(X2[0, :]))
    # print(X)
    print("before", X_train.shape)
    # X = Utils.remove_significant_features(X, T)
    # X_train, _ = Utils.random_forest_selection(X_train, T_train)
    # X = feature_selection(X, T, Utils.select_male_female_different)
    print(X_train.shape)
    from sklearn.linear_model import LogisticRegression
    random_state = np.random.RandomState(0)
    model = LogisticRegression(penalty='l2', random_state=random_state)

    Utils.ROC_cv_obf(X1, X2, T, model)

    model = LogisticRegression(penalty='l2', random_state=random_state)
Пример #2
0
def load_real_fake_data_flixster(file_index=-1):
    import FlixsterData as FD
    real, _, valid_movies = FD.load_flixster_data_subset(small=True)

    real = real[0:int(real.shape[0] / 2), :]
    fake = FD.load_flixster_data_subset_masked(file_index=file_index,
                                               small=True,
                                               valid_movies=valid_movies)[0]
    fake = fake[int(fake.shape[0] / 2):, :]

    data = np.zeros(shape=(real.shape[0] + fake.shape[0], real[0].shape[0]))
    labels = np.zeros(shape=(real.shape[0] + fake.shape[0], ))
    for user_index, user in enumerate(real):
        data[user_index, :] = user
        labels[user_index] = 1
    for user_index, user in enumerate(fake):
        data[len(real) + user_index, :] = user
        labels[len(real) + user_index] = 0

    from Utils import shuffle_two_arrays
    data, labels = shuffle_two_arrays(data, labels)
    return data, labels