Пример #1
0
def test_recommend_movies(pretrained, inputs, linear_feature_columns,
                          dnn_feature_columns, DEVICE):
    # model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE)
    model = FiBiNET(linear_feature_columns,
                    dnn_feature_columns,
                    task='regression',
                    device=DEVICE)
    model.load_state_dict(torch.load(pretrained))
    pred_ans = model.predict(inputs, batch_size=256)

    print(f'Predict rating: {pred_ans}')

    pred_movie_list = []
    idx = np.argsort(pred_ans, axis=0)[::-1]
    for i, ans_idx in enumerate(idx[:, 0]):
        if i < 5:
            print(
                f"Predict rating: {pred_ans[ans_idx][0] :.3f}, movie_id: {data.iloc[ans_idx]['movie_id']}, gender: {data.iloc[ans_idx]['gender']}, age: {data.iloc[ans_idx]['age']}, user_id: {data.iloc[ans_idx]['user_id']}"
            )
            pred_movie_list.append(data.iloc[ans_idx]['movie_id'])
            # print(inputs['movie_id'].iloc[i])

    # print('movie_max',data.loc[:, ['movie_id']].max(axis=0)) # 1682

    return pred_movie_list
Пример #2
0
def test_FiBiNET(bilinear_type):
    model_name = "FiBiNET"
    sample_size = SAMPLE_SIZE
    x, y, feature_columns = get_test_data(sample_size, 3, 3)

    model = FiBiNET(
        feature_columns,
        feature_columns,
        bilinear_type=bilinear_type,
        dnn_hidden_units=[8, 8],
        dnn_dropout=0.5,
    )
    check_model(model, model_name, x, y)
Пример #3
0
def recommend_movies(pretrained, inputs, linear_feature_columns,
                     dnn_feature_columns, DEVICE, df):
    data = df
    # print(data)
    # model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=DEVICE)
    model = FiBiNET(linear_feature_columns,
                    dnn_feature_columns,
                    task='regression',
                    device=DEVICE)
    model.load_state_dict(torch.load(pretrained))
    pred_ans = model.predict(inputs, batch_size=256)

    # print(f'Predict rating: {pred_ans}')

    pred_movie_list = []
    pred_movie_genres = []
    pred_rating = []
    idx = np.argsort(pred_ans, axis=0)[::-1]

    movie_genres = [
        col for col in data.columns
        if col not in ['movie_id', 'movie_title', 'unknown']
    ]

    for i, ans_idx in enumerate(idx[:, 0]):
        # TODO: Add arguments (rank)
        if i < 2:
            genres = [
                movie_genre for movie_genre in movie_genres
                if data.iloc[ans_idx][movie_genre] == 1
            ]
            print(
                f"Predict rating: {pred_ans[ans_idx][0] :.3f}, movie_id: {data.iloc[ans_idx]['movie_id']}, movie_title: {data.iloc[ans_idx]['movie_title']}, gender: {data.iloc[ans_idx]['gender']}, age: {data.iloc[ans_idx]['age']}, genres: {genres}"
            )
            pred_movie_list.append(data.iloc[ans_idx]['movie_title'])
            pred_movie_genres.append(genres)
            pred_rating.append(pred_ans[ans_idx][0])
            # print(inputs['movie_id'].iloc[i])

    # print('movie_max',data.loc[:, ['movie_id']].max(axis=0)) # 1682

    return pred_movie_list, pred_movie_genres, pred_rating
Пример #4
0
def train_recommend_movies(csv_file, DEVICE):
    """
        Description:
            Train recommend system on: 
                Model: "xDeepFM", 
                Target: "rating",
                Input features: ["movie_id", "gender", "age"],
                Save model to: "save_model/xDeepFM_MSE{}.h5"

        Parameters: 
            csv_file: "path to *.csv"
            DEVICE: "cuda:0"
    """
    data = pd.read_csv(csv_file)
    # sparse_features = ["movie_id", "user_id",
    #                    "gender", "age", "occupation", "zip"]
    sparse_features = ["movie_id", "gender", "age"]
    movie_genres = [
        'Action', 'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime',
        'Documentary', 'Drama', 'Fantasy', 'Film_Noir', 'Horror', 'Musical',
        'Mystery', 'Romance', 'Sci_Fi', 'Thriller', 'War', 'Western'
    ]
    target = ['rating']

    # 1.Label Encoding for sparse features,and do simple Transformation for dense features
    for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])

    sparse_features.extend(movie_genres)
    # 2.count #unique features for each sparse field
    fixlen_feature_columns = [
        SparseFeat(feat, data[feat].nunique()) for feat in sparse_features
    ]

    # 他自己的資料型態
    # SparseFeat(name='movie_id', vocabulary_size=187, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='movie_id', group_name='default_group')
    linear_feature_columns = fixlen_feature_columns
    dnn_feature_columns = fixlen_feature_columns
    feature_names = get_feature_names(
        linear_feature_columns + dnn_feature_columns
    )  # movie_id, user_id, gender, age, occupation, zip.

    # 3.generate input data for model
    train, test = train_test_split(data, test_size=0.2)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {
        name: test[name]
        for name in feature_names
    }  # dict of movie_id, user_id, gender, age, occupation, zip values

    # 4.Define Model,train,predict and evaluate
    # model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    model = FiBiNET(linear_feature_columns,
                    dnn_feature_columns,
                    task='regression',
                    device=DEVICE)
    # model = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    model.compile(
        "adam",
        "mse",
        metrics=['mse'],
    )

    history = model.fit(
        train_model_input,
        train[target].values,
        batch_size=256,
        epochs=10,
        verbose=2,
        validation_split=0.2,
    )
    pred_ans = model.predict(test_model_input, batch_size=256)

    print("test MSE",
          round(mean_squared_error(test[target].values, pred_ans), 4))
    print("test MAE",
          round(mean_absolute_error(test[target].values, pred_ans), 4))

    # torch.save(model.state_dict(), './recommend_system/save_model/xDeepFM_MSE{}.h5' .format(round(mean_squared_error(test[target].values, pred_ans), 4)))
    torch.save(
        model.state_dict(),
        './recommend_system/save_model/FiBiNET_MSE{}.h5'.format(
            round(mean_squared_error(test[target].values, pred_ans), 4)))
Пример #5
0
                                      dnn_feature_columns)

    # 3.generate input data for model
    train, test = train_test_split(data, test_size=0.2)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}
    # 4.Define Model,train,predict and evaluate

    device = 'cpu'
    use_cuda = True
    if use_cuda and torch.cuda.is_available():
        print('cuda ready...')
        device = 'cuda:0'

    model = FiBiNET(linear_feature_columns,
                    dnn_feature_columns,
                    task='binary',
                    device=device)
    model.compile(
        "adam",
        "binary_crossentropy",
        metrics=["binary_crossentropy", "auc"],
    )
    history = model.fit(train_model_input,
                        train[target].values,
                        batch_size=256,
                        epochs=10,
                        verbose=2,
                        validation_split=0.2)
    pred_ans = model.predict(test_model_input, batch_size=256)
    #print(pred_ans)
    #print(" ")
Пример #6
0
                                      dnn_feature_columns)

    # 3.generate input data for model
    train, test = train_test_split(data, test_size=0.2)
    train_model_input = {name: train[name] for name in feature_names}
    test_model_input = {name: test[name] for name in feature_names}
    # 4.Define Model,train,predict and evaluate

    device = 'cpu'
    use_cuda = True
    if use_cuda and torch.cuda.is_available():
        print('cuda ready...')
        device = 'cuda:0'

    model = FiBiNET(linear_feature_columns,
                    dnn_feature_columns,
                    task='regression',
                    device=device)
    model.compile(
        "adam",
        "mse",
        metrics=['mse'],
    )

    history = model.fit(train_model_input,
                        train[target].values,
                        batch_size=256,
                        epochs=87,
                        verbose=2,
                        validation_split=0.2)
    pred_ans = model.predict(test_model_input, batch_size=256)
    print("test MSE",