Exemplo n.º 1
0
def test_fit(python_dataset_ncf, model_type):
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(
        n_users=data.n_users, n_items=data.n_items, model_type=model_type, n_epochs=1
    )
    model.fit(data)
def test_predict(python_dataset_ncf, model_type):
    # test data format
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(n_users=data.n_users, n_items=data.n_items, model_type=model_type)
    model.fit(data)

    test_users, test_items = list(test[DEFAULT_USER_COL]), list(test[DEFAULT_ITEM_COL])

    assert type(model.predict(test_users[0], test_items[0])) == float
    
    res = model.predict(test_users, test_items, is_list=True)

    assert type(res) == list
    assert len(res) == len(test)
Exemplo n.º 3
0
def test_predict(python_dataset_ncf, model_type):
    # test data format
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(
        n_users=data.n_users, n_items=data.n_items, model_type=model_type, n_epochs=1
    )
    model.fit(data)

    test_users, test_items = list(test[DEFAULT_USER_COL]), list(test[DEFAULT_ITEM_COL])

    assert type(model.predict(test_users[0], test_items[0])) == float

    res = model.predict(test_users, test_items, is_list=True)

    assert type(res) == list
    assert len(res) == len(test)
def test_fit(python_dataset_ncf, model_type):
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(n_users=data.n_users, n_items=data.n_items, model_type=model_type)
    model.fit(data)
Exemplo n.º 5
0
def ncf_training(params):
    """
    Train NCF using the given hyper-parameters
    """
    logger.debug("Start training...")
    train_data = pd.read_pickle(
        path=os.path.join(params["datastore"], params["train_datapath"])
    )
    validation_data = pd.read_pickle(
        path=os.path.join(params["datastore"], params["validation_datapath"])
    )

    data = NCFDataset(train=train_data, test=validation_data, seed=DEFAULT_SEED)

    model = NCF(
        n_users=data.n_users,
        n_items=data.n_items,
        model_type="NeuMF",
        n_factors=params["n_factors"],
        layer_sizes=[16, 8, 4],
        n_epochs=params["n_epochs"],
        learning_rate=params["learning_rate"],
        verbose=params["verbose"],
        seed=DEFAULT_SEED,
    )

    model.fit(data)

    logger.debug("Evaluating...")

    metrics_dict = {}
    rating_metrics = params["rating_metrics"]
    if len(rating_metrics) > 0:
        predictions = [
            [row.userID, row.itemID, model.predict(row.userID, row.itemID)]
            for (_, row) in validation_data.iterrows()
        ]

        predictions = pd.DataFrame(
            predictions, columns=["userID", "itemID", "prediction"]
        )
        predictions = predictions.astype(
            {"userID": "int64", "itemID": "int64", "prediction": "float64"}
        )

        for metric in rating_metrics:
            result = getattr(evaluation, metric)(validation_data, predictions)
            metrics_dict = _update_metrics(metrics_dict, metric, params, result)

    ranking_metrics = params["ranking_metrics"]
    if len(ranking_metrics) > 0:
        users, items, preds = [], [], []
        item = list(train_data.itemID.unique())
        for user in train_data.userID.unique():
            user = [user] * len(item)
            users.extend(user)
            items.extend(item)
            preds.extend(list(model.predict(user, item, is_list=True)))

        all_predictions = pd.DataFrame(
            data={"userID": users, "itemID": items, "prediction": preds}
        )

        merged = pd.merge(
            train_data, all_predictions, on=["userID", "itemID"], how="outer"
        )
        all_predictions = merged[merged.rating.isnull()].drop("rating", axis=1)
        for metric in ranking_metrics:
            result = getattr(evaluation, metric)(
                validation_data,
                all_predictions,
                col_prediction="prediction",
                k=params["k"],
            )
            metrics_dict = _update_metrics(metrics_dict, metric, params, result)


    if len(ranking_metrics) == 0 and len(rating_metrics) == 0:
        raise ValueError("No metrics were specified.")

    # Report the metrics
    nni.report_final_result(metrics_dict)

    # Save the metrics in a JSON file
    output_dir = os.environ.get("NNI_OUTPUT_DIR")
    with open(os.path.join(output_dir, "metrics.json"), "w") as fp:
        temp_dict = metrics_dict.copy()
        temp_dict[params["primary_metric"]] = temp_dict.pop("default")
        json.dump(temp_dict, fp)

    return model
def train_ncf(params, data):
    model = NCF(n_users=data.n_users, n_items=data.n_items, **params)
    with Timer() as t:
        model.fit(data)
    return model, t
Exemplo n.º 7
0
def train_ncf(params, data):
    model = NCF(n_users=data.n_users, n_items=data.n_items, **params)
    with Timer() as t:
        model.fit(data)
    return model, t