Exemplo n.º 1
0
def train_val_explicit_sample_data(movielens_explicit_interactions):
    _, train, val = random_split(
        interactions=movielens_explicit_interactions,
        val_p=0.05,
        test_p=0.01,
        seed=42,
    )

    return train, val
Exemplo n.º 2
0
def test_bad_random_split_HDF5Interactions(hdf5_interactions):
    with pytest.raises(AssertionError):
        random_split(interactions=hdf5_interactions, )
Exemplo n.º 3
0
 def test_test_p_negative_random(self, implicit_interactions_to_split):
     with pytest.raises(ValueError):
         random_split(interactions=implicit_interactions_to_split,
                      test_p=-0.7)
Exemplo n.º 4
0
 def test_test_p_equal_one_random(self, implicit_interactions_to_split):
     with pytest.raises(ValueError):
         random_split(interactions=implicit_interactions_to_split, test_p=1)
Exemplo n.º 5
0
 def test_val_negative_but_combined_good_random(
         self, implicit_interactions_to_split):
     with pytest.raises(ValueError):
         random_split(interactions=implicit_interactions_to_split,
                      val_p=-0.1,
                      test_p=0.3)
Exemplo n.º 6
0
 def test_combined_equal_one_random(self, implicit_interactions_to_split):
     with pytest.raises(ValueError):
         random_split(interactions=implicit_interactions_to_split,
                      val_p=0.7,
                      test_p=0.3)
Exemplo n.º 7
0
 def test_combined_too_large_random(self, implicit_interactions_to_split):
     with pytest.raises(ValueError):
         random_split(interactions=implicit_interactions_to_split,
                      val_p=0.9,
                      test_p=0.2)
Exemplo n.º 8
0
def test_random_split(implicit_interactions_to_split,
                      explicit_interactions_to_split, data_type):
    if data_type == 'implicit':
        interactions_class = Interactions
        interactions_kwargs = {
            'check_num_negative_samples_is_valid': False,
        }
        interactions_to_split = implicit_interactions_to_split
    else:
        interactions_class = ExplicitInteractions
        interactions_kwargs = {}
        interactions_to_split = explicit_interactions_to_split

    train_expected_df = pd.DataFrame(
        data={
            'user_id': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 4, 4],
            'item_id': [0, 1, 2, 3, 4, 5, 8, 1, 3, 4, 1, 3, 4, 2, 2, 4],
            'rating': [1, 2, 3, 4, 5, 4, 1, 1, 3, 4, 2, 4, 5, 5, 3, 5],
        })
    train_expected = interactions_class(
        mat=coo_matrix(
            (
                train_expected_df['rating'],
                (train_expected_df['user_id'], train_expected_df['item_id']),
            ),
            shape=(interactions_to_split.num_users,
                   interactions_to_split.num_items),
        ),
        allow_missing_ids=True,
        **interactions_kwargs,
    )

    validate_expected_df = pd.DataFrame(data={
        'user_id': [3, 4, 4],
        'item_id': [1, 1, 5],
        'rating': [1, 2, 4]
    })
    validate_expected = interactions_class(
        mat=coo_matrix(
            (
                validate_expected_df['rating'],
                (validate_expected_df['user_id'],
                 validate_expected_df['item_id']),
            ),
            shape=(interactions_to_split.num_users,
                   interactions_to_split.num_items),
        ),
        allow_missing_ids=True,
        **interactions_kwargs,
    )

    test_expected_df = pd.DataFrame(
        data={
            'user_id': [0, 0, 1, 2, 3],
            'item_id': [6, 7, 2, 2, 4],
            'rating': [3, 2, 2, 3, 4],
        })
    test_expected = interactions_class(
        mat=coo_matrix(
            (
                test_expected_df['rating'],
                (test_expected_df['user_id'], test_expected_df['item_id']),
            ),
            shape=(interactions_to_split.num_users,
                   interactions_to_split.num_items),
        ),
        allow_missing_ids=True,
        **interactions_kwargs,
    )

    (train_actual, validate_actual,
     test_actual) = random_split(interactions=interactions_to_split,
                                 val_p=0.1,
                                 test_p=0.2,
                                 seed=42)

    np.testing.assert_array_equal(train_actual.toarray(),
                                  train_expected.toarray())
    np.testing.assert_array_equal(validate_actual.toarray(),
                                  validate_expected.toarray())
    np.testing.assert_array_equal(test_actual.toarray(),
                                  test_expected.toarray())

    assert (train_actual.num_users == train_expected.num_users ==
            validate_actual.num_users == validate_expected.num_users ==
            test_actual.num_users == test_expected.num_users)

    assert (train_actual.num_items == train_expected.num_items ==
            validate_actual.num_items == validate_expected.num_items ==
            test_actual.num_items == test_expected.num_items)

    assert (type(train_actual) == type(train_expected) == type(validate_actual)
            == type(validate_expected) == type(test_actual) ==
            type(test_expected) == interactions_class)
Exemplo n.º 9
0
def test_random_split_with_user_with_only_one_interaction(
    interactions_to_split_with_a_user_with_only_one_interaction, ):
    # unlike for ``stratified_split``, this should work without error
    random_split(interactions=
                 interactions_to_split_with_a_user_with_only_one_interaction, )