Пример #1
0
    def test_predict(self):
        data = np.array([[0, 0, 5], [3, 0, 5], [3, 1, 2]])
        ratings = csr_matrix(data)
        train = ds.array(x=ratings, block_size=(1, 1))
        als = ALS(tol=0.01, random_state=666, n_f=5, verbose=False)
        als.fit(train)
        predictions = als.predict_user(user_id=0)

        # Check that the ratings for user 0 are similar to user 1 because they
        # share preferences (third movie), thus it is expected that user 0
        # will rate movie 1 similarly to user 1.
        self.assertTrue(2.75 < predictions[0] < 3.25 and
                        predictions[1] < 1 and
                        predictions[2] > 4.5)
Пример #2
0
def main():
    n_blocks = 384
    data = "/gpfs/projects/bsc19/COMPSs_DATASETS/dislib/recommendation" \
           "/netflix/netflix_data_libsvm.txt"
    n_factors = 100
    n_features = 480189

    block_size = (int(ceil(17770 / n_blocks)),
                  int(ceil(n_features / n_blocks)))

    x, y = ds.load_svmlight_file(data, block_size=block_size,
                                 n_features=n_features, store_sparse=True)

    als = ALS(tol=0.0001, random_state=676, n_f=n_factors, max_iter=10,
              verbose=False)

    performance.measure("ALS", "Netflix", als, x)
Пример #3
0
    def test_fit(self):
        train, test = load_movielens()

        als = ALS(tol=0.01, random_state=666, n_f=100, verbose=False,
                  check_convergence=True)

        als.fit(train, test)
        self.assertTrue(als.converged)

        als.fit(train)

        self.assertTrue(als.converged)
Пример #4
0
    def test_init_params(self):
        # Test all parameters
        seed = 666
        n_f = 100
        lambda_ = 0.001
        convergence_threshold = 0.1
        max_iter = 10
        verbose = True
        arity = 12

        als = ALS(random_state=seed, n_f=n_f, lambda_=lambda_,
                  tol=convergence_threshold,
                  max_iter=max_iter, verbose=verbose, arity=arity)

        self.assertEqual(als.random_state, seed)
        self.assertEqual(als.n_f, n_f)
        self.assertEqual(als.lambda_, lambda_)
        self.assertEqual(als.tol, convergence_threshold)
        self.assertEqual(als.max_iter, max_iter)
        self.assertEqual(als.verbose, verbose)
        self.assertEqual(als.arity, arity)
Пример #5
0
def main():
    n_blocks = 384
    data = "/fefs/scratch/bsc19/bsc19029/PERFORMANCE/datasets/" \
           "netflix_data_libsvm.txt"
    n_factors = 100
    n_features = 480189

    block_size = (int(ceil(17770 / n_blocks)),
                  int(ceil(n_features / n_blocks)))

    x, y = ds.load_svmlight_file(data,
                                 block_size=block_size,
                                 n_features=n_features,
                                 store_sparse=True)

    als = ALS(tol=0.0001,
              random_state=676,
              n_f=n_factors,
              max_iter=10,
              verbose=False)

    performance.measure("ALS", "Netflix", als.fit, x)
Пример #6
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--num_subsets", type=int, default=48)
    parser.add_argument("--num_factors", type=int, default=100)
    parser.add_argument("--data_path", type=str, default='./tests/files/')

    args = parser.parse_args()

    num_subsets = args.num_subsets

    data_path = args.data_path
    n_f = args.num_factors

    train, test = load_movielens(data_path=data_path)

    exec_start = time()
    als = ALS(tol=0.0001, n_f=n_f, max_iter=2, verbose=True)

    # Fit using training data to check convergence
    # als.fit(train)

    # Fit using test data to check convergence
    als.fit(train, test)

    exec_end = time()

    print("Ratings for user 0:\n%s" % als.predict_user(0))

    print("Execution time: %.2f" % (exec_end - exec_start))