def setUp(self): """ Setup method that is called at the beginning of each test. """ self.documents, self.users = 18, 10 documents_cnt, users_cnt = self.documents, self.users self.n_iterations = 15 self.k_folds = 3 self.hyperparameters = {'n_factors': 5, '_lambda': 0.01} self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds} self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations) self.n_recommendations = 1 def mock_get_ratings_matrix(self=None): return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)] for user in range(users_cnt)] self.ratings_matrix = numpy.array(mock_get_ratings_matrix()) setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix) self.evaluator = Evaluator(self.ratings_matrix) self.cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, load_matrices=True) self.cf.train() self.cf.evaluator.k_folds = self.k_folds self.test_data = self.cf.test_data self.predictions = self.cf.get_predictions() self.rounded_predictions = self.cf.rounded_predictions()
def run_collaborative(self): """ Runs collaborative filtering """ ALS = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self.verbose, self.load_matrices, self.dump, self.train_more) results = ALS.train() report_str = 'Summary: Test sum {:.2f}, Train sum {:.2f}, Final error {:.5f}, train recall {:.5f}, '\ 'test recall {:.5f}, recall@200 {:.5f}, '\ 'ratio {:.5f}, mrr@5 {:.5f}, '\ 'ndcg@5 {:.5f}, mrr@10 {:.5f}, ndcg@10 {:.5f}' print(report_str.format(*results))
def runTest(self): evaluator = Evaluator(self.ratings_matrix) cf = CollaborativeFiltering(self.initializer, evaluator, self.initial_config, self.options, load_matrices=True) grid_search = GridSearch(cf, self.hyperparameters, False) self.checkKeyGenerator(grid_search) self.checkCombinationsGenerator(grid_search) self.checkGridSearch(grid_search)
def run_collaborative(self): """ Runs collaborative filtering """ ALS = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self.verbose, self.load_matrices, self.dump) ALS.train() ALS.get_evaluation_report() print( ALS.evaluator.calculate_recall(ALS.ratings, ALS.rounded_predictions())) print( ALS.evaluator.recall_at_x(1, ALS.get_predictions(), ALS.test_data, ALS.rounded_predictions()))
def run_grid_search(self): """ runs grid search """ hyperparameters = { '_lambda': [0.00001, 0.01, 0.1, 0.5, 10], 'n_factors': [100, 200, 300, 400, 500] } ALS = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self.verbose, self.load_matrices, self.dump, self.train_more) GS = GridSearch(ALS, hyperparameters, self.verbose) best_params, all_results = GS.train() for result in all_results: print(result)
def runTest(self): cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, load_matrices=False) self.assertEqual(cf.n_factors, self.n_factors) self.assertEqual(cf.n_items, self.documents) cf.train() self.assertEqual(cf.get_predictions().shape, (self.users, self.documents)) self.assertTrue(isinstance(cf, AbstractRecommender)) shape = (self.users, self.documents) ratings = cf.get_ratings() self.assertLessEqual(numpy.amax(ratings), 1 + 1e-6) self.assertGreaterEqual(numpy.amin(ratings), -1e-6) self.assertTrue(ratings.shape == shape) rounded_predictions = cf.rounded_predictions() self.assertLessEqual(numpy.amax(rounded_predictions), 1 + 1e-6) self.assertGreaterEqual(numpy.amin(rounded_predictions), -1e-6) self.assertTrue(rounded_predictions.shape == shape) recall = cf.evaluator.calculate_recall(ratings, cf.get_predictions()) self.assertTrue(-1e-6 <= recall <= 1 + 1e-6) random_user = int(numpy.random.random() * self.users) random_item = int(numpy.random.random() * self.documents) random_prediction = cf.predict(random_user, random_item) self.assertTrue(isinstance(random_prediction, numpy.float64))
def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True, random_seed=False, results_file_name='top_recommendations'): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. :param boolean random_seed: A flag to determine if we will use random seed or not. :param str results_file_name: Top recommendations results' file name """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor( DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) # Set flags self.results_file_name = results_file_name + '.dat' self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self._split_type = 'user' self._random_seed = random_seed self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor, self._random_seed, self._verbose) else: raise NameError( "Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError( "Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': is_hybrid = self.config.get_recommender() == 'hybrid' if self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with hybrid recommender") self.collaborative_filtering = CollaborativeFiltering( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more, is_hybrid) elif self.config.get_collaborative_filtering() == 'SDAE': self.collaborative_filtering = SDAERecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) if not self.config.get_content_based() == 'None': raise NameError( "Not a valid content based %s with SDAE. You can only use 'None'" % self.config.get_content_based()) elif self.config.get_collaborative_filtering() == 'None': if not self.config.get_recommender() == 'itembased': raise NameError( "None collaborative filtering is only valid with itembased recommender type" ) elif self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with item-based recommender" ) self.collaborative_filtering = None else: raise NameError("Not a valid collaborative filtering %s. " "Only options are 'None', 'ALS', 'SDAE'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering elif self.config.get_recommender() == 'hybrid': self.recommender = self else: raise NameError( "Invalid recommender type %s. " "Only options are 'userbased','itembased', and 'hybrid'" % self.config.get_recommender())
def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) # Set flags self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor) else: raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError("Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more) else: raise NameError("Not a valid collaborative filtering %s. " "Only option is 'ALS'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering else: raise NameError("Invalid recommender type %s. " "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())
class RecommenderSystem(AbstractRecommender): """ A class that will combine the content-based and collaborative-filtering, in order to provide the main functionalities of recommendations. """ def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) # Set flags self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor) else: raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError("Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more) else: raise NameError("Not a valid collaborative filtering %s. " "Only option is 'ALS'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering else: raise NameError("Invalid recommender type %s. " "Only options are 'userbased' and 'itembased'" % self.config.get_recommender()) @overrides def set_options(self, options): """ Set the options of the recommender. Namely n_iterations and k_folds. :param dict options: A dictionary of the options. """ self.n_iter = options['n_iterations'] self.options = options.copy() @overrides def get_evaluation_report(self): return self.recommender.get_evaluation_report() @overrides def set_hyperparameters(self, hyperparameters): """ The function sets the hyperparameters of the uv_decomposition algorithm :param dict hyperparameters: hyperparameters of the recommender, contains _lambda and n_factors """ self.n_factors = hyperparameters['n_factors'] self._lambda = hyperparameters['_lambda'] self.hyperparameters = hyperparameters.copy() @overrides def train(self): """ Train the recommender on the given data. :returns: The error of the predictions. :rtype: float """ if self._verbose: print("Training content-based %s..." % self.content_based) self.content_based.train() assert self.recommender == self.collaborative_filtering or self.recommender == self.content_based if self.recommender == self.collaborative_filtering: theta = self.content_based.get_document_topic_distribution().copy() if self._verbose: print("Training collaborative-filtering %s..." % self.collaborative_filtering) self.collaborative_filtering.train(theta) self.predictions = self.recommender.get_predictions() if self._verbose: print("done training...")