def transform(self, ids, features, ratings, users_ratings, users, cv_results_file, images_indexes, true_objects_indexes, false_objects_indexes, paths, use_user_data=True, z_score=False): """ Calculates latent matrices and saves ratings predictions :param ids: vector of ids, rows indexes corresponding to features and ratings indexes :param features: matrix of features/objects as rows :param ratings: vector of average ratings :param users_ratings: matrix of all users ratings for experiences, rows indexes corresponding to users indexes :param users: matrix of additional data for users :param cv_results_file: ile for saving cv scores """ if self.selection_algorithm != 'random': preselection = Preselection(true_objects_indexes, false_objects_indexes, self.selection_algorithm) ids, features, ratings = preselection.transform(paths, ids, ratings, features) self.ids = ids self.features = features self.ratings = ratings self.users_ratings = users_ratings self.users = users self.unique_ratings = list(set(ratings)) self.unique_ids = list(set(self.ids)) self.z_score = z_score self.save_objects_for_ids() self.save_ids_to_i() selected_objects_i_for_ids = self.object_selection() self.save_data_for_factorization(selected_objects_i_for_ids) self.factorization(cv_results_file, use_user_data)
def main(): # Preselection example # Parameters data_directory = '../data/generated-data-r-2-n-6-4' features_path = '../data/features-generated-data-r-2-n-6-4' results_file = '/results-preselection/generated-data-r-2-n-6-4.csv' #true_objects_indexes = [0, 1, 2, 3, 4, 5, 6, 7] #false_objects_indexes = [8, 9] true_objects_indexes = [0, 1, 2, 3, 4, 5] false_objects_indexes = [6, 7, 8, 9] preselection = Preselection(data_directory, features_path, true_objects_indexes, false_objects_indexes) preselection.transform(results_file=results_file) #preselection.evaluate() exit() # Texts example # Parameters data_directory = '../data/data-real-r-3-text' results_file = '/results-text/all_knn.csv' classification = ClassificationText(data_directory, algorithm='knn', feature_agglomeration=False, selection='none') classification.transform(results_file=results_file) classification.evaluate() exit() # Images example # Parameters data_directory = '../data/data-real-r-3' features_path = '../data/features-data-real-r-3' results_file = '/results/kmeans_knn.csv' classification = Classification(data_directory, features_path, algorithm='knn', feature_agglomeration=False, selection='kmeans') classification.transform(results_file=results_file) classification.evaluate()