def transform(self, ids, features, ratings, users_ratings, users, cv_results_file, images_indexes, true_objects_indexes, false_objects_indexes, paths, use_user_data=True, z_score=False):
        """
        Calculates latent matrices and saves ratings predictions

        :param ids: vector of ids, rows indexes corresponding to features and ratings indexes
        :param features: matrix of features/objects as rows
        :param ratings: vector of average ratings
        :param users_ratings: matrix of all users ratings for experiences, rows indexes corresponding to users indexes
        :param users: matrix of additional data for users
        :param cv_results_file: ile for saving cv scores
        """
        if self.selection_algorithm != 'random':
            preselection = Preselection(true_objects_indexes, false_objects_indexes, self.selection_algorithm)
            ids, features, ratings = preselection.transform(paths, ids, ratings, features)

        self.ids = ids
        self.features = features
        self.ratings = ratings
        self.users_ratings = users_ratings
        self.users = users
        self.unique_ratings = list(set(ratings))
        self.unique_ids = list(set(self.ids))
        self.z_score = z_score

        self.save_objects_for_ids()
        self.save_ids_to_i()

        selected_objects_i_for_ids = self.object_selection()
        self.save_data_for_factorization(selected_objects_i_for_ids)
        self.factorization(cv_results_file, use_user_data)
Пример #2
0
def main():
    # Preselection example

    # Parameters
    data_directory = '../data/generated-data-r-2-n-6-4'
    features_path = '../data/features-generated-data-r-2-n-6-4'
    results_file = '/results-preselection/generated-data-r-2-n-6-4.csv'
    #true_objects_indexes = [0, 1, 2, 3, 4, 5, 6, 7]
    #false_objects_indexes = [8, 9]
    true_objects_indexes = [0, 1, 2, 3, 4, 5]
    false_objects_indexes = [6, 7, 8, 9]

    preselection = Preselection(data_directory, features_path, true_objects_indexes, false_objects_indexes)
    preselection.transform(results_file=results_file)
    #preselection.evaluate()

    exit()

    # Texts example

    # Parameters
    data_directory = '../data/data-real-r-3-text'
    results_file = '/results-text/all_knn.csv'

    classification = ClassificationText(data_directory, algorithm='knn', feature_agglomeration=False, selection='none')
    classification.transform(results_file=results_file)
    classification.evaluate()

    exit()

    # Images example

    # Parameters
    data_directory = '../data/data-real-r-3'
    features_path = '../data/features-data-real-r-3'
    results_file = '/results/kmeans_knn.csv'

    classification = Classification(data_directory, features_path, algorithm='knn', feature_agglomeration=False,
                                    selection='kmeans')
    classification.transform(results_file=results_file)
    classification.evaluate()