示例#1
0
    def rating_prediction(self):
        dict_file = dict()
        d_feedback = dict()
        list_feedback = list()
        check_error_file(self.file_read)

        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    self.number_interactions += 1
                    user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2])
                    d_feedback.setdefault(user, {}).update({item: feedback})
                    self.triple_dataset.append((user, item, feedback))
                    self.dict_users.setdefault(user, set()).add(item)
                    self.dict_items.setdefault(item, set()).add(user)
                    self.list_users.add(user)
                    self.list_items.add(item)
                    self.mean_feedback += feedback
                    list_feedback.append(feedback)

        self.triple_dataset = sorted(self.triple_dataset)
        self.mean_feedback /= float(self.number_interactions)
        self.list_users = sorted(list(self.list_users))
        self.list_items = sorted(list(self.list_items))
        dict_file.update({'feedback': d_feedback, 'users': self.list_users, 'items': self.list_items,
                          'du': self.dict_users, 'di': self.dict_items, 'mean_rates': self.mean_feedback,
                          'list_feedback': self.triple_dataset, 'ni': self.number_interactions,
                          'max': max(list_feedback), 'min': min(list_feedback)})

        return dict_file
    def simple_evaluation(self, file_result, file_test):
        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type)
        predict.main_information()
        test = ReadFile(file_test, space_type=self.space_type)
        test.main_information()

        rmse = 0
        mae = 0
        count_comp = 0
        for user in test.list_users:
            for item in test.dict_users[user]:
                try:
                    rui_predict = float(predict.dict_users[user][item])
                    rui_test = float(test.dict_users[user][item])
                    rmse += math.pow((rui_predict - rui_test), 2)
                    mae += math.fabs(rui_predict - rui_test)
                    count_comp += 1
                except KeyError:
                    pass

        if count_comp != 0:
            rmse = math.sqrt(float(rmse) / float(count_comp))
            mae = math.sqrt(float(mae) / float(count_comp))

        return rmse, mae
示例#3
0
 def triple_information(self):
     check_error_file(self.file_read)
     with open(self.file_read) as infile:
         for line in infile:
             if line.strip():
                 inline = line.split(self.space_type)
                 self.number_interactions += 1
                 user, item, feedback = int(inline[0]), int(inline[1]), inline[2].replace("\n", "")
                 self.triple_dataset.append([user, item, feedback])
    def all_but_one_evaluation(self, file_result, file_test, n_ranks=list([1, 3, 5, 10])):
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type)
        predict.main_information_item_recommendation()
        test = ReadFile(file_test, space_type=self.space_type)
        test.main_information_item_recommendation()

        num_user = len(test.list_users)
        final_values = list()

        for user in test.list_users:
            test.dict_users[user] = [test.dict_users[user][0]]

        for i, n in enumerate(n_ranks):
            if n < 1:
                print('Error: N must >= 1.')
                sys.exit()

            partial_precision = list()
            partial_recall = list()
            avg_prec_total = list()

            for user in test.list_users:
                num_user_interactions = len(test.dict_users[user])
                hit_cont = 0
                avg_prec_sum = 0

                try:
                    # Generate user intersection list between the recommended items and test.
                    intersection = list(set(predict.dict_users[user][:n]).intersection(
                        set(test.dict_users[user])))

                    if len(intersection) > 0:
                        partial_precision.append((float(len(intersection)) / float(n)))
                        partial_recall.append((float(len(intersection)) / float(num_user_interactions)))

                        for item in intersection:
                            hit_cont += 1
                            avg_prec_sum += (float(hit_cont) / float(test.dict_users[user].index(item) + 1))

                        avg_prec_total.append(float(avg_prec_sum) / float(num_user_interactions))

                except KeyError:
                    pass

            if not self.only_map:
                final_precision = sum(partial_precision) / float(num_user)
                final_values.append(final_precision)
                final_recall = sum(partial_recall) / float(num_user)
                final_values.append(final_recall)
            final_map = sum(avg_prec_total) / float(num_user)
            final_values.append(final_map)

        return final_values
    def simple_evaluation(self, file_result, file_test):
        # Verify that the files are valid
        check_error_file(file_result)
        check_error_file(file_test)

        predict = ReadFile(file_result, space_type=self.space_type)
        predict.main_information_item_recommendation()
        test = ReadFile(file_test, space_type=self.space_type)
        test.main_information_item_recommendation()

        return self.default_evaluation(predict.dict_users, test)
示例#6
0
 def read_matrix(self):
     matrix = list()
     check_error_file(self.file_read)
     with open(self.file_read) as infile:
         for line in infile:
             if line.strip():
                 inline = line.split(self.space_type)
                 inline = np.array(inline)
                 inline = np.delete(inline, len(inline)-1)
                 matrix.append(inline.astype(float))
     return np.array(matrix)
示例#7
0
    def read_metadata(self, l_items):
        dict_file = dict()
        d_feedback = dict()
        list_feedback = list()
        map_user = dict()
        map_index_user = dict()
        map_item = dict()
        map_index_item = dict()
        check_error_file(self.file_read)

        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    self.number_interactions += 1
                    user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2])
                    d_feedback.setdefault(user, {}).update({item: feedback})
                    self.triple_dataset.append((user, item, feedback))
                    self.dict_users.setdefault(user, set()).add(item)
                    self.dict_items.setdefault(item, set()).add(user)
                    self.list_items.add(item)
                    self.mean_feedback += feedback
                    list_feedback.append(feedback)

        self.triple_dataset = sorted(self.triple_dataset)
        self.mean_feedback /= float(self.number_interactions)
        self.list_users = sorted(list(l_items))
        self.list_items = sorted(list(self.list_items))

        for u, user in enumerate(self.list_users):
            map_user[user] = u
            map_index_user[u] = user

        for i, item in enumerate(self.list_items):
            map_item[item] = i
            map_index_item[i] = item

        matrix = np.zeros((len(self.list_users), len(self.list_items)))

        for user in self.list_users:
            try:
                for item in d_feedback[user]:
                    matrix[map_user[user]][map_item[item]] = d_feedback[user][item]
            except KeyError:
                pass

        dict_file.update({'feedback': d_feedback, 'items': self.list_users, 'metadata': self.list_items,
                          'di': self.dict_users, 'dm': self.dict_items, 'mean_rates': self.mean_feedback,
                          'list_feedback': self.triple_dataset, 'ni': self.number_interactions,
                          'max': max(list_feedback), 'min': min(list_feedback), 'matrix': matrix})

        return dict_file
示例#8
0
 def read_rankings(self):
     list_feedback = list()
     check_error_file(self.file_read)
     with open(self.file_read) as infile:
         for line in infile:
             if line.strip():
                 inline = line.split(self.space_type)
                 user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2])
                 self.dict_users.setdefault(user, {}).update({item: feedback})
                 list_feedback.append(feedback)
                 self.average_scores[user] = self.average_scores.get(user, 0) + feedback
                 self.num_user_interactions[user] = self.num_user_interactions.get(user, 0) + 1
     return self.dict_users, list_feedback
示例#9
0
 def split_dataset(self):
     for i, feedback in enumerate(self.file_read):
         self.dict_users = dict()
         check_error_file(feedback)
         with open(feedback) as infile:
             for line in infile:
                 if line.strip():
                     inline = line.split(self.space_type)
                     self.number_interactions += 1
                     user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2])
                     self.triple_dataset.append((user, item))
                     self.dict_users.setdefault(user, {}).update({item: feedback})
         self.individual_interaction.append(self.dict_users)
示例#10
0
    def return_matrix(self, implicit=False):
        check_error_file(self.file_read)
        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    self.number_interactions += 1
                    user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2])
                    self.list_users.add(user)
                    self.list_items.add(item)
                    self.dict_users.setdefault(user, {}).update({item: feedback})

        map_user = dict()
        map_index_user = dict()
        self.list_users = sorted(list(self.list_users))

        for u, user in enumerate(self.list_users):
            map_user[user] = u
            map_index_user[u] = user

        map_item = dict()
        map_index_item = dict()
        self.list_items = sorted(list(self.list_items))

        for i, item in enumerate(self.list_items):
            map_item[item] = i
            map_index_item[i] = item

        matrix = np.zeros((len(self.list_users), len(self.list_items)))

        for user in self.list_users:
            for item in self.dict_users[user]:
                if implicit:
                    matrix[map_user[user]][map_item[item]] = 1
                else:
                    matrix[map_user[user]][map_item[item]] = self.dict_users[user][item]
                self.dict_items.setdefault(map_item[item], set()).add(map_user[user])

        return {"matrix": matrix, "map_user": map_index_user, "map_item": map_index_item,
                "number_interactions": self.number_interactions, "di": self.dict_items, "mu": map_user,
                "users": self.list_users, "items": self.list_items, "feedback": self.dict_users}
示例#11
0
    def main_information_item_recommendation(self):
        check_error_file(self.file_read)
        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    self.number_interactions += 1
                    try:
                        user, item = int(inline[0]), int(inline[1])
                    except ValueError:
                        print('Error: Space type is invalid!')
                        sys.exit()

                    self.num_user_interactions[user] = self.num_user_interactions.get(user, 0) + 1
                    self.num_items_interactions[item] = self.num_items_interactions.get(item, 0) + 1
                    self.list_users.add(user)
                    self.list_items.add(item)
                    self.dict_users.setdefault(user, []).append(item)

        self.list_users = sorted(self.list_users)
        self.list_items = sorted(self.list_items)
示例#12
0
    def return_bprmf(self):
        check_error_file(self.file_read)
        not_seen = dict()
        with open(self.file_read) as infile:
            for line in infile:
                if line.strip():
                    inline = line.split(self.space_type)
                    self.number_interactions += 1
                    user, item, feedback = int(inline[0]), int(inline[1]), float(inline[2])
                    self.list_users.add(user)
                    self.list_items.add(item)
                    self.dict_users.setdefault(user, list()).append(item)

        self.list_users = sorted(list(self.list_users))
        self.list_items = sorted(list(self.list_items))

        for user in self.list_users:
            not_seen[user] = list(set(self.list_items) - set(self.dict_users[user]))

        return {"users": self.list_users, "items": self.list_items, "feedback": self.dict_users, "not_seen": not_seen,
                "number_interactions": self.number_interactions}