Exemplos de Pro_estimate.get_pro em Python, exemplos de pro_estimate.Pro_estimate.get_pro em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: my_python.py Projeto: axuanwu/Bayes

 def class_item_hot(self):
     # 各类商品 关联商品热度 统计结果为购买某一类商品后 其他各个商品出现其后的概率
     self.like_matrix = np.zeros((self.top_k + 1, self.class_num + 1),
                                 float)  # 最后一行记录残余项
     # 关联数
     temp_array = np.array([-1] * self.num_k)  #
     index_temp = 0
     for i_record in xrange(0, self.record_num + 1):
         temp = self.user_item_array[i_record, ]  # 商品  时间差
         # 某用户的第一个商品
         if temp[1] == 0:
             temp_array = np.array([-1] * self.num_k)
             index_temp = 0
         item_index = self.item_dict.get(temp[0], 0)  # 最后一行为残余项求和
         class_id = self.item_class[item_index]
         class_index = self.class_dict[class_id]  # 商品类别列编号
         temp_array[index_temp] = class_index
         item_index = min(item_index, self.top_k)
         # 前top_k个类别在的此商品上+1
         for x in xrange(0, self.num_k):
             class_id0 = self.add_circle(index_temp, -x)
             class_index0 = temp_array[class_id0]
             if class_index0 == -1:
                 break
             self.like_matrix[item_index, class_index0] += 1
         index_temp = self.add_circle(index_temp, 1)  # 下一个位置
     # 将 like_matrix 直接转化为概率
     col_sum = self.like_matrix.sum(0)  # 按照列 求和
     # row_sum = self.like_matrix.sum(1)  # 按照行 求和
     pes = Pro_estimate()
     p_pre_before = -1  # 上一次计算的先验概率
     for item_index in xrange(0, self.top_k + 1):
         # 前 top_k  个是商品 最后一项为残余项
         if item_index % 200 == 0:
             print time.time()
         if item_index == self.top_k:
             p_pre = 1 - 1.0 * sum(
                 self.item_array[0:self.top_k, 1]) / (self.record_num + 1)
         else:
             p_pre = 1.0 * self.item_array[item_index,
                                           1] / (self.record_num + 1)
         if abs(1 - p_pre / p_pre_before) > 0.01:
             # 本次的原假设与已经存储的原假设差别较大 重置先验分布
             pes.solve_function(p_pre)
             pes.set_array()
             p_pre_before = p_pre
         for class_index in xrange(0, self.class_num + 1):
             self.like_matrix[item_index, class_index] = \
                 pes.get_pro(self.like_matrix[item_index, class_index], col_sum[class_index])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: my_python.py Projeto: hpplinux/Bayes

 def class_item_hot(self):
     # 各类商品 关联商品热度 统计结果为购买某一类商品后 其他各个商品出现其后的概率
     self.like_matrix = np.zeros((self.top_k + 1, self.class_num + 1), float)  # 最后一行记录残余项
     # 关联数
     temp_array = np.array([-1] * self.num_k)  #
     index_temp = 0
     for i_record in xrange(0, self.record_num + 1):
         temp = self.user_item_array[i_record,]  # 商品  时间差
         # 某用户的第一个商品
         if temp[1] == 0:
             temp_array = np.array([-1] * self.num_k)
             index_temp = 0
         item_index = self.item_dict.get(temp[0], 0)  # 最后一行为残余项求和
         class_id = self.item_class[item_index]
         class_index = self.class_dict[class_id]  # 商品类别列编号
         temp_array[index_temp] = class_index
         item_index = min(item_index, self.top_k)
         # 前top_k个类别在的此商品上+1
         for x in xrange(0, self.num_k):
             class_id0 = self.add_circle(index_temp, -x)
             class_index0 = temp_array[class_id0]
             if class_index0 == -1:
                 break
             self.like_matrix[item_index, class_index0] += 1
         index_temp = self.add_circle(index_temp, 1)  # 下一个位置
     # 将 like_matrix 直接转化为概率
     col_sum = self.like_matrix.sum(0)  # 按照列 求和
     # row_sum = self.like_matrix.sum(1)  # 按照行 求和
     pes = Pro_estimate()
     p_pre_before = -1  # 上一次计算的先验概率
     for item_index in xrange(0, self.top_k + 1):
         # 前 top_k  个是商品 最后一项为残余项
         if item_index % 200 == 0:
             print time.time()
         if item_index == self.top_k:
             p_pre = 1 - 1.0 * sum(self.item_array[0:self.top_k, 1]) / (self.record_num + 1)
         else:
             p_pre = 1.0 * self.item_array[item_index, 1] / (self.record_num + 1)
         if abs(1 - p_pre / p_pre_before) > 0.01:
             # 本次的原假设与已经存储的原假设差别较大 重置先验分布
             pes.solve_function(p_pre)
             pes.set_array()
             p_pre_before = p_pre
         for class_index in xrange(0, self.class_num + 1):
             self.like_matrix[item_index, class_index] = \
                 pes.get_pro(self.like_matrix[item_index, class_index], col_sum[class_index])

Exemplo n.º 3

0

Exibir arquivo

Arquivo: my_python2.py Projeto: sdutheone/Bayes

    def count_items(self, item_id, user_str):
        """

        :type item_id: 商品id 整数
        :type user_str: item_id 的购买者列表 字符串，不同用户逗号间隔
        """
        # 空的 user 列表
        if user_str[0:2] == "-1":
            peo_result = self.peo_exp.associated_items(item_id)
            result_str = str(item_id) + " "
            num1 = 0
            for temp_item_id in peo_result:
                result_str += str(temp_item_id) + ","
                num1 += 1
            my_orders = np.argsort(-self.temp_item_array_hot)
            result_str += str(int(self.item_array[my_orders[0], 0]))
            for i_order in xrange(1, self.r_top_num - num1):
                result_str += "," + str(int(self.item_array[my_orders[i_order], 0]))
            return result_str
        # k_step = self.range  # 认为紧接前K个商品之间有匹配关系
        result_array = np.array([0.0] * (self.item_num + 1))
        user_list = user_str.split(",")
        for user in user_list:
            user_index = self.user_dict[int(user)]
            for i_record in xrange(self.user_array[user_index, 0], self.user_array[user_index, 1]):
                if (i_record > self.user_array[user_index, 0]) and (
                    self.user_item_array[i_record, 0] == self.user_item_array[i_record - 1, 0]
                ):
                    continue  # 连续的购买相同商品
                temp = self.user_item_array[i_record,]  # 商品  时间差

                # 向前看n个商品
                i_diff = 0
                while i_diff < self.range:
                    pre_ind = i_record - 1 - i_diff
                    if pre_ind >= self.user_array[user_index, 0]:  # 同一用户的记录范围内
                        temp_now = self.user_item_array[pre_ind, :]
                        if (temp[1] - temp_now[1]) <= self.day_diff:  # 时间范围内
                            item_index = self.item_dict.get(temp_now[0], -1)
                            if item_index != -1:
                                result_array[item_index] += self.order_weight[i_diff]
                            else:
                                continue
                        else:
                            break
                        i_diff += 1
                    else:
                        break
                # 向后看n个商品
                i_diff = 0
                while i_diff < self.range:
                    suf_ind = i_record + 1 + i_diff
                    if suf_ind < self.user_array[user_index, 1]:  # 此处为 小于号
                        temp_now = self.user_item_array[suf_ind, :]
                        if (temp_now[1] - temp[1]) <= self.day_diff:  # 时间范围内
                            item_index = self.item_dict.get(temp_now[0], -1)
                            if item_index != -1:
                                result_array[item_index] += self.order_weight[i_diff]
                            else:
                                continue
                        else:
                            break
                        i_diff += 1
                    else:
                        break
        # 将 result_array 直接转化为概率
        # 计算统计 该商品的关联性质
        array_sum = sum(result_array)  # 按照列 求和
        temp_result_array = np.zeros((600, 2))  # 存储 计算结果
        result_dict = {}
        my_orders1 = np.argsort(-result_array)  # 类依据 概率 降序 取序号
        my_orders2 = np.argsort(-self.temp_item_array_hot)
        peo_result = self.peo_exp.associated_items(item_id)
        i_temp_result = 0
        # 首先录入 人工产品
        for temp_item_id in peo_result:
            temp_item_index = self.item_dict.get(temp_item_id, 0)
            temp_result_array[i_temp_result, :] = [temp_item_id, 1 + result_array[temp_item_index] / array_sum]
            result_dict[temp_item_index] = i_temp_result
            i_temp_result += 1
        pes = Pro_estimate()
        # 计算类建议中最大的200个
        item_index = self.item_dict.get(item_id, -1)
        if item_index == -1:
            class_id = -1
        else:
            class_id = self.item_class[item_index]
        for i_order in xrange(0, self.r_top_num):
            temp_item_index = my_orders2[i_order]  # 商品的下标
            if self.item_class[temp_item_index] == class_id:
                continue  # 类别相同
            if result_dict.get(temp_item_index, -1) != -1:
                continue  # 已经录入
            pes.solve_function(self.temp_item_array_hot[temp_item_index])
            pes.set_array()
            temp_pro = pes.get_pro(result_array[temp_item_index], array_sum)
            temp_result_array[i_temp_result, :] = [self.item_array[temp_item_index, 0], temp_pro]
            result_dict[temp_item_index] = i_temp_result
            i_temp_result += 1
        # 计算同类用户的建议中的最大 400个
        for i_order in xrange(0, self.item_num):
            temp_item_index = my_orders1[i_order]  # 商品的下标
            if self.item_class[temp_item_index] == class_id:
                continue  # 类别相同
            if result_dict.get(temp_item_index, -1) != -1:
                continue  # 已经录入
            pes.solve_function(self.temp_item_array_hot[temp_item_index])
            pes.set_array()
            temp_pro = pes.get_pro(result_array[temp_item_index], array_sum)
            temp_result_array[i_temp_result, :] = [self.item_array[temp_item_index, 0], temp_pro]
            result_dict[temp_item_index] = i_temp_result
            i_temp_result += 1
            if i_temp_result > 400:
                break
        temp_result_array = temp_result_array[0:i_temp_result, :]
        temp_order = np.argsort(-temp_result_array[:, 1])  # 按照概率降序排列
        result_str = str(item_id) + " " + str(int(temp_result_array[temp_order[0], 0]))
        for i in xrange(1, self.r_top_num):
            result_str += "," + str(int(temp_result_array[temp_order[i], 0]))
        return result_str

Exemplo n.º 4

0

Exibir arquivo

Arquivo: my_python2.py Projeto: sdutheone/Bayes

 def class_item_hot(self):
     # 各类商品 关联商品热度 统计结果为购买某一类商品后 其他各个商品出现其后的概率
     self.like_matrix = np.zeros((self.top_k + 1, self.class_num + 1))  # 最后一行记录残余项
     # 关联数
     for i_user in xrange(0, self.user_num + 1):
         for i_record in xrange(self.user_array[i_user, 0], self.user_array[i_user, 1]):
             if (i_record > self.user_array[i_user, 0]) and (
                 self.user_item_array[i_record, 0] == self.user_item_array[i_record - 1, 0]
             ):
                 continue  # 连续的购买相同商品
             temp = self.user_item_array[i_record,]  # 商品  时间差
             class_index = self.class_dict.get(temp[0], -1)
             if class_index == -1:
                 continue
             # 向前看n个商品
             i_diff = 0
             while i_diff < self.range:
                 pre_ind = i_record - 1 - i_diff
                 if pre_ind >= self.user_array[i_user, 0]:  # 同一用户的记录范围内
                     temp_now = self.user_item_array[pre_ind, :]
                     if (temp[1] - temp_now[1]) <= self.day_diff:  # 时间范围内
                         item_index = self.item_dict.get(temp_now[0], -1)
                         item_index = min(item_index, self.top_k)  # top+1列存储其他所有
                         if item_index != -1:
                             self.like_matrix[item_index, class_index] += self.order_weight[i_diff]
                         else:
                             continue
                     else:
                         break
                     i_diff += 1
                 else:
                     break
             # 向后看n个商品
             i_diff = 0
             while i_diff < self.range:
                 suf_ind = i_record + 1 + i_diff
                 if suf_ind < self.user_array[i_user, 1]:  # 此处为 小于号
                     temp_now = self.user_item_array[suf_ind, :]
                     if (temp_now[1] - temp[1]) <= self.day_diff:  # 时间范围内
                         item_index = self.item_dict.get(temp_now[0], -1)
                         item_index = min(item_index, self.top_k)  # top+1列存储其他所有
                         if item_index != -1:
                             self.like_matrix[item_index, class_index] += self.order_weight[i_diff]
                         else:
                             continue
                     else:
                         break
                     i_diff += 1
                 else:
                     break
     # 将 like_matrix 直接转化为概率
     col_sum = self.like_matrix.sum(0)  # 按照列 求和
     # row_sum = self.like_matrix.sum(1)  # 按照行 求和
     pes = Pro_estimate()
     p_pre_before = -1  # 上一次计算的先验概率
     un_set = True
     for item_index in xrange(0, self.top_k + 1):
         # 前 top_k  个是商品 最后一项为残余项
         if item_index % 200 == 0:
             print time.time()
         if item_index == self.top_k:
             p_pre = 1 - sum(self.item_array[0 : self.top_k, 1])
         else:
             p_pre = self.item_array[item_index, 1]
         if abs(1 - p_pre / p_pre_before) > 0.01:
             # 本次的原假设与已经存储的原假设差别较大 重置先验分布
             pes.solve_function(p_pre)
             un_set = True
             p_pre_before = p_pre
         for class_index in xrange(0, self.class_num + 1):
             if self.like_matrix[item_index, class_index] > 50:
                 self.like_matrix[item_index, class_index] = (
                     self.like_matrix[item_index, class_index] / col_sum[class_index]
                 )
             else:
                 if un_set:
                     un_set = False
                     pes.set_array()
                 self.like_matrix[item_index, class_index] = pes.get_pro(
                     self.like_matrix[item_index, class_index], col_sum[class_index]
                 )

Exemplo n.º 5

0

Exibir arquivo

Arquivo: my_python.py Projeto: axuanwu/Bayes

    def count_items(self, item_id, user_str):
        """

        :type item_id: 商品id 整数
        :type user_str: item_id 的购买者列表 字符串，不同用户逗号间隔
        """
        # 空的 user 列表
        if user_str[0:2] == '-1':
            my_orders = np.argsort(-self.temp_item_array_hot)
            result_str = str(item_id) + ' ' + str(self.item_array[my_orders[0],
                                                                  0])
            for i_order in xrange(1, self.r_top_num):
                result_str += ',' + str(self.item_array[my_orders[i_order], 0])
            return result_str
        k_step = self.num_k  # 认为紧接前K个商品之间有匹配关系
        result_matrix = np.zeros((self.item_num + 1, k_step))
        user_list = user_str.split(',')
        for user in user_list:
            user_index = self.user_dict[int(user)]
            i_record = self.user_array[user_index, 0]  # 用户开始的记录
            start = False  #  统计开始的标志
            k = 0
            while i_record < self.user_array[user_index, 1]:
                if self.user_item_array[i_record, 0] == item_id:
                    start = True
                    k = 0  # 统计的个数  1开始计数
                elif start & (k < k_step):
                    temp_item = self.user_item_array[i_record, 0]
                    item_index = self.item_dict[temp_item]
                    result_matrix[item_index, k] += 1  # 对应位置+1
                    k += 1
                i_record += 1
        #  计算统计 该商品的关联性质
        col_sum = result_matrix.sum(0)  # 按照列 求和
        row_sum = result_matrix.sum(1)  # 按照行 求和
        temp_result_array = np.zeros((20000, 2))  # 存储 计算结果
        temp_result = np.array([0.0] * len(col_sum))
        my_orders = np.argsort(-self.temp_item_array_hot)  # 概率 降序 取序号
        i_temp_result = 0
        pes = Pro_estimate()
        for i_order in xrange(0, self.item_num + 1):
            temp_item_index = my_orders[i_order]  # 商品的下标
            # 优化原假设 self.temp_item_array_hot
            if (i_order < self.r_top_num) | (row_sum[temp_item_index] > 0):
                try:
                    pes.solve_function(
                        self.temp_item_array_hot[temp_item_index])
                except:
                    print self.temp_item_array_hot[temp_item_index]
                pes.set_array()
                for i in xrange(0, len(col_sum)):
                    temp_result[i] = pes.get_pro(
                        result_matrix[temp_item_index, i], col_sum[i])
                temp_pro = self.union_pro(temp_result)
                temp_result_array[i_temp_result] = [
                    self.item_array[temp_item_index, 0], temp_pro
                ]  # 记录商品 其概率结果
                i_temp_result += 1
                if i_temp_result == 20000:  # 超出记录空间 断出
                    break
        temp_result_array = temp_result_array[0:i_temp_result, :]
        temp_order = np.argsort(-temp_result_array[:, 1])  # 按照概率降序排列
        result_str = str(item_id) + ' ' + str(temp_result_array[temp_order[0],
                                                                0])
        for i in xrange(1, self.r_top_num):
            result_str += ',' + str(int(temp_result_array[temp_order[i], 0]))
        return result_str

Exemplo n.º 6

0

Exibir arquivo

Arquivo: my_python2.py Projeto: axuanwu/Bayes

    def count_items(self, item_id, user_str):
        """

        :type item_id: 商品id 整数
        :type user_str: item_id 的购买者列表 字符串，不同用户逗号间隔
        """
        # 空的 user 列表
        if user_str[0:2] == '-1':
            peo_result = self.peo_exp.associated_items(item_id)
            result_str = str(item_id) + ' '
            num1 = 0
            for temp_item_id in peo_result:
                result_str += str(temp_item_id) + ','
                num1 += 1
            my_orders = np.argsort(-self.temp_item_array_hot)
            result_str += str(int(self.item_array[my_orders[0], 0]))
            for i_order in xrange(1, self.r_top_num - num1):
                result_str += ',' + str(int(self.item_array[my_orders[i_order], 0]))
            return result_str
        # k_step = self.range  # 认为紧接前K个商品之间有匹配关系
        result_array = np.array([0.0] * (self.item_num + 1))
        user_list = user_str.split(',')
        for user in user_list:
            user_index = self.user_dict[int(user)]
            for i_record in xrange(self.user_array[user_index, 0], self.user_array[user_index, 1]):
                if (i_record > self.user_array[user_index, 0]) and \
                        (self.user_item_array[i_record, 0] == self.user_item_array[i_record - 1, 0]):
                    continue  # 连续的购买相同商品
                temp = self.user_item_array[i_record,]  # 商品  时间差

                # 向前看n个商品
                i_diff = 0
                while i_diff < self.range:
                    pre_ind = i_record - 1 - i_diff
                    if pre_ind >= self.user_array[user_index, 0]:  # 同一用户的记录范围内
                        temp_now = self.user_item_array[pre_ind, :]
                        if (temp[1] - temp_now[1]) <= self.day_diff:  # 时间范围内
                            item_index = self.item_dict.get(temp_now[0], -1)
                            if item_index != -1:
                                result_array[item_index] += self.order_weight[i_diff]
                            else:
                                continue
                        else:
                            break
                        i_diff += 1
                    else:
                        break
                # 向后看n个商品
                i_diff = 0
                while i_diff < self.range:
                    suf_ind = i_record + 1 + i_diff
                    if suf_ind < self.user_array[user_index, 1]:  # 此处为 小于号
                        temp_now = self.user_item_array[suf_ind, :]
                        if (temp_now[1] - temp[1]) <= self.day_diff:  # 时间范围内
                            item_index = self.item_dict.get(temp_now[0], -1)
                            if item_index != -1:
                                result_array[item_index] += self.order_weight[i_diff]
                            else:
                                continue
                        else:
                            break
                        i_diff += 1
                    else:
                        break
        # 将 result_array 直接转化为概率
        # 计算统计 该商品的关联性质
        array_sum = sum(result_array)  # 按照列 求和
        temp_result_array = np.zeros((600, 2))  # 存储 计算结果
        result_dict = {}
        my_orders1 = np.argsort(-result_array)  # 类依据 概率 降序 取序号
        my_orders2 = np.argsort(-self.temp_item_array_hot)
        peo_result = self.peo_exp.associated_items(item_id)
        i_temp_result = 0
        # 首先录入 人工产品
        for temp_item_id in peo_result:
            temp_item_index = self.item_dict.get(temp_item_id, 0)
            temp_result_array[i_temp_result, :] = [temp_item_id, 1 + result_array[temp_item_index] / array_sum]
            result_dict[temp_item_index] = i_temp_result
            i_temp_result += 1
        pes = Pro_estimate()
        # 计算类建议中最大的200个
        item_index = self.item_dict.get(item_id, -1)
        if item_index == -1:
            class_id = -1
        else:
            class_id = self.item_class[item_index]
        for i_order in xrange(0, self.r_top_num):
            temp_item_index = my_orders2[i_order]  # 商品的下标
            if self.item_class[temp_item_index] == class_id:
                continue  # 类别相同
            if result_dict.get(temp_item_index, -1) != -1:
                continue  # 已经录入
            pes.solve_function(self.temp_item_array_hot[temp_item_index])
            pes.set_array()
            temp_pro = pes.get_pro(result_array[temp_item_index], array_sum)
            temp_result_array[i_temp_result, :] = [self.item_array[temp_item_index, 0], temp_pro]
            result_dict[temp_item_index] = i_temp_result
            i_temp_result += 1
        # 计算同类用户的建议中的最大 400个
        for i_order in xrange(0, self.item_num):
            temp_item_index = my_orders1[i_order]  # 商品的下标
            if self.item_class[temp_item_index] == class_id:
                continue  # 类别相同
            if result_dict.get(temp_item_index, -1) != -1:
                continue  # 已经录入
            pes.solve_function(self.temp_item_array_hot[temp_item_index])
            pes.set_array()
            temp_pro = pes.get_pro(result_array[temp_item_index], array_sum)
            temp_result_array[i_temp_result, :] = [self.item_array[temp_item_index, 0], temp_pro]
            result_dict[temp_item_index] = i_temp_result
            i_temp_result += 1
            if i_temp_result > 400:
                break
        temp_result_array = temp_result_array[0:i_temp_result, :]
        temp_order = np.argsort(-temp_result_array[:, 1])  # 按照概率降序排列
        result_str = str(item_id) + ' ' + str(int(temp_result_array[temp_order[0], 0]))
        for i in xrange(1, self.r_top_num):
            result_str += ',' + str(int(temp_result_array[temp_order[i], 0]))
        return result_str

Exemplo n.º 7

0

Exibir arquivo

Arquivo: my_python2.py Projeto: axuanwu/Bayes

 def class_item_hot(self):
     # 各类商品 关联商品热度 统计结果为购买某一类商品后 其他各个商品出现其后的概率
     self.like_matrix = np.zeros((self.top_k + 1, self.class_num + 1))  # 最后一行记录残余项
     # 关联数
     for i_user in xrange(0, self.user_num + 1):
         for i_record in xrange(self.user_array[i_user, 0], self.user_array[i_user, 1]):
             if (i_record > self.user_array[i_user, 0]) and \
                     (self.user_item_array[i_record, 0] == self.user_item_array[i_record - 1, 0]):
                 continue  # 连续的购买相同商品
             temp = self.user_item_array[i_record,]  # 商品  时间差
             class_index = self.class_dict.get(temp[0], -1)
             if class_index == -1:
                 continue
             # 向前看n个商品
             i_diff = 0
             while i_diff < self.range:
                 pre_ind = i_record - 1 - i_diff
                 if pre_ind >= self.user_array[i_user, 0]:  # 同一用户的记录范围内
                     temp_now = self.user_item_array[pre_ind, :]
                     if (temp[1] - temp_now[1]) <= self.day_diff:  # 时间范围内
                         item_index = self.item_dict.get(temp_now[0], -1)
                         item_index = min(item_index, self.top_k)  # top+1列存储其他所有
                         if item_index != -1:
                             self.like_matrix[item_index, class_index] += self.order_weight[i_diff]
                         else:
                             continue
                     else:
                         break
                     i_diff += 1
                 else:
                     break
             # 向后看n个商品
             i_diff = 0
             while i_diff < self.range:
                 suf_ind = i_record + 1 + i_diff
                 if suf_ind < self.user_array[i_user, 1]:  # 此处为 小于号
                     temp_now = self.user_item_array[suf_ind, :]
                     if (temp_now[1] - temp[1]) <= self.day_diff:  # 时间范围内
                         item_index = self.item_dict.get(temp_now[0], -1)
                         item_index = min(item_index, self.top_k)  # top+1列存储其他所有
                         if item_index != -1:
                             self.like_matrix[item_index, class_index] += self.order_weight[i_diff]
                         else:
                             continue
                     else:
                         break
                     i_diff += 1
                 else:
                     break
     # 将 like_matrix 直接转化为概率
     col_sum = self.like_matrix.sum(0)  # 按照列 求和
     # row_sum = self.like_matrix.sum(1)  # 按照行 求和
     pes = Pro_estimate()
     p_pre_before = -1  # 上一次计算的先验概率
     un_set = True
     for item_index in xrange(0, self.top_k + 1):
         # 前 top_k  个是商品 最后一项为残余项
         if item_index % 200 == 0:
             print time.time()
         if item_index == self.top_k:
             p_pre = 1 - sum(self.item_array[0:self.top_k, 1])
         else:
             p_pre = self.item_array[item_index, 1]
         if abs(1 - p_pre / p_pre_before) > 0.01:
             # 本次的原假设与已经存储的原假设差别较大 重置先验分布
             pes.solve_function(p_pre)
             un_set = True
             p_pre_before = p_pre
         for class_index in xrange(0, self.class_num + 1):
             if self.like_matrix[item_index, class_index] > 50:
                 self.like_matrix[item_index, class_index] = \
                     self.like_matrix[item_index, class_index] / col_sum[class_index]
             else:
                 if un_set:
                     un_set = False
                     pes.set_array()
                 self.like_matrix[item_index, class_index] = \
                     pes.get_pro(self.like_matrix[item_index, class_index], col_sum[class_index])

Exemplo n.º 8

0

Exibir arquivo

Arquivo: my_python.py Projeto: hpplinux/Bayes

    def count_items(self, item_id, user_str):
        """

        :type item_id: 商品id 整数
        :type user_str: item_id 的购买者列表 字符串，不同用户逗号间隔
        """
        # 空的 user 列表
        if user_str[0:2] == '-1':
            my_orders = np.argsort(-self.temp_item_array_hot)
            result_str = str(item_id) + ' ' + str(self.item_array[my_orders[0], 0])
            for i_order in xrange(1, self.r_top_num):
                result_str += ',' + str(self.item_array[my_orders[i_order], 0])
            return result_str
        k_step = self.num_k  # 认为紧接前K个商品之间有匹配关系
        result_matrix = np.zeros((self.item_num + 1, k_step))
        user_list = user_str.split(',')
        for user in user_list:
            user_index = self.user_dict[int(user)]
            i_record = self.user_array[user_index, 0]  # 用户开始的记录
            start = False  #  统计开始的标志
            k = 0
            while i_record < self.user_array[user_index, 1]:
                if self.user_item_array[i_record, 0] == item_id:
                    start = True
                    k = 0  # 统计的个数  1开始计数
                elif start & (k < k_step):
                    temp_item = self.user_item_array[i_record, 0]
                    item_index = self.item_dict[temp_item]
                    result_matrix[item_index, k] += 1  # 对应位置+1
                    k += 1
                i_record += 1
        #  计算统计 该商品的关联性质
        col_sum = result_matrix.sum(0)  # 按照列 求和
        row_sum = result_matrix.sum(1)  # 按照行 求和
        temp_result_array = np.zeros((20000, 2))  # 存储 计算结果
        temp_result = np.array([0.0] * len(col_sum))
        my_orders = np.argsort(-self.temp_item_array_hot)  # 概率 降序 取序号
        i_temp_result = 0
        pes = Pro_estimate()
        for i_order in xrange(0, self.item_num + 1):
            temp_item_index = my_orders[i_order]  # 商品的下标
            # 优化原假设 self.temp_item_array_hot
            if (i_order < self.r_top_num) | (row_sum[temp_item_index] > 0):
                try:
                    pes.solve_function(self.temp_item_array_hot[temp_item_index])
                except:
                    print self.temp_item_array_hot[temp_item_index]
                pes.set_array()
                for i in xrange(0, len(col_sum)):
                    temp_result[i] = pes.get_pro(result_matrix[temp_item_index, i], col_sum[i])
                temp_pro = self.union_pro(temp_result)
                temp_result_array[i_temp_result] = [self.item_array[temp_item_index, 0], temp_pro]  # 记录商品 其概率结果
                i_temp_result += 1
                if i_temp_result == 20000:  # 超出记录空间 断出
                    break
        temp_result_array = temp_result_array[0:i_temp_result, :]
        temp_order = np.argsort(-temp_result_array[:, 1])  # 按照概率降序排列
        result_str = str(item_id) + ' ' + str(temp_result_array[temp_order[0], 0])
        for i in xrange(1, self.r_top_num):
            result_str += ',' + str(int(temp_result_array[temp_order[i], 0]))
        return result_str