Exemplo n.º 1
0
    def update_cluster_user_rec_item(self, cluster_user_id_arr):
        data_smat = get_data_smat()
        all_user_rec_item_dict = self.all_user_rec_item_dict

        cluster_user_data_smat = data_smat[cluster_user_id_arr]
        user_id_arr, item_id_arr, _ = find_nonzero_indices(cluster_user_data_smat)

        cluster_user_dict = {}
        for user_id in cluster_user_id_arr:
            cluster_user_dict[user_id] = set()
        
        item_set = set(item_id_arr)
        item_count_dict = {}
        for i,item_id in enumerate(item_id_arr):
            try:
                item_count_dict[item_id] += 1
            except KeyError:
                item_count_dict[item_id] = 1

            user_item_set = cluster_user_dict[user_id_arr[i]]
            user_item_set.add(item_id)
            

        for user_id in cluster_user_id_arr:
            user_item_set = cluster_user_dict[user_id]
            user_new_item_set = item_set - user_item_set

            user_rec_item_dict = all_user_rec_item_dict[user_id]

            for item_id in user_new_item_set:
                weight = item_count_dict[item_id]
                try:
                    user_rec_item_dict[item_id] += weight
                except KeyError:
                    user_rec_item_dict[item_id] = weight
Exemplo n.º 2
0
def gen_toy_dataset(n_user, n_item):
    mat = random.choice(2, n_user*n_item, p=[0.7,0.3]).reshape((n_user,n_item))
    # print mat
    smat = csr_matrix(mat)
    # print smat
    row_index_arr,column_index_arr,_ = find_nonzero_indices(smat)
    relationships = zip(row_index_arr, column_index_arr)
    return relationships
Exemplo n.º 3
0
def gen_debug_dataset():
    data = [
        [1,1,1,1,0,0,0,0,0,0],
        [1,1,1,0,0,1,0,0,0,0],
        [1,0,0,1,0,1,1,0,0,0],
        [1,1,0,1,1,0,0,1,0,0],
        [0,0,1,1,1,1,0,0,0,0],
        [0,0,0,1,1,0,0,1,1,0],
        [0,0,0,1,0,1,1,1,0,1],
        [0,0,0,0,1,0,1,1,1,1],
        [0,0,1,0,0,0,1,0,1,1],
        [0,0,0,1,0,1,1,0,1,1],
    ]
    n_user = len(data)
    n_item = len(data[0])

    print n_user, n_item

    smat = csr_matrix(data, shape=(n_user, n_item), dtype=UINT)
    row_index_arr,column_index_arr,_ = find_nonzero_indices(smat)
    relationships = zip(row_index_arr, column_index_arr)
    return n_user, n_item, relationships
def op_sparse_matrix():
    mat = random.randint(0,2,size=(10,10))
    print mat
    smat = csr_matrix(mat)
    indices = find_nonzero_indices(smat)[1]