Python knn示例，utils.knn Python示例

示例#1

0

显示文件

文件： item_cf.py 项目： shiyuLuo2019/machine_learning_projects

def item_based_cf(datafile, userid, movieid, distance, k, iFlag, numOfUsers,
                  numOfItems):
    '''
    build item-based collaborative filter that predicts the rating 
    of a user for a movie.
    This function returns the predicted rating and its actual rating.
    
    Parameters
    ----------
    <datafile> - a fully specified path to a file formatted like the MovieLens100K data file u.data 
    <userid> - a userId in the MovieLens100K data
    <movieid> - a movieID in the MovieLens 100K data set
    <distance> - a Boolean. If set to 0, use Pearson's correlation as the distance measure. If 1, use Manhattan distance.
    <k> - The number of nearest neighbors to consider
    <iFlag> - A Boolean value. If set to 0 for user-based collaborative filtering, 
    only users that have actual (ie non-0) ratings for the movie are considered in your top K. 
    For item-based, use only movies that have actual ratings by the user in your top K. 
    If set to 1, simply use the top K regardless of whether the top K contain actual or filled-in ratings.
    <numOfUsers> - the number of users in the dataset 
    <numOfItems> - the number of items in the dataset
    (NOTE: use these variables (<numOfUsers>, <numOfItems>) to build user-rating matrix. 
    DO NOT USE any CONSTANT NUMBERS when building user-rating matrix. We already set these variables in the main function for you.
    The size of user-rating matrix in the test case for grading could be different from the given dataset. )
    
    returns
    -------
    trueRating: <userid>'s actual rating for <movieid>
    predictedRating: <userid>'s rating predicted by collaborative filter for <movieid>


    AUTHOR: Shiyu Luo (This is where you put your name)
    '''

    # read file
    u_data = csv.reader(open(datafile, 'rb'), delimiter='\t')
    columns = list(zip(*u_data))
    # column 1: user id
    col1 = np.array(columns[0]).astype(np.int)
    # column 2: item id
    col2 = np.array(columns[1]).astype(np.int)
    # column 3: ratings
    col3 = np.array(columns[2]).astype(np.int)

    mv_mat = movie_matrix(col1, col2, col3, numOfUsers, numOfItems)
    trueRating = mv_mat[movieid - 1, userid - 1]

    neighbors = utils.knn(mat=mv_mat,
                          target_row=movieid - 1,
                          nonzero_col=userid - 1,
                          metric=distance,
                          k=k,
                          iFlag=iFlag)

    ratings = neighbors[:, userid - 1]
    predictedRating = mode(ratings)

    return trueRating, predictedRating

示例#2

0

显示文件

文件： wFM.py 项目： lishixuan001/HignDimSphereCNN_MultiGPU

    def wFM_on_sphere(self, inputs):

        #         print("---------------------------------\n[wFMLayer]")
        #         print("===\nSize: {}".format(self.w.size()))
        #         print("===\nWeight: \n{}\n".format(self.w))
        #         print("---------------------------------\n")

        # Get Dimensions of Input
        B, N, D, C = inputs.shape
        v = self.conv(inputs)
        inputs = inputs.contiguous()
        inputs = inputs.view(B, N, D * C)

        # Downsampling
        if self.down_sample_rate != 1:
            inputs = down_sampling(inputs, v.squeeze(),
                                   int(N * self.down_sample_rate))
            N = int(N * self.down_sample_rate)
        inputs = inputs.view(B, N, D, C)

        # Get KNN Matrix
        adj = utils.pairwise_distance(inputs)

        print("---------------------------------\n[Adj Matrix")
        print(adj)
        print("---------------------------------\n")

        knn_matrix = utils.knn(adj, k=self.k, include_myself=True)
        knn_matrix = torch.Tensor(knn_matrix).long()

        idx = torch.arange(
            B
        ) * N  # IDs for later processing, used because we flatten the tensor
        idx = idx.view((B, 1, 1))  # reshape to be added to knn indices

        # Combine in * k and normalize there
        # Get [B * N * K * D * C]
        k2 = knn_matrix + idx
        ptcld = inputs.view(B * N, D, C)  # [(B*N) * (D*C)]
        ptcld = ptcld.view(B * N, D * C)
        gathered = ptcld[k2]  # [B * N * K * (D*C)]
        gathered = gathered.view(B, N, self.k, D, C)  # [B * N * K * D * C]

        gathered = gathered.permute(0, 1, 3, 4, 2)  # [B * N * D * C * K]

        weighted = gathered * weight_normalize(self.w1,
                                               dim=1)  # [B * N * D * C * K]
        weighted = torch.sum(weighted, dim=-1)  # [B * N * D * C]
        weighted = torch.matmul(weighted,
                                weight_normalize(self.w2,
                                                 dim=0))  # [B * N * D * Cout]

        return weighted

示例#3

0

显示文件

    def forward(self, x):
        x = torch.squeeze(x, dim=1).transpose(2, 1)  # [B,num_dims,num]
        batch_size, num_dims, num_points = x.size()
        # 单独对坐标进行T-Net旋转
        if num_dims > 3 or self.use_mFea:
            x, feature = x.transpose(2, 1).split(
                [3, 5], dim=2)  # [B,num,3]  [B,num,num_dims-3]
            xInit3d = x.transpose(2, 1)
            # 是否进行3D坐标旋转
            if self.t3d:
                trans = self.t_net3d(x.transpose(2, 1))
                x = torch.bmm(x, trans)
                x = torch.cat([x, feature],
                              dim=2).transpose(2, 1)  # [B,num_dims,num]
            else:
                x = torch.cat([x, feature],
                              dim=2).transpose(2, 1)  # [B,num_dims,num]
        else:
            xInit3d = x
            if self.t3d:
                trans = self.t_net3d(x)
                x = torch.bmm(x.transpose(2, 1), trans).transpose(2, 1)

        x = self.conv1_lpd(x)
        x = self.conv2_lpd(x)

        if self.tfea:
            trans_feat = self.t_net_fea(x)
            x = x.transpose(2, 1)
            x = torch.bmm(x, trans_feat)
            x = x.transpose(2, 1)

        # Serial structure
        # Danymic Graph cnn for feature space
        x = get_graph_feature_Origin(x, k=self.k)  # [b,64*2,num,20]
        x = self.convDG1(x)  # [b,64,num,20]
        x = self.convDG2(x)  # [b,64,num,20]
        x = x.max(dim=-1, keepdim=True)[0]  # [b,64,num,1]

        # Spatial Neighborhood fusion for cartesian space
        idx = knn(xInit3d, k=self.k)
        x = get_graph_feature_Origin(x, idx=idx, k=self.k,
                                     cat=False)  # [b,64,num,20]
        x = self.convSN1(x)  # [b,64,num,20]
        x = self.convSN2(x)  # [b,64,num,20]
        x = x.max(dim=-1, keepdim=True)[0].squeeze(-1)  # [b,64,num]

        x = self.conv3_lpd(x)  # [b,64,num]
        x = self.conv4_lpd(x)  # [b,128,num]
        x = self.conv5_lpd(x)  # [b,emb_dims,num]
        x = x.unsqueeze(-1)  # [b,emb_dims,num,1]

        return x

示例#4

0

显示文件

文件： neighborhood.py 项目： joaoms/incRec

    def _neighborhood(self, ident):
        """
        Description
            A function which computes and returns the neighborhood
            of an element.

        Argument
            :param ident: The element to calculate the neighborhood for.
            :type ident: int
        """
        candidates = self.users.difference({ident})
        return knn(ident, candidates, self.n_neighbors,
                   self.similarity_between)

示例#5

0

显示文件

def main():
    data = np.random.rand(50000, 2)
    q = data[22000]
    va_inst = VAFile(data, 8)
    va_inst.near_optimal_search(q, 20)
    print ("Return for 20 nearest neighbors to q on a 50000 by 2 random data array")
    _ , dists = knn(data, q, 20)
    print (dists)
    print ("Sorting va_inst.dst")
    sorted_indexes = np.argsort(va_inst.dst)
    print ("Returning top 20 results from dst array")
    nn_indexes = sorted_indexes[:20]
    nn_dists = va_inst.dst[nn_indexes]
    print (nn_dists)

示例#6

0

显示文件

文件： clustering.py 项目： joaoms/incRec

    def _neighborhood(self, ident, candidate_set):
        """
        Description
            A function which computes and returns the neighborhood
            of an element inside a cluster which is a DynamicArray object.

        Argument
            :param ident: The element to calculate the neighborhood for.
            :type ident: int
            :param candidate_set: The cluster.
            :type candidate_set: DynamicArray
        """
        candidates = candidate_set.difference({ident})
        return knn(ident, candidates, self.n_neighbors,
                   self.similarity_between)

示例#7

0

显示文件

def calc_laplacian_mat(points, k):
    num_of_points = points.shape[0]
    adj_mat = utls.pairwise_distance(points, points)
    distance, indices = utls.knn(adj_mat, k=k)
    dst_1_k = 1/tf.cast(distance, dtype=tf.float64)[:, 1:]
    dst_0 = tf.reduce_sum(dst_1_k, axis=1, keepdims=True)
    distance = tf.concat((-dst_0, dst_1_k), axis=1)
    data = tf.reshape(distance, [-1])
    columns = tf.reshape(indices, [-1, 1])
    rows = tf.reshape(tf.range(num_of_points), [-1, 1])
    rows = tf.keras.backend.repeat(rows, k)
    rows = tf.reshape(rows, [-1, 1])
    index = tf.cast(tf.concat((rows, columns), axis=1), dtype=tf.int64)

    return tf.sparse.reorder(tf.SparseTensor(indices=index, values=data, dense_shape=(num_of_points, num_of_points)))

示例#8

0

显示文件

def get_graph_feature(x, k=20, idx=None):
    batch_size = x.size(0)
    num_points = x.size(2)
    x = x.view(batch_size, -1, num_points)
    if idx is None:
        # (batch_size, num_points, k)
        idx = knn(x, k=k)

    device = torch.device('cuda')
    # (batch_size, 1, 1) [0, num_points, ..., num_points * (batch_size - 1)]
    idx_base = torch.arange(0, batch_size, device=device).view(-1, 1,
                                                               1) * num_points
    # (batch_size, num_points, k)
    idx = idx + idx_base
    # (batch_size * num_points * k)
    idx = idx.view(-1)
    _, num_dims, _ = x.size()
    # (batch_size, num_points, num_dims)
    x = x.transpose(2, 1).contiguous()
    # (batch_size * num_points * k, num_dims)
    feature = x.view(batch_size * num_points, -1)[idx, :]
    # (batch_size, num_points, k, num_dims)
    feature = feature.view(batch_size, num_points, k, num_dims)
    '''
    feature: (batch_size, num_points, k, num_dims)
    For every batch, here are points.
    For every point, here are k nearest points.
    For every point, here are dims.
    '''

    if cat_or_stack:
        # (batch_size, num_points, k, num_dims)
        x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1)
        # (batch_size, num_points, k, num_dims * 2)
        feature = torch.cat((feature, x), dim=3).permute(0, 3, 1, 2)
        '''
        feature: (batch_size, num_dims * 2, num_points, k)
        '''
    else:
        # (batch_size, num_points, 1, num_dims)
        x = x.view(batch_size, num_points, 1, num_dims)
        # (batch_size, num_points, k + 1, num_dims)
        feature = torch.cat((feature, x), dim=2).permute(0, 3, 1, 2)
        '''
        feature: (batch_size, num_dims, num_points, k + 1)
        '''
    return feature

示例#9

0

显示文件

文件： rec_slim.py 项目： nondecidibile/keksys

	def get_similarity(self, data):
		urm = data
		urm = urm.tocsr()

		self.num_interactions = urm.nnz
		
		urm = sp.csr_matrix(urm)
		self.bpr_sampler = BPR(urm)

		slim_dim = urm.shape[1]

		s = np.zeros((slim_dim, slim_dim), dtype=np.float32)

		self.train(self.lr, self.epochs, urm, s)
		s = utils.knn(s.T, knn=self.knn)

		return s

示例#10

0

显示文件

    def get_similarity(self, data):

        s = sparse.csr_matrix((data.shape[1], data.shape[1]), dtype=np.float32)

        model_sim = self.model1.get_similarity(data)
        model_sim = model_sim * self.w1
        s += model_sim
        del model_sim

        model_sim = self.model2.get_similarity(data)
        model_sim = model_sim * self.w2
        s += model_sim
        del model_sim
        
        s = normalize(s, norm='l2', axis=1)
        s = utils.knn(s, np.inf)
        return s

示例#11

0

显示文件

def cal_testing_image_labels(img_list, crsval_mode):
    print("  Preparing Testing Images...")
    if crsval_mode in [1, 2]:
        rgb_anchors_norm = utils.normc(resources['anchors'][0] @ resources['cmf'])
    elif crsval_mode in [3, 4]:
        rgb_anchors_norm = utils.normc(resources['anchors'][1] @ resources['cmf'])
        
    for img_name in img_list:
        print("    ", img_name[:-1])
        spec_img = load_icvl_data(directories['data'], img_name[:-1]) # 31 x H x W
        gt_data = {} 
        gt_data['spec'], gt_data['rgb'] = utils.cal_gt_data(spec_img, resources['cmf'])
        
        rgb_data_norm = utils.normc(gt_data['rgb'])
        nearest_anchor = utils.knn(rgb_anchors_norm, rgb_data_norm, k=1, batch_size=400000).reshape(-1)
        
        np.save(os.path.join(directories['sparse_label'], img_name[:-5]+'_label.npy'), nearest_anchor)

示例#12

0

显示文件

文件： rec_item.py 项目： nondecidibile/keksys

    def get_similarity(self, data):
        print("Computing Item similarity...")
        similarity = utils.cosine_similarity(data,
                                             alpha=self.alpha,
                                             asym=self.asym,
                                             h=self.h,
                                             dtype=np.float32)

        # ARTIST
        similarity += utils.cosine_similarity(
            self.artists_mat, alpha=0.5, asym=True, h=0,
            dtype=np.float32) * self.artist_w
        # ALBUM
        similarity += utils.cosine_similarity(
            self.albums_mat, alpha=0.5, asym=True, h=0,
            dtype=np.float32) * self.album_w

        similarity = utils.knn(similarity, self.knn)
        return similarity

示例#13

0

显示文件

def cal_validation_data(img_list, crsval_mode):
    print("  Preparing Validation Images...")
    gt_data = utils.collect_gt_data(directories['data'], img_list, resources['cmf'], 
                                    num_sampling_points=param['num_sampling_points'], rand=param['random_shuffle'])
    rgb_data_norm = utils.normc(gt_data['rgb'])

    if crsval_mode in [1, 2]:
        rgb_anchors_norm = utils.normc(resources['anchors'][0] @ resources['cmf'])
    elif crsval_mode in [3, 4]:
        rgb_anchors_norm = utils.normc(resources['anchors'][1] @ resources['cmf'])
    
    nearest_neighbors = utils.knn(rgb_data_norm, rgb_anchors_norm, k=param['num_neighbors']//2, batch_size=250)
    
    _, val_suffix = generate_crsval_suffix(crsval_mode)
    #if param['random_shuffle']:
    #    val_suffix = val_suffix + '_rand'
    if param['augmentation']:
        val_suffix = val_suffix + '_aug'
    with open(os.path.join(directories['precal'], 'sparse_all_data'+val_suffix+'.pkl'), 'wb') as handle:
        pickle.dump(gt_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
    np.save(os.path.join(directories['precal'], 'sparse_neighbor_idx'+val_suffix+'.npy'), nearest_neighbors)

示例#14

0

显示文件

def get_graph_feature_Origin(x, k=20, idx=None, cat=True):
    batch_size = x.size(0)
    num_points = x.size(2)
    x = x.view(batch_size, -1, num_points)
    if idx is None:
        idx = knn(x, k=k)  # (batch_size, num_points, k)

    device = torch.device('cuda')
    # 获得索引阶梯数组
    idx_base = torch.arange(0, batch_size, device=device).view(
        -1, 1, 1
    ) * num_points  # (batch_size, 1, 1) [0 num_points ... num_points*(B-1)]
    # 以batch为单位，加到索引上
    idx = idx + idx_base  # (batch_size, num_points, k)
    # 展成一维数组，方便后续索引
    idx = idx.view(-1)  # (batch_size * num_points * k)
    # 获得特征维度
    _, num_dims, _ = x.size()
    x = x.transpose(2, 1).contiguous()  # (batch_size, num_points, num_dims)
    # 改变x的shape，方便索引。被索引数组是所有batch的所有点的特征，索引数组idx为所有临近点对应的序号，从而索引出所有领域点的特征
    feature = x.view(batch_size * num_points,
                     -1)[idx, :]  # (batch_size * num_points * k,num_dims)
    # 统一数组形式
    feature = feature.view(batch_size, num_points, k,
                           num_dims)  # (batch_size, num_points, k, num_dims)
    if cat:
        # 重复k次，以便k个邻域点每个都能和中心点做运算
        x = x.view(batch_size, num_points, 1,
                   num_dims).repeat(1, 1, k, 1)  # [B, num, k, num_dims]
        # 领域特征的表示，为(feature - x, x)，这种形式可以详尽参见dgcnn论文
        feature = torch.cat((x, feature - x),
                            dim=3).permute(0, 3, 1,
                                           2)  # [B, num_dims*2, num, k]
    else:
        feature = feature.permute(0, 3, 1, 2)
    return feature

示例#15

0

显示文件

文件： utils_test.py 项目： joaoms/incRec

 def test_knn(self):
     elems = knn(0, [0, 1, 2, 3, 4, 5], 2, lambda x, y: x**2 - y)
     self.assertEqual(elems, [1, 0])

示例#16

0

显示文件

from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plot

import utils

data = x_train, x_test, y_train, y_test = utils.import_adult(normalize=True)

# FINDING k

for k in range(18, 24):
    utils.knn(*data, n_neighbors=k)

# INFLUENCE OF THE WEIGHTS

utils.knn(*data, n_neighbors=20, weights='distance')

# INFLUENCE OF THE METRICS

metrics = ['manhattan', 'chebyshev']

for metric in metrics:
    utils.knn(*data, n_neighbors=20, metric=metric)

# BEST MODEL

data = x_train, x_test, y_train, y_test = utils.import_wine(y_transform=None)

utils.knn(*data, n_neighbors=20)

# LEARNING CURVE

示例#17

0

显示文件

文件： custom_layers.py 项目： samaonline/spatial-transformer-for-3d-point-clouds

 def setup(self, bottom, top):
     self.numNN = int(self.param_str)
     self.numpts = int(bottom[0].data.shape[3])
     self.point_cloud = np.squeeze(bottom[0].data).transpose(0, 2, 1)
     adj = utils.pairwise_distance(self.point_cloud)
     self.nn_idx = utils.knn(adj, k=self.numNN)

示例#18

0

显示文件

文件： rec_user.py 项目： nondecidibile/keksys

 def get_similarity(self, data):
     print("User similarity...")
     s = utils.cosine_similarity(data.T, alpha=self.alpha, asym=self.asym, h=0, dtype=np.float32)
     s = utils.knn(s, self.knn)
     return s

示例#19

0

显示文件

文件： top_k_terms.py 项目： thiziri/data_processing

    encountred = []
    #encountred_random={}
    print("Collecting neighbors ...")

    for w in toProcess:
        for word in toProcess[w]:
            if word not in encountred:
                encountred.append(word)
                if word in model.vocab:
                    #neighbors=model.most_similar(word, topn=int(args["--n"])) # get most similar words using the word2vec function
                    alpha = (index.maximum_document() - id2df[token2id[word]] +
                             0.5) / (id2df[token2id[word]] + 0.5) + float(
                                 args["--b"])
                    neighbors = knn(
                        word, alpha, model, int(args["--n"])
                    )  # get most similar words using the knn function
                else:
                    #if word not in encountred_random:
                    #randomVect=random_vector(model.layer1_size)
                    #encountred_random[word]=randomVect
                    #else: randomVect=encountred_random[word]
                    #neighbors=w2v.similar_by_vector(randomVect, topn=int(args["--n"]), restrict_vocab=None)
                    neighbors = [(word, 1)]  #just has one neighbor
                #print(neighbors)
                word_neighbors = word
                for t in neighbors:
                    w = t[0]
                    #if (prog.match(w)):
                    #	w=w.replace('.','')
                    w_txt = w.lower(

示例#20

0

显示文件

    def forward(self, x):
        # (batch_size, num_dims, num_points)
        x = torch.squeeze(x, dim=1).transpose(2, 1)
        batch_size, num_dims, num_points = x.size()

        if num_dims > 3 or self.use_mFea:
            x, feature = x.transpose(2, 1).split(
                [3, 5], dim=2)  # [B,num,3]  [B,num,num_dims-3]
            xInit3d = x.transpose(2, 1)
            # 是否进行3D坐标旋转
            if self.t3d:
                trans = self.t_net3d(x.transpose(2, 1))
                x = torch.bmm(x, trans)
                x = torch.cat([x, feature],
                              dim=2).transpose(2, 1)  # [B,num_dims,num]
            else:
                x = torch.cat([x, feature],
                              dim=2).transpose(2, 1)  # [B,num_dims,num]
        else:
            xInit3d = x
            if self.t3d:
                # (num_dims, num_dims)
                trans = self.t_net3d(x)
                # (batch_size, num_dims, num_points)
                x = torch.bmm(x.transpose(2, 1), trans).transpose(2, 1)
        '''
        Get x updated by T-Net.
        x: (batch_size, num_dims, num_points)
        Get backup of init x.
        xInit3d: (batch_size, num_dims, num_points)
        '''

        if self.useBN:
            x = self.act_f(self.bn1_lpd(self.conv1_lpd(x)))
            x = self.act_f(self.bn2_lpd(self.conv2_lpd(x)))
        else:
            x = self.act_f(self.conv1_lpd(x))
            x = self.act_f(self.conv2_lpd(x))
        '''
        Get x updated by conv1 and conv2.
        x: (batch_size, 64, num_points)
        '''

        if self.tfea:
            trans_feat = self.t_net_fea(x)
            x = x.transpose(2, 1)
            x = torch.bmm(x, trans_feat)
            x = x.transpose(2, 1)
        '''
        Get x updated by T-Net.
        x: (batch_size, num_dims, num_points)
        num_dims = 64
        '''

        # Serial structure
        # Dynamic Graph cnn for feature space
        if cat_or_stack:
            # (batch_size, num_dims * 2, num_points, k)
            x = get_graph_feature(x, k=self.k)
        else:
            # (batch_size, num_dims, num_points, k + 1)
            x = get_graph_feature(x, k=self.k)
        '''
        Get x including local feature.
        x: (batch_size, num_dims * 2, num_points, k)
        num_dims = 64
        '''

        # (batch_size, 128, num_points, k)
        x = self.convDG1(x)
        # (batch_size, 128, num_points, 1)
        x1 = x.max(dim=-1, keepdim=True)[0]
        # (batch_size, 128, num_points, k)
        x = self.convDG2(x)
        # (batch_size, 128, num_points, 1)
        x2 = x.max(dim=-1, keepdim=True)[0]
        '''
        Get x1 and x2.
        x1: (batch_size, num_dims * 2, num_points, 1)
        x2: (batch_size, num_dims * 2, num_points, 1)
        num_dims = 64: (batch_size, 128, num_points, 1)
        '''

        # Spatial Neighborhood fusion for cartesian space
        # (batch_size, num_points, k)
        idx = knn(xInit3d, k=self.k)
        # (batch_size, 128 * 2, num_points, k)
        x = get_graph_feature(x2, idx=idx, k=self.k)
        # (batch_size, 256, num_points, k)
        x = self.convSN1(x)
        # (batch_size, 256, num_points, 1)
        x3 = x.max(dim=-1, keepdim=True)[0]
        '''
        Get x3.
        x3: (batch_size, num_dims * 4, num_points, 1)
        num_points = 64: (batch_size, 256, num_points, 1)
        '''

        # (batch_size, 512, num_points)
        x = torch.cat((x1, x2, x3), dim=1).squeeze(-1)
        '''
        Get x.
        x: (batch_size, num_dims * 8, num_points, 1)
        num_dims = 64: (batch_size, 512, num_points, 1)
        '''

        if self.useBN:
            # (batch_size, self.emb_dims, num_points)
            x = self.act_f(self.bn3_lpd(self.conv3_lpd(x)))
        else:
            # (batch_size, self.emb_dims, num_points)
            x = self.act_f(self.conv3_lpd(x))
        # (batch_size, self.emb_dims, num_points, 1)
        x = x.unsqueeze(-1)
        '''
        Output: (batch_size, self.emb_dims, num_points, 1)
        '''
        return x