def item_based_cf(datafile, userid, movieid, distance, k, iFlag, numOfUsers, numOfItems): ''' build item-based collaborative filter that predicts the rating of a user for a movie. This function returns the predicted rating and its actual rating. Parameters ---------- <datafile> - a fully specified path to a file formatted like the MovieLens100K data file u.data <userid> - a userId in the MovieLens100K data <movieid> - a movieID in the MovieLens 100K data set <distance> - a Boolean. If set to 0, use Pearson's correlation as the distance measure. If 1, use Manhattan distance. <k> - The number of nearest neighbors to consider <iFlag> - A Boolean value. If set to 0 for user-based collaborative filtering, only users that have actual (ie non-0) ratings for the movie are considered in your top K. For item-based, use only movies that have actual ratings by the user in your top K. If set to 1, simply use the top K regardless of whether the top K contain actual or filled-in ratings. <numOfUsers> - the number of users in the dataset <numOfItems> - the number of items in the dataset (NOTE: use these variables (<numOfUsers>, <numOfItems>) to build user-rating matrix. DO NOT USE any CONSTANT NUMBERS when building user-rating matrix. We already set these variables in the main function for you. The size of user-rating matrix in the test case for grading could be different from the given dataset. ) returns ------- trueRating: <userid>'s actual rating for <movieid> predictedRating: <userid>'s rating predicted by collaborative filter for <movieid> AUTHOR: Shiyu Luo (This is where you put your name) ''' # read file u_data = csv.reader(open(datafile, 'rb'), delimiter='\t') columns = list(zip(*u_data)) # column 1: user id col1 = np.array(columns[0]).astype(np.int) # column 2: item id col2 = np.array(columns[1]).astype(np.int) # column 3: ratings col3 = np.array(columns[2]).astype(np.int) mv_mat = movie_matrix(col1, col2, col3, numOfUsers, numOfItems) trueRating = mv_mat[movieid - 1, userid - 1] neighbors = utils.knn(mat=mv_mat, target_row=movieid - 1, nonzero_col=userid - 1, metric=distance, k=k, iFlag=iFlag) ratings = neighbors[:, userid - 1] predictedRating = mode(ratings) return trueRating, predictedRating
def wFM_on_sphere(self, inputs): # print("---------------------------------\n[wFMLayer]") # print("===\nSize: {}".format(self.w.size())) # print("===\nWeight: \n{}\n".format(self.w)) # print("---------------------------------\n") # Get Dimensions of Input B, N, D, C = inputs.shape v = self.conv(inputs) inputs = inputs.contiguous() inputs = inputs.view(B, N, D * C) # Downsampling if self.down_sample_rate != 1: inputs = down_sampling(inputs, v.squeeze(), int(N * self.down_sample_rate)) N = int(N * self.down_sample_rate) inputs = inputs.view(B, N, D, C) # Get KNN Matrix adj = utils.pairwise_distance(inputs) print("---------------------------------\n[Adj Matrix") print(adj) print("---------------------------------\n") knn_matrix = utils.knn(adj, k=self.k, include_myself=True) knn_matrix = torch.Tensor(knn_matrix).long() idx = torch.arange( B ) * N # IDs for later processing, used because we flatten the tensor idx = idx.view((B, 1, 1)) # reshape to be added to knn indices # Combine in * k and normalize there # Get [B * N * K * D * C] k2 = knn_matrix + idx ptcld = inputs.view(B * N, D, C) # [(B*N) * (D*C)] ptcld = ptcld.view(B * N, D * C) gathered = ptcld[k2] # [B * N * K * (D*C)] gathered = gathered.view(B, N, self.k, D, C) # [B * N * K * D * C] gathered = gathered.permute(0, 1, 3, 4, 2) # [B * N * D * C * K] weighted = gathered * weight_normalize(self.w1, dim=1) # [B * N * D * C * K] weighted = torch.sum(weighted, dim=-1) # [B * N * D * C] weighted = torch.matmul(weighted, weight_normalize(self.w2, dim=0)) # [B * N * D * Cout] return weighted
def forward(self, x): x = torch.squeeze(x, dim=1).transpose(2, 1) # [B,num_dims,num] batch_size, num_dims, num_points = x.size() # 单独对坐标进行T-Net旋转 if num_dims > 3 or self.use_mFea: x, feature = x.transpose(2, 1).split( [3, 5], dim=2) # [B,num,3] [B,num,num_dims-3] xInit3d = x.transpose(2, 1) # 是否进行3D坐标旋转 if self.t3d: trans = self.t_net3d(x.transpose(2, 1)) x = torch.bmm(x, trans) x = torch.cat([x, feature], dim=2).transpose(2, 1) # [B,num_dims,num] else: x = torch.cat([x, feature], dim=2).transpose(2, 1) # [B,num_dims,num] else: xInit3d = x if self.t3d: trans = self.t_net3d(x) x = torch.bmm(x.transpose(2, 1), trans).transpose(2, 1) x = self.conv1_lpd(x) x = self.conv2_lpd(x) if self.tfea: trans_feat = self.t_net_fea(x) x = x.transpose(2, 1) x = torch.bmm(x, trans_feat) x = x.transpose(2, 1) # Serial structure # Danymic Graph cnn for feature space x = get_graph_feature_Origin(x, k=self.k) # [b,64*2,num,20] x = self.convDG1(x) # [b,64,num,20] x = self.convDG2(x) # [b,64,num,20] x = x.max(dim=-1, keepdim=True)[0] # [b,64,num,1] # Spatial Neighborhood fusion for cartesian space idx = knn(xInit3d, k=self.k) x = get_graph_feature_Origin(x, idx=idx, k=self.k, cat=False) # [b,64,num,20] x = self.convSN1(x) # [b,64,num,20] x = self.convSN2(x) # [b,64,num,20] x = x.max(dim=-1, keepdim=True)[0].squeeze(-1) # [b,64,num] x = self.conv3_lpd(x) # [b,64,num] x = self.conv4_lpd(x) # [b,128,num] x = self.conv5_lpd(x) # [b,emb_dims,num] x = x.unsqueeze(-1) # [b,emb_dims,num,1] return x
def _neighborhood(self, ident): """ Description A function which computes and returns the neighborhood of an element. Argument :param ident: The element to calculate the neighborhood for. :type ident: int """ candidates = self.users.difference({ident}) return knn(ident, candidates, self.n_neighbors, self.similarity_between)
def main(): data = np.random.rand(50000, 2) q = data[22000] va_inst = VAFile(data, 8) va_inst.near_optimal_search(q, 20) print ("Return for 20 nearest neighbors to q on a 50000 by 2 random data array") _ , dists = knn(data, q, 20) print (dists) print ("Sorting va_inst.dst") sorted_indexes = np.argsort(va_inst.dst) print ("Returning top 20 results from dst array") nn_indexes = sorted_indexes[:20] nn_dists = va_inst.dst[nn_indexes] print (nn_dists)
def _neighborhood(self, ident, candidate_set): """ Description A function which computes and returns the neighborhood of an element inside a cluster which is a DynamicArray object. Argument :param ident: The element to calculate the neighborhood for. :type ident: int :param candidate_set: The cluster. :type candidate_set: DynamicArray """ candidates = candidate_set.difference({ident}) return knn(ident, candidates, self.n_neighbors, self.similarity_between)
def calc_laplacian_mat(points, k): num_of_points = points.shape[0] adj_mat = utls.pairwise_distance(points, points) distance, indices = utls.knn(adj_mat, k=k) dst_1_k = 1/tf.cast(distance, dtype=tf.float64)[:, 1:] dst_0 = tf.reduce_sum(dst_1_k, axis=1, keepdims=True) distance = tf.concat((-dst_0, dst_1_k), axis=1) data = tf.reshape(distance, [-1]) columns = tf.reshape(indices, [-1, 1]) rows = tf.reshape(tf.range(num_of_points), [-1, 1]) rows = tf.keras.backend.repeat(rows, k) rows = tf.reshape(rows, [-1, 1]) index = tf.cast(tf.concat((rows, columns), axis=1), dtype=tf.int64) return tf.sparse.reorder(tf.SparseTensor(indices=index, values=data, dense_shape=(num_of_points, num_of_points)))
def get_graph_feature(x, k=20, idx=None): batch_size = x.size(0) num_points = x.size(2) x = x.view(batch_size, -1, num_points) if idx is None: # (batch_size, num_points, k) idx = knn(x, k=k) device = torch.device('cuda') # (batch_size, 1, 1) [0, num_points, ..., num_points * (batch_size - 1)] idx_base = torch.arange(0, batch_size, device=device).view(-1, 1, 1) * num_points # (batch_size, num_points, k) idx = idx + idx_base # (batch_size * num_points * k) idx = idx.view(-1) _, num_dims, _ = x.size() # (batch_size, num_points, num_dims) x = x.transpose(2, 1).contiguous() # (batch_size * num_points * k, num_dims) feature = x.view(batch_size * num_points, -1)[idx, :] # (batch_size, num_points, k, num_dims) feature = feature.view(batch_size, num_points, k, num_dims) ''' feature: (batch_size, num_points, k, num_dims) For every batch, here are points. For every point, here are k nearest points. For every point, here are dims. ''' if cat_or_stack: # (batch_size, num_points, k, num_dims) x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1) # (batch_size, num_points, k, num_dims * 2) feature = torch.cat((feature, x), dim=3).permute(0, 3, 1, 2) ''' feature: (batch_size, num_dims * 2, num_points, k) ''' else: # (batch_size, num_points, 1, num_dims) x = x.view(batch_size, num_points, 1, num_dims) # (batch_size, num_points, k + 1, num_dims) feature = torch.cat((feature, x), dim=2).permute(0, 3, 1, 2) ''' feature: (batch_size, num_dims, num_points, k + 1) ''' return feature
def get_similarity(self, data): urm = data urm = urm.tocsr() self.num_interactions = urm.nnz urm = sp.csr_matrix(urm) self.bpr_sampler = BPR(urm) slim_dim = urm.shape[1] s = np.zeros((slim_dim, slim_dim), dtype=np.float32) self.train(self.lr, self.epochs, urm, s) s = utils.knn(s.T, knn=self.knn) return s
def get_similarity(self, data): s = sparse.csr_matrix((data.shape[1], data.shape[1]), dtype=np.float32) model_sim = self.model1.get_similarity(data) model_sim = model_sim * self.w1 s += model_sim del model_sim model_sim = self.model2.get_similarity(data) model_sim = model_sim * self.w2 s += model_sim del model_sim s = normalize(s, norm='l2', axis=1) s = utils.knn(s, np.inf) return s
def cal_testing_image_labels(img_list, crsval_mode): print(" Preparing Testing Images...") if crsval_mode in [1, 2]: rgb_anchors_norm = utils.normc(resources['anchors'][0] @ resources['cmf']) elif crsval_mode in [3, 4]: rgb_anchors_norm = utils.normc(resources['anchors'][1] @ resources['cmf']) for img_name in img_list: print(" ", img_name[:-1]) spec_img = load_icvl_data(directories['data'], img_name[:-1]) # 31 x H x W gt_data = {} gt_data['spec'], gt_data['rgb'] = utils.cal_gt_data(spec_img, resources['cmf']) rgb_data_norm = utils.normc(gt_data['rgb']) nearest_anchor = utils.knn(rgb_anchors_norm, rgb_data_norm, k=1, batch_size=400000).reshape(-1) np.save(os.path.join(directories['sparse_label'], img_name[:-5]+'_label.npy'), nearest_anchor)
def get_similarity(self, data): print("Computing Item similarity...") similarity = utils.cosine_similarity(data, alpha=self.alpha, asym=self.asym, h=self.h, dtype=np.float32) # ARTIST similarity += utils.cosine_similarity( self.artists_mat, alpha=0.5, asym=True, h=0, dtype=np.float32) * self.artist_w # ALBUM similarity += utils.cosine_similarity( self.albums_mat, alpha=0.5, asym=True, h=0, dtype=np.float32) * self.album_w similarity = utils.knn(similarity, self.knn) return similarity
def cal_validation_data(img_list, crsval_mode): print(" Preparing Validation Images...") gt_data = utils.collect_gt_data(directories['data'], img_list, resources['cmf'], num_sampling_points=param['num_sampling_points'], rand=param['random_shuffle']) rgb_data_norm = utils.normc(gt_data['rgb']) if crsval_mode in [1, 2]: rgb_anchors_norm = utils.normc(resources['anchors'][0] @ resources['cmf']) elif crsval_mode in [3, 4]: rgb_anchors_norm = utils.normc(resources['anchors'][1] @ resources['cmf']) nearest_neighbors = utils.knn(rgb_data_norm, rgb_anchors_norm, k=param['num_neighbors']//2, batch_size=250) _, val_suffix = generate_crsval_suffix(crsval_mode) #if param['random_shuffle']: # val_suffix = val_suffix + '_rand' if param['augmentation']: val_suffix = val_suffix + '_aug' with open(os.path.join(directories['precal'], 'sparse_all_data'+val_suffix+'.pkl'), 'wb') as handle: pickle.dump(gt_data, handle, protocol=pickle.HIGHEST_PROTOCOL) np.save(os.path.join(directories['precal'], 'sparse_neighbor_idx'+val_suffix+'.npy'), nearest_neighbors)
def get_graph_feature_Origin(x, k=20, idx=None, cat=True): batch_size = x.size(0) num_points = x.size(2) x = x.view(batch_size, -1, num_points) if idx is None: idx = knn(x, k=k) # (batch_size, num_points, k) device = torch.device('cuda') # 获得索引阶梯数组 idx_base = torch.arange(0, batch_size, device=device).view( -1, 1, 1 ) * num_points # (batch_size, 1, 1) [0 num_points ... num_points*(B-1)] # 以batch为单位,加到索引上 idx = idx + idx_base # (batch_size, num_points, k) # 展成一维数组,方便后续索引 idx = idx.view(-1) # (batch_size * num_points * k) # 获得特征维度 _, num_dims, _ = x.size() x = x.transpose(2, 1).contiguous() # (batch_size, num_points, num_dims) # 改变x的shape,方便索引。被索引数组是所有batch的所有点的特征,索引数组idx为所有临近点对应的序号,从而索引出所有领域点的特征 feature = x.view(batch_size * num_points, -1)[idx, :] # (batch_size * num_points * k,num_dims) # 统一数组形式 feature = feature.view(batch_size, num_points, k, num_dims) # (batch_size, num_points, k, num_dims) if cat: # 重复k次,以便k个邻域点每个都能和中心点做运算 x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1) # [B, num, k, num_dims] # 领域特征的表示,为(feature - x, x),这种形式可以详尽参见dgcnn论文 feature = torch.cat((x, feature - x), dim=3).permute(0, 3, 1, 2) # [B, num_dims*2, num, k] else: feature = feature.permute(0, 3, 1, 2) return feature
def test_knn(self): elems = knn(0, [0, 1, 2, 3, 4, 5], 2, lambda x, y: x**2 - y) self.assertEqual(elems, [1, 0])
from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plot import utils data = x_train, x_test, y_train, y_test = utils.import_adult(normalize=True) # FINDING k for k in range(18, 24): utils.knn(*data, n_neighbors=k) # INFLUENCE OF THE WEIGHTS utils.knn(*data, n_neighbors=20, weights='distance') # INFLUENCE OF THE METRICS metrics = ['manhattan', 'chebyshev'] for metric in metrics: utils.knn(*data, n_neighbors=20, metric=metric) # BEST MODEL data = x_train, x_test, y_train, y_test = utils.import_wine(y_transform=None) utils.knn(*data, n_neighbors=20) # LEARNING CURVE
def setup(self, bottom, top): self.numNN = int(self.param_str) self.numpts = int(bottom[0].data.shape[3]) self.point_cloud = np.squeeze(bottom[0].data).transpose(0, 2, 1) adj = utils.pairwise_distance(self.point_cloud) self.nn_idx = utils.knn(adj, k=self.numNN)
def get_similarity(self, data): print("User similarity...") s = utils.cosine_similarity(data.T, alpha=self.alpha, asym=self.asym, h=0, dtype=np.float32) s = utils.knn(s, self.knn) return s
encountred = [] #encountred_random={} print("Collecting neighbors ...") for w in toProcess: for word in toProcess[w]: if word not in encountred: encountred.append(word) if word in model.vocab: #neighbors=model.most_similar(word, topn=int(args["--n"])) # get most similar words using the word2vec function alpha = (index.maximum_document() - id2df[token2id[word]] + 0.5) / (id2df[token2id[word]] + 0.5) + float( args["--b"]) neighbors = knn( word, alpha, model, int(args["--n"]) ) # get most similar words using the knn function else: #if word not in encountred_random: #randomVect=random_vector(model.layer1_size) #encountred_random[word]=randomVect #else: randomVect=encountred_random[word] #neighbors=w2v.similar_by_vector(randomVect, topn=int(args["--n"]), restrict_vocab=None) neighbors = [(word, 1)] #just has one neighbor #print(neighbors) word_neighbors = word for t in neighbors: w = t[0] #if (prog.match(w)): # w=w.replace('.','') w_txt = w.lower(
def forward(self, x): # (batch_size, num_dims, num_points) x = torch.squeeze(x, dim=1).transpose(2, 1) batch_size, num_dims, num_points = x.size() if num_dims > 3 or self.use_mFea: x, feature = x.transpose(2, 1).split( [3, 5], dim=2) # [B,num,3] [B,num,num_dims-3] xInit3d = x.transpose(2, 1) # 是否进行3D坐标旋转 if self.t3d: trans = self.t_net3d(x.transpose(2, 1)) x = torch.bmm(x, trans) x = torch.cat([x, feature], dim=2).transpose(2, 1) # [B,num_dims,num] else: x = torch.cat([x, feature], dim=2).transpose(2, 1) # [B,num_dims,num] else: xInit3d = x if self.t3d: # (num_dims, num_dims) trans = self.t_net3d(x) # (batch_size, num_dims, num_points) x = torch.bmm(x.transpose(2, 1), trans).transpose(2, 1) ''' Get x updated by T-Net. x: (batch_size, num_dims, num_points) Get backup of init x. xInit3d: (batch_size, num_dims, num_points) ''' if self.useBN: x = self.act_f(self.bn1_lpd(self.conv1_lpd(x))) x = self.act_f(self.bn2_lpd(self.conv2_lpd(x))) else: x = self.act_f(self.conv1_lpd(x)) x = self.act_f(self.conv2_lpd(x)) ''' Get x updated by conv1 and conv2. x: (batch_size, 64, num_points) ''' if self.tfea: trans_feat = self.t_net_fea(x) x = x.transpose(2, 1) x = torch.bmm(x, trans_feat) x = x.transpose(2, 1) ''' Get x updated by T-Net. x: (batch_size, num_dims, num_points) num_dims = 64 ''' # Serial structure # Dynamic Graph cnn for feature space if cat_or_stack: # (batch_size, num_dims * 2, num_points, k) x = get_graph_feature(x, k=self.k) else: # (batch_size, num_dims, num_points, k + 1) x = get_graph_feature(x, k=self.k) ''' Get x including local feature. x: (batch_size, num_dims * 2, num_points, k) num_dims = 64 ''' # (batch_size, 128, num_points, k) x = self.convDG1(x) # (batch_size, 128, num_points, 1) x1 = x.max(dim=-1, keepdim=True)[0] # (batch_size, 128, num_points, k) x = self.convDG2(x) # (batch_size, 128, num_points, 1) x2 = x.max(dim=-1, keepdim=True)[0] ''' Get x1 and x2. x1: (batch_size, num_dims * 2, num_points, 1) x2: (batch_size, num_dims * 2, num_points, 1) num_dims = 64: (batch_size, 128, num_points, 1) ''' # Spatial Neighborhood fusion for cartesian space # (batch_size, num_points, k) idx = knn(xInit3d, k=self.k) # (batch_size, 128 * 2, num_points, k) x = get_graph_feature(x2, idx=idx, k=self.k) # (batch_size, 256, num_points, k) x = self.convSN1(x) # (batch_size, 256, num_points, 1) x3 = x.max(dim=-1, keepdim=True)[0] ''' Get x3. x3: (batch_size, num_dims * 4, num_points, 1) num_points = 64: (batch_size, 256, num_points, 1) ''' # (batch_size, 512, num_points) x = torch.cat((x1, x2, x3), dim=1).squeeze(-1) ''' Get x. x: (batch_size, num_dims * 8, num_points, 1) num_dims = 64: (batch_size, 512, num_points, 1) ''' if self.useBN: # (batch_size, self.emb_dims, num_points) x = self.act_f(self.bn3_lpd(self.conv3_lpd(x))) else: # (batch_size, self.emb_dims, num_points) x = self.act_f(self.conv3_lpd(x)) # (batch_size, self.emb_dims, num_points, 1) x = x.unsqueeze(-1) ''' Output: (batch_size, self.emb_dims, num_points, 1) ''' return x