def __init__(self, train_data, test_data, interaction_data: sp.csr_matrix, n_epochs=100, batch_size=256, embedding_k=64, top_k=10, learning_rate=0.0001, use_model=True): """ Init function. :param train_data: The train data. :param test_data: The test data. :param interaction_data: The user-track interaction data. :param n_epochs: Train epochs. :param batch_size: Train batch size. :param embedding_k: The length of user/track embedding vector. :param top_k: In top-k recommendation, Recommend top_k tracks for each user. """ self.train_data = train_data self.test_data = test_data self.interaction_data: sp.csr_matrix = interaction_data self.n_epochs = n_epochs self.batch_size = batch_size self.embedding_k = embedding_k self.top_k = top_k self.learning_rate = learning_rate self.num_user = interaction_data.get_shape()[0] self.num_item = interaction_data.get_shape()[1] # build TF graph self.build_model() # create session gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() if use_model: ckpt = tf.train.get_checkpoint_state('../cpkt/') # checkpoint存在的目录 if ckpt and ckpt.model_checkpoint_path: self.saver.restore(self.sess, ckpt.model_checkpoint_path ) # 自动恢复model_checkpoint_path保存模型一般是最新 print("Model restored...") else: print('No Model') return
def page_rank(m: csr_matrix, alpha): """ """ n, _ = m.get_shape() degree = numpy.sum(m, axis=1) weight_mat = m / (degree + numpy.array(degree == 0, dtype=int)) # weight_mat = m v = numpy.random.rand(n).reshape((-1, 1)) last_v = v while True: v = alpha * (weight_mat.T * v) + (1 - alpha) * v delta = numpy.sum(abs(v - last_v)) print(delta) if delta < 0.0001: break last_v = v return numpy.array(v).flatten()
def __init__(self, A: sparse.csr_matrix, L: sparse.csr_matrix, batch_size=1): ''' This is trick dataset for graph. I pass batch_size here so when training, DataLoader is always batch_size =1 :param A: :param L: :param batch_size: ''' # self.dts = [] # dataset_size = A.shape[0] # steps_per_epoch = (dataset_size - 1) // batch_size + 1 # for i in range(steps_per_epoch): # index = np.arange( # i * batch_size, min((i + 1) * batch_size, dataset_size)) # A_train = A[index, :].todense() # L_train = L[index][:, index].todense() # # A_train = torch.tensor(A_train) # L_train = torch.tensor(L_train) # batch_inp = [A_train, L_train] # self.dts.append(batch_inp) self.A = A self.L = L self.size = A.get_shape()[0]
def bisecting_kmeans(points: sparse.csr_matrix, k=2): user_to_cluster = {} cluster_to_user = {} clusters = {0: points} for i in range(points.get_shape()[0]): user_to_cluster[i] = (0, i) cluster_to_user[(0, i)] = i while len(clusters) < k: biggest_cluster_key = list(clusters.keys())[0] for i in clusters: if clusters[i].get_shape( )[0] > clusters[biggest_cluster_key].get_shape()[0]: biggest_cluster_key = i biggest_cluster = clusters[biggest_cluster_key] # remove cluster from dict del clusters[biggest_cluster_key] kmeans = KMeans(n_clusters=2, max_iter=100).fit(biggest_cluster) key1 = random.randint(1, 1000000) key2 = random.randint(1, 1000000) id1 = 0 id2 = 0 clusters_data1 = [[], [], []] clusters_data2 = [[], [], []] for i in range(len(kmeans.labels_)): row, col = biggest_cluster.getrow(i).nonzero() data = np.array(biggest_cluster.getrow(i)[row, col]).flatten() if kmeans.labels_[i] == 0: row = np.ones(len(col), dtype=int) * id1 for j in range(len(col)): clusters_data1[0].append(row[j]) clusters_data1[1].append(col[j]) clusters_data1[2].append(data[j]) # update mapping user_id = cluster_to_user[(key1, id1)] = cluster_to_user[( biggest_cluster_key, i)] user_to_cluster[user_id] = (key1, id1) id1 += 1 else: row = np.ones(len(col), dtype=int) * id2 for j in range(len(col)): clusters_data2[0].append(row[j]) clusters_data2[1].append(col[j]) clusters_data2[2].append(data[j]) # update mapping user_id = cluster_to_user[(key2, id2)] = cluster_to_user[( biggest_cluster_key, i)] user_to_cluster[user_id] = (key2, 2) id2 += 1 del cluster_to_user[biggest_cluster_key, i] clusters[key1] = (sparse.csr_matrix( (clusters_data1[2], (clusters_data1[0], clusters_data1[1])), (id1, biggest_cluster.get_shape()[1]))) clusters[key2] = (sparse.csr_matrix( (clusters_data2[2], (clusters_data2[0], clusters_data2[1])), (id2, biggest_cluster.get_shape()[1]))) return clusters, user_to_cluster