def fit(self, X, y=None, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3): '''X: tensor data''' use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() print("=====Training DEC=======") # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) print("Initializing cluster centers with kmeans.") kmeans = KMeans(self.n_clusters, n_init=20) data, _ = self.forward(X) y_pred = kmeans.fit_predict(data.data.cpu().numpy()) y_pred_last = y_pred self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) if y is not None: y = y.cpu().numpy() print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) self.train() num = X.shape[0] num_batch = int(math.ceil(1.0*X.shape[0]/batch_size)) for epoch in range(num_epochs): if epoch%update_interval == 0: # update the targe distribution p _, q = self.forward(X) p = self.target_distribution(q).data # evalute the clustering performance y_pred = torch.argmax(q, dim=1).data.cpu().numpy() if y is not None: print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) # check stop criterion delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / num y_pred_last = y_pred if epoch>0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) print("Reach tolerance threshold. Stopping training.") break # train 1 epoch train_loss = 0.0 for batch_idx in range(num_batch): xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] pbatch = p[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] optimizer.zero_grad() inputs = Variable(xbatch) target = Variable(pbatch) z, qbatch = self.forward(inputs) loss = self.loss_function(target, qbatch) train_loss += loss.data*len(inputs) loss.backward() optimizer.step() print("#Epoch %3d: Loss: %.4f" % ( epoch+1, train_loss / num))
def predict(self, X, y): use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() latent = self.encodeBatch(X) q = self.soft_assign(latent) # evalute the clustering performance y_pred = torch.argmax(q, dim=1).data.cpu().numpy() y = y.data.cpu().numpy() if y is not None: print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) final_acc = acc(y, y_pred) final_nmi = normalized_mutual_info_score(y, y_pred) return final_acc, final_nmi
def initialize_cluster(self, loader, init="k-means++"): trainX=[] trainY=[] for batch_idx,(X,Y) in enumerate(loader): trainX.append(self.encodeBatch(X.float()).cpu()) trainY.append(Y.cpu()) trainX = torch.cat(tuple(trainX), 0).numpy() trainY = torch.cat(tuple(trainY), 0).numpy() n_components = self.n_centroids km = KMeans(n_clusters=n_components, init=init).fit(trainX) y_pred = km.predict(trainX) print("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred))) write_log("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred)), self.log_dir) u_p = km.cluster_centers_ return u_p, y_pred
def evaluate(self, dataset): dataX, dataY = dataset[:] X = self.encodeBatch(dataX).cpu().numpy() Y = dataY.cpu().numpy() y_pred = self.predict(X) accuracy = acc(Y, y_pred) nmi = normalized_mutual_info_score(Y, y_pred) logging.info("acc: %.5f, nmi: %.5f" % (accuracy, nmi)) return accuracy, nmi
def test(self, X, y): kmeans = KMeans(self.n_clusters, n_init=20) data, _ = self.forward(X) y_pred = kmeans.fit_predict(data.data.cpu().numpy()) if y is not None: y = y.cpu().numpy() print(y[0:10], y_pred[0:10]) print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
def learn_gmm(self, dataset, max_iter=200, init=False): dataX, dataY = dataset[:] X = self.encodeBatch(dataX).cpu().numpy() Y = dataY.cpu().numpy() self.gmmfit(X, max_iter=max_iter, init=init) y_pred = self.predict(X) logging.info("acc: %.5f, nmi: %.5f" % (acc(Y, y_pred), normalized_mutual_info_score(Y, y_pred))) log_prob_norm, log_resp = self._e_step(X) return log_resp
def initialize_cluster(self, trainX, trainY, init="k-means++"): trainX = self.encodeBatch(trainX) trainX = trainX.cpu().numpy() trainY = trainY.cpu().numpy() n_components = len(np.unique(trainY)) km = KMeans(n_clusters=n_components, init=init).fit(trainX) y_pred = km.predict(trainX) print("acc: %.5f, nmi: %.5f" % (acc( trainY, y_pred), normalized_mutual_info_score(trainY, y_pred))) u_p = km.cluster_centers_ return u_p, y_pred
def fit(self, anchor, positive, negative, ml_ind1, ml_ind2, cl_ind1, cl_ind2, mask, use_global, ml_p, cl_p, X, y=None, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3, use_kmeans=True, plotting="", clustering_loss_weight=1): # save intermediate results for plotting intermediate_results = collections.defaultdict(lambda: {}) '''X: tensor data''' use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() print("=====Training IDEC=======") optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) if use_kmeans: print("Initializing cluster centers with kmeans.") kmeans = KMeans(self.n_clusters, n_init=20) data = self.encodeBatch(X) y_pred = kmeans.fit_predict(data.data.cpu().numpy()) y_pred_last = y_pred self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) else: # use kmeans to randomly initialize cluster ceters print("Randomly initializing cluster centers.") kmeans = KMeans(self.n_clusters, n_init=1, max_iter=1) data = self.encodeBatch(X) y_pred = kmeans.fit_predict(data.data.cpu().numpy()) y_pred_last = y_pred self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) if y is not None: y = y.cpu().numpy() # print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) self.train() num = X.shape[0] num_batch = int(math.ceil(1.0 * X.shape[0] / batch_size)) ml_num_batch = int(math.ceil(1.0 * ml_ind1.shape[0] / batch_size)) cl_num_batch = int(math.ceil(1.0 * cl_ind1.shape[0] / batch_size)) tri_num_batch = int(math.ceil(1.0 * anchor.shape[0] / batch_size)) cl_num = cl_ind1.shape[0] ml_num = ml_ind1.shape[0] tri_num = anchor.shape[0] final_acc, final_nmi, final_epoch = 0, 0, 0 update_ml = 1 update_cl = 1 update_triplet = 1 for epoch in range(num_epochs): if epoch % update_interval == 0: # update the targe distribution p latent = self.encodeBatch(X) q = self.soft_assign(latent) p = self.target_distribution(q).data # evalute the clustering performance y_pred = torch.argmax(q, dim=1).data.cpu().numpy() if use_global: y_dict = collections.defaultdict(list) ind1, ind2 = [], [] for i in range(y_pred.shape[0]): y_dict[y_pred[i]].append(i) for key in y_dict.keys(): if y is not None: print("predicted class: ", key, " total: ", len(y_dict[key])) #, " mapped index(ground truth): ", np.bincount(y[y_dict[key]]).argmax()) if y is not None: print("acc: %.5f, nmi: %.5f" % (acc( y, y_pred), normalized_mutual_info_score(y, y_pred))) print("satisfied constraints: %.5f" % self.satisfied_constraints(ml_ind1, ml_ind2, cl_ind1, cl_ind2, y_pred)) final_acc = acc(y, y_pred) final_nmi = normalized_mutual_info_score(y, y_pred) final_epoch = epoch # save model for plotting if plotting and (epoch in [10, 20, 30, 40] or epoch % 50 == 0 or epoch == num_epochs - 1): df = pd.DataFrame(latent.cpu().numpy()) df["y"] = y df.to_pickle( os.path.join(plotting, "save_model_%d.pkl" % (epoch))) intermediate_results["acc"][str(epoch)] = acc(y, y_pred) intermediate_results["nmi"][str( epoch)] = normalized_mutual_info_score(y, y_pred) with open( os.path.join(plotting, "intermediate_results.json"), "w") as fp: json.dump(intermediate_results, fp) # check stop criterion try: delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / num y_pred_last = y_pred if epoch > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) print("Reach tolerance threshold. Stopping training.") # save model for plotting if plotting: df = pd.DataFrame(latent.cpu().numpy()) df["y"] = y df.to_pickle( os.path.join(plotting, "save_model_%d.pkl" % epoch)) intermediate_results["acc"][str(epoch)] = acc( y, y_pred) intermediate_results["nmi"][str( epoch)] = normalized_mutual_info_score( y, y_pred) with open( os.path.join(plotting, "intermediate_results.json"), "w") as fp: json.dump(intermediate_results, fp) break except: pass # train 1 epoch for clustering loss train_loss = 0.0 recon_loss_val = 0.0 cluster_loss_val = 0.0 instance_constraints_loss_val = 0.0 global_loss_val = 0.0 for batch_idx in range(num_batch): xbatch = X[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] pbatch = p[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] mask_batch = mask[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] optimizer.zero_grad() inputs = Variable(xbatch) target = Variable(pbatch) cons_detail = np.array( [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) global_cons = torch.from_numpy(cons_detail).float().to("cuda") z, qbatch, xrecon = self.forward(inputs) if use_global == False: cluster_loss = self.cluster_loss(target, qbatch) recon_loss = self.recon_loss(inputs, xrecon) instance_constraints_loss = self.difficulty_loss( qbatch, mask_batch) loss = cluster_loss + recon_loss + instance_constraints_loss loss.backward() optimizer.step() cluster_loss_val += cluster_loss.data * len(inputs) recon_loss_val += recon_loss.data * len(inputs) instance_constraints_loss_val += instance_constraints_loss.data * len( inputs) train_loss = clustering_loss_weight * cluster_loss_val + recon_loss_val + instance_constraints_loss_val else: cluster_loss = self.cluster_loss(target, qbatch) recon_loss = self.recon_loss(inputs, xrecon) global_loss = self.global_size_loss(qbatch, global_cons) loss = cluster_loss + recon_loss + global_loss loss.backward() optimizer.step() cluster_loss_val += cluster_loss.data * len(inputs) recon_loss_val += recon_loss.data * len(inputs) train_loss = clustering_loss_weight * cluster_loss_val + recon_loss_val if instance_constraints_loss_val != 0.0: print( "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f Instance Difficulty Loss: %.4f" % (epoch + 1, train_loss / num, cluster_loss_val / num, recon_loss_val / num, instance_constraints_loss_val / num)) elif global_loss_val != 0.0 and use_global: print( "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f Global Loss: %.4f" % (epoch + 1, train_loss / num + global_loss_val / num_batch, cluster_loss_val / num, recon_loss_val / num, global_loss_val / num_batch)) else: print( "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f" % (epoch + 1, train_loss / num, cluster_loss_val / num, recon_loss_val / num)) ml_loss = 0.0 if epoch % update_ml == 0: for ml_batch_idx in range(ml_num_batch): px1 = X[ml_ind1[ml_batch_idx * batch_size:min(ml_num, (ml_batch_idx + 1) * batch_size)]] px2 = X[ml_ind2[ml_batch_idx * batch_size:min(ml_num, (ml_batch_idx + 1) * batch_size)]] pbatch1 = p[ml_ind1[ml_batch_idx * batch_size:min(ml_num, (ml_batch_idx + 1) * batch_size)]] pbatch2 = p[ml_ind2[ml_batch_idx * batch_size:min(ml_num, (ml_batch_idx + 1) * batch_size)]] optimizer.zero_grad() inputs1 = Variable(px1) inputs2 = Variable(px2) target1 = Variable(pbatch1) target2 = Variable(pbatch2) z1, q1, xr1 = self.forward(inputs1) z2, q2, xr2 = self.forward(inputs2) loss = (ml_p * self.pairwise_loss(q1, q2, "ML") + self.recon_loss(inputs1, xr1) + self.recon_loss(inputs2, xr2)) # 0.1 for mnist/reuters, 1 for fashion, the parameters are tuned via grid search on validation set ml_loss += loss.data loss.backward() optimizer.step() cl_loss = 0.0 if epoch % update_cl == 0: for cl_batch_idx in range(cl_num_batch): px1 = X[cl_ind1[cl_batch_idx * batch_size:min(cl_num, (cl_batch_idx + 1) * batch_size)]] px2 = X[cl_ind2[cl_batch_idx * batch_size:min(cl_num, (cl_batch_idx + 1) * batch_size)]] pbatch1 = p[cl_ind1[cl_batch_idx * batch_size:min(cl_num, (cl_batch_idx + 1) * batch_size)]] pbatch2 = p[cl_ind2[cl_batch_idx * batch_size:min(cl_num, (cl_batch_idx + 1) * batch_size)]] optimizer.zero_grad() inputs1 = Variable(px1) inputs2 = Variable(px2) target1 = Variable(pbatch1) target2 = Variable(pbatch2) z1, q1, xr1 = self.forward(inputs1) z2, q2, xr2 = self.forward(inputs2) loss = cl_p * self.pairwise_loss(q1, q2, "CL") cl_loss += loss.data loss.backward() optimizer.step() if ml_num_batch > 0 and cl_num_batch > 0: print("Pairwise Total:", round(float(ml_loss.cpu()), 2) + float(cl_loss.cpu()), "ML loss", float(ml_loss.cpu()), "CL loss:", float(cl_loss.cpu())) triplet_loss = 0.0 if epoch % update_triplet == 0: for tri_batch_idx in range(tri_num_batch): px1 = X[anchor[tri_batch_idx * batch_size:min(tri_num, (tri_batch_idx + 1) * batch_size)]] px2 = X[positive[tri_batch_idx * batch_size:min(tri_num, (tri_batch_idx + 1) * batch_size)]] px3 = X[negative[tri_batch_idx * batch_size:min(tri_num, (tri_batch_idx + 1) * batch_size)]] pbatch1 = p[anchor[tri_batch_idx * batch_size:min(tri_num, (tri_batch_idx + 1) * batch_size)]] pbatch2 = p[ positive[tri_batch_idx * batch_size:min(tri_num, (tri_batch_idx + 1) * batch_size)]] pbatch3 = p[ negative[tri_batch_idx * batch_size:min(tri_num, (tri_batch_idx + 1) * batch_size)]] optimizer.zero_grad() inputs1 = Variable(px1) inputs2 = Variable(px2) inputs3 = Variable(px3) target1 = Variable(pbatch1) target2 = Variable(pbatch2) target3 = Variable(pbatch3) z1, q1, xr1 = self.forward(inputs1) z2, q2, xr2 = self.forward(inputs2) z3, q3, xr3 = self.forward(inputs3) loss = self.triplet_loss(q1, q2, q3, 0.1) triplet_loss += loss.data loss.backward() optimizer.step() if tri_num_batch > 0: print("Triplet Loss:", triplet_loss) return final_acc, final_nmi, final_epoch
def fit(self, trainloader, validloader, lr=0.001, batch_size=128, num_epochs=10, visualize=False, anneal=False, optimizer="adam"): use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() if optimizer == "adam": optimizer = optim.Adam(self.parameters(), lr=lr) elif optimizer == "sgd": optimizer = optim.SGD(self.parameters(), lr=lr, momentum=0.9) # validate self.eval() valid_loss = 0.0 for batch_idx, (inputs, _) in enumerate(validloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs = self.forward(inputs) loss = self.loss_function(outputs, inputs) valid_loss += loss.data * len(inputs) # total_loss += valid_recon_loss.data[0] * inputs.size()[0] # total_num += inputs.size()[0] # valid_loss = total_loss / total_num print("#Epoch -1: Valid Loss: %.5f" % (valid_loss / len(validloader.dataset))) for epoch in range(num_epochs): # train 1 epoch self.train() if anneal: adjust_learning_rate(lr, optimizer, epoch) train_loss = 0 for batch_idx, (inputs, labels) in enumerate(trainloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() optimizer.zero_grad() inputs = Variable(inputs) z, outputs = self.forward(inputs) loss = self.loss_function(outputs, inputs) train_loss += loss.data * len(inputs) loss.backward() optimizer.step() # print(" #Iter %3d: Reconstruct Loss: %.3f" % ( # batch_idx, recon_loss.data[0])) # validate self.eval() valid_loss = 0.0 for batch_idx, (inputs, labels) in enumerate(validloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs = self.forward(inputs) loss = self.loss_function(outputs, inputs) valid_loss += loss.data * len(inputs) print("#Epoch %3d: Train Loss: %.5f, Valid Loss: %.5f" % (epoch, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) if epoch % int(num_epochs / 10) == 0 or epoch == num_epochs - 1: trainX, trainY = self.encodeBatch(trainloader, True) testX, testY = self.encodeBatch(validloader, True) trainX = trainX.cpu().numpy() trainY = trainY.cpu().numpy() testX = testX.cpu().numpy() testY = testY.cpu().numpy() n_components = len(np.unique(trainY)) km = KMeans(n_clusters=n_components, n_init=20).fit(trainX) y_pred = km.predict(testX) print("acc: %.5f, nmi: %.5f" % (acc(testY, y_pred), normalized_mutual_info_score(testY, y_pred))) gmm = GaussianMixture( n_components=n_components, covariance_type='diag', means_init=km.cluster_centers_).fit(trainX) y_pred = gmm.predict(testX) print("acc: %.5f, nmi: %.5f" % (acc(testY, y_pred), normalized_mutual_info_score(testY, y_pred)))
def fit(self, trainloader, validloader, lr=0.001, batch_size=128, num_epochs=10, visualize=False, anneal=False): use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) # validate self.eval() valid_loss = 0.0 for batch_idx, (inputs, _) in enumerate(validloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs, mu, logvar = self.forward(inputs) loss = self.loss_function(outputs, inputs, z, mu, logvar) valid_loss += loss.data * len(inputs) # total_loss += valid_recon_loss.data[0] * inputs.size()[0] # total_num += inputs.size()[0] # valid_loss = total_loss / total_num print("#Epoch -1: Valid Loss: %.5f" % (valid_loss / len(validloader.dataset))) for epoch in range(num_epochs): # train 1 epoch self.train() if anneal: adjust_learning_rate(lr, optimizer, epoch) train_loss = 0 for batch_idx, (inputs, _) in enumerate(trainloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() optimizer.zero_grad() inputs = Variable(inputs) z, outputs, mu, logvar = self.forward(inputs) loss = self.loss_function(outputs, inputs, z, mu, logvar) train_loss += loss.data * len(inputs) loss.backward() optimizer.step() # print(" #Iter %3d: Reconstruct Loss: %.3f" % ( # batch_idx, recon_loss.data[0])) # validate self.eval() valid_loss = 0.0 for batch_idx, (inputs, _) in enumerate(validloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs, mu, logvar = self.forward(inputs) loss = self.loss_function(outputs, inputs, z, mu, logvar) valid_loss += loss.data * len(inputs) # total_loss += valid_recon_loss.data[0] * inputs.size()[0] # total_num += inputs.size()[0] # view reconstruct if visualize and batch_idx == 0: n = min(inputs.size(0), 8) comparison = torch.cat([ inputs.view(-1, 1, 28, 28)[:n], outputs.view(-1, 1, 28, 28)[:n] ]) save_image(comparison.data.cpu(), 'results/vae/reconstruct/reconstruction_' + str(epoch) + '.png', nrow=n) # valid_loss = total_loss / total_num print("#Epoch %3d: Train Loss: %.5f, Valid Loss: %.5f" % (epoch, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) if epoch % int(num_epochs / 10) == 0 or epoch == num_epochs - 1: trainX, trainY = self.encodeBatch(trainloader, True) testX, testY = self.encodeBatch(validloader, True) trainX = trainX.numpy() trainY = trainY.numpy() testX = testX.numpy() testY = testY.numpy() n_components = len(np.unique(trainY)) km = KMeans(n_clusters=n_components, n_init=20).fit(trainX) y_pred = km.predict(testX) print("acc: %.5f, nmi: %.5f" % (acc(testY, y_pred), normalized_mutual_info_score(testY, y_pred))) gmm = GaussianMixture( n_components=n_components, covariance_type='diag', means_init=km.cluster_centers_).fit(trainX) y_pred = gmm.predict(testX) print("acc: %.5f, nmi: %.5f" % (acc(testY, y_pred), normalized_mutual_info_score(testY, y_pred))) # view sample if visualize: sample = Variable(torch.randn(64, self.z_dim)) if use_cuda: sample = sample.cuda() sample = self.decode(sample).cpu() save_image(sample.data.view(64, 1, 28, 28), 'results/vae/sample/sample_' + str(epoch) + '.png')
def fit(self, dataloader, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3): '''X: tensor data''' use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() # X=X.cuda() print("=====Training DEC=======") write_log("=====Training DEC=======", self.log_dir) # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) print("Initializing cluster centers with kmeans.") write_log("Initializing cluster centers with kmeans.", self.log_dir) kmeans = KMeans(self.n_clusters, n_init=20) #原始代码 # data, _ = self.forward(X) # 按batch_size求q,X,Y替换为Dataloader data = [] y = [] for batch_idx, (inputs, yi) in enumerate(dataloader): inputs = inputs.view(inputs.size(0), -1).float() inputs = inputs.cuda() datai, _ = self.forward(inputs) data.append(datai.data.cpu()) y.append(yi.data.cpu()) del inputs torch.cuda.empty_cache() data = torch.cat(tuple(data), 0) y = torch.cat(tuple(y), 0) y_pred = kmeans.fit_predict(data) y_pred_last = y_pred # print(y[0:10], y_pred[0:10]) self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_)) if y is not None: y = y.cpu().numpy() # print(y.shape,y_pred.shape) print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) write_log( "Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)), self.log_dir) del data, y torch.cuda.empty_cache() self.train() # num_batch = int(math.ceil(1.0*X.shape[0]/batch_size)) for epoch in range(num_epochs): tic = timer() if epoch % update_interval == 0: # update the targe distribution p # _, q = self.forward(X) #按batch计算q data = [] y = [] num = dataloader.dataset.__len__() for batch_idx, (xbatch, yi) in enumerate(dataloader): # xbatch = X[batch_idx * batch_size: min((batch_idx + 1) * batch_size, num)] xbatch = xbatch.float().cuda() datai, _ = self.forward(xbatch) data.append(datai.data.cpu()) y.append(yi.data.cpu()) del xbatch, datai torch.cuda.empty_cache() data = torch.cat(tuple(data), 0) y = torch.cat(tuple(y), 0).numpy() # print("data:",data,data.shape) q = 1.0 / (1.0 + torch.sum( (data.unsqueeze(1) - self.mu.data.cpu())**2, dim=2) / self.alpha) q = q**(self.alpha + 1.0) / 2.0 q = q / torch.sum(q, dim=1, keepdim=True) p = self.target_distribution(q).data del data torch.cuda.empty_cache() # evalute the clustering performance y_pred = torch.argmax(q, dim=1).data.cpu().numpy() if y is not None: print("acc: %.5f, nmi: %.5f" % (acc( y, y_pred), normalized_mutual_info_score(y, y_pred))) write_log("acc: %.5f, nmi: %.5f" % (acc( y, y_pred), normalized_mutual_info_score(y, y_pred)), logpath=self.log_dir) if self.writer is not None: self.writer.add_scalars( 'dec', { 'acc': acc(y, y_pred), 'nmi': normalized_mutual_info_score(y, y_pred) }, epoch) # check stop criterion #本次结果和上次结果相差小于tol=0.0001时停止训练 delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / num y_pred_last = y_pred if epoch > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) # write_log('delta_label '+str(delta_label) +'< tol '+str(tol) ) print("Reach tolerance threshold. Stopping training.") # write_log("Reach tolerance threshold. Stopping training.") break # train 1 epoch train_loss = 0.0 for batch_idx, (xbatch, _) in enumerate(dataloader): # xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)] pbatch = p[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] xbatch = xbatch.float().cuda() pbatch = pbatch.cuda() optimizer.zero_grad() inputs = Variable(xbatch) target = Variable(pbatch) # print(inputs,target) z, qbatch = self.forward(inputs) loss = self.loss_function(target, qbatch) train_loss += loss * len(inputs) loss.backward() # for param in self.parameters(): # print('param', param.grad) optimizer.step() del xbatch, qbatch, inputs, target, loss torch.cuda.empty_cache() toc = timer() print("cost:", toc - tic) print("#Epoch %3d: Loss: %.4f" % (epoch + 1, train_loss / num)) write_log("#Epoch %3d: Loss: %.4f" % (epoch + 1, train_loss / num), self.log_dir) if self.writer is not None: self.writer.add_scalars('dec', {'loss': train_loss / num}, epoch + 1) torch.cuda.empty_cache()
def fit(self, stat, y_pred, ml_list, cl_list, ml_ind1, ml_ind2, cl_ind1, cl_ind2, ml_p, cl_p, X, y=None, lr=0.001, batch_size=256, num_epochs=100, update_interval=1, tol=1e-3): '''X: tensor data''' # use_cuda = torch.cuda.is_available() # if use_cuda: # self.cuda() print("=====Training IDEC=======") optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) print("Initializing cluster centers with input assignment.") data = self.encodeBatch(X).data.cpu().numpy() cluster_centers = [] y_pred_last = y_pred for i in range(self.n_clusters): cluster_centers.append(np.full(self.z_dim, 0, dtype=np.float32)) cluster_size = dict() for i in range(len(y_pred)): cluster_centers[y_pred[i]] += data[i] class_id = int(y_pred[i]) if class_id not in cluster_size: cluster_size[class_id] = 1.0 else: cluster_size[class_id] += 1.0 print(cluster_size) for i in range(self.n_clusters): cluster_centers[i] /= cluster_size[i] cluster_centers = np.asarray(cluster_centers) self.mu.data.copy_(torch.Tensor(cluster_centers)) if y is not None: y = y.cpu().numpy() # print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred))) self.train() num = X.shape[0] num_batch = int(math.ceil(1.0 * X.shape[0] / batch_size)) ml_num_batch = int(math.ceil(1.0 * ml_ind1.shape[0] / batch_size)) cl_num_batch = int(math.ceil(1.0 * cl_ind1.shape[0] / batch_size)) cl_num = cl_ind1.shape[0] ml_num = ml_ind1.shape[0] last_acc, last_nmi, final_epoch = 0, 0, 0 update_ml = 1 update_cl = 1 last_delta = 0.0 last_nmi = 1.0 arr_acc = [] for epoch in range(num_epochs): # if epoch < 20: # self.mu.data.copy_(torch.Tensor(cluster_centers)) if epoch % update_interval == 0: # update the targe distribution p latent = self.encodeBatch(X) q = self.soft_assign(latent) p = self.target_distribution(q).data # evalute the clustering performance y_pred = torch.argmax(q, dim=1).data.cpu().numpy() if y is not None: num_vcon = 0 for i in range(len(ml_list[0])): if y_pred[ml_list[0][i]] != y_pred[ml_list[1][i]]: num_vcon += 1 for i in range(len(cl_list[0])): if y_pred[cl_list[0][i]] == y_pred[cl_list[1][i]]: num_vcon += 1 last_acc = acc(y, y_pred) last_nmi = normalized_mutual_info_score( y, y_pred, average_method="arithmetic") arr_acc.append(last_acc) if len(arr_acc) > 4: arr_acc = arr_acc[1:] final_epoch = epoch print("NMI - ACC - #violated constraints") stat.append((last_nmi, last_acc, num_vcon, last_delta)) print("%.5f\t%.5f\t%d\n" % (last_nmi, last_acc, num_vcon)) # check stop criterion delta_label = np.sum(y_pred != y_pred_last).astype( np.float32) / num y_pred_last = y_pred if epoch > 0 and delta_label < tol: print('delta_label ', delta_label, '< tol ', tol) print("Reach tolerance threshold. Stopping training.") break print("Delta label:", delta_label) # train 1 epoch for clustering loss train_loss = 0.0 recon_loss_val = 0.0 cluster_loss_val = 0.0 for batch_idx in range(num_batch): xbatch = X[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] pbatch = p[batch_idx * batch_size:min((batch_idx + 1) * batch_size, num)] optimizer.zero_grad() inputs = Variable(xbatch) target = Variable(pbatch) z, qbatch, xrecon = self.forward(inputs) cluster_loss = self.cluster_loss(target, qbatch) recon_loss = self.recon_loss(inputs, xrecon) loss = cluster_loss + recon_loss loss.backward() optimizer.step() cluster_loss_val += cluster_loss.data * len(inputs) recon_loss_val += recon_loss.data * len(inputs) train_loss = cluster_loss_val + recon_loss_val print( "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f" % (epoch + 1, train_loss / num, cluster_loss_val / num, recon_loss_val / num)) ml_loss = 0.0 if epoch % update_ml == 0: for ml_batch_idx in range(ml_num_batch): px1 = X[ml_ind1[ml_batch_idx * batch_size:min(ml_num, (ml_batch_idx + 1) * batch_size)]] px2 = X[ml_ind2[ml_batch_idx * batch_size:min(ml_num, (ml_batch_idx + 1) * batch_size)]] # pbatch1 = p[ml_ind1[ml_batch_idx * batch_size: min(ml_num, (ml_batch_idx + 1) * batch_size)]] # pbatch2 = p[ml_ind2[ml_batch_idx * batch_size: min(ml_num, (ml_batch_idx + 1) * batch_size)]] optimizer.zero_grad() inputs1 = Variable(px1) inputs2 = Variable(px2) z1, q1, xr1 = self.forward(inputs1) z2, q2, xr2 = self.forward(inputs2) loss = (ml_p * self.pairwise_loss(q1, q2, "ML") + self.recon_loss(inputs1, xr1) + self.recon_loss(inputs2, xr2)) # 0.1 for mnist/reyuters, 1 for fashion, the parameters are tuned via grid search on validation set ml_loss += loss.data loss.backward() optimizer.step() cl_loss = 0.0 if epoch % update_cl == 0: for cl_batch_idx in range(cl_num_batch): px1 = X[cl_ind1[cl_batch_idx * batch_size:min(cl_num, (cl_batch_idx + 1) * batch_size)]] px2 = X[cl_ind2[cl_batch_idx * batch_size:min(cl_num, (cl_batch_idx + 1) * batch_size)]] # pbatch1 = p[cl_ind1[cl_batch_idx * batch_size: min(cl_num, (cl_batch_idx + 1) * batch_size)]] # pbatch2 = p[cl_ind2[cl_batch_idx * batch_size: min(cl_num, (cl_batch_idx + 1) * batch_size)]] optimizer.zero_grad() inputs1 = Variable(px1) inputs2 = Variable(px2) z1, q1, xr1 = self.forward(inputs1) z2, q2, xr2 = self.forward(inputs2) loss = cl_p * self.pairwise_loss(q1, q2, "CL") cl_loss += loss.data loss.backward() optimizer.step() if ml_num_batch > 0 and cl_num_batch > 0: print("Pairwise Total:", round(float(ml_loss.cpu()), 2) + float(cl_loss.cpu()), "ML loss", float(ml_loss.cpu()), "CL loss:", float(cl_loss.cpu())) return last_acc, last_nmi, final_epoch
def fit(self, trainloader, validloader, lr=0.001, num_epochs=10, corrupt=0.3, loss_type="mse"): """ data_x: FloatTensor valid_x: FloatTensor """ use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() logging.info("=====Stacked Denoising Autoencoding Layer=======") # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr) optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9) if loss_type == "mse": criterion = MSELoss() elif loss_type == "cross-entropy": criterion = BCELoss() # validate total_loss = 0.0 total_num = 0 for batch_idx, (inputs, _) in enumerate(validloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs = self.forward(inputs) valid_recon_loss = criterion(outputs, inputs) total_loss += valid_recon_loss.data * len(inputs) total_num += inputs.size()[0] valid_loss = total_loss / total_num logging.info("#Epoch 0: Valid Reconstruct Loss: %.4f" % (valid_loss)) self.train() for epoch in range(num_epochs): # train 1 epoch adjust_learning_rate(lr, optimizer, epoch) train_loss = 0.0 for batch_idx, (inputs, _) in enumerate(trainloader): inputs = inputs.view(inputs.size(0), -1).float() inputs_corr = masking_noise(inputs, corrupt) if use_cuda: inputs = inputs.cuda() inputs_corr = inputs_corr.cuda() optimizer.zero_grad() inputs = Variable(inputs) inputs_corr = Variable(inputs_corr) z, outputs = self.forward(inputs_corr) recon_loss = criterion(outputs, inputs) train_loss += recon_loss.data * len(inputs) recon_loss.backward() optimizer.step() # validate valid_loss = 0.0 for batch_idx, (inputs, _) in enumerate(validloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs = self.forward(inputs) valid_recon_loss = criterion(outputs, inputs) valid_loss += valid_recon_loss.data * len(inputs) logging.info( "#Epoch %3d: Reconstruct Loss: %.4f, Valid Reconstruct Loss: %.4f" % (epoch + 1, train_loss / len(trainloader.dataset), valid_loss / len(validloader.dataset))) if epoch % int(num_epochs / 10) == 0 or epoch == num_epochs - 1: trainX, trainY = self.encodeBatch(trainloader, True) testX, testY = self.encodeBatch(validloader, True) trainX = trainX.cpu().numpy() trainY = trainY.cpu().numpy() testX = testX.cpu().numpy() testY = testY.cpu().numpy() n_components = len(np.unique(trainY)) km = KMeans(n_clusters=n_components, n_init=20).fit(trainX) y_pred = km.predict(testX) logging.info("acc: %.5f, nmi: %.5f" % (acc(testY, y_pred), normalized_mutual_info_score(testY, y_pred))) gmm = GaussianMixture( n_components=n_components, covariance_type='diag', means_init=km.cluster_centers_).fit(trainX) y_pred = gmm.predict(testX) logging.info("acc: %.5f, nmi: %.5f" % (acc(testY, y_pred), normalized_mutual_info_score(testY, y_pred)))
dcn = DeepClusteringNetwork(input_dim=405, z_dim=latent_dim, n_centroids=n_clusters, binary=False, encodeLayer=[500, 500, 2000], decodeLayer=[2000, 500, 500], activation="relu", dropout=0) dcn.load_model(sdae_savepath) dcn.fit(X, y, lr=0.001, batch_size=batch_size, num_epochs=10) # testing testdata, _ = dcn.forward(Xt) kmeans = KMeans(n_clusters, n_init=20) y_pred = kmeans.fit_predict(testdata.detach().cpu().numpy()) print((metrics.normalized_mutual_info_score(yt, y_pred))) print(acc(yt, y_pred)) total_entropy = compute_entropy(n_clusters, y_pred, attr_t, n_bins) print("Total entropy: %.5f" % total_entropy) acc_dcn.append(acc(yt, y_pred)) nmi_dcn.append(metrics.normalized_mutual_info_score(yt, y_pred)) entropy_dcn.append(total_entropy) dec = DEC(input_dim=405, z_dim=latent_dim, n_clusters=n_clusters, encodeLayer=[500, 500, 2000], activation="relu", dropout=0) #print(dec) # dec.load_model(sdae_savepath)