def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() model.train() optimizer.zero_grad() output, embeddings = model.myforward(train_fea, train_adj, adj_knn, layer=1.5) # special for reddit if sampler.learning_type == "inductive": loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_ssl = args.lambda_ * ssl_agent.make_loss(embeddings) loss_total = loss_train + loss_ssl loss_total.backward() optimizer.step() train_t = time.time() - t val_t = time.time() # We can not apply the fastmode for the reddit dataset. # if sampler.learning_type == "inductive" or not args.fastmode: # if args.early_stopping > 0 and sampler.dataset != "reddit": # loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() # early_stopping(loss_val, model) if not args.fastmode and args.early_stopping > 0: # # Evaluate validation set performance separately, # # deactivates dropout during validation run. model.eval() output = model(val_fea, val_adj, adj_knn) loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() early_stopping(acc_val, model) else: loss_val = 0 acc_val = 0 if args.lradjust: scheduler.step() val_t = time.time() - val_t try: return (loss_train.item(), acc_train.item(), loss_val, acc_val, loss_ssl.item(), loss_total.item(), train_t) except: return (loss_train.item(), acc_train.item(), loss_val, acc_val, loss_ssl, loss_total.item(), train_t)
def compute_metric(input, target): metric = { 'accuracy@1': accuracy(input=input, target=target, topk=1), 'accuracy@5': accuracy(input=input, target=target, topk=5), } return metric
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() model.train() optimizer.zero_grad() output = model(train_fea, train_adj) # special for reddit if sampler.learning_type == "inductive": loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() train_t = time.time() - t val_t = time.time() # We can not apply the fastmode for the reddit dataset. # if sampler.learning_type == "inductive" or not args.fastmode: grads = [ np.linalg.norm(l.grad.cpu().numpy()) for l in model.midlayer[0].model.weights ] norms = [ np.linalg.norm(l.detach().cpu().numpy()) for l in model.midlayer[0].model.weights ] print("Grads:", grads) print("Norms", norms) print(np.array(norms) / np.array(grads)) if args.early_stopping > 0 and sampler.dataset != "reddit": loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() early_stopping(loss_val, model) if not args.fastmode: # # Evaluate validation set performance separately, # # deactivates dropout during validation run. model.eval() output = model(val_fea, val_adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() if sampler.dataset == "reddit": early_stopping(loss_val, model) else: loss_val = 0 acc_val = 0 if args.lradjust: scheduler.step() val_t = time.time() - val_t return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t, grads, norms)
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() model.train() optimizer.zero_grad() recovered, mu, logvar, output = model(train_fea, train_adj) # special for reddit if sampler.learning_type == "inductive": #loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_nc = F.nll_loss(output[idx_train], labels[idx_train]) ae_loss = loss_function(preds=recovered, labels=train_adj, mu=mu, logvar=logvar, n_nodes=train_adj.size(0)) loss_train = loss_nc + 0.2 * ae_loss acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() train_t = time.time() - t val_t = time.time() # We can not apply the fastmode for the reddit dataset. # if sampler.learning_type == "inductive" or not args.fastmode: '''if args.early_stopping > 0 and sampler.dataset != "reddit": loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() early_stopping(loss_val, model) if not args.fastmode: # # Evaluate validation set performance separately, # # deactivates dropout during validation run. model.eval() recovered, mu, logvar,output = model(val_fea, val_adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() if sampler.dataset == "reddit": early_stopping(loss_val, model) else: loss_val = 0 acc_val = 0''' model.eval() recovered, mu, logvar, output = model(val_fea, val_adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() early_stopping(acc_val, model) if args.lradjust: scheduler.step() val_t = time.time() - val_t return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
def train_full(epoch, train_g, val_g, idx_val, labels): unsupervised_model.eval() t = time.time() classifier_model.train() optimizer.zero_grad() # get features for training feats = unsupervised_model(train_g.ndata['features'], train_g) output = classifier_model(feats) # special for inductive, learning type must be inductive if sampler.learning_type=='inductive': loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) # loss_train = F.nll_loss(output, labels[idx_train]) # acc_train = accuracy(output, labels[idx_train]) # if sampler.learning_type == "inductive": # loss_train = F.nll_loss(output, labels[idx_train]) # acc_train = accuracy(output, labels[idx_train]) # else: # loss_train = F.nll_loss(output[idx_train], labels[idx_train]) # acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() train_t = time.time() - t val_t = time.time() # We can not apply the fastmode for the coauthor_phy dataset. # if sampler.learning_type == "inductive" or not args.fastmode: classifier_model.eval() if sampler.dataset in ['coauthor_phy']: unsupervised_model.cpu() classifier_model.cpu() labels = labels.cpu() # get features for validation feats = unsupervised_model(val_g.ndata['features'], val_g) output = classifier_model(feats) loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() early_stopping(loss_val, classifier_model) if sampler.dataset in ['coauthor_phy']: unsupervised_model.cuda() classifier_model.cuda() labels = labels.cuda() if args.lradjust: scheduler.step() val_t = time.time() - val_t return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
def test_accuracy(): input = torch.tensor([[2, 3, 1], [5, 7, 0], [0, 9, -1]], dtype=torch.float) target = torch.tensor([1, 0, 2], dtype=torch.long) assert torch.equal(accuracy(input=input, target=target, topk=1), torch.tensor([1., 0., 0.])) assert torch.equal(accuracy(input=input, target=target, topk=2), torch.tensor([1., 1., 0.]))
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): unsupervised_model.eval() if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() classifier_model.train() optimizer.zero_grad() feats = unsupervised_model(train_fea, train_adj) output = classifier_model(feats) # special for reddit if sampler.learning_type == "inductive": loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() train_t = time.time() - t val_t = time.time() # We can not apply the fastmode for the reddit dataset. # if sampler.learning_type == "inductive" or not args.fastmode: classifier_model.eval() feats = unsupervised_model(val_fea, val_adj) output = classifier_model(feats) if args.early_stopping > 0 and sampler.dataset not in ['reddit', 'coauthor_phy']: loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() early_stopping(loss_val, classifier_model) if not args.fastmode: # # Evaluate validation set performance separately, # # deactivates dropout during validation run. loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() if sampler.dataset in ['reddit', 'coauthor_phy']: early_stopping(loss_val, classifier_model) else: loss_val = 0 acc_val = 0 if args.lradjust: scheduler.step() val_t = time.time() - val_t return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() model.train() optimizer.zero_grad() #with torch.no_grad(): output = model(train_fea, train_adj) # special for reddit if sampler.learning_type == "inductive": #yes! loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: #only reddit yes! loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() #torch.nn.utils.clip_grad_norm(model.parameters(), 0.5) # optimizer.step() train_t = time.time() - t val_t = time.time() # We can not apply the fastmode for the reddit dataset. # if sampler.learning_type == "inductive" or not args.fastmode: if args.early_stopping > 0 and sampler.dataset != "reddit": loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() early_stopping(loss_val, model) if not args.fastmode: # # Evaluate validation set performance separately, # # deactivates dropout during validation run. model.eval() #with torch.no_grad(): output = model(val_fea, val_adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item() acc_val = accuracy(output[idx_val], labels[idx_val]).item() if sampler.dataset == "reddit": early_stopping(loss_val, model) else: loss_val = 0 acc_val = 0 if args.lradjust: scheduler.step() val_t = time.time() - val_t return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
def train(): global best_epoch, best_acc if args.start_epoch: model.load_state_dict( torch.load( os.path.join(args.model_path, 'model-%d.pkl' % (args.start_epoch)))) # Training for epoch in range(args.start_epoch, args.num_epochs): train_loss = 0 train_acc = 0 scheduler.step() model.train() for i, x in enumerate(train_loader): logit = model(x[0].float()) target = train_label[i] loss = criterion(logit, target.view(1)) model.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() train_acc += accuracy(logit, target.view(1)) print('[epoch', epoch + 1, '] Train loss:', train_loss / i, 'Train Acc:', train_acc / i) if (epoch + 1) % args.val_step == 0: model.eval() val_loss = 0 val_acc = 0 with torch.no_grad(): for i, x in enumerate(valid_loader): logit = model(x[0].float()) target = valid_label[i] val_loss += criterion(logit, target.view(1)).item() val_acc += accuracy(logit, target.view(1)) if best_acc <= (val_acc / i): best_epoch = epoch + 1 best_acc = (val_acc / i) torch.save( model.state_dict(), os.path.join(args.model_path, 'model-%d.pkl' % (best_epoch))) print('Val loss:', val_loss / i, 'Val Acc:', val_acc / i)
def test(test_adj, test_fea, idx_test, labels): unsupervised_model.eval() classifier_model.eval() if sampler.learning_type=='inductive': unsupervised_model.cpu() classifier_model.cpu() labels = labels.cpu() # construct g from test adj if sampler.learning_type=='inductive': test_edges = test_adj._indices().data.numpy() else: test_edges = test_adj._indices().data.cpu().numpy() test_edges = sp.coo_matrix((np.ones(test_edges.shape[1]), (test_edges[0], test_edges[1])), shape=(test_adj.shape[0], test_adj.shape[0]), dtype=np.float32) test_g = nx.from_scipy_sparse_matrix(test_edges, create_using=nx.DiGraph()) test_g = DGLGraph(test_g) feats = unsupervised_model(test_fea, test_g) output = classifier_model(feats) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test]) if args.debug: print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "auc= {:.4f}".format(auc_test), "accuracy= {:.4f}".format(acc_test.item())) print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def infer(test_reader): """ do inference function """ total_cost = 0.0 total_count = 0 preds, labels = [], [] for data in test_reader(): avg_cost, avg_acc, batch_prediction = exe.run( inference_program, feed=feeder.feed(data), fetch_list=fetch_list, return_numpy=True) total_cost += avg_cost * len(data) total_count += len(data) preds.append(batch_prediction) labels.append(np.asarray([x[-1] for x in data], dtype=np.int64)) y_pred = np.concatenate(preds) y_label = np.concatenate(labels) metric_res = [] for metric_name in metric_type: if metric_name == 'accuracy_with_threshold': metric_res.append( (metric_name, metric.accuracy_with_threshold(y_pred, y_label, threshold=0.3))) elif metric_name == 'accuracy': metric_res.append( (metric_name, metric.accuracy(y_pred, y_label))) else: print("Unknown metric type: ", metric_name) exit() return total_cost / (total_count * 1.0), metric_res
def train(args, model, data, label, train_idx, feature_mask, optimizer, epoch): model.train() optimizer.zero_grad() # criterion = nn.CrossEntropyLoss() rec_vec, output, semantic = model(feature_mask) label = label.long().view(-1, ) # classification loss cls_loss = F.nll_loss(output[train_idx], label[train_idx]) args.logger.warning("Classfication loss " + str(cls_loss.item())) # reconstruction loss rec_loss = 0.0 for v in range(args.view_num): sum = torch.sum(torch.pow(torch.sub(rec_vec[v], data[v]), 2.0), 1) fea = feature_mask[:, v].double() loss = sum * fea loss = torch.sum(loss) args.logger.warning("View " + str(v) + " loss " + str(loss.item())) rec_loss += loss # summary loss if epoch < 100: loss = rec_loss else: loss = cls_loss + rec_loss args.logger.warning("Total loss " + str(loss.item())) loss.backward() optimizer.step() acc_train = accuracy(output[train_idx], label[train_idx]).item() args.logger.error("Epoch : " + str(epoch) + ' train accuracy : ' + str(acc_train)) return acc_train, semantic
def validate(self): self.pred_labels = self.classify() acc = accuracy(self.true_labels, self.pred_labels) ematch = exact_match(self.true_labels, self.pred_labels) pre_micro, rec_micro, f_micro = f_score_micro(self.true_labels, self.pred_labels) pre_label, rec_label, f_label, prec_result, recall_result = f_score_by_label(self.true_labels, self.pred_labels, len(self.label_dict)) return acc, ematch, pre_micro, rec_micro, f_micro, pre_label, rec_label, f_label, prec_result, recall_result
def test(X): zh, score = kmeans(X) a = accuracy(z, zh) print "k-means original space:", a Y = pca_proj(X) zh, score = kmeans(Y) a = accuracy(z, zh) print "k-means/1D PCA:", a zh = kmeans_random(X, 20) a = accuracy(z, zh) print "k-means/random projection 1D:", a zh = energy_random(X, 20) a = accuracy(z, zh) print "energy/random projection 1D", a
def train(epoch, train_adj, train_fea, val_adj=None, val_fea=None): if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() #adjust lr if args.lradjust: #scheduler.step(loss_val) scheduler.step() model.train() optimizer.zero_grad() output = model(train_fea, train_adj) #special for reddit if sampler.learning_type == "inductive": loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() #We can not apply the fastmode for the reddit dataset. if sampler.learning_type == "inductive" or not args.fastmode: # Evaluate validation set performance separately, # deactivates dropout during validation run. model.eval() output = model(val_fea, val_adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) if args.earlystopping > 0: early_stopping(loss_val, model) if args.debug and epoch % 1 == 0: print('Epoch: {:04d}'.format(epoch+1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item()), 'time: {:.4f}s'.format(time.time() - t)) return (loss_train.item(), acc_train.item(), loss_val.item(), acc_val.item())
def test(args, model, data, label, test_idx, feature_mask, epoch=0): model.eval() with torch.no_grad(): _, output, semantic = model(feature_mask) loss_test = F.nll_loss(output[test_idx], label[test_idx]) acc_test = accuracy(output[test_idx], label[test_idx]).item() args.logger.error("Epoch : " + str(epoch) + ' test accuracy : ' + str(acc_test)) return acc_test, semantic
def test(test_adj,test_fea): model.eval() output = model(test_fea, test_adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) if args.debug: print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return (loss_test.item(), acc_test.item())
def test_accuracy_should_examine_each_sentence(self): reference = [ "the dog walks on his 4 legs in the park", "bob looked at the stars and saw jupiter" ] translation = [ "the dog is on his 4 leg in the stadium", "bob look at a star and saw some wood" ] self.assertEqual(103 / 180, metric.accuracy(reference, translation))
def trainGraphConvolutionNetwork(hparam): from model import GraphConvNetwork from data.dataset import CoraDataSet device = 'cuda' if hparam.gpu else 'cpu' db = CoraDataSet(basepath=hparam.dataPath) X, y, A, idx_train, idx_val, idx_test = db.getTorchTensor(device) net = GraphConvNetwork(inchannel=db.featureDim, nhidden=hparam.nHidden, outchannel=db.numClass, dropout=hparam.dropout).to(device).train() from losses import LossFamily lossfunc = LossFamily[hparam.loss] import torch.optim as optim optimizer = optim.Adam(net.parameters(), lr=hparam.lr, weight_decay=hparam.weight_decay) from metric import accuracy for epoch in range(hparam.epoch): optimizer.zero_grad() #forward path yhat = net(X, A) _loss = lossfunc(yhat[idx_train], y[idx_train]) # backward _loss.backward() optimizer.step() #统计截断 trainLoss = _loss.item() valLoss = lossfunc(yhat[idx_val], y[idx_val]).item() trainAcc = accuracy(yhat[idx_train], y[idx_train]) valAcc = accuracy(yhat[idx_val], y[idx_val]) print('epoch %d,train accuracy %.2f,val accuracy %.2f' % (trainAcc, valAcc)) torch.save(net.state_dict(), hparam.savepath)
def _forward(data, model, loss_fn, window, forecast_length, training=True, teacher_ratio=1): outputs = [] label_x, feature_x, label_y, feature_y, _, _ = data batch_size = label_x.shape[0] model.init_hidden(batch_size) # concat the true value of day(t-1) and the features of day(t) to forecast day(t) inp = torch.cat([ label_x[:, :-1].reshape(batch_size, window - 1, 1), feature_x[:, 1:, :] ], dim=2) # no need to iterate the first day for time_step in range(window - 1): output = model(inp[:, time_step:time_step + 1, :]) outputs.append(output) for idx in range(forecast_length): if idx == 0: inp = torch.cat([ label_x[:, -1:].reshape([-1, 1, 1]), feature_y[:, idx:idx + 1, :] ], dim=2) else: if training: if np.random.random() < teacher_ratio: inp = torch.cat([ label_y[:, idx - 1].reshape([-1, 1, 1]), feature_y[:, idx:idx + 1, :] ], dim=2) else: inp = torch.cat([ output.reshape([-1, 1, 1]), feature_y[:, idx:idx + 1, :] ], dim=2) else: inp = torch.cat( [output.reshape([-1, 1, 1]), feature_y[:, idx:idx + 1, :]], dim=2) output = model(inp) outputs.append(output) outputs = torch.stack(outputs, 1) loss = loss_fn(outputs, torch.cat([label_x[:, 1:], label_y], 1)) avg_acc = accuracy(outputs[:, -forecast_length:], label_y) return loss, outputs, avg_acc
def test(test_adj, test_fea): model.eval() # output = model(test_fea, test_adj) output, embeddings = model.myforward(test_fea, test_adj, adj_knn) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): if val_adj is None: val_adj = train_adj val_fea = train_fea t = time.time() model.train() optimizer.zero_grad() output = model(train_fea, train_adj) #special for reddit if sampler.learning_type == "inductive": loss_train = F.nll_loss(output, labels[idx_train]) acc_train = accuracy(output, labels[idx_train]) else: loss_train = F.nll_loss(output[idx_train], labels[idx_train]) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() train_t = time.time() - t val_t = time.time() #We can not apply the fastmode for the reddit dataset. if sampler.learning_type == "inductive" or not args.fastmode: # # Evaluate validation set performance separately, # # deactivates dropout during validation run. model.eval() output = model(val_fea, val_adj) loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) if args.lradjust: scheduler.step() if args.early_stopping > 0: early_stopping(loss_val, model) val_t = time.time() - val_t return (loss_train.item(), acc_train.item(), loss_val.item(), acc_val.item(), get_lr(optimizer), train_t, val_t)
def test(test_adj, test_fea): model.eval() recovered, mu, logvar, output = model(test_fea, test_adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test]) if args.debug: '''print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "auc= {:.4f}".format(auc_test), "accuracy= {:.4f}".format(acc_test.item()))''' print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def test_sampling(model, test_g, val_batch_size): model.eval() output = model.inference(test_g, test_g.ndata['features'], val_batch_size, 'cpu') loss_test = F.nll_loss(output[idx_test.cpu()], labels[idx_test].cpu()) acc_test = accuracy(output[idx_test.cpu()], labels[idx_test].cpu()) auc_test = roc_auc_compute_fn(output[idx_test.cpu()], labels[idx_test].cpu()) if args.debug: print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "auc= {:.4f}".format(auc_test), "accuracy= {:.4f}".format(acc_test.item())) print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def validation_class(network, dataset, type='validation'): accuracy = 0. time_max = dataset.get_size(type='validation') // batch_size for index in range(0, time_max, 1): data_x, data_y = dataset.get_minbatch(batch_size, index, type=type) pred_label = network.predict(data_x) pred_label = pred_label.astype(np.int) pred_label = np.equal(pred_label, data_y).astype(np.int) data_y = np.ones_like(pred_label) accuracy += metric.accuracy(pred_label, data_y) accuracy = accuracy / time_max return accuracy
def test(test_adj, test_fea): unsupervised_model.eval() classifier_model.eval() feats = unsupervised_model(test_fea, test_adj) output = classifier_model(feats) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test]) if args.debug: print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "auc= {:.4f}".format(auc_test), "accuracy= {:.4f}".format(acc_test.item())) print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def main(): data = np.genfromtxt('./winequality-white.csv', delimiter=';', dtype=float, skip_header=1) train_feature = data[:3000, :-1] train_label = data[:3000, -1] test_feature = data[3000:, :-1] test_label = data[3000:, -1] adaboost = Adaboost(train_feature, train_label) adaboost.train(3) predict_label = [] for item in test_feature: predict_label.append(adaboost.predict(item)) print("test accuracy: ", accuracy(predict_label, test_label))
def test_sampling(test_g, val_batch_size): unsupervised_model.eval() classifier_model.eval() feats = unsupervised_model.inference(test_g, test_g.ndata['features'], val_batch_size, 'cpu') output = classifier_model(feats[idx_test.cpu()].cuda()) loss_test = F.nll_loss(output, labels[idx_test]) acc_test = accuracy(output, labels[idx_test]) auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test]) if args.debug: print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "auc= {:.4f}".format(auc_test), "accuracy= {:.4f}".format(acc_test.item())) print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def test_full(model, test_g, idx_test, labels): model.eval() if sampler.dataset in ['coauthor_phy']: model.cpu() labels = labels.cpu() output = model(test_g.ndata['features'], test_g) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) auc_test = roc_auc_compute_fn(output[idx_test], labels[idx_test]) if args.debug: print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "auc= {:.4f}".format(auc_test), "accuracy= {:.4f}".format(acc_test.item())) print("accuracy=%.5f" % (acc_test.item())) return (loss_test.item(), acc_test.item())
def eval(model, eval_dataloader, device): model.eval() eval_accuracy = 0. #eval_map, eval_accuracy, eval_mrr = 0., 0., 0. nb_eval_steps, nb_eval_examples = 0, 0 preds, labels = [], [] for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = (t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids, x_input_ids, x_input_mask, x_segment_ids, y_input_ids, y_input_mask, y_segment_ids = batch with torch.no_grad(): # tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids) logits = model(x_input_ids, x_input_mask, x_segment_ids, y_input_ids, y_input_mask, y_segment_ids, input_ids, segment_ids, input_mask) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() preds.extend(np.argmax(logits, 1).tolist()) labels.extend(label_ids.tolist()) tmp_eval_accuracy = accuracy(logits, label_ids) #tmp_eval_map = mean_average_precision(label_ids, logits[:,1]) #tmp_eval_mrr = mean_reciprocal_rank(label_ids, logits[:, 1]) # eval_loss += tmp_eval_loss.mean().item() eval_accuracy += tmp_eval_accuracy #eval_map += tmp_eval_map #eval_mrr += tmp_eval_mrr nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 # eval_loss = eval_loss / nb_eval_steps eval_accuracy = eval_accuracy / nb_eval_examples eval_f1 = f1_score(np.array(labels), np.array(preds)) #eval_map = eval_map / nb_eval_examples #eval_mrr = eval_mrr / nb_eval_examples result = { # 'eval_loss': eval_loss, 'eval_accuracy': eval_accuracy, 'eval_f1_score': eval_f1 } #'eval_map': eval_map, #'eval_mrr': eval_mrr} pprint(result)
#fname = "normal_density2.pdf" fname = "lognormal_density2.pdf" ############################################################################### t = PrettyTable(['Method', 'Accuracy']) km = KMeans(k, n_init=5) km.fit(Y) zh_kmeans = km.labels_ x1_kmeans = X[np.where(zh_kmeans==0)][:, np.newaxis] x2_kmeans = X[np.where(zh_kmeans==1)][:, np.newaxis] x1_mu_kmeans, x2_mu_kmeans = km.cluster_centers_ x1_mu_kmeans, x2_mu_kmeans = x1_mu_kmeans[0], x2_mu_kmeans[0] x1_var_kmeans, x2_var_kmeans = np.var(x1_kmeans), np.var(x2_kmeans) acc_kmeans = metric.accuracy(z, zh_kmeans) t.add_row(['k-means', acc_kmeans]) gm = GMM(k, n_init=5, init_params="kmeans") gm.fit(Y) zh_gmm = gm.predict(Y) #x1_gmm = X[np.where(zh_gmm==0)][:, np.newaxis] #x2_gmm = X[np.where(zh_gmm==1)][:, np.newaxis] x1_mu_gmm, x2_mu_gmm = gm.means_ x1_mu_gmm, x2_mu_gmm = x1_mu_gmm[0], x2_mu_gmm[0] x1_var_gmm, x2_var_gmm = gm.covariances_ x1_var_gmm, x2_var_gmm = x1_var_gmm[0][0], x2_var_gmm[0][0] acc_gmm = metric.accuracy(z, zh_gmm) t.add_row(['gmm', acc_gmm]) G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y))
G = kernel_matrix(X, rho) # initialization mu0, z0 = initialization.kmeanspp(k, X, ret='both') Z0 = ztoZ(z0) z1 = initialization.spectral(k, G) Z1 = ztoZ(z1) t = BeautifulTable() t.column_headers = ["Method", "Accuracy", "Objective", "Exec Time"] start = timer() zh = energy_clustering_brute(k, G, Z0) end = timer() Zh = ztoZ(zh) t.append_row(["E-clustering brute", metric.accuracy(z, zh), objective(Zh, G), end-start]) start = timer() zh = energy_hartigan(k, G, Z0) end = timer() Zh = ztoZ(zh) t.append_row(["E-H-clustering++", metric.accuracy(z, zh), objective(Zh, G), end-start]) t.append_row(['Spectral Clustering:', metric.accuracy(z, z1), objective(Z1,G), '-']) start = timer() zh = energy_hartigan(k, G, Z1) end = timer()
import data from metric import accuracy import eclust import initialization num_experiments = 10 table = np.zeros((num_experiments, 5)) for i in range(num_experiments): X, z = data.univariate_lognormal([0, -1.5], [0.3, 1.5], [100, 100]) #X, z = data.univariate_normal([0, 5], [1, 22], [15, 15]) Y = np.array([[x] for x in X]) k = 2 # 1D energy clustering zh, cost = two_clusters1D(X) table[i,0] = accuracy(z, zh) # initialization z0 = initialization.kmeanspp(k, Y, ret='labels') Z0 = eclust.ztoZ(z0) rho = lambda x, y: np.linalg.norm(x-y) G = eclust.kernel_matrix(Y, rho) z1 = initialization.spectral(k, G) Z1 = eclust.ztoZ(z1) # Hartigan's method zh = eclust.energy_hartigan(k, G, Z0) table[i,1] = accuracy(z, zh) zh = eclust.energy_hartigan(k, G, Z1) table[i,2] = accuracy(z, zh)
k = 2 n = 2000 n1, n2 = np.random.multinomial(n, [0.5, 0.5]) m1 = 0 s1 = 1.5 m2 = 1.5 s2 = 0.3 #X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2]) X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2]) Y = np.array([[x] for x in X]) ### clustering t = PrettyTable(['Method', 'Accuracy']) G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y)) zh_kmeans = wrapper.kmeans(k, Y) t.add_row(['k-means', metric.accuracy(z, zh_kmeans)]) zh_gmm = wrapper.gmm(k, Y) t.add_row(['gmm', metric.accuracy(z, zh_gmm)]) zh_kgroups = wrapper.kernel_kgroups(k, Y, G) t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)]) print t ### estimated classes x1_true = X[np.where(z==0)] x2_true = X[np.where(z==1)] x1_kmeans = X[np.where(zh_kmeans==0)] x2_kmeans = X[np.where(zh_kmeans==1)] x1_gmm = X[np.where(zh_gmm==0)] x2_gmm = X[np.where(zh_gmm==1)]
n = 400 d = 10 n1, n2 = np.random.multinomial(n, [1/2, 1/2]) m1 = np.zeros(d) m2 = 0.7*np.ones(d) s1 = s2 = np.eye(d) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y)) W = np.eye(n) k = 2 t = PrettyTable(["Method", "Accuracy"]) zh = kernel_kmeans(k, X, G, W, run_times=5, ini="k-means++") a = metric.accuracy(z, zh) t.add_row(["Kernel k-means", a]) zh = kernel_kgroups(k, X, G, W, run_times=5, ini="k-means++") a = metric.accuracy(z, zh) t.add_row(["Kernel k-groups", a]) zh = spectral(k, X, G, W, run_times=5) a = metric.accuracy(z, zh) t.add_row(["Spectral", a]) zh = kmeans(k, X, run_times=5) a = metric.accuracy(z, zh) t.add_row(["k-means", a]) zh = gmm(k, X, run_times=5)
import data import metric #np.random.seed(12) D = 10 m1 = np.zeros(D) s1 = np.eye(D) m2 = np.ones(D) s2 = 2*np.eye(D) X, z = data.multivariate_normal([m1, m2], [s1, s2], [100, 100]) k = 2 # scikit-learn library has a better procedure to estimate the covariance # matrix. g = GMM(k) zh = g.fit_predict(X) print "GMM class:", metric.accuracy(z, zh) zh = gmm(k, X) print "GMM func:", metric.accuracy(z, zh) sg = sk_GMM(k) sg.fit(X) zh = sg.predict(X) print "GMM sklearn:", metric.accuracy(z, zh)
rho = lambda x,y: np.power(np.linalg.norm(x-y), 1) G = kernel_matrix(X, rho) # initialization z0, mu0 = init.kmeans_plus2(k, X) Z0 = ztoZ(z0) z1 = init.spectral(k, G, W) Z1 = ztoZ(z1) t = PrettyTable(["Method", "Accuracy", "Objective", "Exec Time"]) start = timer() zh = kernel_kgroups(k, G, Z0, W) end = timer() Zh = ztoZ(zh) t.add_row(["kernel k-groups (k-means++)", metric.accuracy(z, zh), objective(Zh, G, W), end-start]) start = timer() zh = kernel_kgroups(k, G, Z1, W) end = timer() Zh = ztoZ(zh) t.add_row(["kernel k-groups (spectral)", metric.accuracy(z, zh), objective(Zh, G, W), end-start]) start = timer() zh = kernel_kmeans(k, G, Z0, W) end = timer() Zh = ztoZ(zh) t.add_row(["kernel k-means (k-means++)", metric.accuracy(z, zh), objective(Zh, G, W), end-start])
dist = np.zeros((n_samples, self.n_clusters)) self._compute_dist(K, dist, self.within_distances_, update_within=False) return dist.argmin(axis=1) ############################################################################### if __name__ == '__main__': import energy import data from metric import accuracy from sklearn.cluster import KMeans X, z = data.multivariate_normal( [[0,0], [2,0]], [np.eye(2), np.eye(2)], [100, 100] ) kernel = energy.energy_kernel km = KernelEnergy(n_clusters=2, max_iter=100, verbose=1, kernel_params={'alpha':.8}) zh = km.fit_predict(X) print accuracy(z, zh) km = KMeans(n_clusters=2) zh = km.fit_predict(X) print accuracy(z, zh)
G = eclust.kernel_matrix(data, rho) #G = eclust.kernel_matrix(data, rho_gauss) #G = eclust.kernel_matrix(data, rho_exp) k = 3 r = [] r.append(wrapper.kmeans(k, data, run_times=5)) r.append(wrapper.gmm(k, data, run_times=5)) r.append(wrapper.spectral_clustering(k, data, G, run_times=5)) r.append(wrapper.spectral(k, data, G, run_times=5)) r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random')) #r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++')) #r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral')) r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random')) #r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++')) #r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups'] for algo, zh in zip(algos, r): t.add_row([algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t
costs.append(cost) costs = np.array(costs) min_index = costs.argmin() min_cost = costs[min_index] return min_cost, min_index ############################################################################### if __name__ == '__main__': import data from metric import accuracy m1 = np.array([0,0]) s1 = np.array([[1,0],[0,1]]) n1 = 100 m2 = np.array([3,0]) s2 = np.array([[1,0],[0,10]]) n2 = 100 X, true_labels = data.multivariate_normal([m1,m2], [s1,s2], [n1,n2]) ec = EClust(n_clusters=2, max_iter=10, init='kmeans++') labels = ec.fit_predict(X) print accuracy(labels, true_labels) km = KMeans(2) labels2 = km.fit_predict(X) print accuracy(labels2, true_labels)