def test(model, x, y_true, idx, emb=None, layer=0): """ Inference module for DNN Args: - model: DNN model - x (num_nodes, num_node_features): Input - y_true (num_nodes): True node labels - idx: [train_idx, val_idx, test_idx] - emb (num_nodes, num_node_features): Embeddings of `layer`-th hidden layer - layer (int): Hidden layer whose representations is `emb` Returns: - acc: [train_acc, val_acc, test_acc] """ model.eval() _, pred, _ = model(x, emb, layer) y_pred = pred.max(1)[1] train_idx = idx["train_idx"] val_idx = idx["val_idx"] test_idx = idx["test_idx"] train_acc = accuracy(y_pred[train_idx], y_true[train_idx]) val_acc = accuracy(y_pred[val_idx], y_true[val_idx]) test_acc = accuracy(y_pred[test_idx], y_true[test_idx]) acc = {"train_acc": train_acc, "val_acc": val_acc, "test_acc": test_acc} return acc
def get_link_pred_perfs_by_attention(model, edge_y, layer_idx=-1, metric="roc_auc"): """ :param model: GNN model (nn.Module) :param edge_y: [E_pred] tensor :param layer_idx: layer idx of GNN models :param metric: metric for perfs :return: """ cache_list = [ m.cache for m in model.modules() if m.__class__.__name__ == SuperGAT.__name__ ] cache_of_layer_idx = cache_list[layer_idx] att = cache_of_layer_idx["att_with_negatives"] # [E + neg_E, heads] att = att.mean(dim=-1) # [E + neg_E] edge_probs, edge_y = np_sigmoid( att.cpu().numpy()), edge_y.cpu().numpy() perfs = None if metric == "roc_auc": perfs = roc_auc_score(edge_y, edge_probs) elif metric == "average_precision": perfs = average_precision_score(edge_y, edge_probs) elif metric == "accuracy": perfs = accuracy(edge_probs, edge_y) else: ValueError("Inappropriate metric: {}".format(metric)) return perfs
def evaluate(model): model.eval() loader = DataLoader(TEST, batch_size=BATCH_SIZE) pred = [] target = [] for data in loader: data = data.to(DEVICE) predicted = torch.argmax(model(data.x, data.edge_index, data.batch), dim=1) for p in predicted: pred.append(p.item()) for y in data.y: target.append(y.item()) pred = torch.tensor(pred) target = torch.tensor(target) print("Accuracy: {:.2f}%".format(100 * accuracy(pred, target))) print("True Positive: {}".format(true_positive(pred, target, 1).item())) print("True Negative: {}".format(true_negative(pred, target, 1).item())) print("False Positive: {}".format(false_positive(pred, target, 1).item())) print("False Negative: {}".format(false_negative(pred, target, 1).item())) print("Precision: {:.2f}%".format(100 * precision(pred, target, 1).item())) print("Recall: {:.2f}%".format(100 * recall(pred, target, 1).item())) print("F1 score: {:.2f}%".format(100 * f1_score(pred, target, 1).item()))
def validation_step(self, data): out = self(data) loss = F.nll_loss(out[data.val_mask], data.y[data.val_mask], ignore_index=-1) pred = out.argmax(dim=1) acc = accuracy(pred=pred[data.val_mask], target=data.y[data.val_mask]) * 100 return {'val_loss': loss.item(), 'val_acc': acc}
def training_step(self, data): out = self(data) loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask], ignore_index=-1) pred = out.argmax(dim=1) acc = accuracy(pred=pred[data.train_mask], target=data.y[data.train_mask]) * 100 return loss, {'train_acc': acc}
def test_metric(): pred = torch.tensor([0, 0, 1, 1]) target = torch.tensor([0, 1, 0, 1]) assert accuracy(pred, target) == 0.5 assert true_positive(pred, target, num_classes=2).tolist() == [1, 1] assert true_negative(pred, target, num_classes=2).tolist() == [1, 1] assert false_positive(pred, target, num_classes=2).tolist() == [1, 1] assert false_negative(pred, target, num_classes=2).tolist() == [1, 1] assert precision(pred, target, num_classes=2).tolist() == [0.5, 0.5] assert recall(pred, target, num_classes=2).tolist() == [0.5, 0.5] assert f1_score(pred, target, num_classes=2).tolist() == [0.5, 0.5]
def test(model, x, y, train_idx, val_idx, test_idx, edge_index=None): """ Test module for MLP Parameters: model : MLP MLP model x : torch.tensor of shape (num_examples, num_dims) Input tensor y : torch.tensor of shape (num_examples) Output tensor train_idx : torch.tensor of shape (num_examples) Boolean tensor which indicates which of the nodes are in the training set val_idx : torch.tensor of shape (num_examples) Boolean tensor which indicates which of the nodes are in the validation set test_idx : torch.tensor of shape (num_examples) Boolean tensor which indicates which of the nodes are in the test set edge_index : torch.tensor of shape (2, num_edges), optional Edge index Returns: train_acc : float Accuracy of the training set val_acc : float Accuracy of the validation set test_acc : float Accuracy of the test set """ model.eval() _, out, _ = model(x) if edge_index is None else model(x, edge_index) y_pred = out.argmax(dim=-1) train_acc = accuracy(y_pred[train_idx], y[train_idx]) val_acc = accuracy(y_pred[val_idx], y[val_idx]) test_acc = accuracy(y_pred[test_idx], y[test_idx]) return train_acc, val_acc, test_acc
def test_classifier(model, loader, device): model.eval() y = torch.tensor([]).long().to(device) yp = torch.tensor([]).long().to(device) loss_all = 0 for data in loader: data = data.to(device) pred, _ = model(data.x, data.edge_index, batch=data.batch) loss = F.nll_loss(F.log_softmax(pred, dim=-1), data.y) pred = pred.max(dim=1)[1] y = torch.cat([y, data.y]) yp = torch.cat([yp, pred]) loss_all += data.num_graphs * loss.item() return (accuracy(y, yp), precision(y, yp, model.num_output).mean().item(), recall(y, yp, model.num_output).mean().item(), f1_score(y, yp, model.num_output).mean().item(), loss_all)
def test_step(self, data): out = self(data) pred = out.argmax(dim=1) acc = accuracy(pred=pred[data.test_mask], target=data.y[data.test_mask]) * 100 return {'test_acc': acc}
with torch.no_grad(): val_loss, y_valid, _ = model.forward(A, node_features, valid_node, valid_target) val_f1 = torch.mean( f1_score(torch.argmax(y_valid, dim=1), valid_target, num_classes=3)).cpu().numpy() print('Valid - Loss: {}, Macro_F1: {}'.format( val_loss.detach().cpu().numpy(), val_f1)) test_loss, y_test, W = model.forward(A, node_features, test_node, test_target) test_f1 = torch.mean( f1_score(torch.argmax(y_test, dim=1), test_target, num_classes=3)).cpu().numpy() test_acc = accuracy(torch.argmax(y_test, dim=1), test_target) print('Test - Loss: {}, Macro_F1: {}, Acc: {}\n'.format( test_loss.detach().cpu().numpy(), test_f1, test_acc)) if val_f1 > best_val_f1: best_val_loss = val_loss.detach().cpu().numpy() best_test_loss = test_loss.detach().cpu().numpy() best_train_loss = loss.detach().cpu().numpy() best_train_f1 = train_f1 best_val_f1 = val_f1 best_test_f1 = test_f1 torch.cuda.empty_cache() print('---------------Best Results--------------------') print('Train - Loss: {}, Macro_F1: {}'.format(best_test_loss, best_train_f1)) print('Valid - Loss: {}, Macro_F1: {}'.format(best_val_loss, best_val_f1))
def _score_fn(y, pred): return accuracy(y, pred.argmax(-1))
def _score_fn(y, pred): return accuracy(y, pred.sigmoid().round())
def linear_model(data_path: Path, output_path: Path, mode: Mode, num_input: int, radius: int, num_output: int, epochs: int, lr: float = typer.Argument(1e-3), lambda_: float = typer.Argument(1e-4)): data = json.load(open(data_path, 'r')) info = [{} for _ in data] X = torch.stack([ morgan_count_fingerprint(d['id'], num_input, radius, bitInfo=info[i]).tensor() for i, d in enumerate(data) ]).float() Y = torch.stack([torch.tensor(d['prediction']['output']) for d in data]) print("X = ", X.numpy()) print("Y = ", Y.numpy() if mode != 'classification' else Y.argmax(dim=-1).numpy()) create_path(output_path) interpretable_model = Sequential(Linear(num_input, num_output)) optimizer = torch.optim.SGD(interpretable_model.parameters(), lr=lr) EPS = 1e-15 with tq(total=epochs) as pbar: for epoch in range(epochs): optimizer.zero_grad() out = interpretable_model(X).squeeze() W = torch.cat( [w.flatten() for w in interpretable_model[0].parameters()]) reg = lambda_ * torch.norm(W, 1) loss = F.mse_loss(out, Y) + reg description = f"Loss: {loss.item():.4f}" if mode == 'classification': y1 = F.softmax(out, dim=-1).max(dim=1)[1] y2 = F.softmax(Y, dim=-1).max(dim=1)[1] acc = accuracy(y1, y2) description = description + f", Accuracy: {acc:.2f})" if acc == 1: break else: acc = loss.item() loss.backward() optimizer.step() pbar.update(1) pbar.set_description(description) weight = interpretable_model[0].weight torch.set_printoptions(precision=2) np.set_printoptions(precision=2) w_abs = weight.abs().detach().numpy() for c in range(num_output): print(f"Feature importance for class {c}:") print(f"max: {w_abs[c].max()}") print(f"mean: {w_abs[c].mean()}") print(f"std: {w_abs[c].std()}") np.save(f"{output_path}/W.npy", weight.detach().numpy()) np.save(f"{output_path}/morgan_envs.npy", np.array(info)) for i, d in enumerate(data): mol = mol_from_smiles(d['id']) d2d = Draw.rdMolDraw2D.MolDraw2DSVG(300, 300) d2d.drawOptions().addAtomIndices = True d2d.DrawMolecule(mol) d2d.FinishDrawing() open(f"{output_path}/mol-with-indexes.svg", "w").write(d2d.GetDrawingText())