def main(args): # create the dataset train_dataset, test_dataset = LegacyPPIDataset( mode="train"), LegacyPPIDataset(mode="test") train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=collate_fn) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, collate_fn=collate_fn) n_features, n_classes = train_dataset.features.shape[ 1], train_dataset.labels.shape[1] # create the model, loss function and optimizer device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)) # model = BasicGraphModel(g=train_dataset.graph, n_layers=2, input_size=n_features, # hidden_size=256, output_size=n_classes, nonlinearity=F.elu).to(device) model = GAT(g=train_dataset.graph, in_dim=n_features, hidden_dim=256, out_dim=n_classes, num_heads=4, n_layers=2).to(device) loss_fcn = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) # train and test if args.mode == "train": train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset) torch.save(model.state_dict(), MODEL_STATE_FILE) model.load_state_dict(torch.load(MODEL_STATE_FILE)) return test(model, loss_fcn, device, test_dataloader)
def main(args): # create the dataset train_dataset, test_dataset = LegacyPPIDataset( mode="train"), LegacyPPIDataset(mode="test") train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=collate_fn) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, collate_fn=collate_fn) n_features, n_classes = train_dataset.features.shape[ 1], train_dataset.labels.shape[1] # create the model, loss function and optimizer device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)) print(f"Using device: {device}") if args.model == 'gat': print(f"Using model GAT") print(f"Number of heads: {args.num_heads}") print(f"Hidden dim: {args.hidden_dim}") print(f"Save location {MODEL_STATE_FILE}") model = GAT(g=train_dataset.graph, num_layers=args.num_layers, in_dim=n_features, num_hidden=args.hidden_dim, num_classes=n_classes, heads=[args.num_heads] * args.num_layers, activation=None, feat_drop=args.feat_drop, attn_drop=args.att_drop, negative_slope=0.2, residual=True).to(device) else: print(f"Using base model (GCN)") model = BasicGraphModel(g=train_dataset.graph, n_layers=2, input_size=n_features, hidden_size=256, output_size=n_classes, nonlinearity=F.elu).to(device) loss_fcn = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) # train and test if args.mode == "train": train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset) torch.save(model.state_dict(), MODEL_STATE_FILE) model.load_state_dict(torch.load(MODEL_STATE_FILE)) return test(model, loss_fcn, device, test_dataloader)
def main(args): # create the dataset train_dataset, valid_dataset, test_dataset = LegacyPPIDataset( mode="train"), LegacyPPIDataset(mode="valid"), LegacyPPIDataset( mode="test") train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=collate_fn) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, collate_fn=collate_fn) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, collate_fn=collate_fn) n_features, n_classes = train_dataset.features.shape[ 1], train_dataset.labels.shape[1] print("Number of features: ", n_features, " Number of classes: ", n_classes) # create the model, loss function and optimizer device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)) model = GATModel(graph=train_dataset.graph, n_heads=8, n_layers=3, input_size=n_features, hidden_size=256, output_size=n_classes, nonlinearity=F.leaky_relu).to(device) #model = MyGAT(graph=train_dataset.graph, n_layers=3, input_size=n_features, output_size=n_classes).to(device) loss_fcn = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.005) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5) # train and test if args.mode == "train": train(model, loss_fcn, device, optimizer, scheduler, train_dataloader, valid_dataset, epochs=args.epochs) torch.save(model.state_dict(), MODEL_STATE_FILE) model.load_state_dict(torch.load(MODEL_STATE_FILE)) return test(model, loss_fcn, device, test_dataloader)
def main(args): # create the dataset train_dataset, test_dataset = LegacyPPIDataset( mode="train"), LegacyPPIDataset(mode="test") train_dataloader = DataLoader( train_dataset, batch_size=args.batch_size, collate_fn=collate_fn) test_dataloader = DataLoader( test_dataset, batch_size=args.batch_size, collate_fn=collate_fn) n_features, n_classes = train_dataset.features.shape[1], train_dataset.labels.shape[1] print("n_features", n_features) print("n_classes", n_classes) # create the model, loss function and optimizer device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)) num_layers = 3 model = GAT_opti( g=train_dataset.graph, num_layers=num_layers, in_dim=n_features, num_hidden=256, num_classes=n_classes, heads=[5, 5, 5], activation=F.elu, feat_drop=0, attn_drop=0, negative_slope=0.2, residual=True ).to(device) loss_fcn = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) # train and test if args.mode == "train": train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset) torch.save(model.state_dict(), MODEL_STATE_FILE) model.load_state_dict(torch.load(MODEL_STATE_FILE)) return test(model, loss_fcn, device, test_dataloader)
def load_ppi(batch_size): """ Loads the DGL-hosted PPI dataset :param batch_size: number of samples in each batch :return: namedtuple for the PPI graph dataset; attributes: [input_embed_len, n_labels, train_graph, train_loader, test_loader] """ from dgl.data.ppi import LegacyPPIDataset from torch.utils.data import DataLoader from dgl import batch as dgl_batch def collate_fn(sample): """ Helper function for the torch dataloader """ graphs, feats, labels = map(list, zip(*sample)) graph = dgl_batch(graphs) feats = torch.from_numpy(np.concatenate(feats)) labels = torch.from_numpy(np.concatenate(labels)) return graph, feats, labels #enddef # create and return the dataset train_data = LegacyPPIDataset(mode='train') train_loader = DataLoader(train_data, batch_size=batch_size, collate_fn=collate_fn) test_loader = DataLoader(LegacyPPIDataset(mode='test'), batch_size=batch_size, collate_fn=collate_fn) data_tuple = namedtuple("data_vars", [ "input_embed_len", "n_labels", "train_graph", "train_loader", "test_loader" ]) return data_tuple(train_data.features.shape[1], train_data.labels.shape[1], train_data.graph, train_loader, test_loader)
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) batch_size = args.batch_size cur_step = 0 patience = args.patience best_score = -1 best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = LegacyPPIDataset(mode='train') valid_dataset = LegacyPPIDataset(mode='valid') test_dataset = LegacyPPIDataset(mode='test') train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = train_dataset.labels.shape[1] num_feats = train_dataset.features.shape[1] g = train_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.residual) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) for epoch in range(args.epochs): model.train() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data feats = feats.to(device) labels = labels.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(feats.float()) loss = loss_fcn(logits, labels.float()) optimizer.zero_grad() loss.backward() optimizer.step() loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data)) if epoch % 5 == 0: score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data feats = feats.to(device) labels = labels.to(device) score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() print("F1-Score: {:.4f} ".format(mean_score)) # early stop if mean_score > best_score or best_loss > mean_val_loss: if mean_score > best_score and best_loss > mean_val_loss: val_early_loss = mean_val_loss val_early_score = mean_score best_score = np.max((mean_score, best_score)) best_loss = np.min((best_loss, mean_val_loss)) cur_step = 0 else: cur_step += 1 if cur_step == patience: break test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0]) print("F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
def main(args): if args.gpu < 0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) writer = SummaryWriter() batch_size = args.batch_size # cur_step = 0 # patience = args.patience # best_score = -1 # best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = LegacyPPIDataset(mode='train') valid_dataset = LegacyPPIDataset(mode='valid') test_dataset = LegacyPPIDataset(mode='test') train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = train_dataset.labels.shape[1] num_feats = train_dataset.features.shape[1] g = train_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.bias, args.residual, args.l0) print(model) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) best_epoch = 0 dur = [] acc = [] for epoch in range(args.epochs): num = 0 model.train() if epoch % 5 == 0: t0 = time.time() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data feats = feats.to(device) labels = labels.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(feats.float()) loss = loss_fcn(logits, labels.float()) loss_l0 = args.loss_l0 * (model.gat_layers[0].loss) optimizer.zero_grad() (loss + loss_l0).backward() optimizer.step() loss_list.append(loss.item()) num += model.gat_layers[0].num if epoch % 5 == 0: dur.append(time.time() - t0) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data)) writer.add_scalar('edge_num/0', num, epoch) if epoch % 5 == 0: score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data feats = feats.to(device) labels = labels.to(device) score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) score_list.append(score) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean() mean_val_loss = np.array(val_loss_list).mean() print("val F1-Score: {:.4f} ".format(mean_score)) writer.add_scalar('loss', mean_val_loss, epoch) writer.add_scalar('f1/test_f1_mic', mean_score, epoch) acc.append(mean_score) # # early stop # if mean_score > best_score or best_loss > mean_val_loss: # if mean_score > best_score and best_loss > mean_val_loss: # val_early_loss = mean_val_loss # val_early_score = mean_score # torch.save(model.state_dict(), '{}.pkl'.format('save_rand')) # best_epoch = epoch # # best_score = np.max((mean_score, best_score)) # best_loss = np.min((best_loss, mean_val_loss)) # cur_step = 0 # else: # cur_step += 1 # if cur_step == patience: # break test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_score_list.append( evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0]) acc = np.array(test_score_list).mean() print("test F1-Score: {:.4f}".format(acc)) writer.close()
def main(args): if args.gpu<0: device = torch.device("cpu") else: device = torch.device("cuda:" + str(args.gpu)) # batch_size = args.batch_size # cur_step = 0 # patience = args.patience # best_score = -1 # best_loss = 10000 # # define loss function # loss_fcn = torch.nn.BCEWithLogitsLoss() # create the dataset train_dataset = LegacyPPIDataset(mode='train') valid_dataset = LegacyPPIDataset(mode='valid') test_dataset = LegacyPPIDataset(mode='test') # nxg = valid_dataset.graph.to_networkx().to_undirected() # comps = [comp for comp in nx.connected_components(nxg) if len(comp)>10] # print(len(comps)) # exit() cross_valid_list = [] for i in range(5): cross_valid_list.append(list(range(4*i, 4*(i + 1)))) cross_train_dataset = copy.copy(train_dataset) valid_precision = [] valid_recall = [] valid_scores = [] test_precision = [] test_recall = [] test_scores = [] for ind, valid_list in enumerate(cross_valid_list): batch_size = args.batch_size cur_step = 0 patience = args.patience best_score = -1 best_loss = 10000 # define loss function loss_fcn = torch.nn.BCEWithLogitsLoss() train_list = [ind for ind in range(20) if ind not in valid_list] print('Train List: {}'.format(train_list)) print('Valid List: {}'.format(valid_list)) modify(train_dataset, cross_train_dataset, train_list, mode='train', offset=0) modify(valid_dataset, cross_train_dataset, valid_list, mode='valid', offset=16) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate) n_classes = train_dataset.labels.shape[1] num_feats = train_dataset.features.shape[1] g = train_dataset.graph heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] # define the model model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.alpha, args.residual) # define the optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) model = model.to(device) for epoch in range(args.epochs): model.train() loss_list = [] for batch, data in enumerate(train_dataloader): subgraph, feats, labels = data feats = feats.to(device) labels = labels.to(device) model.g = subgraph for layer in model.gat_layers: layer.g = subgraph logits = model(feats.float()) loss = loss_fcn(logits, labels.float()) optimizer.zero_grad() loss.backward() optimizer.step() loss_list.append(loss.item()) loss_data = np.array(loss_list).mean() print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data), end=' ') if epoch % 1 == 0: score_list = [] val_loss_list = [] for batch, valid_data in enumerate(valid_dataloader): subgraph, feats, labels = valid_data feats = feats.to(device) labels = labels.to(device) prec, recall, score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn) score_list.append([prec, recall, score]) val_loss_list.append(val_loss) mean_score = np.array(score_list).mean(axis=0) mean_val_loss = np.array(val_loss_list).mean() print("| Valid Precision: {:.4f} | Valid Recall: {:.4f} | Valid F1-Score: {:.4f} ".format(mean_score[0], mean_score[1], mean_score[2]), end = ' ') test_score_list = [] for batch, test_data in enumerate(test_dataloader): subgraph, feats, labels = test_data feats = feats.to(device) labels = labels.to(device) test_prec, test_rec, test_score, _ = evaluate(feats, model, subgraph, labels.float(), loss_fcn) test_score_list.append([test_prec, test_rec, test_score]) mean_test_score = np.array(test_score_list).mean(axis=0) print("| Test Precision: {:.4f} | Test Recall: {:.4f} | Test F1-Score: {:.4f}".format(mean_test_score[0], mean_test_score[1], mean_test_score[2])) if epoch == args.epochs - 1: valid_precision.append(round(mean_score[0], 4)) valid_recall.append(round(mean_score[1], 4)) valid_scores.append(round(mean_score[2], 4)) test_precision.append(round(mean_test_score[0], 4)) test_recall.append(round(mean_test_score[1], 4)) test_scores.append(round(mean_test_score[2], 4)) # early stop if mean_score[2] > best_score or best_loss > mean_val_loss: if mean_score[2] > best_score and best_loss > mean_val_loss: val_early_loss = mean_val_loss val_early_score = mean_score[2] best_score = np.max((mean_score[2], best_score)) best_loss = np.min((best_loss, mean_val_loss)) cur_step = 0 else: cur_step += 1 if cur_step == patience: valid_precision.append(round(mean_score[0], 4)) valid_recall.append(round(mean_score[1], 4)) valid_scores.append(round(mean_score[2], 4)) test_precision.append(round(mean_test_score[0], 4)) test_recall.append(round(mean_test_score[1], 4)) test_scores.append(round(mean_test_score[2], 4)) break print('Valid Scores: {}'.format(valid_scores)) print('Test Scores: {}'.format(test_scores)) out_matrix = np.stack([valid_precision, valid_recall, valid_scores, test_precision, test_recall, test_scores], axis=1) np.savetxt('results.csv', out_matrix, delimiter=',')