示例#1
0
def main(args):
    # create the dataset
    train_dataset, test_dataset = LegacyPPIDataset(
        mode="train"), LegacyPPIDataset(mode="test")
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[
        1], train_dataset.labels.shape[1]

    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))
    #    model = BasicGraphModel(g=train_dataset.graph, n_layers=2, input_size=n_features,
    #                            hidden_size=256, output_size=n_classes, nonlinearity=F.elu).to(device)

    model = GAT(g=train_dataset.graph,
                in_dim=n_features,
                hidden_dim=256,
                out_dim=n_classes,
                num_heads=4,
                n_layers=2).to(device)

    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

    # train and test
    if args.mode == "train":
        train(model, loss_fcn, device, optimizer, train_dataloader,
              test_dataset)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)
示例#2
0
def main(args):
    # create the dataset
    train_dataset, test_dataset = LegacyPPIDataset(
        mode="train"), LegacyPPIDataset(mode="test")
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[
        1], train_dataset.labels.shape[1]

    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))
    print(f"Using device: {device}")

    if args.model == 'gat':
        print(f"Using model GAT")
        print(f"Number of heads: {args.num_heads}")
        print(f"Hidden dim: {args.hidden_dim}")
        print(f"Save location {MODEL_STATE_FILE}")

        model = GAT(g=train_dataset.graph,
                    num_layers=args.num_layers,
                    in_dim=n_features,
                    num_hidden=args.hidden_dim,
                    num_classes=n_classes,
                    heads=[args.num_heads] * args.num_layers,
                    activation=None,
                    feat_drop=args.feat_drop,
                    attn_drop=args.att_drop,
                    negative_slope=0.2,
                    residual=True).to(device)
    else:
        print(f"Using base model (GCN)")
        model = BasicGraphModel(g=train_dataset.graph,
                                n_layers=2,
                                input_size=n_features,
                                hidden_size=256,
                                output_size=n_classes,
                                nonlinearity=F.elu).to(device)

    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())

    # train and test
    if args.mode == "train":
        train(model, loss_fcn, device, optimizer, train_dataloader,
              test_dataset)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)
示例#3
0
def main(args):
    # create the dataset
    train_dataset, valid_dataset, test_dataset = LegacyPPIDataset(
        mode="train"), LegacyPPIDataset(mode="valid"), LegacyPPIDataset(
            mode="test")
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  collate_fn=collate_fn)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch_size,
                                  collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[
        1], train_dataset.labels.shape[1]
    print("Number of features: ", n_features, " Number of classes: ",
          n_classes)

    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))
    model = GATModel(graph=train_dataset.graph,
                     n_heads=8,
                     n_layers=3,
                     input_size=n_features,
                     hidden_size=256,
                     output_size=n_classes,
                     nonlinearity=F.leaky_relu).to(device)
    #model = MyGAT(graph=train_dataset.graph, n_layers=3, input_size=n_features, output_size=n_classes).to(device)

    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=100,
                                                gamma=0.5)

    # train and test
    if args.mode == "train":
        train(model,
              loss_fcn,
              device,
              optimizer,
              scheduler,
              train_dataloader,
              valid_dataset,
              epochs=args.epochs)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)
示例#4
0
def main(args):
    # create the dataset
    train_dataset, test_dataset = LegacyPPIDataset(
        mode="train"), LegacyPPIDataset(mode="test")
    train_dataloader = DataLoader(
        train_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    test_dataloader = DataLoader(
        test_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[1], train_dataset.labels.shape[1]
    print("n_features", n_features)
    print("n_classes", n_classes)

    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))

    num_layers = 3
    model = GAT_opti(
        g=train_dataset.graph,
        num_layers=num_layers,
        in_dim=n_features,
        num_hidden=256,
        num_classes=n_classes,
        heads=[5, 5, 5],
        activation=F.elu,
        feat_drop=0,
        attn_drop=0,
        negative_slope=0.2,
        residual=True
    ).to(device)

    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())

    # train and test
    if args.mode == "train":
        train(model, loss_fcn, device, optimizer,
              train_dataloader, test_dataset)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)
def load_ppi(batch_size):
    """
    Loads the DGL-hosted PPI dataset

    :param batch_size: number of samples in each batch
    :return: namedtuple for the PPI graph dataset; attributes:
        [input_embed_len, n_labels, train_graph, train_loader, test_loader]
    """
    from dgl.data.ppi import LegacyPPIDataset
    from torch.utils.data import DataLoader
    from dgl import batch as dgl_batch

    def collate_fn(sample):
        """
        Helper function for the torch dataloader
        """
        graphs, feats, labels = map(list, zip(*sample))
        graph = dgl_batch(graphs)
        feats = torch.from_numpy(np.concatenate(feats))
        labels = torch.from_numpy(np.concatenate(labels))
        return graph, feats, labels

    #enddef

    # create and return the dataset
    train_data = LegacyPPIDataset(mode='train')
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              collate_fn=collate_fn)
    test_loader = DataLoader(LegacyPPIDataset(mode='test'),
                             batch_size=batch_size,
                             collate_fn=collate_fn)
    data_tuple = namedtuple("data_vars", [
        "input_embed_len", "n_labels", "train_graph", "train_loader",
        "test_loader"
    ])
    return data_tuple(train_data.features.shape[1], train_data.labels.shape[1],
                      train_data.graph, train_loader, test_loader)
示例#6
0
def main(args):
    if args.gpu < 0:
        device = torch.device("cpu")
    else:
        device = torch.device("cuda:" + str(args.gpu))

    batch_size = args.batch_size
    cur_step = 0
    patience = args.patience
    best_score = -1
    best_loss = 10000
    # define loss function
    loss_fcn = torch.nn.BCEWithLogitsLoss()
    # create the dataset
    train_dataset = LegacyPPIDataset(mode='train')
    valid_dataset = LegacyPPIDataset(mode='valid')
    test_dataset = LegacyPPIDataset(mode='test')
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  collate_fn=collate)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  collate_fn=collate)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 collate_fn=collate)
    n_classes = train_dataset.labels.shape[1]
    num_feats = train_dataset.features.shape[1]
    g = train_dataset.graph
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    # define the model
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop, args.alpha,
                args.residual)
    # define the optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    model = model.to(device)
    for epoch in range(args.epochs):
        model.train()
        loss_list = []
        for batch, data in enumerate(train_dataloader):
            subgraph, feats, labels = data
            feats = feats.to(device)
            labels = labels.to(device)
            model.g = subgraph
            for layer in model.gat_layers:
                layer.g = subgraph
            logits = model(feats.float())
            loss = loss_fcn(logits, labels.float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_list.append(loss.item())
        loss_data = np.array(loss_list).mean()
        print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data))
        if epoch % 5 == 0:
            score_list = []
            val_loss_list = []
            for batch, valid_data in enumerate(valid_dataloader):
                subgraph, feats, labels = valid_data
                feats = feats.to(device)
                labels = labels.to(device)
                score, val_loss = evaluate(feats.float(), model, subgraph,
                                           labels.float(), loss_fcn)
                score_list.append(score)
                val_loss_list.append(val_loss)
            mean_score = np.array(score_list).mean()
            mean_val_loss = np.array(val_loss_list).mean()
            print("F1-Score: {:.4f} ".format(mean_score))
            # early stop
            if mean_score > best_score or best_loss > mean_val_loss:
                if mean_score > best_score and best_loss > mean_val_loss:
                    val_early_loss = mean_val_loss
                    val_early_score = mean_score
                best_score = np.max((mean_score, best_score))
                best_loss = np.min((best_loss, mean_val_loss))
                cur_step = 0
            else:
                cur_step += 1
                if cur_step == patience:
                    break
    test_score_list = []
    for batch, test_data in enumerate(test_dataloader):
        subgraph, feats, labels = test_data
        feats = feats.to(device)
        labels = labels.to(device)
        test_score_list.append(
            evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0])
    print("F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
示例#7
0
def main(args):
    if args.gpu < 0:
        device = torch.device("cpu")
    else:
        device = torch.device("cuda:" + str(args.gpu))
    writer = SummaryWriter()
    batch_size = args.batch_size
    # cur_step = 0
    # patience = args.patience
    # best_score = -1
    # best_loss = 10000
    # define loss function
    loss_fcn = torch.nn.BCEWithLogitsLoss()
    # create the dataset
    train_dataset = LegacyPPIDataset(mode='train')
    valid_dataset = LegacyPPIDataset(mode='valid')
    test_dataset = LegacyPPIDataset(mode='test')
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  collate_fn=collate)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  collate_fn=collate)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=batch_size,
                                 collate_fn=collate)
    n_classes = train_dataset.labels.shape[1]
    num_feats = train_dataset.features.shape[1]
    g = train_dataset.graph
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]

    # define the model
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop, args.alpha,
                args.bias, args.residual, args.l0)
    print(model)
    # define the optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    model = model.to(device)
    best_epoch = 0
    dur = []
    acc = []
    for epoch in range(args.epochs):
        num = 0
        model.train()
        if epoch % 5 == 0:
            t0 = time.time()
        loss_list = []
        for batch, data in enumerate(train_dataloader):
            subgraph, feats, labels = data
            feats = feats.to(device)
            labels = labels.to(device)
            model.g = subgraph
            for layer in model.gat_layers:
                layer.g = subgraph
            logits = model(feats.float())
            loss = loss_fcn(logits, labels.float())
            loss_l0 = args.loss_l0 * (model.gat_layers[0].loss)
            optimizer.zero_grad()
            (loss + loss_l0).backward()
            optimizer.step()
            loss_list.append(loss.item())
            num += model.gat_layers[0].num

        if epoch % 5 == 0:
            dur.append(time.time() - t0)

        loss_data = np.array(loss_list).mean()
        print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data))
        writer.add_scalar('edge_num/0', num, epoch)

        if epoch % 5 == 0:
            score_list = []
            val_loss_list = []
            for batch, valid_data in enumerate(valid_dataloader):
                subgraph, feats, labels = valid_data
                feats = feats.to(device)
                labels = labels.to(device)
                score, val_loss = evaluate(feats.float(), model, subgraph,
                                           labels.float(), loss_fcn)
                score_list.append(score)
                val_loss_list.append(val_loss)

            mean_score = np.array(score_list).mean()
            mean_val_loss = np.array(val_loss_list).mean()
            print("val F1-Score: {:.4f} ".format(mean_score))
            writer.add_scalar('loss', mean_val_loss, epoch)
            writer.add_scalar('f1/test_f1_mic', mean_score, epoch)

            acc.append(mean_score)

            # # early stop
            # if mean_score > best_score or best_loss > mean_val_loss:
            #     if mean_score > best_score and best_loss > mean_val_loss:
            #         val_early_loss = mean_val_loss
            #         val_early_score = mean_score
            #         torch.save(model.state_dict(), '{}.pkl'.format('save_rand'))
            #         best_epoch = epoch
            #
            #     best_score = np.max((mean_score, best_score))
            #     best_loss = np.min((best_loss, mean_val_loss))
            #     cur_step = 0
            # else:
            #     cur_step += 1
            #     if cur_step == patience:
            #         break

    test_score_list = []
    for batch, test_data in enumerate(test_dataloader):
        subgraph, feats, labels = test_data
        feats = feats.to(device)
        labels = labels.to(device)
        test_score_list.append(
            evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0])
    acc = np.array(test_score_list).mean()
    print("test F1-Score: {:.4f}".format(acc))
    writer.close()
示例#8
0
def main(args):
    if args.gpu<0:
        device = torch.device("cpu")
    else:
        device = torch.device("cuda:" + str(args.gpu))

    # batch_size = args.batch_size
    # cur_step = 0
    # patience = args.patience
    # best_score = -1
    # best_loss = 10000
    # # define loss function
    # loss_fcn = torch.nn.BCEWithLogitsLoss()

    # create the dataset
    train_dataset = LegacyPPIDataset(mode='train')
    valid_dataset = LegacyPPIDataset(mode='valid')
    test_dataset = LegacyPPIDataset(mode='test')

    # nxg = valid_dataset.graph.to_networkx().to_undirected()
    # comps = [comp for comp in nx.connected_components(nxg) if len(comp)>10]
    # print(len(comps))
    # exit()

    cross_valid_list = []
    for i in range(5):
        cross_valid_list.append(list(range(4*i, 4*(i + 1))))
    cross_train_dataset = copy.copy(train_dataset)

    valid_precision = []
    valid_recall = []
    valid_scores = []
    test_precision = []
    test_recall = []
    test_scores = []
    for ind, valid_list in enumerate(cross_valid_list):
        batch_size = args.batch_size
        cur_step = 0
        patience = args.patience
        best_score = -1
        best_loss = 10000
        # define loss function
        loss_fcn = torch.nn.BCEWithLogitsLoss()

        train_list = [ind for ind in range(20) if ind not in valid_list]
        print('Train List: {}'.format(train_list))
        print('Valid List: {}'.format(valid_list))
        modify(train_dataset, cross_train_dataset, train_list, mode='train', offset=0)
        modify(valid_dataset, cross_train_dataset, valid_list, mode='valid', offset=16)

        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate)
        valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, collate_fn=collate)
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate)
        n_classes = train_dataset.labels.shape[1]
        num_feats = train_dataset.features.shape[1]
        g = train_dataset.graph
        heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
        # define the model
        model = GAT(g,
                    args.num_layers,
                    num_feats,
                    args.num_hidden,
                    n_classes,
                    heads,
                    F.elu,
                    args.in_drop,
                    args.attn_drop,
                    args.alpha,
                    args.residual)
        # define the optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        model = model.to(device)

        for epoch in range(args.epochs):
            model.train()
            loss_list = []
            for batch, data in enumerate(train_dataloader):
                subgraph, feats, labels = data
                feats = feats.to(device)
                labels = labels.to(device)
                model.g = subgraph
                for layer in model.gat_layers:
                    layer.g = subgraph
                logits = model(feats.float())
                loss = loss_fcn(logits, labels.float())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                loss_list.append(loss.item())
            loss_data = np.array(loss_list).mean()
            print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data), end=' ')
            if epoch % 1 == 0:
                score_list = []
                val_loss_list = []
                for batch, valid_data in enumerate(valid_dataloader):
                    subgraph, feats, labels = valid_data
                    feats = feats.to(device)
                    labels = labels.to(device)
                    prec, recall, score, val_loss = evaluate(feats.float(), model, subgraph, labels.float(), loss_fcn)
                    score_list.append([prec, recall, score])
                    val_loss_list.append(val_loss)
                mean_score = np.array(score_list).mean(axis=0)
                mean_val_loss = np.array(val_loss_list).mean()
                print("| Valid Precision: {:.4f} | Valid Recall: {:.4f} |  Valid F1-Score: {:.4f} ".format(mean_score[0], mean_score[1], mean_score[2]), end = ' ')

                test_score_list = []
                for batch, test_data in enumerate(test_dataloader):
                    subgraph, feats, labels = test_data
                    feats = feats.to(device)
                    labels = labels.to(device)
                    test_prec, test_rec, test_score, _ = evaluate(feats, model, subgraph, labels.float(), loss_fcn)
                    test_score_list.append([test_prec, test_rec, test_score])
                mean_test_score = np.array(test_score_list).mean(axis=0)
                print("| Test Precision: {:.4f} | Test Recall: {:.4f} | Test F1-Score: {:.4f}".format(mean_test_score[0], mean_test_score[1], mean_test_score[2]))

                if epoch == args.epochs - 1:
                    valid_precision.append(round(mean_score[0], 4))
                    valid_recall.append(round(mean_score[1], 4))
                    valid_scores.append(round(mean_score[2], 4))
                    test_precision.append(round(mean_test_score[0], 4))
                    test_recall.append(round(mean_test_score[1], 4))
                    test_scores.append(round(mean_test_score[2], 4))

                # early stop
                if mean_score[2] > best_score or best_loss > mean_val_loss:
                    if mean_score[2] > best_score and best_loss > mean_val_loss:
                        val_early_loss = mean_val_loss
                        val_early_score = mean_score[2]
                    best_score = np.max((mean_score[2], best_score))
                    best_loss = np.min((best_loss, mean_val_loss))
                    cur_step = 0
                else:
                    cur_step += 1
                    if cur_step == patience:
                        valid_precision.append(round(mean_score[0], 4))
                        valid_recall.append(round(mean_score[1], 4))
                        valid_scores.append(round(mean_score[2], 4))
                        test_precision.append(round(mean_test_score[0], 4))
                        test_recall.append(round(mean_test_score[1], 4))
                        test_scores.append(round(mean_test_score[2], 4))
                        break
        print('Valid Scores: {}'.format(valid_scores))
        print('Test Scores: {}'.format(test_scores))
    
    out_matrix = np.stack([valid_precision, valid_recall, valid_scores, test_precision, test_recall, test_scores], axis=1)
    np.savetxt('results.csv', out_matrix, delimiter=',')