Exemplo n.º 1
0
def split_dataset_general(dataset, args):
    droplast = args.model == "diffpool"

    train_size = int(len(dataset) * args.train_ratio)
    test_size = int(len(dataset) * args.test_ratio)
    index = list(range(len(dataset)))
    random.shuffle(index)

    train_index = index[:train_size]
    test_index = index[-test_size:]

    bs = args.batch_size
    train_loader = DataLoader([dataset[i] for i in train_index],
                              batch_size=bs,
                              drop_last=droplast)
    test_loader = DataLoader([dataset[i] for i in test_index],
                             batch_size=bs,
                             drop_last=droplast)
    if args.train_ratio + args.test_ratio < 1:
        val_index = index[train_size:-test_size]
        valid_loader = DataLoader([dataset[i] for i in val_index],
                                  batch_size=bs,
                                  drop_last=droplast)
    else:
        valid_loader = test_loader
    return train_loader, valid_loader, test_loader
Exemplo n.º 2
0
    def _kfold_train(self):
        y = [x.y for x in self.data]
        kf = StratifiedKFold(n_splits=self.folds,
                             shuffle=True,
                             random_state=self.args.seed)
        acc = []
        for train_index, test_index in kf.split(self.data, y=y):
            model = build_model(self.args)
            self.model = model.to(self.device)

            droplast = self.args.model == 'diffpool'
            self.train_loader = DataLoader([self.data[i] for i in train_index],
                                           batch_size=self.args.batch_size,
                                           drop_last=droplast)
            self.test_loader = DataLoader([self.data[i] for i in test_index],
                                          batch_size=self.args.batch_size,
                                          drop_last=droplast)
            self.val_loader = DataLoader([self.data[i] for i in test_index],
                                         batch_size=self.args.batch_size,
                                         drop_last=droplast)
            self.optimizer = torch.optim.Adam(
                self.model.parameters(),
                lr=self.args.lr,
                weight_decay=self.args.weight_decay)
            self.scheduler = torch.optim.lr_scheduler.StepLR(
                optimizer=self.optimizer, step_size=50, gamma=0.5)

            res = self._train()
            acc.append(res["Acc"])
        return dict(Acc=sum(acc) / len(acc))
Exemplo n.º 3
0
 def split_dataset(cls, dataset, args):
     if "ModelNet" in args.dataset:
         train_data = [Data(x=d.pos, y=d.y) for d in dataset["train"]]
         test_data = [Data(x=d.pos, y=d.y) for d in dataset["test"]]
         train_loader = DataLoader(train_data,
                                   batch_size=args.batch_size,
                                   num_workers=6)
         test_loader = DataLoader(test_data,
                                  batch_size=args.batch_size,
                                  num_workers=6,
                                  shuffle=False)
         return train_loader, test_loader, test_loader
     else:
         random.shuffle(dataset)
         train_size = int(len(dataset) * args.train_ratio)
         test_size = int(len(dataset) * args.test_ratio)
         bs = args.batch_size
         train_loader = DataLoader(dataset[:train_size], batch_size=bs)
         test_loader = DataLoader(dataset[-test_size:], batch_size=bs)
         if args.train_ratio + args.test_ratio < 1:
             valid_loader = DataLoader(dataset[train_size:-test_size],
                                       batch_size=bs)
         else:
             valid_loader = test_loader
         return train_loader, valid_loader, test_loader
Exemplo n.º 4
0
Arquivo: gin.py Projeto: xs-li/cogdl
    def split_dataset(cls, dataset, args):
        test_index = random.sample(range(len(dataset)), len(dataset) // 10)
        train_index = [x for x in range(len(dataset)) if x not in test_index]

        train_dataset = [dataset[i] for i in train_index]
        test_dataset = [dataset[i] for i in test_index]
        train_loader = DataLoader(train_dataset, batch_size=args.batch_size)
        test_loader = DataLoader(test_dataset, batch_size=args.batch_size)
        return train_loader, test_loader, test_loader
Exemplo n.º 5
0
    def distributed_dataloader(self, dataloader: DataLoader, dataset, rank):
        # TODO: just a toy implementation
        assert isinstance(dataloader, DataLoader)

        args, kwargs = dataloader.get_parameters()
        sampler = torch.utils.data.distributed.DistributedSampler(
            dataset, num_replicas=self.world_size, rank=rank)
        kwargs["sampler"] = sampler
        dataloader = dataloader.__class__(*args, **kwargs)
        return dataloader
Exemplo n.º 6
0
 def split_dataset(cls, dataset, args):
     random.shuffle(dataset)
     train_size = int(len(dataset) * args.train_ratio)
     test_size = int(len(dataset) * args.test_ratio)
     bs = args.batch_size
     train_loader = DataLoader(dataset[:train_size], batch_size=bs)
     test_loader = DataLoader(dataset[-test_size:], batch_size=bs)
     if args.train_ratio + args.test_ratio < 1:
         valid_loader = DataLoader(dataset[train_size:-test_size], batch_size=bs)
     else:
         valid_loader = test_loader
     return train_loader, valid_loader, test_loader
Exemplo n.º 7
0
Arquivo: ogb.py Projeto: sofyc/cogdl
 def get_loader(self, args):
     split_index = self.dataset.get_idx_split()
     train_loader = DataLoader(self.get_subset(split_index["train"]),
                               batch_size=args.batch_size,
                               shuffle=True)
     valid_loader = DataLoader(self.get_subset(split_index["valid"]),
                               batch_size=args.batch_size,
                               shuffle=False)
     test_loader = DataLoader(self.get_subset(split_index["test"]),
                              batch_size=args.batch_size,
                              shuffle=False)
     return train_loader, valid_loader, test_loader
Exemplo n.º 8
0
    def __init__(self, args, dataset=None, model=None):
        super(GraphClassification, self).__init__(args)
        dataset = build_dataset(args) if dataset is None else dataset

        args.max_graph_size = max([ds.num_nodes for ds in dataset])
        args.num_features = dataset.num_features
        args.num_classes = dataset.num_classes
        args.use_unsup = False

        self.args = args
        self.kfold = args.kfold
        self.folds = 10

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]

        if args.dataset.startswith("ogbg"):
            self.data = dataset.data
            self.train_loader, self.val_loader, self.test_loader = dataset.get_loader(
                args)
            model = build_model(args) if model is None else model
        else:
            self.data = dataset
            if self.data[0].x is None:
                self.data = node_degree_as_feature(dataset)
                args.num_features = self.data.num_features
            model = build_model(args) if model is None else model
            (
                self.train_dataset,
                self.val_dataset,
                self.test_dataset,
            ) = model.split_dataset(self.data, args)
            self.train_loader = DataLoader(**self.train_dataset)
            self.val_loader = DataLoader(**self.val_dataset)
            self.test_loader = DataLoader(**self.test_dataset)

        self.model = model.to(self.device)

        self.set_loss_fn(dataset)
        self.set_evaluator(dataset)

        self.patience = args.patience
        self.max_epoch = args.max_epoch

        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=args.lr,
                                          weight_decay=args.weight_decay)
        self.scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer=self.optimizer, step_size=50, gamma=0.5)
Exemplo n.º 9
0
    def split_dataset(cls, dataset, args):
        if args.dataset == "QM9":
            test_dataset = dataset[:10000]
            val_dataset = dataset[10000:20000]
            train_dataset = dataset[20000:20000 + args.train_num]
            return DataLoader(train_dataset, batch_size=args.batch_size), DataLoader(val_dataset, batch_size=args.batch_size),\
                   DataLoader(test_dataset, batch_size=args.batch_size)
        else:
            test_index = random.sample(range(len(dataset)), len(dataset) // 10)
            train_index = [x for x in range(len(dataset)) if x not in test_index]

            train_dataset = [dataset[i] for i in train_index]
            test_dataset = [dataset[i] for i in test_index]
            train_loader = DataLoader(train_dataset, batch_size=args.batch_size)
            test_loader = DataLoader(test_dataset, batch_size=args.batch_size)
            return train_loader, test_loader, test_loader
Exemplo n.º 10
0
    def __init__(self, args):
        args.data_type = "unsupervised"
        super(InfoMaxTrainer, self).__init__(args)
        self.hidden_size = args.hidden_size
        self.dataloader = DataLoader(self.dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.num_workers)
        self.model = GNN(
            num_layers=args.num_layers,
            hidden_size=args.hidden_size,
            JK=args.JK,
            dropout=args.dropout,
            input_layer=self.opt.get("input_layer", None),
            edge_encode=self.opt.get("edge_encode", None),
            edge_emb=self.opt.get("edge_emb", None),
            num_atom_type=self.opt.get("num_atom_type", None),
            num_chirality_tag=self.opt.get("num_chirality_tag", None),
            concat=self.opt["concat"],
        )

        self.discriminator = Discriminator(args.hidden_size)
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=args.lr,
                                          weight_decay=args.weight_decay)
Exemplo n.º 11
0
 def split_data(self):
     length = len(self.dataset)
     indices = np.arange(length)
     np.random.shuffle(indices)
     self.train_ratio = 0.6
     train_index = torch.LongTensor(indices[: int(length * self.train_ratio)])
     dataset = self.dataset[train_index]
     dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
     return dataloader
Exemplo n.º 12
0
    def split_dataset(self, dataset, args):
        random.shuffle(dataset)
        # process each graph and add it into Data() as attribute tx
        for i, data in enumerate(dataset):
            new_feature = get_single_feature(dataset[i], args.num_features,
                                             args.num_classes, args.sample,
                                             args.neighbor, args.stride)
            dataset[i].tx = torch.from_numpy(new_feature)

        train_size = int(len(dataset) * args.train_ratio)
        test_size = int(len(dataset) * args.test_ratio)
        bs = args.batch_size
        train_loader = DataLoader(dataset[:train_size], batch_size=bs)
        test_loader = DataLoader(dataset[-test_size:], batch_size=bs)
        if args.train_ratio + args.test_ratio < 1:
            valid_loader = DataLoader(dataset[train_size:-test_size],
                                      batch_size=bs)
        else:
            valid_loader = test_loader
        return train_loader, valid_loader, test_loader
Exemplo n.º 13
0
    def test_step(self, dataset):
        device = self.device
        dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
        preds = []
        with torch.no_grad():
            for batch in dataloader:
                preds.append(self.model(batch.to(device)))
        preds = torch.cat(preds).cpu().numpy()
        labels = np.array([g.y.item() for g in dataset])
        result = evaluate_graph_embeddings_using_svm(preds, labels)

        self.note("test_metric", result["acc"])
        self.note("std", result["std"])
Exemplo n.º 14
0
    def __init__(self, args, dataset=None, model=None):
        super(UnsupervisedGraphClassification, self).__init__(args)

        self.device = "cpu" if not torch.cuda.is_available(
        ) or args.cpu else args.device_id[0]

        dataset = build_dataset(args) if dataset is None else dataset
        if "gcc" in args.model:
            self.label = dataset.graph_labels[:, 0]
            self.data = dataset.graph_lists
        else:
            self.label = np.array([data.y for data in dataset])
            self.data = [
                Data(x=data.x,
                     y=data.y,
                     edge_index=data.edge_index,
                     edge_attr=data.edge_attr,
                     pos=data.pos).apply(lambda x: x.to(self.device))
                for data in dataset
            ]
        args.num_features = dataset.num_features
        args.num_classes = args.hidden_size
        args.use_unsup = True

        if args.degree_feature:
            self.data = node_degree_as_feature(self.data)
            args.num_features = self.data[0].num_features

        self.num_graphs = len(self.data)
        self.num_classes = dataset.num_classes
        # self.label_matrix = np.zeros((self.num_graphs, self.num_classes))
        # self.label_matrix[range(self.num_graphs), np.array([data.y for data in self.data], dtype=int)] = 1

        self.model = build_model(args) if model is None else model
        self.model = self.model.to(self.device)
        self.model_name = args.model
        self.hidden_size = args.hidden_size
        self.num_shuffle = args.num_shuffle
        self.save_dir = args.save_dir
        self.epoch = args.epoch
        self.use_nn = args.model in ("infograph", )

        if self.use_nn:
            self.optimizer = torch.optim.Adam(self.model.parameters(),
                                              lr=args.lr,
                                              weight_decay=args.weight_decay)
            self.data_loader = DataLoader(self.data,
                                          batch_size=args.batch_size,
                                          shuffle=True)
Exemplo n.º 15
0
    def __init__(self, args):
        super(UnsupervisedGraphClassification, self).__init__(args)
        dataset = build_dataset(args)
        self.label = np.array([data.y for data in dataset])
        self.data = [
            Data(x=data.x,
                 y=data.y,
                 edge_index=data.edge_index,
                 edge_attr=data.edge_attr,
                 pos=data.pos).apply(lambda x: x.cuda()) for data in dataset
        ]
        args.num_features = dataset.num_features
        args.num_classes = args.hidden_size
        args.use_unsup = True

        if args.degree_feature:
            self.data = node_degree_as_feature(self.data)
            args.num_features = self.data[0].num_features

        self.num_graphs = len(self.data)
        self.num_classes = dataset.num_classes
        # self.label_matrix = np.zeros((self.num_graphs, self.num_classes))
        # self.label_matrix[range(self.num_graphs), np.array([data.y for data in self.data], dtype=int)] = 1

        self.model = build_model(args)
        self.model = self.model.cuda()
        self.model_name = args.model
        self.hidden_size = args.hidden_size
        self.num_shuffle = args.num_shuffle
        self.save_dir = args.save_dir
        self.epochs = args.epochs
        self.use_nn = args.nn

        if args.nn:
            self.optimizer = torch.optim.Adam(self.model.parameters(),
                                              lr=args.lr,
                                              weight_decay=args.weight_decay)
            self.data_loader = DataLoader(self.data,
                                          batch_size=args.batch_size,
                                          shuffle=True)
Exemplo n.º 16
0
 def split_dataset(cls, dataset, args):
     if args.dataset == "qm9":
         test_dataset = dataset[:10000]
         val_dataset = dataset[10000:20000]
         train_dataset = dataset[20000:20000 + args.train_num]
         return DataLoader(train_dataset, batch_size=args.batch_size), DataLoader(val_dataset, batch_size=args.batch_size),\
                DataLoader(test_dataset, batch_size=args.batch_size)
     else:
         random.shuffle(dataset)
         train_size = int(len(dataset) * args.train_ratio)
         test_size = int(len(dataset) * args.test_ratio)
         bs = args.batch_size
         train_loader = DataLoader(dataset[:train_size], batch_size=bs)
         test_loader = DataLoader(dataset[-test_size:], batch_size=bs)
         if args.train_ratio + args.test_ratio < 1:
             valid_loader = DataLoader(dataset[train_size:-test_size], batch_size=bs)
         else:
             valid_loader = test_loader
         return train_loader, valid_loader, test_loader
Exemplo n.º 17
0
 def test_wrapper(self):
     return DataLoader(self.dataset[self.split_idx[2]], batch_size=self.batch_size, shuffle=False, num_workers=4)
Exemplo n.º 18
0
 def val_wrapper(self):
     if self.split_idx[1] is not None:
         return DataLoader(self.dataset[self.split_idx[1]], batch_size=self.batch_size, shuffle=False, num_workers=4)
Exemplo n.º 19
0
 def train_wrapper(self):
     return DataLoader(self.dataset[self.split_idx[0]], batch_size=self.batch_size, shuffle=True, num_workers=4)