def run(dataset_name): args = default_parameter().__dict__ results = experiment(task="heterogeneous_node_classification", dataset=dataset_name, model="han", **args) return results
def train(): args = build_default_args() combinations = get_parameters() best_parameters = None best_result = None best_val_acc = 0 print(f"===== Start At: {get_time()} ===========") start = time.time() for item in combinations: for key, val in item.items(): args[key] = val print(f"### -- Parameters: {args}") res = experiment(dataset="cora", model="gcn", **args) result_list = list(res.values())[0] val_acc = [x["val_acc"] for x in result_list] test_acc = [x["test_acc"] for x in result_list] val_acc = sum(val_acc) / len(val_acc) print(f"### Result: {val_acc}") if val_acc > best_val_acc: best_val_acc = val_acc best_parameters = copy.deepcopy(args) best_result = dict(Acc=sum(test_acc) / len(test_acc), ValAcc=val_acc) print(f"Best Parameters: {best_parameters}") print(f"Best result: {best_result}") end = time.time() print(f"===== End At: {get_time()} ===========") print("Time cost:", end - start)
def run(dataset_name): args = default_parameter() args = DATASET_REGISTRY[dataset_name](args).__dict__ results = experiment(task="multiplex_node_classification", dataset=dataset_name, model="pte", **args) return results
def run(dataset_name): args = default_parameter() args = DATASET_REGISTRY[dataset_name](args).__dict__ results = experiment(task="unsupervised_node_classification", dataset=dataset_name, model="grarep", **args) return results
def run(dataset_name): args = default_parameter() args = DATASET_REGISTRY[dataset_name](args).__dict__ results = experiment(task="multiplex_link_prediction", dataset=dataset_name, model="gatne", **args) return results
def run(dataset_name): unsup = False if unsup: task = "unsupervised_node_classification" model = "unsup_graphsage" args = default_parameter_unsup() else: task = "node_classification" model = "graphsage" args = default_parameters_sup() args = DATASET_REGISTRY[dataset_name](args).__dict__ results = experiment(task=task, dataset=dataset_name, model=model, **args) return results
self.num_layers = num_layers self.dropout = dropout self.num_nodes = 0 self.unet = GraphUNet(self.in_feats, self.hidden_size, self.out_feats, depth=3, pool_ratios=[2000 / num_nodes, 0.5], act=F.elu) def forward(self, x, edge_index): edge_index, _ = dropout_adj(edge_index, p=0.2, force_undirected=True, num_nodes=x.shape[0], training=self.training) x = F.dropout(x, p=self.dropout, training=self.training) x = self.unet(x, edge_index) return x def predict(self, data): return self.forward(data.x, data.edge_index) if __name__ == "__main__": ret = experiment(task="node_classification", dataset="cora", model="pyg_unet")
self.num_layers = num_layers self.dropout = dropout shapes = [num_features ] + [hidden_size] * (num_layers - 1) + [num_classes] self.convs = nn.ModuleList([ GCNConv(shapes[layer], shapes[layer + 1], cached=False) for layer in range(num_layers) ]) def forward(self, graph): x = graph.x edge_index, edge_weight = torch.stack( graph.edge_index), graph.edge_weight for conv in self.convs[:-1]: x = F.relu(conv(x, edge_index, edge_weight)) x = F.dropout(x, p=self.dropout, training=self.training) x = self.convs[-1](x, edge_index, edge_weight) return F.log_softmax(x, dim=1) if __name__ == "__main__": cora = CoraDataset() model = GCN( num_features=cora.num_features, hidden_size=64, num_classes=cora.num_classes, num_layers=2, dropout=0.5, ) ret = experiment(dataset=cora, model=model)
val_mask[int(0.3 * num_nodes):int(0.7 * num_nodes)] = True test_mask = torch.zeros(num_nodes).bool() test_mask[int(0.7 * num_nodes):] = True data = Graph(x=x, edge_index=edge_index, y=y, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask) return data if __name__ == "__main__": # Train customized dataset via defining a new class dataset = MyNodeDataset() experiment(dataset=dataset, model="gcn") # Train customized dataset via feeding the graph data to NodeDataset data = generate_random_graph(num_nodes=100, num_edges=300, num_feats=30) dataset = NodeDataset(data=data) experiment(dataset=dataset, model="gcn") # %% # Dataset for graph_classification # --------------------------------- from cogdl.data import Graph from cogdl.datasets import GraphDataset class MyGraphDataset(GraphDataset): def __init__(self, path="data.pt"):
num_edges = 300 feat_dim = 30 # load or generate your dataset edge_index = torch.randint(0, num_nodes, (2, num_edges)) x = torch.randn(num_nodes, feat_dim) y = torch.randint(0, 2, (num_nodes, )) # set train/val/test mask in node_classification task train_mask = torch.zeros(num_nodes).bool() train_mask[0:int(0.3 * num_nodes)] = True val_mask = torch.zeros(num_nodes).bool() val_mask[int(0.3 * num_nodes):int(0.7 * num_nodes)] = True test_mask = torch.zeros(num_nodes).bool() test_mask[int(0.7 * num_nodes):] = True data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask) torch.save(data, "mydata.pt") return data if __name__ == "__main__": # Run with self-loaded dataset experiment(task="node_classification", dataset="mydataset", model="gcn") # Run with given datapaath experiment(task="node_classification", dataset="./mydata.pt", model="gcn")
from cogdl import options, experiment if __name__ == "__main__": parser = options.get_training_parser() args, _ = parser.parse_known_args() args = options.parse_args_and_arch(parser, args) assert len(args.device_id) == 1 experiment(task=args.task, dataset=args.dataset, model=args.model, args=args)
@classmethod def build_model_from_args(cls, args): return cls(args.num_features, args.hidden_size, args.num_classes, args.dropout) def __init__(self, in_feats, hidden_size, out_feats, dropout): super(GCN, self).__init__() self.gc1 = GraphConvolution(in_feats, hidden_size) self.gc2 = GraphConvolution(hidden_size, out_feats) self.dropout = dropout def forward(self, x, edge_index): edge_index, edge_attr = add_remaining_self_loops(edge_index, num_nodes=x.shape[0]) edge_attr = symmetric_normalization(x.shape[0], edge_index, edge_attr) x = F.dropout(x, self.dropout, training=self.training) x = F.relu(self.gc1(x, edge_index, edge_attr)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, edge_index, edge_attr) return x def predict(self, data): return self.forward(data.x, data.edge_index) if __name__ == "__main__": experiment(task="node_classification", dataset="cora", model="mygcn")
from cogdl import experiment # basic usage experiment(dataset="cora", model="gcn") # set other hyper-parameters experiment(dataset="cora", model="gcn", hidden_size=32, epochs=200) # run over multiple models on different seeds experiment(dataset="cora", model=["gcn", "gat"], seed=[0, 1]) # run on different splits experiment(dataset="chameleon", model="gcn", seed=[0, 1], split=[0, 1]) def search_space(trial): return { "lr": trial.suggest_categorical("lr", [1e-3, 5e-3, 1e-2]), "hidden_size": trial.suggest_categorical("hidden_size", [32, 64, 128]), "dropout": trial.suggest_uniform("dropout", 0.5, 0.8), } experiment(dataset="cora", model="gcn", seed=[1, 2], search_space=search_space, n_trials=3)
from cogdl import options, experiment if __name__ == "__main__": parser = options.get_training_parser() args, _ = parser.parse_known_args() args = options.parse_args_and_arch(parser, args) experiment(dataset=args.dataset, model=args.model, args=args)
logits, _ = self.gat_layers[-1](self.g, h, self.e_feat, res_attn=None) logits = logits.mean(1) # This is an equivalent replacement for tf.l2_normalize, see https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/math/l2_normalize for more information. logits = logits / (torch.max(torch.norm(logits, dim=1, keepdim=True), self.epsilon)) y = logits[target_x] loss = self.cross_entropy_loss(y, target) return loss, y, None def loss(self, data): loss, y, _ = self.forward(data.adj, data.x, data.train_node, data.train_target) return loss def evaluate(self, data, nodes, targets): loss, y, _ = self.forward(data.adj, data.x, nodes, targets) f1 = accuracy(y, targets) return loss.item(), f1 if __name__ == "__main__": # CUDA_VISIBLE_DEVICES=0 python custom_gcn.py --seed 0 1 2 3 4 -t heterogeneous_node_classification -dt gtn-acm -m simple_hgn --lr 0.001 parser = options.get_training_parser() args, _ = parser.parse_known_args() args = options.parse_args_and_arch(parser, args) experiment(task="heterogeneous_node_classification", dataset="gtn-acm", model="simple_hgn", args=args) # experiment(task="node_classification", dataset="cora", model="mygcn")
from cogdl import experiment from cogdl.models import BaseModel from cogdl.datasets.planetoid_data import CoraDataset class GAT(BaseModel): def __init__(self, in_feats, hidden_size, out_feats, num_heads, dropout): super(GAT, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.hidden_size = hidden_size self.num_heads = num_heads self.dropout = dropout self.conv1 = GATConv(in_feats, hidden_size, heads=num_heads, dropout=dropout) self.conv2 = GATConv(hidden_size * num_heads, out_feats, dropout=dropout) def forward(self, graph): x = graph.x edge_index = torch.stack(graph.edge_index) x = F.dropout(x, p=self.dropout, training=self.training) x = F.elu(self.conv1(x, edge_index)) x = F.dropout(x, p=self.dropout, training=self.training) x = F.elu(self.conv2(x, edge_index)) return x if __name__ == "__main__": cora = CoraDataset() model = GAT(in_feats=cora.num_features, hidden_size=64, out_feats=cora.num_classes, num_heads=2, dropout=0.1) ret = experiment(dataset=cora, model=model, dw="node_classification_dw", mw="node_classification_mw")
nn.Dropout(dropout), nn.Linear(512, 256), nn.BatchNorm1d(256), nn.Dropout(dropout), nn.Linear(256, out_feats), ) def forward(self, batch): h = batch.x h1 = self.conv1(h, batch.batch) h2 = self.conv2(h1, batch.batch) h = self.linear(torch.cat([h1, h2], dim=1)) h = global_max_pool(h, batch.batch) out = self.final_mlp(h) return out if __name__ == "__main__": mutag = MUTAGDataset() model = DGCNN( in_feats=mutag.num_features, hidden_size=64, out_feats=mutag.num_classes, k=20, dropout=0.5, ) ret = experiment(dataset=mutag, model=model, dw="graph_classification_dw", mw="graph_classification_mw")
parser.add_argument("--eval-step", type=int, default=10) parser.add_argument("--patience", type=int, default=10) parser.add_argument("--logger", type=str, default=None) parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() dataset = OGBProductsDataset() gnn = SAGE( in_feats=dataset.num_features, hidden_size=args.hidden_size, out_feats=dataset.num_classes, num_layers=args.num_layers, dropout=args.dropout, ) experiment( model=gnn, dataset=dataset, lr=args.lr, weight_decay=args.weight_decay, epochs=args.epochs, seed=list(range(args.runs)), dw="cluster_dw", batch_size=args.batch_size, n_cluster=args.n_cluster, cpu_inference=True, eval_step=args.eval_step, logger=args.logger, patience=args.patience, )
optimizer.step() model.eval() _, pred = model(dataset).max(dim=1) correct = float(pred[dataset.test_mask].eq( dataset.y[dataset.test_mask]).sum().item()) acc = correct / dataset.test_mask.sum().item() print( 'The accuracy rate obtained by running the experiment with the custom training logic: {:.6f}' .format(acc)) # %% # Experiment API # -------------- # CogDL在训练上提供了更易于使用的 API ,即Experiment from cogdl import experiment experiment(model="gcn", dataset="cora") #或者,您可以单独创建每个组件并使用CogDL 中的 build_dataset , build_model 来手动运行该过程。 from cogdl import experiment from cogdl.datasets import build_dataset from cogdl.models import build_model from cogdl.options import get_default_args args = get_default_args(model="gcn", dataset="cora") dataset = build_dataset(args) model = build_model(args) experiment(model=model, dataset=dataset) # %% # 如何保存训练好的模型? # --------------------------
""" Args: in_feats: int Input feature size out_feats: int Output feature size """ def __init__(self, in_feats, out_feats): super(GCNLayer, self).__init__() self.fc = torch.nn.Linear(in_feats, out_feats) def forward(self, graph, x): h = self.fc(x) h = spmm(graph, h) return h # %% # 将自定义的GNN模型与Cogdl一起使用 # ------------------------------------ # 现在您已经定义了自己的 GNN,您可以使用 CogDL 中的数据集/任务来立即训练和评估模型的性能。 from cogdl import experiment from cogdl.datasets import build_dataset_from_name data = build_dataset_from_name("cora")[0] # Use the JKNet model as defined above model = JKNet(data.num_features, data.num_classes, 32, 4) experiment(model=model, dataset="cora", mw="node_classification_mw", dw="node_classification_dw")
# output projection logits, _ = self.gat_layers[-1](self.g, h, res_attn=None) # This is an equivalent replacement for tf.l2_normalize, see https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/math/l2_normalize for more information. logits = logits / (torch.max(torch.norm(logits, dim=1, keepdim=True), self.epsilon)) return logits if __name__ == "__main__": parser = options.get_training_parser() args, _ = parser.parse_known_args() args.mw = "heterogeneous_gnn_mw" args.dw = "heterogeneous_gnn_dw" args = options.parse_args_and_arch(parser, args) if args.dataset[0] == "gtn-acm": dataset = ACM_GTNDataset() elif args.dataset[0] == "gtn-dblp": dataset = DBLP_GTNDataset() elif args.dataset[0] == "gtn-imdb": dataset = IMDB_GTNDataset() else: raise NotImplementedError hgn = SimpleHGN(in_dims=dataset.num_features, num_classes=dataset.num_classes) experiment(dataset=dataset, model=hgn, dw="heterogeneous_gnn_dw", mw="heterogeneous_gnn_mw", args=args)