def __init__(self, graph_wrapper=None, buf_size=1000, batch_size=128, num_workers=1, samples=[30, 30], shuffle=True, phase="train"): super(ArxivDataGenerator, self).__init__(buf_size=buf_size, num_workers=num_workers, batch_size=batch_size, shuffle=shuffle) self.samples = samples self.d_name = "ogbn-arxiv" self.graph_wrapper = graph_wrapper dataset = PglNodePropPredDataset(name=self.d_name) splitted_idx = dataset.get_idx_split() self.phase = phase graph, label = dataset[0] graph = to_undirected(graph) self.graph = graph self.num_nodes = graph.num_nodes if self.phase == 'train': nodes_idx = splitted_idx["train"] labels = label[nodes_idx] elif self.phase == "valid": nodes_idx = splitted_idx["valid"] labels = label[nodes_idx] elif self.phase == "test": nodes_idx = splitted_idx["test"] labels = label[nodes_idx] self.nodes_idx = nodes_idx self.labels = labels self.sample_based_line_example(nodes_idx, labels)
wf.flush() return max_cor_acc if __name__ == '__main__': parser = get_config() print('===========args==============') print(parser) print('=============================') startup_prog = F.default_startup_program() train_prog = F.default_main_program() place = F.CPUPlace() if parser.place < 0 else F.CUDAPlace(parser.place) dataset = PglNodePropPredDataset(name="ogbn-arxiv") split_idx = dataset.get_idx_split() graph, label = dataset[0] print(label.shape) graph = to_undirected(graph) graph = add_self_loop(graph) with F.unique_name.guard(): with F.program_guard(train_prog, startup_prog): gw = pgl.graph_wrapper.GraphWrapper( name="arxiv", node_feat=graph.node_feat_info(), place=place) if parser.use_label_e: model = Arxiv_label_embedding_model(gw, parser.hidden_size,
evaluator = OgbEvaluator() train_prog = F.Program() startup_prog = F.Program() args.num_nodes = evaluator.num_nodes if args.use_cuda: dev_list = F.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = F.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) assert dev_count == 1, "The program not support multi devices now!" dataset = PglNodePropPredDataset(name="ogbn-arxiv") graph, label = dataset[0] graph = to_undirected(graph) if args.model is None: Model = BaseGraph elif args.model.upper() == "MLP": Model = MLPModel elif args.model.upper() == "SAGE": Model = SAGEModel elif args.model.upper() == "GAT": Model = GATModel elif args.model.upper() == "GCN": Model = GCNModel elif args.model.upper() == "GAAN": Model = GAANModel
def aggregate_node_features(graph): efeat = graph.edge_feat["feat"] graph.edge_feat["feat"] = efeat nfeat = np.zeros((graph.num_nodes, efeat.shape[-1]), dtype="float32") edges_dst = graph.edges[:, 1] np_scatter(edges_dst, efeat, nfeat) graph.node_feat["feat"] = nfeat if __name__ == '__main__': parser = get_config() print('===========args==============') print(parser) print('=============================') dataset = PglNodePropPredDataset(name="ogbn-proteins") split_idx = dataset.get_idx_split() graph, label = dataset[0] aggregate_node_features(graph) place = F.CPUPlace() if parser.place < 0 else F.CUDAPlace(parser.place) startup_prog = F.default_startup_program() train_prog = F.default_main_program() with F.program_guard(train_prog, startup_prog): with F.unique_name.guard(): gw = pgl.graph_wrapper.GraphWrapper( name="proteins", node_feat=graph.node_feat_info(),
def main(): """main """ # Training settings parser = argparse.ArgumentParser(description='Graph Dataset') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--dataset', type=str, default="ogbn-proteins", help='dataset name (default: proteinfunc)') args = parser.parse_args() #device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") #place = fluid.CUDAPlace(0) place = fluid.CPUPlace() # Dataset too big to use GPU ### automatic dataloading and splitting dataset = PglNodePropPredDataset(name=args.dataset) splitted_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = Evaluator(args.dataset) graph_data, label = dataset[0] train_program = fluid.Program() startup_program = fluid.Program() test_program = fluid.Program() # degree normalize indegree = graph_data.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) graph_data.node_feat["norm"] = np.expand_dims(norm, -1).astype("float32") graph_data.node_feat["x"] = np.zeros((len(indegree), 1), dtype="int64") graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype( "float32") model = GNNModel(name="gnn", num_task=dataset.num_tasks, emb_dim=64, num_layers=2) with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.StaticGraphWrapper("graph", graph_data, place) pred = model.forward(gw) sigmoid_pred = fluid.layers.sigmoid(pred) val_program = train_program.clone(for_test=True) initializer = [] with fluid.program_guard(train_program, startup_program): train_node_index, init = paddle_helper.constant( "train_node_index", dtype="int64", value=splitted_idx["train"]) initializer.append(init) train_node_label, init = paddle_helper.constant( "train_node_label", dtype="float32", value=label[splitted_idx["train"]].astype("float32")) initializer.append(init) train_pred_t = fluid.layers.gather(pred, train_node_index) train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits( x=train_pred_t, label=train_node_label) train_loss_t = fluid.layers.reduce_sum(train_loss_t) train_pred_t = fluid.layers.sigmoid(train_pred_t) adam = fluid.optimizer.Adam( learning_rate=1e-2, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005)) adam.minimize(train_loss_t) exe = fluid.Executor(place) exe.run(startup_program) gw.initialize(place) for init in initializer: init(place) for epoch in range(1, args.epochs + 1): loss = exe.run(train_program, feed={}, fetch_list=[train_loss_t]) print("Loss %s" % loss[0]) print("Evaluating...") y_pred = exe.run(val_program, feed={}, fetch_list=[sigmoid_pred])[0] result = {} input_dict = { "y_true": label[splitted_idx["train"]], "y_pred": y_pred[splitted_idx["train"]] } result["train"] = evaluator.eval(input_dict) input_dict = { "y_true": label[splitted_idx["valid"]], "y_pred": y_pred[splitted_idx["valid"]] } result["valid"] = evaluator.eval(input_dict) input_dict = { "y_true": label[splitted_idx["test"]], "y_pred": y_pred[splitted_idx["test"]] } result["test"] = evaluator.eval(input_dict) print(result)