def calculate_loss(name, np_cached_h, node_label_val, num_classes, args): """Calculate loss function. """ initializer = [] const_cached_h, init = paddle_helper.constant("const_%s_cached_h" % name, dtype='float32', value=np_cached_h) initializer.append(init) node_label, init = paddle_helper.constant("%s_node_label" % (name), dtype='int64', value=node_label_val) initializer.append(init) output = fluid.layers.fc(const_cached_h, size=num_classes, bias_attr=args.bias, name='fc') loss, probs = fluid.layers.softmax_with_cross_entropy(logits=output, label=node_label, return_softmax=True) loss = fluid.layers.mean(loss) acc = None if name != 'train': acc = fluid.layers.accuracy(input=probs, label=node_label, k=1) return { 'loss': loss, 'acc': acc, 'probs': probs, 'initializer': initializer }
def pre_gather(features, name_prefix, node_index_val): """Get features with respect to node index. """ node_index, init = paddle_helper.constant("%s_node_index" % (name_prefix), dtype='int32', value=node_index_val) logits = fluid.layers.gather(features, node_index) return logits, init
def __create_graph_edge_feat(self, edge_feat, collector): """Convert edge features into paddlepaddle tensor. """ for edge_feat_name, edge_feat_value in edge_feat.items(): edge_feat_shape = edge_feat_value.shape edge_feat_dtype = edge_feat_value.dtype self._edge_feat_tensor_dict[ edge_feat_name], init = paddle_helper.constant( name=self.__data_name_prefix + '_' + edge_feat_name, dtype=edge_feat_dtype, value=edge_feat_value) collector.append(init)
def __create_graph_node_feat(self, node_feat, collector): """Convert node features into paddlepaddle tensor. """ for node_feat_name, node_feat_value in node_feat.items(): node_feat_shape = node_feat_value.shape node_feat_dtype = node_feat_value.dtype self._node_feat_tensor_dict[ node_feat_name], init = paddle_helper.constant( name=self.__data_name_prefix + '_' + node_feat_name, dtype=node_feat_dtype, value=node_feat_value) collector.append(init)
def main(args): data = load_data(args.normalize, args.symmetry) log.info("preprocess finish") log.info("Train Examples: %s" % len(data["train_index"])) log.info("Val Examples: %s" % len(data["val_index"])) log.info("Test Examples: %s" % len(data["test_index"])) log.info("Num nodes %s" % data["graph"].num_nodes) log.info("Num edges %s" % data["graph"].num_edges) log.info("Average Degree %s" % np.mean(data["graph"].indegree())) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() samples = [] if args.samples_1 > 0: samples.append(args.samples_1) if args.samples_2 > 0: samples.append(args.samples_2) with fluid.program_guard(train_program, startup_program): feature, feature_init = paddle_helper.constant( "feat", dtype=data['feature'].dtype, value=data['feature'], hide_batch_size=False) graph_wrapper = pgl.graph_wrapper.GraphWrapper( "sub_graph", fluid.CPUPlace(), node_feat=data['graph'].node_feat_info()) model_loss, model_acc = build_graph_model( graph_wrapper, num_class=data["num_class"], feature=feature, hidden_size=args.hidden_size, graphsage_type=args.graphsage_type, k_hop=len(samples)) test_program = train_program.clone(for_test=True) with fluid.program_guard(train_program, startup_program): adam = fluid.optimizer.Adam(learning_rate=args.lr) adam.minimize(model_loss) exe = fluid.Executor(place) exe.run(startup_program) feature_init(place) train_iter = reader.multiprocess_graph_reader( data['graph'], graph_wrapper, samples=samples, num_workers=args.sample_workers, batch_size=args.batch_size, with_parent_node_index=True, node_index=data['train_index'], node_label=data["train_label"]) val_iter = reader.multiprocess_graph_reader( data['graph'], graph_wrapper, samples=samples, num_workers=args.sample_workers, batch_size=args.batch_size, with_parent_node_index=True, node_index=data['val_index'], node_label=data["val_label"]) test_iter = reader.multiprocess_graph_reader( data['graph'], graph_wrapper, samples=samples, num_workers=args.sample_workers, batch_size=args.batch_size, with_parent_node_index=True, node_index=data['test_index'], node_label=data["test_label"]) for epoch in range(args.epoch): run_epoch(train_iter, program=train_program, exe=exe, prefix="train", model_loss=model_loss, model_acc=model_acc, epoch=epoch) run_epoch(val_iter, program=test_program, exe=exe, prefix="val", model_loss=model_loss, model_acc=model_acc, log_per_step=10000, epoch=epoch) run_epoch(test_iter, program=test_program, prefix="test", exe=exe, model_loss=model_loss, model_acc=model_acc, log_per_step=10000, epoch=epoch)
def __create_graph_attr(self, graph): """Create graph attributes for paddlepaddle. """ src, dst, eid = graph.sorted_edges(sort_by="dst") indegree = graph.indegree() nodes = graph.nodes uniq_dst = nodes[indegree > 0] uniq_dst_count = indegree[indegree > 0] uniq_dst_count = np.cumsum(uniq_dst_count, dtype='int32') uniq_dst_count = np.insert(uniq_dst_count, 0, 0) graph_lod = graph.graph_lod num_graph = graph.num_graph num_edges = len(src) if num_edges == 0: # Fake Graph src = np.array([0], dtype="int64") dst = np.array([0], dtype="int64") eid = np.array([0], dtype="int64") uniq_dst_count = np.array([0, 1], dtype="int32") uniq_dst = np.array([0], dtype="int64") edge_feat = {} for key, value in graph.edge_feat.items(): edge_feat[key] = value[eid] node_feat = graph.node_feat self.__create_graph_node_feat(node_feat, self._initializers) self.__create_graph_edge_feat(edge_feat, self._initializers) self._num_edges, init = paddle_helper.constant( dtype="int64", value=np.array([num_edges], dtype="int64"), name=self._data_name_prefix + '/num_edges') self._initializers.append(init) self._num_graph, init = paddle_helper.constant( dtype="int64", value=np.array([num_graph], dtype="int64"), name=self._data_name_prefix + '/num_graph') self._initializers.append(init) self._edges_src, init = paddle_helper.constant( dtype="int64", value=src, name=self._data_name_prefix + '/edges_src') self._initializers.append(init) self._edges_dst, init = paddle_helper.constant( dtype="int64", value=dst, name=self._data_name_prefix + '/edges_dst') self._initializers.append(init) self._num_nodes, init = paddle_helper.constant( dtype="int64", hide_batch_size=False, value=np.array([graph.num_nodes]), name=self._data_name_prefix + '/num_nodes') self._initializers.append(init) self._edge_uniq_dst, init = paddle_helper.constant( name=self._data_name_prefix + "/uniq_dst", dtype="int64", value=uniq_dst) self._initializers.append(init) self._edge_uniq_dst_count, init = paddle_helper.constant( name=self._data_name_prefix + "/uniq_dst_count", dtype="int32", value=uniq_dst_count) self._initializers.append(init) self._graph_lod, init = paddle_helper.constant( name=self._data_name_prefix + "/graph_lod", dtype="int32", value=graph_lod) self._initializers.append(init) node_ids_value = np.arange(0, graph.num_nodes, dtype="int64") self._node_ids, init = paddle_helper.constant( name=self._data_name_prefix + "/node_ids", dtype="int64", value=node_ids_value) self._initializers.append(init) self._indegree, init = paddle_helper.constant( name=self._data_name_prefix + "/indegree", dtype="int64", value=indegree) self._initializers.append(init)
def main(args): dataset = load(args.dataset) # normalize indegree = dataset.graph.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1) train_index = dataset.train_index train_label = np.expand_dims(dataset.y[train_index], -1) train_index = np.expand_dims(train_index, -1) val_index = dataset.val_index val_label = np.expand_dims(dataset.y[val_index], -1) val_index = np.expand_dims(val_index, -1) test_index = dataset.test_index test_label = np.expand_dims(dataset.y[test_index], -1) test_index = np.expand_dims(test_index, -1) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() test_program = fluid.Program() hidden_size = 16 with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.StaticGraphWrapper(name="graph", graph=dataset.graph, place=place) output = pgl.layers.gcn(gw, gw.node_feat["words"], hidden_size, activation="relu", norm=gw.node_feat['norm'], name="gcn_layer_1") output = fluid.layers.dropout( output, 0.5, dropout_implementation='upscale_in_train') output = pgl.layers.gcn(gw, output, dataset.num_classes, activation=None, norm=gw.node_feat['norm'], name="gcn_layer_2") val_program = train_program.clone(for_test=True) test_program = train_program.clone(for_test=True) initializer = [] with fluid.program_guard(train_program, startup_program): train_node_index, init = paddle_helper.constant("train_node_index", dtype="int32", value=train_index) initializer.append(init) train_node_label, init = paddle_helper.constant("train_node_label", dtype="int64", value=train_label) initializer.append(init) pred = fluid.layers.gather(output, train_node_index) train_loss_t = fluid.layers.softmax_with_cross_entropy( logits=pred, label=train_node_label) train_loss_t = fluid.layers.reduce_mean(train_loss_t) adam = fluid.optimizer.Adam( learning_rate=1e-2, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005)) adam.minimize(train_loss_t) with fluid.program_guard(val_program, startup_program): val_node_index, init = paddle_helper.constant("val_node_index", dtype="int32", value=val_index) initializer.append(init) val_node_label, init = paddle_helper.constant("val_node_label", dtype="int64", value=val_label) initializer.append(init) pred = fluid.layers.gather(output, val_node_index) val_loss_t, pred = fluid.layers.softmax_with_cross_entropy( logits=pred, label=val_node_label, return_softmax=True) val_acc_t = fluid.layers.accuracy(input=pred, label=val_node_label, k=1) val_loss_t = fluid.layers.reduce_mean(val_loss_t) with fluid.program_guard(test_program, startup_program): test_node_index, init = paddle_helper.constant("test_node_index", dtype="int32", value=test_index) initializer.append(init) test_node_label, init = paddle_helper.constant("test_node_label", dtype="int64", value=test_label) initializer.append(init) pred = fluid.layers.gather(output, test_node_index) test_loss_t, pred = fluid.layers.softmax_with_cross_entropy( logits=pred, label=test_node_label, return_softmax=True) test_acc_t = fluid.layers.accuracy(input=pred, label=test_node_label, k=1) test_loss_t = fluid.layers.reduce_mean(test_loss_t) exe = fluid.Executor(place) exe.run(startup_program) gw.initialize(place) for init in initializer: init(place) dur = [] for epoch in range(200): if epoch >= 3: t0 = time.time() train_loss = exe.run(train_program, feed={}, fetch_list=[train_loss_t], return_numpy=True) train_loss = train_loss[0] if epoch >= 3: time_per_epoch = 1.0 * (time.time() - t0) dur.append(time_per_epoch) val_loss, val_acc = exe.run(val_program, feed={}, fetch_list=[val_loss_t, val_acc_t], return_numpy=True) log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) + "Train Loss: %f " % train_loss + "Val Loss: %f " % val_loss + "Val Acc: %f " % val_acc) test_loss, test_acc = exe.run(test_program, feed={}, fetch_list=[test_loss_t, test_acc_t], return_numpy=True) log.info("Accuracy: %f" % test_acc)
split_idx=dataset.get_idx_split() graph, label = dataset[0] print(label.shape) with F.program_guard(train_prog, startup_prog): with F.unique_name.guard(): gw_list=[] for i in range(len(parser.sizes)): gw_list.append(pgl.graph_wrapper.GraphWrapper( name="product_"+str(i))) feature_input, feat_init=paddle_helper.constant( name='node_feat_input', dtype='float32', value=graph.node_feat['feat']) if parser.use_label_e: model=Products_label_embedding_model(feature_input, gw_list, parser.hidden_size, parser.num_heads, parser.dropout, parser.num_layers) else: model=Arxiv_baseline_model(gw, parser.hidden_size, parser.num_heads, parser.dropout, parser.num_layers) # test_prog=train_prog.clone(for_test=True) model.train_program() adam_optimizer = optimizer_func(parser.lr)#optimizer adam_optimizer.minimize(model.avg_cost)
def __create_graph_attr(self, graph): """Create graph attributes for paddlepaddle. """ src, dst = list(zip(*graph.edges)) src, dst, eid = graph.sorted_edges(sort_by="dst") indegree = graph.indegree() nodes = graph.nodes uniq_dst = nodes[indegree > 0] uniq_dst_count = indegree[indegree > 0] edge_feat = {} for key, value in graph.edge_feat.items(): edge_feat[key] = value[eid] node_feat = graph.node_feat self.__create_graph_node_feat(node_feat, self._initializers) self.__create_graph_edge_feat(edge_feat, self._initializers) self._edges_src, init = paddle_helper.constant( dtype="int32", value=src, name=self.__data_name_prefix + '_edges_src') self._initializers.append(init) self._edges_dst, init = paddle_helper.constant( dtype="int32", value=dst, name=self.__data_name_prefix + '_edges_dst') self._initializers.append(init) self._num_nodes, init = paddle_helper.constant( dtype="int32", hide_batch_size=False, value=np.array([graph.num_nodes]), name=self.__data_name_prefix + '_num_nodes') self._initializers.append(init) self._edge_uniq_dst, init = paddle_helper.constant( name=self.__data_name_prefix + "_uniq_dst", dtype="int32", value=uniq_dst) self._initializers.append(init) self._edge_uniq_dst_count, init = paddle_helper.constant( name=self.__data_name_prefix + "_uniq_dst_count", dtype="int32", value=uniq_dst_count) self._initializers.append(init) bucket_value = np.expand_dims(np.arange(0, len(dst), dtype="int32"), -1) self._bucketing_index, init = paddle_helper.lod_constant( name=self.__data_name_prefix + "_bucketing_index", dtype="int32", lod=list(uniq_dst_count), value=bucket_value) self._initializers.append(init) node_ids_value = np.arange(0, graph.num_nodes, dtype="int32") self._node_ids, init = paddle_helper.constant( name=self.__data_name_prefix + "_node_ids", dtype="int32", value=node_ids_value) self._initializers.append(init) self._indegree, init = paddle_helper.constant( name=self.__data_name_prefix + "_indegree", dtype="int32", value=indegree) self._initializers.append(init)
def main(): """main """ # Training settings parser = argparse.ArgumentParser(description='Graph Dataset') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--dataset', type=str, default="ogbn-proteins", help='dataset name (default: proteinfunc)') args = parser.parse_args() #device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") #place = fluid.CUDAPlace(0) place = fluid.CPUPlace() # Dataset too big to use GPU ### automatic dataloading and splitting dataset = PglNodePropPredDataset(name=args.dataset) splitted_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = Evaluator(args.dataset) graph_data, label = dataset[0] train_program = fluid.Program() startup_program = fluid.Program() test_program = fluid.Program() # degree normalize indegree = graph_data.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) graph_data.node_feat["norm"] = np.expand_dims(norm, -1).astype("float32") graph_data.node_feat["x"] = np.zeros((len(indegree), 1), dtype="int64") graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype( "float32") model = GNNModel(name="gnn", num_task=dataset.num_tasks, emb_dim=64, num_layers=2) with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.StaticGraphWrapper("graph", graph_data, place) pred = model.forward(gw) sigmoid_pred = fluid.layers.sigmoid(pred) val_program = train_program.clone(for_test=True) initializer = [] with fluid.program_guard(train_program, startup_program): train_node_index, init = paddle_helper.constant( "train_node_index", dtype="int64", value=splitted_idx["train"]) initializer.append(init) train_node_label, init = paddle_helper.constant( "train_node_label", dtype="float32", value=label[splitted_idx["train"]].astype("float32")) initializer.append(init) train_pred_t = fluid.layers.gather(pred, train_node_index) train_loss_t = fluid.layers.sigmoid_cross_entropy_with_logits( x=train_pred_t, label=train_node_label) train_loss_t = fluid.layers.reduce_sum(train_loss_t) train_pred_t = fluid.layers.sigmoid(train_pred_t) adam = fluid.optimizer.Adam( learning_rate=1e-2, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005)) adam.minimize(train_loss_t) exe = fluid.Executor(place) exe.run(startup_program) gw.initialize(place) for init in initializer: init(place) for epoch in range(1, args.epochs + 1): loss = exe.run(train_program, feed={}, fetch_list=[train_loss_t]) print("Loss %s" % loss[0]) print("Evaluating...") y_pred = exe.run(val_program, feed={}, fetch_list=[sigmoid_pred])[0] result = {} input_dict = { "y_true": label[splitted_idx["train"]], "y_pred": y_pred[splitted_idx["train"]] } result["train"] = evaluator.eval(input_dict) input_dict = { "y_true": label[splitted_idx["valid"]], "y_pred": y_pred[splitted_idx["valid"]] } result["valid"] = evaluator.eval(input_dict) input_dict = { "y_true": label[splitted_idx["test"]], "y_pred": y_pred[splitted_idx["test"]] } result["test"] = evaluator.eval(input_dict) print(result)