def test_one_community_benchmarks(): pg.load_backend("numpy") datasets = ["graph9", "bigraph"] pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") algorithms = { "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, max_iters=10000, tol=1.E-9), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk3": pg.HeatKernel(t=3, preprocessor=pre, max_iters=10000, tol=1.E-9), "hk5": pg.HeatKernel(t=5, preprocessor=pre, max_iters=10000, tol=1.E-9), "tuned": pg.ParameterTuner(preprocessor=pre, max_iters=10000, tol=1.E-9), } # algorithms = benchmark.create_variations(algorithms, {"": pg.Tautology, "+SO": pg.SeedOversampling}) # loader = pg.load_datasets_one_community(datasets) # pg.benchmark(algorithms, loader, "time", verbose=True) loader = pg.load_datasets_one_community(datasets) pg.benchmark_print( pg.benchmark_average( pg.benchmark_ranks( pg.benchmark(algorithms, loader, pg.AUC, fraction_of_training=.8))))
def test_gnn_errors(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) from tensorflow.keras.layers import Dropout, Dense from tensorflow.keras.regularizers import L2 class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ Dropout(0.5, input_shape=(num_inputs,)), Dense(hidden, activation="relu", kernel_regularizer=L2(1.E-5)), Dropout(0.5), Dense(num_outputs, activation="relu")]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, use_quotient=False, error_type="iters", max_iters=10) # 10 iterations def call(self, features, graph, training=False): predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) model = APPNP(features.shape[1], labels.shape[1]) with pytest.raises(Exception): pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=2) pg.load_backend('tensorflow') pg.gnn_train(model, features, graph, labels, training, validation, test=test, epochs=300, patience=2) predictions = model(features, graph) pg.load_backend('numpy') with pytest.raises(Exception): pg.gnn_accuracy(labels, predictions, test)
def test_appnp_torch(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class AutotuneAPPNP(torch.nn.Module): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__() self.layer1 = torch.nn.Linear(num_inputs, hidden) self.layer2 = torch.nn.Linear(hidden, num_outputs) self.activation = torch.nn.ReLU() self.dropout = torch.nn.Dropout(0.5) self.num_outputs = num_outputs self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) def forward(self, inputs, training=False): graph, features = inputs predict = self.dropout(torch.FloatTensor(features)) predict = self.dropout(self.activation(self.layer1(predict))) predict = self.activation(self.layer2(predict)) predict = self.ranker.propagate( graph, predict, graph_dropout=0.5 if training else 0) ret = torch.nn.functional.softmax(predict, dim=1) self.loss = 0 for param in self.layer1.parameters(): self.loss = self.loss + 1E-5 * torch.norm(param) return ret def init_weights(m): if isinstance(m, torch.nn.Linear): torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) pg.load_backend('pytorch') model = AutotuneAPPNP(features.shape[1], labels.shape[1]) model.apply(init_weights) pg.gnn_train(model, graph, features, labels, training, validation, epochs=50, patience=2) # TODO: higher numbers fail only on github actions - for local tests it is fine assert float(pg.gnn_accuracy(labels, model([graph, features]), test)) >= 0.2 pg.load_backend('numpy')
def test_backend_load(): pg.load_backend("tensorflow") assert pg.backend_name() == "tensorflow" pg.load_backend("matvec") assert pg.backend_name() == "matvec" pg.load_backend("numpy") assert pg.backend_name() == "numpy" with pytest.raises(Exception): pg.load_backend("unknown") assert pg.backend_name() == "numpy"
def test_optimization_dict(): pg.load_backend("numpy") from timeit import default_timer as time graph = next(pg.load_datasets_graph(["bigraph"])) personalization = {str(i): 1 for i in range(200)} preprocessor = pg.preprocessor(assume_immutability=True) preprocessor(graph) tic = time() for _ in range(10): pg.ParameterTuner(preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization) unoptimized = time()-tic optimization = dict() tic = time() for _ in range(10): pg.ParameterTuner(optimization_dict=optimization, preprocessor=preprocessor, tol=1.E-9).rank(graph, personalization) optimized = time() - tic assert len(optimization) == 20 assert unoptimized > optimized
def test_appnp_tf(): graph, features, labels = pg.load_feature_dataset('synthfeats') training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) class APPNP(tf.keras.Sequential): def __init__(self, num_inputs, num_outputs, hidden=64): super().__init__([ tf.keras.layers.Dropout(0.5, input_shape=(num_inputs, )), tf.keras.layers.Dense( hidden, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.L2(1.E-5)), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(num_outputs, activation=tf.nn.relu), ]) self.ranker = pg.PageRank(0.9, renormalize=True, assume_immutability=True, error_type="iters", max_iters=10) self.input_spec = None # prevents some versions of tensorflow from checking call inputs def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) predict = self.ranker.propagate( graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(predict, axis=1) pg.load_backend('tensorflow') model = APPNP(features.shape[1], labels.shape[1]) pg.gnn_train(model, graph, features, labels, training, validation, test=test, epochs=50) assert float(pg.gnn_accuracy(labels, model([graph, features]), test)) >= 0.5 pg.load_backend('numpy')
self.num_outputs = num_outputs self.propagate_on_training = propagate_on_training def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) if not training or self.propagate_on_training: predict = self.ranker.propagate( graph, predict, graph_dropout=self.graph_dropout if training else 0) return tf.nn.softmax(predict, axis=1) pg.load_backend('numpy') graph, features, labels = pg.load_feature_dataset('cora') for seed in range(10): training, test = pg.split(list(range(len(graph))), 0.8, seed=seed) training, validation = pg.split(training, 1 - 0.2 / 0.8, seed=seed) architectures = { "APPNP": APPNP(features.shape[1], labels.shape[1], alpha=0.9), #"LAPPNP": APPNP(features.shape[1], labels.shape[1], alpha=tf.Variable([0.85])), "APFNP": APPNP(features.shape[1], labels.shape[1], alpha="estimated") } pg.load_backend('tensorflow') accs = dict() for architecture, model in architectures.items(): pg.gnn_train(model, graph,
def supported_backends(): for backend in [ "matvec", "pytorch", "tensorflow", "torch_sparse", "numpy" ]: pg.load_backend(backend) yield backend
def supported_backends(): for backend in ["pytorch", "tensorflow", "numpy"]: pg.load_backend(backend) yield backend
min_vals=[0.5, 5], measure=pg.Mabs, deviation_tol=0.1, tuning_backend="numpy") def call(self, inputs, training=False): graph, features = inputs predict = super().call(features, training=training) propagate = self.ranker.propagate(graph, predict, graph_dropout=0.5 if training else 0) return tf.nn.softmax(propagate, axis=1) graph, features, labels = pg.load_feature_dataset('citeseer') acc = 0 for _ in range(10): training, test = pg.split(list(range(len(graph))), 0.8) training, validation = pg.split(training, 1 - 0.2 / 0.8) pg.load_backend('tensorflow') # explicitly load the appropriate backend model = APPNP(features.shape[1], labels.shape[1]) pg.gnn_train(model, graph, features, labels, training, validation, optimizer=tf.optimizers.Adam(learning_rate=0.01)) acc += pg.gnn_accuracy(labels, model([graph, features]), test) / 10 print("Accuracy", pg.gnn_accuracy(labels, model([graph, features]), test)) print(acc)
import pygrank as pg pg.load_backend("matvec") #datasets = ["amazon", "citeseer", "maven"] datasets = ["amazon"] community_size = 500 pre = pg.preprocessor(assume_immutability=True, normalization="symmetric") convergence = {"tol": 1.E-9, "max_iters": 10000} #convergence = {"error_type": "iters", "max_iters": 20} tuned = [1]+[10000, 0.7573524630180889, 0.0, 1.0, 0.004950495049504955, 0.4975492598764827, 0.2573767277717871, 0.0, 0.2549259876482698, 0.009851975296539583, 0.5, 0.5, 0.5, 0.2549259876482698, 0.5, 0.5, 0.5, 0.009851975296539583, 0.009851975296539583, 0.25002450740123516, 0.2524752475247525, 0.0, 0.5, 0.0, 0.009851975296539583, 0.0, 0.004950495049504955, 0.0, 0.5, 0.004950495049504955, 0.7475247524752475, 0.004950495049504955, 0.009851975296539583, 0.0, 0.995049504950495, 0.0, 0.5, 0.0, 0.004950495049504955, 0.0] algorithms = { "ppr0.5": pg.PageRank(alpha=0.5, preprocessor=pre, **convergence), "ppr0.85": pg.PageRank(alpha=0.85, preprocessor=pre, **convergence), "ppr0.9": pg.PageRank(alpha=0.9, preprocessor=pre, **convergence), "ppr0.99": pg.PageRank(alpha=0.99, preprocessor=pre, **convergence), "hk2": pg.HeatKernel(t=2, preprocessor=pre, **convergence), "hk3": pg.HeatKernel(t=3, preprocessor=pre, **convergence), "hk5": pg.HeatKernel(t=5, preprocessor=pre, **convergence), "hk7": pg.HeatKernel(t=7, preprocessor=pre, **convergence), #"exp": pg.GenericGraphFilter(tuned, preprocessor=pre, error_type="iters", max_iters=41) } postprocessor = pg.Tautology algorithms = pg.benchmarks.create_variations(algorithms, postprocessor) measure = pg.Time optimization = pg.SelfClearDict()