def process() -> tuple: X, Y = dataset_helper.get_dataset_cached(graph_cache_file) X = graph_helper.get_graphs_only(X) graph_helper.convert_graphs_to_adjs_tuples(X) X = [(x, [0] * len(y)) for x, y in X] estimator, params = task_helper.get_graph_estimator_and_params(X, Y) return ClassificationData(X, Y, estimator, params)
def transform(self, X, y=None, **fit_params): if self.from_nx_to_tuple: X = graph_helper.get_graphs_only(X) graph_helper.convert_graphs_to_adjs_tuples(X) else: graph_helper.convert_adjs_tuples_to_graphs(X) return X
def test_convert_graph_datasets(self): for graph_dataset, dataset_name in self.iterate_graph_cache_datasets(): X, Y = dataset_helper.get_dataset_cached(graph_dataset) self.assertTrue(len(X)) self.assertTrue(len(Y)) graph_helper.convert_graphs_to_adjs_tuples(X) for x in X: self.assertTrue(isinstance(x, tuple)) self.assertTrue(isinstance(x[0], scipy.sparse.spmatrix)) self.assertTrue(isinstance(x[1], list)) break
def get_graphs(as_adj=False): g_train = nx.Graph() g_train.add_edge('A', 'B') g_test = nx.Graph() g_test.add_edge('A', 'C') if as_adj: g_test = graph_helper.convert_graphs_to_adjs_tuples([g_test], copy=True)[0] g_train = graph_helper.convert_graphs_to_adjs_tuples([g_train], copy=True)[0] return g_train, g_test
def get_graph_estimator_and_params(X, Y=None, reduced: bool = False, with_node_weights: bool = False, add_phi_dim=False): assert len(X) X_ = X estimator, params = graph_pipeline.get_params( reduced=reduced, with_node_weights=with_node_weights) if add_phi_dim: if isinstance(X[0], nx.Graph): X_ = graph_helper.convert_graphs_to_adjs_tuples(X, copy=True) assert isinstance(X_[0], tuple) and isinstance(X_[0][1], list) num_vertices = get_num_vertices(X_) * 5 graph_pipeline.add_num_vertices_to_fast_wl_params(params, num_vertices) return estimator, params
def _retrieve_node_weights_and_convert_graphs(X, node_weight_function=None, same_label=False, use_directed=True, use_nx=True): X = graph_helper.get_graphs_only(X) if not use_directed: if use_nx: X = [nx.Graph(x) for x in X] assert not np.any([x.is_directed() for x in X]) else: raise NotImplementedError( '!use_directed and !use_nx not implemented') node_weight_factors = get_node_weight_factors(X, metric=node_weight_function) X = graph_helper.convert_graphs_to_adjs_tuples(X, copy=True) if same_label: X = [(adj, ['dummy'] * len(labels)) for adj, labels in X] return X, node_weight_factors
from time import time from utils import dataset_helper, graph_helper, primes, time_utils DATASET = 'ng20' H = 10 USED_SPARSE_MATRIX = scipy.sparse.lil_matrix PERF_LOG = './perf.log' # Warm up primes cache primes.get_highest_prime_range() # Get dataset X, Y = dataset_helper.get_concept_map_for_dataset(DATASET, graphs_only=True) # Convert to adj X = graph_helper.convert_graphs_to_adjs_tuples(X, copy=True) # Same label for all nodes adj, labels = zip(*X) labels = [[1] * len(x) for x in labels] X = list(zip(adj, labels)) # Start profiling thread = flamegraph.start_profile_thread(fd=open(PERF_LOG, "w")) start = time() # Calculate fast_wl phi_lists, new_label_lookups, new_label_counters = fast_wl.transform( X, h=H, used_matrix_type=USED_SPARSE_MATRIX) # Convert to sparse lil_matrix for "fancy" indexing (needed for other pipeline elements)