dataset = "MUTAG" corpus_data_dir = "data/" + dataset # Desired output paths output_embedding_fh = "Graph2Vec_Embeddings.json" # Hyper parameters wl_depth = 2 min_count_patterns = 0 # min number of occurrences to be considered in vocabulary of subgraph patterns ####### # Step 1 Create corpus data for neural language model # We keep permanent files for sake of deeper post studies and testing ####### graph_files = utils.get_files(corpus_data_dir, ".gexf", max_files=0) wl_corpus(graph_files, wl_depth) extension = ".wld" + str(wl_depth) # Extension of the graph document ###### # Step 2 Train a neural language model to learn distributed representations # of the graphs directly or of its substructures. Here we learn it directly # for an example of the latter check out the DGK models. ###### # Instantiate a PV-DBOW trainer to learn distributed reps directly. trainer = InMemoryTrainer(corpus_dir=corpus_data_dir, extension=extension, max_files=0, output_fh=output_embedding_fh, emb_dimension=32, batch_size=128, epochs=250,
# Input data paths dataset = "MUTAG" corpus_data_dir = "data/" + dataset # Desired output paths for subgraph embeddings output_embedding_fh = "WL_Subgraph_CBOW_Embeddings.json" # WL decomposition hyperparameters wl_depth = 2 ############ # Step 1 # Run the decomposition algorithm to get subgraph patterns across the graphs of MUTAG ############ graph_files = utils.get_files(corpus_data_dir, ".gexf", max_files=0) corpus, vocabulary, prob_map, num_graphs, graph_map = wl_corpus(graph_files, wl_depth) extension = ".wld" + str(wl_depth) # Extension of the graph document ############ # Step 2 # Train a skipgram (w. Negative Sampling) model to learn distributed representations of the subgraph patterns ############ trainer = Trainer(corpus_dir=corpus_data_dir, extension=extension, max_files=0, window_size=10, output_fh=output_embedding_fh, emb_dimension=32, batch_size=128, epochs=25, initial_lr=0.001, min_count=1) trainer.train() final_subgraph_embeddings = trainer.cbow.give_target_embeddings() ############ # Step 3 # Create a kernel matrix of the graphs using the embeddings of the substructures