Exemplo n.º 1
0
dataset = "MUTAG"
corpus_data_dir = "data/" + dataset

# Desired output paths
output_embedding_fh = "Graph2Vec_Embeddings.json"

# Hyper parameters
wl_depth = 2
min_count_patterns = 0  # min number of occurrences to be considered in vocabulary of subgraph patterns

#######
# Step 1 Create corpus data for neural language model
# We keep permanent files for sake of deeper post studies and testing
#######
graph_files = utils.get_files(corpus_data_dir, ".gexf", max_files=0)
wl_corpus(graph_files, wl_depth)
extension = ".wld" + str(wl_depth)  # Extension of the graph document

######
# Step 2 Train a neural language model to learn distributed representations
# 		 of the graphs directly or of its substructures. Here we learn it directly
#		 for an example of the latter check out the DGK models.
######
# Instantiate a PV-DBOW trainer to learn distributed reps directly.
trainer = InMemoryTrainer(corpus_dir=corpus_data_dir,
                          extension=extension,
                          max_files=0,
                          output_fh=output_embedding_fh,
                          emb_dimension=32,
                          batch_size=128,
                          epochs=250,
Exemplo n.º 2
0
# Input data paths
dataset = "MUTAG"
corpus_data_dir = "data/" + dataset

# Desired output paths for subgraph embeddings
output_embedding_fh = "WL_Subgraph_CBOW_Embeddings.json"

# WL decomposition hyperparameters
wl_depth = 2

############
# Step 1
# Run the decomposition algorithm to get subgraph patterns across the graphs of MUTAG
############
graph_files = utils.get_files(corpus_data_dir, ".gexf", max_files=0)
corpus, vocabulary, prob_map, num_graphs, graph_map = wl_corpus(graph_files, wl_depth)
extension = ".wld" + str(wl_depth) # Extension of the graph document


############
# Step 2
# Train a skipgram (w. Negative Sampling) model to learn distributed representations of the subgraph patterns
############
trainer = Trainer(corpus_dir=corpus_data_dir, extension=extension, max_files=0, window_size=10, output_fh=output_embedding_fh,
				  emb_dimension=32, batch_size=128, epochs=25, initial_lr=0.001, min_count=1)
trainer.train()
final_subgraph_embeddings = trainer.cbow.give_target_embeddings()

############
# Step 3
# Create a kernel matrix of the graphs using the embeddings of the substructures