Пример #1
0
    def learn_embeddings(self):
        model = GraphFactorization(d=self.dim,
                                   max_iter=10000,
                                   eta=1 * 10**-4,
                                   regu=1.0,
                                   data_set='ds')
        model.learn_embedding(self.graph)

        self.embeddings = model.get_embedding()
Пример #2
0
def main(data_set_name):
    dimensions = 4
    input_file = './graph/' + data_set_name + '.tsv'
    output_file = './emb/' + data_set_name + '.emb'
    # Instatiate the embedding method with hyperparameters
    graph_factorization = GraphFactorization(dimensions, 1000, 1 * 10**-4, 1.0)

    # Load graph
    graph = graph_util.loadGraphFromEdgeListTxt(input_file)

    # Learn embedding - accepts a networkx graph or file with edge list
    embeddings_array, t = graph_factorization.learn_embedding(graph,
                                                              edge_f=None,
                                                              is_weighted=True,
                                                              no_python=True)

    embeddings = pandas.DataFrame(embeddings_array)
    embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
 def graphFactorization(netData, **kwargs):
     d = kwargs.get('d', 2)
     max_iter = kwargs.get('max_iter', 100000)
     eta = kwargs.get('eta', 1 * 10**-4)
     regu = kwargs.get('regu', 1.0)
     from gem.embedding.gf import GraphFactorization
     emb = GraphFactorization(d=d,
                              max_iter=max_iter,
                              eta=eta,
                              regu=regu)
     return attMethods.GEMexport(netData, emb)
Пример #4
0
def main(args):

    # Load edgelist
    G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed)
    G = G.to_directed()

    # Preprocess the graph
    # G, _ = prep_graph(G)

    if args.method == 'gf':
        # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta),
        # regularization coefficient (regu) as inputs
        model = GraphFactorization(d=args.dimension,
                                   max_iter=args.max_iter,
                                   eta=args.eta,
                                   regu=args.regu)
    elif args.method == 'hope':
        # HOPE takes embedding dimension (d) and decay factor (beta) as inputs
        model = HOPE(d=args.dimension, beta=args.beta)
    elif args.method == 'lap':
        # LE takes embedding dimension (d) as input
        model = LaplacianEigenmaps(d=args.dimension)
    elif args.method == 'lle':
        # LLE takes embedding dimension (d) as input
        model = LocallyLinearEmbedding(d=args.dimension)
    elif args.method == 'sdne':
        encoder_layer_list = ast.literal_eval(args.encoder_list)
        # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight
        # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers
        # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch)
        # location of modelfile and weightfile save (modelfile and weightfile) as inputs
        model = SDNE(d=args.dimension,
                     beta=args.beta,
                     alpha=args.alpha,
                     nu1=args.nu1,
                     nu2=args.nu2,
                     K=len(encoder_layer_list),
                     n_units=encoder_layer_list,
                     n_iter=args.max_iter,
                     xeta=args.learning_rate,
                     n_batch=args.bs)
        # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])
    else:
        raise ValueError('The requested method does not exist!')

    # Learn the node embeddings
    Y, t = model.learn_embedding(graph=G,
                                 edge_f=None,
                                 is_weighted=args.weighted,
                                 no_python=True)
    Z = np.real_if_close(Y, tol=1000)

    # Save the node embeddings to a file
    np.savetxt(args.output, Z, delimiter=',', fmt='%f')
Пример #5
0
def train_GF_model(graph, dim=2, reg=0.0, max_iter=50000, eta=1 * 10**-4):
    """
    Load and train the Graph Factorization model
    """
    embedding = GraphFactorization(d=dim,
                                   max_iter=max_iter,
                                   eta=eta,
                                   regu=reg)

    Y, t = (embedding
            .learn_embedding(graph=graph,
                             edge_f=None,
                             is_weighted=False,
                             no_python=False))

    return (embedding, Y)
Пример #6
0
#from gem.embedding.sdne     import SDNE

# File that contains the edges. Format: source target
# Optionally, you can add weights as third column: source target weight
edge_f = 'data/karate.edgelist'
# Specify whether the edges are directed
isDirected = True

# Load graph
G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected)
G = G.to_directed()

models = []
# You can comment out the methods you don't want to run
models.append(
    GraphFactorization(d=2, max_iter=100000, eta=1 * 10**-4, regu=1.0))
models.append(HOPE(d=4, beta=0.01))
models.append(LaplacianEigenmaps(d=2))
models.append(LocallyLinearEmbedding(d=2))
#models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1))
#models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500,
#                modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'],
#                weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5']))

for embedding in models:
    print('Num nodes: %d, num edges: %d' %
          (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G,
                                     edge_f=None,
Пример #7
0
    # Optionally, you can add weights as third column: source target weight
    edge_f = 'examples/data/karate.edgelist'
    # Specify whether the edges are directed
    isDirected = True

    # Load graph
    G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected)
    G = G.to_directed()


    print(G.nodes)


    models = []
    # Load the models you want to run
    models.append(GraphFactorization(d=2, max_iter=50000, eta=1 * 10**-4, regu=1.0, data_set='karate'))
    models.append(HOPE(d=4, beta=0.01))
    models.append(LaplacianEigenmaps(d=2))
    models.append(LocallyLinearEmbedding(d=2))
    if run_n2v:
        models.append(
            node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)
        )
    models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=100,
                    modelfile=['enc_model.json', 'dec_model.json'],
                    weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))

    # For each model, learn the embedding and evaluate on graph reconstruction and visualization
    for embedding in models:
        print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges()))
        t1 = time()
Пример #8
0
from gem.embedding.node2vec import node2vec
from gem.embedding.sdne import SDNE

# File that contains the edges. Format: source target
# Optionally, you can add weights as third column: source target weight
edge_f = 'data/karate.edgelist'
# Specify whether the edges are directed
isDirected = True

# Load graph
G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected)
G = G.to_directed()

models = []
# Load the models you want to run
models.append(GraphFactorization(d=2, max_iter=50000, eta=1 * 10**-4,
                                 regu=1.0))
models.append(HOPE(d=4, beta=0.01))
models.append(LaplacianEigenmaps(d=2))
models.append(LocallyLinearEmbedding(d=2))
models.append(
    node2vec(d=2,
             max_iter=1,
             walk_len=80,
             num_walks=10,
             con_size=10,
             ret_p=1,
             inout_p=1))
models.append(
    SDNE(d=2,
         beta=5,
         alpha=1e-5,
Пример #9
0
from gem.embedding.node2vec import node2vec
from gem.embedding.sdne import SDNE

# File that contains the edges. Format: source target
# Optionally, you can add weights as third column: source target weight
edge_f = './gem/data/karate.edgelist'
# Specify whether the edges are directed
isDirected = True

# Load graph
G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected)
G = G.to_directed()

models = []
# You can comment out the methods you don't want to run
models.append(GraphFactorization(2, 1 * 10**-4, 1.0, 50000))
models.append(HOPE(4, 0.01))
models.append(LaplacianEigenmaps(2))
models.append(LocallyLinearEmbedding(2))
#models.append(node2vec(2, 1, 80, 10, 10, 1, 1))
#models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500,
#                modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'],
#                weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5']))

for embedding in models:
    print('Num nodes: %d, num edges: %d' %
          (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G,
                                     edge_f=None,
Пример #10
0
G = nx.read_gpickle(file_prefix)
try:
    node_colors = pickle.load(
        open('data/sbm_node_labels.pickle', 'rb')
    )
except UnicodeDecodeError:
    node_colors = pickle.load(
        open('data/sbm_node_labels.pickle', 'rb'), encoding='latin1'
    )
node_colors_arr = [None] * node_colors.shape[0]
for idx in range(node_colors.shape[0]):
    node_colors_arr[idx] = np.where(node_colors[idx, :].toarray() == 1)[1][0]

models = []
# Load the models you want to run
models.append(GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm'))
models.append(HOPE(d=256, beta=0.01))
models.append(LaplacianEigenmaps(d=128))
models.append(LocallyLinearEmbedding(d=128))
models.append(node2vec(d=182, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set='sbm'))
models.append(SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[500, 300,], rho=0.3, n_iter=30, xeta=0.001,n_batch=500,
                modelfile=['enc_model.json', 'dec_model.json'],
                weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))
# For each model, learn the embedding and evaluate on graph reconstruction and visualization
for embedding in models:
    print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True)
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    # Evaluate on graph reconstruction
Пример #11
0
    elif model == 'manela':
        gr = graph.from_networkx(G, undirected=True)
        emb = ds.Distributed(gr)
        emb.setArgs(numUpdates=90,
                    outputPath='temp_emb.embeddings',
                    representSize=128,
                    window=10,
                    numNegSampling=5,
                    ratio=args.ratio)
        emb.process()
        emb_matrix = emb.getEmbeddings()

    elif model == 'gf':
        md = GraphFactorization(d=128,
                                max_iter=1000,
                                eta=1 * 10**-4,
                                regu=1.0,
                                data_set='sbm')
        md.learn_embedding(G)
        emb_matrix = md.get_embedding()

    elif model == 'node2vec':
        gra = node2vec.Graph(G, is_directed=isDirected, p=1, q=1)
        gra.preprocess_transition_probs()
        walks = gra.simulate_walks(num_walks=10, walk_length=80)
        walks = [list(map(str, walk)) for walk in walks]
        md = Word2Vec(walks,
                      size=128,
                      window=10,
                      min_count=0,
                      sg=1,
Пример #12
0
def get_embeddings(graph,
                   embedding_algorithm_enum,
                   dimension_count,
                   hyperparameter,
                   lower=None,
                   higher=None):
    """Generate embeddings. """

    if embedding_algorithm_enum is EmbeddingType.LocallyLinearEmbedding:
        embedding_alg = LocallyLinearEmbedding(d=dimension_count)
    elif embedding_algorithm_enum is EmbeddingType.Hope:
        embedding_alg = HOPE(d=dimension_count, beta=0.01)
    elif embedding_algorithm_enum is EmbeddingType.GF:
        embedding_alg = GraphFactorization(d=dimension_count,
                                           max_iter=100000,
                                           eta=1 * 10**-4,
                                           regu=1.0)
    elif embedding_algorithm_enum is EmbeddingType.LaplacianEigenmaps:
        embedding_alg = LaplacianEigenmaps(d=dimension_count)

    elif embedding_algorithm_enum is EmbeddingType.DegreeNeigDistributionWithout:
        A = np.array([
            np.histogram([graph.degree(neig) for neig in graph.neighbors(i)],
                         bins=dimension_count,
                         density=True,
                         range=(lower, higher))[0] for i in graph.nodes()
        ])
        A = (A - A.mean(axis=0)) / A.std(axis=0)
        return A

    elif embedding_algorithm_enum is EmbeddingType.DegreeNeigDistribution:
        A = np.array([
            np.concatenate([
                np.array([graph.degree(i) / (higher * dimension_count)]),
                np.histogram(
                    [graph.degree(neig) for neig in graph.neighbors(i)],
                    bins=dimension_count - 1,
                    density=True,
                    range=(lower, higher))[0]
            ],
                           axis=0) for i in graph.nodes()
        ])
        A = (A - A.mean(axis=0)) / A.std(axis=0)
        return A

    elif embedding_algorithm_enum is EmbeddingType.DegreeNeigNeigDistribution:
        bin_length = int(dimension_count / 2)

        A = np.array([
            np.concatenate([
                np.array([graph.degree(i) / (higher)]),
                np.histogram(
                    [graph.degree(neig) for neig in graph.neighbors(i)],
                    bins=bin_length,
                    density=True,
                    range=(lower, higher))[0],
                np.histogram([
                    graph.degree(neigneig) for neig in graph.neighbors(i)
                    for neigneig in graph.neighbors(neig)
                ],
                             bins=bin_length,
                             density=True,
                             range=(lower, higher))[0]
            ],
                           axis=0) for i in graph.nodes()
        ])

        A = (A - A.mean(axis=0)) / A.std(axis=0)
        A[:, 0] = A[:, 0]
        A[:, 1:1 + bin_length] = A[:, 1:1 + bin_length]
        A[:, 2 + bin_length:] = A[:, 2 + bin_length:] * hyperparameter
        A = np.nan_to_num(A)
        return A
    else:
        raise NotImplementedError

    A, t = embedding_alg.learn_embedding(graph=graph, no_python=True)

    A = np.dot(A, np.diag(np.sign(np.mean(A, axis=0))))
    A = (A - A.mean(axis=0)) / A.std(axis=0)
    return A
Пример #13
0
        gfile for gfile in os.listdir(dirpath)
        if os.path.splitext(gfile)[1] == '.csv'
    ]

    results = {}

    for gfile in gfiles:
        if gfile != 'soc-pokec-relationships-4096.csv':
            continue

        models = []
        # Load the models you want to run
        models.append(
            GraphFactorization(d=64,
                               max_iter=50000,
                               eta=1 * 10**-4,
                               regu=1.0,
                               data_set=gfile))
        models.append(HOPE(d=64, beta=0.01))
        models.append(LaplacianEigenmaps(d=64))
        models.append(LocallyLinearEmbedding(d=64))
        models.append(
            node2vec(d=64,
                     max_iter=100,
                     walk_len=80,
                     num_walks=10,
                     con_size=10,
                     ret_p=1,
                     inout_p=1,
                     data_set=gfile))
        models.append(
Пример #14
0
from timeit import Timer

import networkx as nx

from gem.embedding.gf import GraphFactorization

sizes = [200, 500, 1000, 2000, 3000, 5000, 10000, 15000, 20000, 35000, 50000]
_DENS = 1e-3

for s in sizes:
    G = nx.gnp_random_graph(s, _DENS, directed=True)
    gf = GraphFactorization(d=128, eta=1e-3, regu=1, max_iter=1000)
    t = Timer('gf.learn_embedding(G)', setup='from __main__ import gf, G')
    n_runs = 3 if s <= 5000 else 1
    exec_times = t.repeat(n_runs, 1)
    print(f'{s}: {exec_times}')

    with open('gf_times.txt', 'a') as f:
        f.write(f'{s}: {exec_times}\n')