示例#1
0
文件: harp.py 项目: schloett/halk
def line(G,
         scale=1,
         iter_count=50,
         sfdp_path=getcwd() + '/SFDP/sfdp_osx',
         representation_size=64,
         window_size=1,
         lr_scheme='default',
         alpha=0.025,
         min_alpha=0.001,
         sg=1,
         hs=0,
         sample=0.001):
    return graph_coarsening.skipgram_coarsening_disconnected(
        G,
        scale=scale,
        iter_count=iter_count,
        sfdp_path=sfdp_path,
        representation_size=representation_size,
        window_size=window_size,
        lr_scheme=lr_scheme,
        alpha=alpha,
        min_alpha=min_alpha,
        sg=sg,
        hs=hs,
        sample=sample)
示例#2
0
文件: harp.py 项目: schloett/halk
def deepwalk(G,
             scale=-1,
             iter_count=1,
             sfdp_path=getcwd() + '/SFDP/sfdp_osx',
             num_walks=40,
             walk_length=10,
             representation_size=128,
             window_size=10,
             lr_scheme='default',
             alpha=0.025,
             min_alpha=0.001,
             sg=1,
             hs=1,
             coarsening_scheme=2,
             sample=0.1):
    return graph_coarsening.skipgram_coarsening_disconnected(
        G,
        scale=scale,
        iter_count=iter_count,
        sfdp_path=sfdp_path,
        num_paths=num_walks,
        path_length=walk_length,
        representation_size=representation_size,
        window_size=window_size,
        lr_scheme=lr_scheme,
        alpha=alpha,
        min_alpha=min_alpha,
        sg=sg,
        hs=hs,
        coarsening_scheme=coarsening_scheme,
        sample=sample)
示例#3
0
文件: harp.py 项目: schloett/halk
def dw(G,
       scale=-1,
       iter_count=1,
       sfdp_path=getcwd() + '/SFDP/sfdp_osx',
       num_walks=40,
       walk_length=10,
       representation_size=128,
       window_size=10,
       lr_scheme='default',
       alpha=0.025,
       min_alpha=0.001,
       sg=1,
       hs=1,
       coarsening_scheme=2,
       sample=0.1,
       outfile='emb.harp.wv',
       workers=1,
       negative=5,
       min_count=0):
    embeddings = graph_coarsening.skipgram_coarsening_disconnected(
        G,
        scale=scale,
        iter_count=iter_count,
        sfdp_path=sfdp_path,
        num_paths=num_walks,
        path_length=walk_length,
        min_count=min_count,
        representation_size=representation_size,
        window_size=window_size,
        negative=negative,
        lr_scheme=lr_scheme,
        alpha=alpha,
        min_alpha=min_alpha,
        sg=sg,
        hs=hs,
        coarsening_scheme=coarsening_scheme,
        sample=sample,
        workers=workers)
    to_w2v(embeddings, outfile)
示例#4
0
文件: harp.py 项目: schloett/halk
def test_harp(input_file, output_file, sfdp):
    G = magicgraph.load_edgelist(input_file, undirected=True)
    # G = magicgraph.load_matfile(input_file, variable_name='network', undirected=True)
    G = graph_coarsening.DoubleWeightedDiGraph(G)
    print('Number of nodes: {}'.format(G.number_of_nodes()))
    print('Number of edges: {}'.format(G.number_of_edges()))
    embeddings = graph_coarsening.skipgram_coarsening_disconnected(
        G,
        scale=-1,
        iter_count=1,
        sfdp_path=sfdp,
        num_paths=10,
        path_length=80,
        representation_size=128,
        window_size=10,
        lr_scheme='default',
        alpha=0.025,
        min_alpha=0.001,
        sg=1,
        hs=0,
        coarsening_scheme=2,
        sample=0.1)
    np.save(output_file, embeddings)
示例#5
0
文件: harp.py 项目: schloett/halk
def main():
    parser = ArgumentParser('harp',
                            formatter_class=ArgumentDefaultsHelpFormatter,
                            conflict_handler='resolve')
    parser.add_argument('--format',
                        default='mat',
                        help='File format of input file')
    parser.add_argument('--input',
                        nargs='?',
                        required=True,
                        help='Input graph file')
    parser.add_argument(
        '--sfdp-path',
        default='./bin/sfdp_osx',
        help=
        'Path to the SFDP binary file which produces graph coarsening results.'
    )
    parser.add_argument(
        '--model',
        default='deepwalk',
        help='Embedding model to use. Could be deepwalk, line or node2vec.')
    parser.add_argument(
        '--matfile-variable-name',
        default='network',
        help='Variable name of adjacency matrix inside a .mat file')
    parser.add_argument('--number-walks',
                        default=40,
                        type=int,
                        help='Number of random walks to start at each node')
    parser.add_argument('--output',
                        required=True,
                        help='Output representation file')
    parser.add_argument(
        '--representation-size',
        default=128,
        type=int,
        help='Number of latent dimensions to learn for each node.')
    parser.add_argument('--walk-length',
                        default=10,
                        type=int,
                        help='Length of the random walk started at each node.')
    parser.add_argument('--window-size',
                        default=10,
                        type=int,
                        help='Window size of the Skip-gram model.')
    parser.add_argument('--workers',
                        default=1,
                        type=int,
                        help='Number of parallel processes.')
    args = parser.parse_args()

    # Process args
    if args.format == 'mat':
        G = magicgraph.load_matfile(args.input,
                                    variable_name=args.matfile_variable_name,
                                    undirected=True)
    elif args.format == 'adjlist':
        G = magicgraph.load_adjacencylist(args.input, undirected=True)
    elif args.format == 'edgelist':
        G = magicgraph.load_edgelist(args.input, undirected=True)
    else:
        raise Exception(
            "Unknown file format: '%s'. Valid formats: 'mat', 'adjlist', and 'edgelist'."
            % args.format)
    G = graph_coarsening.DoubleWeightedDiGraph(G)
    print('Number of nodes: {}'.format(G.number_of_nodes()))
    print('Number of edges: {}'.format(G.number_of_edges()))
    print('Underlying network embedding model: {}'.format(args.model))

    if args.model == 'deepwalk':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(
            G,
            scale=-1,
            iter_count=1,
            sfdp_path=args.sfdp_path,
            num_paths=args.number_walks,
            path_length=args.walk_length,
            representation_size=args.representation_size,
            window_size=args.window_size,
            lr_scheme='default',
            alpha=0.025,
            min_alpha=0.001,
            sg=1,
            hs=1,
            coarsening_scheme=2,
            sample=0.1)
    elif args.model == 'node2vec':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(
            G,
            scale=-1,
            iter_count=1,
            sfdp_path=args.sfdp_path,
            num_paths=args.number_walks,
            path_length=args.walk_length,
            representation_size=args.representation_size,
            window_size=args.window_size,
            lr_scheme='default',
            alpha=0.025,
            min_alpha=0.001,
            sg=1,
            hs=0,
            coarsening_scheme=2,
            sample=0.1)
    elif args.model == 'line':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(
            G,
            scale=1,
            iter_count=50,
            sfdp_path=args.sfdp_path,
            representation_size=64,
            window_size=1,
            lr_scheme='default',
            alpha=0.025,
            min_alpha=0.001,
            sg=1,
            hs=0,
            sample=0.001)
    np.save(args.output, embeddings)