Пример #1
0
def Coarsening(filename):
    G = magicgraph.load_edgelist(filename, undirected=True)

    G = graph_coarsening.DoubleWeightedDiGraph(G)

    print('Orginal Graph')
    print('number of nodes', G.number_of_nodes())
    print('number of edges', G.number_of_edges())
    sfdp_path = '/home/diana/Benchmark/HARP/bin/sfdp_linux'
    Gs, mps = graph_coarsening.external_ec_coarsening(G, sfdp_path)
    return Gs, mps
Пример #2
0
def main():
    parser = ArgumentParser('harp',
                            formatter_class=ArgumentDefaultsHelpFormatter,
                            conflict_handler='resolve')
    parser.add_argument('--format',
                        default='mat',
                        help='File format of input file')
    parser.add_argument('--input',
                        nargs='?',
                        required=True,
                        help='Input graph file')
    parser.add_argument(
        '--sfdp-path',
        default='./bin/sfdp_osx',
        help=
        'Path to the SFDP binary file which produces graph coarsening results.'
    )
    parser.add_argument(
        '--model',
        default='deepwalk',
        help='Embedding model to use. Could be deepwalk, line or node2vec.')
    parser.add_argument(
        '--matfile-variable-name',
        default='network',
        help='Variable name of adjacency matrix inside a .mat file')
    parser.add_argument('--number-walks',
                        default=40,
                        type=int,
                        help='Number of random walks to start at each node')
    parser.add_argument('--output',
                        required=True,
                        help='Output representation file')
    parser.add_argument(
        '--representation-size',
        default=128,
        type=int,
        help='Number of latent dimensions to learn for each node.')
    parser.add_argument('--walk-length',
                        default=10,
                        type=int,
                        help='Length of the random walk started at each node.')
    parser.add_argument('--window-size',
                        default=10,
                        type=int,
                        help='Window size of the Skip-gram model.')
    parser.add_argument(
        '--workers',
        default=1,
        type=int,
        help=
        'Number of parallel processes, -1 to consume all available logical CPUs (harware threads).'
    )
    args = parser.parse_args()

    # Process args
    if args.format == 'mat':
        G = magicgraph.load_matfile(args.input,
                                    variable_name=args.matfile_variable_name,
                                    undirected=True)
    elif args.format == 'adjlist':
        G = magicgraph.load_adjacencylist(args.input, undirected=True)
    elif args.format == 'edgelist':
        G = magicgraph.load_edgelist(args.input, undirected=True)
    else:
        raise ValueError(
            "Unknown file format: '{}'. Valid formats: 'mat', 'adjlist', and 'edgelist'."
            .format(args.format))
    if args.workers < 0 or args.workers > cpu_count():
        if args.workers == -1:
            args.workers = cpu_count()
        else:
            raise ValueError("Invalid number of workers: {} / {} CPUs".format(
                args.workers, cpu_count()))

    G = graph_coarsening.DoubleWeightedDiGraph(G)  # makeContiguous
    print('Number of nodes: {}'.format(G.number_of_nodes()))
    print('Number of edges: {}'.format(G.number_of_edges()))
    print('Underlying network embedding model: {}'.format(args.model))

    outpbase = os.path.splitext(args.output)[0]
    if args.model == 'deepwalk':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(
            G,
            workers=args.workers,
            output=
            outpbase,  # Retain contigious raw internal ids for the embeddings
            scale=-1,
            iter_count=1,
            sfdp_path=args.sfdp_path,
            num_paths=args.number_walks,
            path_length=args.walk_length,
            representation_size=args.representation_size,
            window_size=args.window_size,
            lr_scheme='default',
            alpha=0.025,
            min_alpha=0.001,
            sg=1,
            hs=1,
            coarsening_scheme=2,
            sample=0.1)
    elif args.model == 'node2vec':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(
            G,
            workers=args.workers,
            output=outpbase,
            scale=-1,
            iter_count=1,
            sfdp_path=args.sfdp_path,
            num_paths=args.number_walks,
            path_length=args.walk_length,
            representation_size=args.representation_size,
            window_size=args.window_size,
            lr_scheme='default',
            alpha=0.025,
            min_alpha=0.001,
            sg=1,
            hs=0,
            coarsening_scheme=2,
            sample=0.1)
    elif args.model == 'line':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(
            G,
            workers=args.workers,
            output=outpbase,
            scale=1,
            iter_count=50,
            sfdp_path=args.sfdp_path,
            representation_size=args.representation_size,
            window_size=1,
            lr_scheme='default',
            alpha=0.025,
            min_alpha=0.001,
            sg=1,
            hs=0,
            sample=0.001)

    if args.output.lower().endswith('.mat'):
        savemat(args.output, mdict={'embs': embeddings})
    else:
        np.save(args.output, embeddings)
def main():
    parser = ArgumentParser('harp',
                            formatter_class=ArgumentDefaultsHelpFormatter,
                            conflict_handler='resolve')
    parser.add_argument('--format', default='mat',
                        help='File format of input file')
    parser.add_argument('--input', nargs='?', required=True,
                        help='Input graph file')
    parser.add_argument('--sfdp-path', default='./bin/sfdp_osx',
                        help='Path to the SFDP binary file which produces graph coarsening results.')
    parser.add_argument('--model', default='deepwalk',
                        help='Embedding model to use. Could be deepwalk, line or node2vec.')
    parser.add_argument('--matfile-variable-name', default='network',
                        help='Variable name of adjacency matrix inside a .mat file')
    parser.add_argument('--number-walks', default=40, type=int,
                        help='Number of random walks to start at each node')
    parser.add_argument('--output', required=True,
                        help='Output representation file')
    parser.add_argument('--representation-size', default=128, type=int,
                        help='Number of latent dimensions to learn for each node.')
    parser.add_argument('--walk-length', default=10, type=int,
                        help='Length of the random walk started at each node.')
    parser.add_argument('--window-size', default=10, type=int,
                        help='Window size of the Skip-gram model.')
    parser.add_argument('--workers', default=1, type=int,
                        help='Number of parallel processes.')
    args = parser.parse_args()

    # Process args
    if args.format == 'mat':
        G = magicgraph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=True)
    elif args.format == 'adjlist':
        G = magicgraph.load_adjacencylist(args.input, undirected=True)
    elif args.format == 'edgelist':
        G = magicgraph.load_edgelist(args.input, undirected=True)
    else:
        raise Exception("Unknown file format: '%s'. Valid formats: 'mat', 'adjlist', and 'edgelist'."
                % args.format)
    G = graph_coarsening.DoubleWeightedDiGraph(G)
    # print ('Number of nodes: {}'.format(G.number_of_nodes()))
    # print ('Number of edges: {}'.format(G.number_of_edges()))
    # print ('Underlying network embedding model: {}'.format(args.model))

    if args.model == 'deepwalk':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(G,scale=-1,iter_count=1,
                sfdp_path=args.sfdp_path,
                num_paths=args.number_walks,path_length=args.walk_length,
                representation_size=args.representation_size,window_size=args.window_size,
                lr_scheme='default',alpha=0.025,min_alpha=0.001,sg=1,hs=1,coarsening_scheme=2, sample=0.1)
    elif args.model == 'node2vec':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(G,scale=-1,iter_count=1,
                sfdp_path=args.sfdp_path,
                num_paths=args.number_walks,path_length=args.walk_length,
                representation_size=args.representation_size,window_size=args.window_size,
                lr_scheme='default',alpha=0.025,min_alpha=0.001,sg=1,hs=0,coarsening_scheme=2, sample=0.1)
    elif args.model == 'line':
        embeddings = graph_coarsening.skipgram_coarsening_disconnected(G,scale=1, iter_count=50,
                sfdp_path=args.sfdp_path,
                representation_size=64,window_size=1,
                lr_scheme='default',alpha=0.025,min_alpha=0.001,sg=1,hs=0,sample=0.001)
    np.save(args.output, embeddings)