示例#1
0
def main():
    parser = ArgumentParser(description="GraphZoom")
    parser.add_argument("-d", "--dataset", type=str, default="cora", \
            help="input dataset")
    parser.add_argument("-o", "--coarse", type=str, default="simple", \
            help="choose either simple_coarse or lamg_coarse, [simple, lamg]")
    parser.add_argument("-c", "--mcr_dir", type=str, default="/opt/matlab/R2018A/", \
            help="directory of matlab compiler runtime (only required by lamg_coarsen)")
    parser.add_argument("-s", "--search_ratio", type=int, default=12, \
            help="control the search space in graph fusion process (only required by lamg_coarsen)")
    parser.add_argument("-r", "--reduce_ratio", type=int, default=2, \
            help="control graph coarsening levels (only required by lamg_coarsen)")
    parser.add_argument("-v", "--level", type=int, default=1, \
            help="number of coarsening levels (only required by simple_coarsen)")
    parser.add_argument("-n", "--num_neighs", type=int, default=2, \
            help="control k-nearest neighbors in graph fusion process")
    parser.add_argument("-l", "--lda", type=float, default=0.1, \
            help="control self loop in adjacency matrix")
    parser.add_argument("-e", "--embed_path", type=str, default="embed_results/embeddings.npy", \
            help="path of embedding result")
    parser.add_argument("-m", "--embed_method", type=str, default="deepwalk", \
            help="[deepwalk, node2vec, graphsage]")
    parser.add_argument("-f", "--fusion", default=True, action="store_false", \
            help="whether use graph fusion")
    parser.add_argument("-p", "--power", default=False, action="store_true", \
            help="Strong power of graph filter, set True to enhance filter power")
    parser.add_argument("-g", "--sage_model", type=str, default="mean", \
            help="aggregation function in graphsage")
    parser.add_argument("-w", "--sage_weighted", default=True, action="store_false", \
            help="whether consider weighted reduced graph")

    args = parser.parse_args()

    dataset = args.dataset
    feature_path = "dataset/{}/{}-feats.npy".format(dataset, dataset)
    fusion_input_path = "dataset/{}/{}.mtx".format(dataset, dataset)
    reduce_results = "reduction_results/"
    mapping_path = "{}Mapping.mtx".format(reduce_results)

    if args.fusion:
        coarsen_input_path = "dataset/{}/fused_{}.mtx".format(dataset, dataset)
    else:
        coarsen_input_path = "dataset/{}/{}.mtx".format(dataset, dataset)

######Load Data######
    print("%%%%%% Loading Graph Data %%%%%%")
    laplacian = json2mtx(dataset)

    ## whether node features are required
    if args.fusion or args.embed_method == "graphsage":
        feature = np.load(feature_path)

######Graph Fusion######
    if args.fusion:
        print("%%%%%% Starting Graph Fusion %%%%%%")
        fusion_start = time.process_time()
        laplacian    = graph_fusion(laplacian, feature, args.num_neighs, args.mcr_dir, args.coarse,\
                       fusion_input_path, args.search_ratio, reduce_results, mapping_path, dataset)
        fusion_time = time.process_time() - fusion_start

######Graph Reduction######
    print("%%%%%% Starting Graph Reduction %%%%%%")
    reduce_start = time.process_time()

    if args.coarse == "simple":
        G, projections, laplacians, level = sim_coarse(laplacian, args.level)
        reduce_time = time.process_time() - reduce_start

    elif args.coarse == "lamg":
        os.system('./run_coarsening.sh {} {} {} n {}'.format(args.mcr_dir, \
                coarsen_input_path, args.reduce_ratio, reduce_results))
        reduce_time = read_time("{}CPUtime.txt".format(reduce_results))
        G = mtx2graph("{}Gs.mtx".format(reduce_results))
        level = read_levels("{}NumLevels.txt".format(reduce_results))
        projections, laplacians = construct_proj_laplacian(
            laplacian, level, reduce_results)

    else:
        raise NotImplementedError


######Embed Reduced Graph######
    print("%%%%%% Starting Graph Embedding %%%%%%")
    if args.embed_method == "deepwalk":
        embed_start = time.process_time()
        embeddings = deepwalk(G)

    elif args.embed_method == "node2vec":
        embed_start = time.process_time()
        embeddings = node2vec(G)

    elif args.embed_method == "graphsage":
        from embed_methods.graphsage.graphsage import graphsage
        nx.set_node_attributes(G, False, "test")
        nx.set_node_attributes(G, False, "val")

        ## obtain mapping operator
        if args.coarse == "lamg":
            mapping = normalize(mtx2matrix(mapping_path), norm='l1', axis=1)
        else:
            mapping = identity(feature.shape[0])
            for p in projections:
                mapping = mapping @ p
            mapping = normalize(mapping, norm='l1', axis=1).transpose()

        ## control iterations for training
        coarse_ratio = mapping.shape[1] / mapping.shape[0]

        ## map node feats to the coarse graph
        feats = mapping @ feature

        embed_start = time.process_time()
        embeddings = graphsage(G, feats, args.sage_model, args.sage_weighted,
                               int(1000 / coarse_ratio))

    embed_time = time.process_time() - embed_start

    ######Refinement######
    print("%%%%%% Starting Graph Refinement %%%%%%")
    refine_start = time.process_time()
    embeddings = refinement(level, projections, laplacians, embeddings,
                            args.lda, args.power)
    refine_time = time.process_time() - refine_start

    ######Save Embeddings######
    np.save(args.embed_path, embeddings)

    ######Evaluation######
    lr("dataset/{}/".format(dataset), args.embed_path, dataset)

    ######Report timing information######
    print("%%%%%% CPU time %%%%%%")
    if args.fusion:
        total_time = fusion_time + reduce_time + embed_time + refine_time
        print(f"Graph Fusion     Time: {fusion_time:.3f}")
    else:
        total_time = reduce_time + embed_time + refine_time
        print("Graph Fusion     Time: 0")
    print(f"Graph Reduction  Time: {reduce_time:.3f}")
    print(f"Graph Embedding  Time: {embed_time:.3f}")
    print(f"Graph Refinement Time: {refine_time:.3f}")
    print(
        f"Total Time = Fusion_time + Reduction_time + Embedding_time + Refinement_time = {total_time:.3f}"
    )
示例#2
0
    levels = read_levels(level_path)
    projections, coarse_laplacian = construct_proj_laplacian(laplacian, levels, proj_dir)

    ######Refinement######
    print("%%%%%% Starting Graph Refinement %%%%%%")
    feat = normalize(projections[0], norm='l1', axis=1) @ feature
    refine_start = time.process_time()
    embeddings = refinement(levels, projections, coarse_laplacian, embeddings, lda, args.power, args.kpower, feat)
    refine_end = time.process_time()
    refine_time = refine_end - refine_start

    ######Save Embeddings######
    np.save(save_dir, embeddings)

    ######Evaluation######
    lr(eval_dataset, save_dir, dataset)

    ######Report timing information######
    print("%%%%%% Single CPU time %%%%%%")
    if args.fusion:
        total_time = fusion_time + reduce_time + embed_time + refine_time
        time_info = [fusion_time, reduce_time, embed_time, refine_time, total_time]
        print("Graph Fusion     Time: {}".format(fusion_time))
    else:
        total_time = reduce_time + embed_time + refine_time
        time_info = [reduce_time, embed_time, refine_time, total_time]
    print("Graph Reduction  Time: {}".format(reduce_time))
    print("Graph Embedding  Time: {}".format(embed_time))
    print("Graph Refinement Time: {}".format(refine_time))
    print("Total Time = Fusion_time + Reduction_time + Embedding_time + Refinement_time = {}".format(total_time))
示例#3
0
def main():
    parser = ArgumentParser(description="GraphZoom")
    parser.add_argument("-d",
                        "--dataset",
                        type=str,
                        default="cora",
                        help="input dataset")
    parser.add_argument("-c",
                        "--mcr_dir",
                        type=str,
                        default="/usr/local/MATLAB/MATLAB_Runtime/v94",
                        help="directory of matlab compiler runtime")
    parser.add_argument(
        "-s",
        "--search_ratio",
        type=int,
        default=12,
        help="control the search space in graph fusion process")
    parser.add_argument(
        "-r",
        "--reduce_ratio",
        type=int,
        default=2,  # ? difference between level?
        help="control graph coarsening levels")
    parser.add_argument(
        "-n",
        "--num_neighs",
        type=int,
        default=2,
        help="control k-nearest neighbors in graph fusion process")
    parser.add_argument("-l",
                        "--lda",
                        type=float,
                        default=0.1,
                        help="control self loop in adjacency matrix")
    parser.add_argument("-e",
                        "--embed_path",
                        type=str,
                        default="embed_results/",
                        help="path of embedding result")
    parser.add_argument("-m",
                        "--embed_method",
                        type=str,
                        default="deepwalk",
                        help="[deepwalk, node2vec, graphsage]")
    parser.add_argument("-pre", "--prefix", type=str, default="")
    parser.add_argument("-pj",
                        "--proj",
                        type=str,
                        default='fusion',
                        help="projection matrix type")
    parser.add_argument("-emb_arch", "--emb_arch", default='GCN')
    parser.add_argument(
        "-p",
        "--power",
        default=False,
        action="store_true",
        help="Strong power of graph filter, set True to enhance filter power")
    parser.add_argument("-g",
                        "--sage_model",
                        type=str,
                        default="mean",
                        help="aggregation function in graphsage")
    parser.add_argument("-w",
                        "--sage_weighted",
                        default=True,
                        action="store_false",
                        help="whether consider weighted reduced graph")
    parser.add_argument("-el", "--embed_level", type=int, default=1)

    args = parser.parse_args()

    dataset = args.dataset
    # feature_path = "{}/dataset/{}/{}-feats.npy".format(args.prefix, dataset, dataset)
    fusion_input_path = "dataset/{}/{}.mtx".format(dataset, dataset)
    proj_matrix_type = args.proj
    reduce_results = f"reduction_results/{dataset}/{proj_matrix_type}/"
    coarsen_flag = True
    if not Path(reduce_results).exists():
        Path(reduce_results).mkdir(parents=True)
    else:
        coarsen_flag = False
        print('skip fusion & coarsen')
    mapping_path = "{}Mapping.mtx".format(reduce_results)

    if args.proj in ('fusion'):
        input_path = "dataset/{}/fused_{}.mtx".format(dataset, dataset)
    else:
        input_path = "dataset/{}/{}.mtx".format(dataset, dataset)
    print(f'coarse_input_path: {input_path}')
    ######Load Data######
    print("%%%%%% Loading Graph Data %%%%%%")
    laplacian, feature = load_dataset(dataset, args.prefix)
    # if args.fusion or args.embed_method == "graphsage":  # whether feature is needed
    #     feature = np.load(feature_path)

    ######Graph Fusion######
    fusion_time, reduce_time = 0, 0
    if Path(input_path).exists():
        laplacian = mmread(input_path)
        print('load previous dataset laplacian')
    else:
        print("%%%%%% Starting Graph Fusion %%%%%%")
        print('start fusion calculation')
        fusion_start = time.process_time()
        laplacian = graph_fusion(
            laplacian, feature, args.num_neighs, args.mcr_dir,
            fusion_input_path, args.search_ratio, reduce_results, mapping_path,
            dataset)  # load original mapping matrix, fusion on it
        fusion_time = time.process_time() - fusion_start
        print("Graph Fusion     Time: {}".format(fusion_time))

######Graph Reduction######
# ! ignore fusion coarsen
    if args.proj not in ['border', 'one_hot']:
        print("%%%%%% Starting Graph Reduction %%%%%%")
        os.system('./run_coarsening.sh {} {} {} n {}'.format(
            args.mcr_dir, input_path, args.reduce_ratio, reduce_results))
        reduce_time = read_time("{}CPUtime.txt".format(reduce_results))
    else:
        print(f'load original projections')

######Embed Reduced Graph######
    G = mtx2graph("{}Gs.mtx".format(reduce_results))
    # G = mtx2graph("dataset/{}/{}_ori.mtx".format(dataset, dataset))

    levels = read_levels("{}NumLevels.txt".format(reduce_results))
    print("%%%%%% Starting Graph Embedding %%%%%%")
    print(G.number_of_edges(), G.number_of_nodes())
    embed_start = time.process_time()
    if args.embed_method == "deepwalk":
        embeddings = deepwalk(G)

    elif args.embed_method == "node2vec":
        embeddings = node2vec(G)

    elif args.embed_method == "graphsage":
        from embed_methods.graphsage.graphsage import graphsage
        nx.set_node_attributes(G, False, "test")
        nx.set_node_attributes(G, False, "val")
        mapping = normalize(mtx2matrix(mapping_path), norm='l1', axis=1)
        feats = mapping @ feature
        embeddings = graphsage(G, feats, args.sage_model, args.sage_weighted,
                               int(1000 / args.reduce_ratio))
    elif args.embed_method == 'ft':
        emb_path = f'../embeddings/{args.emb_arch}_{args.dataset}_emb_level_{levels}_mask.npy'
        print(f'emb_path: {emb_path}')
        # embeddings = np.load(f'{prefix}/{args.dataset}_emb_level_1.npy')
        embeddings = np.load(emb_path)

    embed_time = time.process_time() - embed_start

    ######Load Refinement Data######
    # print(embeddings.shape) # corase_nodes * 128
    projections, coarse_laplacian = construct_proj_laplacian(
        laplacian, levels, reduce_results)

    ######Refinement######
    print("%%%%%% Starting Graph Refinement %%%%%%")
    refine_start = time.process_time()
    embeddings = refinement(levels, projections, coarse_laplacian, embeddings,
                            args.lda, args.power)
    refine_time = time.process_time() - refine_start

    ######Save Embeddings######
    embed_path = f'{args.embed_path}/{args.dataset}_level_{levels}.npy'
    print(f'embed_path: {embed_path}')
    np.save(embed_path, embeddings)  #! FK

    ######Evaluation######
    if args.prefix != '':
        lr("{}/dataset/{}/".format(args.prefix, dataset), embed_path, dataset)
    else:
        lr("dataset/{}/".format(dataset), embed_path, dataset)


######Report timing information######
    print("%%%%%% Single CPU time %%%%%%")
    if coarsen_flag:
        total_time = fusion_time + reduce_time + embed_time + refine_time
        print("Graph Fusion     Time: {}".format(fusion_time))
    else:
        total_time = reduce_time + embed_time + refine_time
        print("Graph Fusion     Time: 0")
    print("Graph Reduction  Time: {}".format(reduce_time))
    print("Graph Embedding  Time: {:.3f} Mins".format(embed_time / 60))
    print("Graph Refinement Time: {}".format(refine_time))
    print(
        "Total Time = Fusion_time + Reduction_time + Embedding_time + Refinement_time = {}"
        .format(total_time))
示例#4
0
def main():
    parser = ArgumentParser(description="GraphZoom")
    parser.add_argument("-d",
                        "--dataset",
                        type=str,
                        default="cora",
                        help="input dataset")
    parser.add_argument("-c",
                        "--mcr_dir",
                        type=str,
                        default="/opt/matlab/R2018A/",
                        help="directory of matlab compiler runtime")
    parser.add_argument(
        "-s",
        "--search_ratio",
        type=int,
        default=12,
        help="control the search space in graph fusion process")
    parser.add_argument("-k",
                        "--kpower",
                        type=int,
                        default=2,
                        help="control the graph filter power")
    parser.add_argument("-r",
                        "--reduce_ratio",
                        type=int,
                        default=2,
                        help="control graph coarsening levels")
    parser.add_argument(
        "-n",
        "--num_neighs",
        type=int,
        default=2,
        help="control k-nearest neighbors in graph fusion process")
    parser.add_argument("-l",
                        "--lda",
                        type=float,
                        default=0.1,
                        help="control self loop in adjacency matrix")
    parser.add_argument("-e",
                        "--embed_path",
                        type=str,
                        default="embed_results/embeddings.npy",
                        help="path of embedding result")
    parser.add_argument("-m",
                        "--embed_method",
                        type=str,
                        default="dgi",
                        help="[deepwalk, node2vec, graphsage, dgi]")
    parser.add_argument("-f",
                        "--fusion",
                        default=False,
                        action="store_true",
                        help="whether use graph fusion")
    parser.add_argument(
        "-p",
        "--power",
        default=False,
        action="store_true",
        help="Strong power of graph filter, set True to enhance filter power")

    parser.add_argument("-g",
                        "--sage_model",
                        type=str,
                        default="mean",
                        help="aggregation function in graphsage")
    parser.add_argument("-w",
                        "--sage_weighted",
                        default=True,
                        action="store_false",
                        help="whether consider weighted reduced graph")

    args = parser.parse_args()

    dataset = args.dataset
    mcr_dir = args.mcr_dir
    search_ratio = args.search_ratio
    num_neighs = args.num_neighs
    ratio = args.reduce_ratio
    lda = args.lda
    mapping_path = "reduction_results/Mapping.mtx"
    feature_path = "dataset/{}/{}-feats.npy".format(dataset, dataset)
    fusion_input_path = "dataset/{}/{}.mtx".format(dataset, dataset)
    fusion_output_dir = "reduction_results/"
    output_dir = "reduction_results/"
    mtx_path = "reduction_results/Gs.mtx"
    level_path = "reduction_results/NumLevels.txt"
    proj_dir = "reduction_results"
    save_dir = args.embed_path
    eval_dataset = "dataset/{}/".format(dataset)
    reduce_method = "graphzoom"
    cputime_path = "reduction_results/CPUtime.txt"

    if args.fusion:
        input_path = "dataset/{}/fused_{}.mtx".format(dataset, dataset)
    else:
        input_path = "dataset/{}/{}.mtx".format(dataset, dataset)

######Load Data######
    print("%%%%%% Loading Graph Data %%%%%%")
    if os.path.exists(fusion_input_path):
        laplacian = mmread(fusion_input_path)
    else:
        laplacian = json2mtx(dataset)

    if args.fusion or args.embed_method == "graphsage" or args.embed_method == "dgi":  ##whether feature is needed
        feature = np.load(feature_path)

######Graph Fusion######
    if args.fusion:
        print("%%%%%% Starting Graph Fusion %%%%%%")
        fusion_start = time.process_time()
        laplacian = graph_fusion(laplacian, feature, num_neighs, mcr_dir,
                                 fusion_input_path, search_ratio,
                                 fusion_output_dir, mapping_path, dataset)
        fusion_end = time.process_time()
        fusion_time = fusion_end - fusion_start


######Graph Reduction######
    print("%%%%%% Starting Graph Reduction %%%%%%")
    os.system('./run_coarsening.sh {} {} {} n {}'.format(
        mcr_dir, input_path, ratio, output_dir))
    reduce_time = read_time(cputime_path)

    ######Embed Reduced Graph######
    G = mtx2graph(mtx_path)

    print("%%%%%% Starting Graph Embedding %%%%%%")
    if args.embed_method == "deepwalk":
        embed_start = time.process_time()
        embeddings = deepwalk(G)
        embed_end = time.process_time()

    elif args.embed_method == "node2vec":
        embed_start = time.process_time()
        embeddings = node2vec(G)
        embed_end = time.process_time()

    elif args.embed_method == "graphsage":
        from embed_methods.graphsage.graphsage import graphsage
        nx.set_node_attributes(G, False, "test")
        nx.set_node_attributes(G, False, "val")
        mapping = normalize(mtx2matrix(mapping_path), norm='l1', axis=1)
        feats = mapping @ feature

        embed_start = time.process_time()
        embeddings = graphsage(G, feats, args.sage_model, args.sage_weighted,
                               int(10000 / args.reduce_ratio))
        embed_end = time.process_time()

    elif args.embed_method == "dgi":
        from embed_methods.dgi.execute import dgi
        mapping = normalize(mtx2matrix(mapping_path), norm='l1', axis=1)
        feats = mapping @ feature
        embed_start = time.process_time()
        embeddings = dgi(G, feats)
        embed_end = time.process_time()

    embed_time = embed_end - embed_start

    ######Load Refinement Data######
    levels = read_levels(level_path)
    projections, coarse_laplacian = construct_proj_laplacian(
        laplacian, levels, proj_dir)

    ######Refinement######
    print("%%%%%% Starting Graph Refinement %%%%%%")
    feat = normalize(projections[0], norm='l1', axis=1) @ feature
    refine_start = time.process_time()
    embeddings = refinement(levels, projections, coarse_laplacian, embeddings,
                            lda, args.power, args.kpower, feat)
    refine_end = time.process_time()
    refine_time = refine_end - refine_start

    ######Save Embeddings######
    np.save(save_dir, embeddings)

    ######Evaluation######
    lr(eval_dataset, save_dir, dataset)

    ######Report timing information######
    print("%%%%%% Single CPU time %%%%%%")
    if args.fusion:
        total_time = fusion_time + reduce_time + embed_time + refine_time
        time_info = [
            fusion_time, reduce_time, embed_time, refine_time, total_time
        ]
        print("Graph Fusion     Time: {}".format(fusion_time))
    else:
        total_time = reduce_time + embed_time + refine_time
        time_info = [reduce_time, embed_time, refine_time, total_time]
    print("Graph Reduction  Time: {}".format(reduce_time))
    print("Graph Embedding  Time: {}".format(embed_time))
    print("Graph Refinement Time: {}".format(refine_time))
    print(
        "Total Time = Fusion_time + Reduction_time + Embedding_time + Refinement_time = {}"
        .format(total_time))
示例#5
0
def main():
    parser = ArgumentParser(description="Original")
    parser.add_argument("-d",
                        "--dataset",
                        type=str,
                        default="cora",
                        help="input dataset")
    parser.add_argument("-e",
                        "--embed_path",
                        type=str,
                        default="embed_results/original_embeddings.npy",
                        help="path of embedding result")
    parser.add_argument("-m",
                        "--embed_method",
                        type=str,
                        default="deeepwalk",
                        help="specific embedding method")
    parser.add_argument("-f",
                        "--fusion",
                        default=False,
                        action="store_true",
                        help="whether use graph fusion")

    parser.add_argument("-g",
                        "--sage_model",
                        type=str,
                        default="mean",
                        help="aggregation function in graphsage")
    parser.add_argument("-w",
                        "--sage_weighted",
                        type=bool,
                        default=False,
                        help="whether consider weighted reduced graph")

    args = parser.parse_args()

    dataset = args.dataset
    mtx_path = "dataset/{}/{}.mtx".format(dataset, dataset)
    save_dir = args.embed_path
    eval_dataset = "dataset/{}/".format(dataset)
    embed_method = args.embed_method

    print("%%%%%% Starting Graph Embedding %%%%%%")
    if embed_method == "graphsage":
        G_data = json.load(
            open("dataset/{}/{}-G.json".format(dataset, dataset)))
        G = json_graph.node_link_graph(G_data)
        feature = np.load("dataset/{}/{}-feats.npy".format(dataset, dataset))
        embed_start = time.process_time()
        embeddings = graphsage(G, feature, args.sage_model, args.sage_weighted,
                               10000)
        embed_end = time.process_time()

    elif embed_method == "dgi":
        G_data = json.load(
            open("dataset/{}/{}-G.json".format(dataset, dataset)))
        G = json_graph.node_link_graph(G_data)
        feature = np.load("dataset/{}/{}-feats.npy".format(dataset, dataset))
        embed_start = time.process_time()
        embeddings = dgi(G, feature)
        embed_end = time.process_time()

    else:
        G = nx.Graph()
        with open(mtx_path) as ff:
            for i, line in enumerate(ff):
                info = line.split()
                if i < 2:
                    continue
                elif i == 2:
                    num_nodes = int(info[0])
                elif int(info[0]) != int(info[1]):
                    G.add_edge(int(info[0]) - 1,
                               int(info[1]) - 1,
                               wgt=abs(float(info[2])))
        for i in range(num_nodes):
            G.add_node(i)

        embed_start = time.process_time()
        if embed_method == "deepwalk":
            embeddings = deepwalk(G)
        elif embed_method == "node2vec":
            embeddings = node2vec(G)
        embed_end = time.process_time()
    total_embed_time = embed_end - embed_start

    np.save(save_dir, embeddings)

    lr(eval_dataset, save_dir, dataset)

    print("Total Baseline Embedding Time: {}".format(total_embed_time))