Exemplo n.º 1
0
def eval_list(real_graphs_filename, pred_graphs_filename, prefix, eval_every):
    real_graphs_dict = {}
    pred_graphs_dict = {}

    for fname in real_graphs_filename:
        result_id, epochs = extract_result_id_and_epoch(fname, prefix, "real_")
        if not epochs % eval_every == 0:
            continue
        if result_id not in real_graphs_dict:
            real_graphs_dict[result_id] = {}
        real_graphs_dict[result_id][epochs] = fname
    for fname in pred_graphs_filename:
        result_id, epochs = extract_result_id_and_epoch(fname, prefix, "pred_")
        if not epochs % eval_every == 0:
            continue
        if result_id not in pred_graphs_dict:
            pred_graphs_dict[result_id] = {}
        pred_graphs_dict[result_id][epochs] = fname

    for result_id in real_graphs_dict.keys():
        for epochs in sorted(real_graphs_dict[result_id]):
            real_g_list = utils.load_graph_list(
                real_graphs_dict[result_id][epochs])
            pred_g_list = utils.load_graph_list(
                pred_graphs_dict[result_id][epochs])
            random.shuffle(real_g_list)
            random.shuffle(pred_g_list)
            perturbed_g_list = perturb(real_g_list, 0.05)

            # dist = eval.stats.degree_stats(real_g_list, pred_g_list)
            dist = eval.stats.clustering_stats(real_g_list, pred_g_list)
            print(
                "dist between real and pred (",
                result_id,
                ") at epoch ",
                epochs,
                ": ",
                dist,
            )

            # dist = eval.stats.degree_stats(real_g_list, perturbed_g_list)
            dist = eval.stats.clustering_stats(real_g_list, perturbed_g_list)
            print("dist between real and perturbed: ", dist)

            mid = len(real_g_list) // 2
            # dist = eval.stats.degree_stats(real_g_list[:mid], real_g_list[mid:])
            dist = eval.stats.clustering_stats(real_g_list[:mid],
                                               real_g_list[mid:])
            print("dist among real: ", dist)
Exemplo n.º 2
0
def load_ground_truth(dir_input, dataset_name, model_name='GraphRNN_RNN'):
    ''' Read ground truth graphs.
    '''
    if not 'small' in dataset_name:
        hidden = 128
    else:
        hidden = 64
    if model_name == 'Internal' or model_name == 'Noise' or model_name == 'B-A' or model_name == 'E-R':
        fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str(
            args.num_layers) + '_' + str(args.num_layers_edge) + '_' + str(
                args.hidden_size_rnn) + '_' + str(
                    args.hidden_size_rnn_output) + '_' + str(
                        args.embedding_size_rnn) + '_' + str(
                            args.embedding_size_rnn_output) + '_test_' + str(
                                0) + '.dat'
    else:
        fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(
            args.num_layers) + '_' + str(args.num_layers_edge) + '_' + str(
                args.hidden_size_rnn) + '_' + str(
                    args.hidden_size_rnn_output) + '_' + str(
                        args.embedding_size_rnn) + '_' + str(
                            args.embedding_size_rnn_output) + '_test_' + str(
                                0) + '.dat'
    try:
        graph_test = utils.load_graph_list(fname_test, is_real=True)
    except:
        print('Not found: ' + fname_test)
        logging.warning('Not found: ' + fname_test)
        return None
    return graph_test
Exemplo n.º 3
0
def eval_list(real_graphs_filename, pred_graphs_filename, prefix, eval_every):
    real_graphs_dict = {}
    pred_graphs_dict = {}

    for fname in real_graphs_filename:
        result_id, epochs = extract_result_id_and_epoch(fname, prefix, 'real_')
        if not epochs % eval_every == 0:
            continue
        if result_id not in real_graphs_dict:
            real_graphs_dict[result_id] = {}
        real_graphs_dict[result_id][epochs] = fname
    for fname in pred_graphs_filename:
        result_id, epochs = extract_result_id_and_epoch(fname, prefix, 'pred_')
        if not epochs % eval_every == 0:
            continue
        if result_id not in pred_graphs_dict:
            pred_graphs_dict[result_id] = {}
        pred_graphs_dict[result_id][epochs] = fname
    
    for result_id in real_graphs_dict.keys():
        for epochs in sorted(real_graphs_dict[result_id]):
            real_g_list = utils.load_graph_list(real_graphs_dict[result_id][epochs])
            pred_g_list = utils.load_graph_list(pred_graphs_dict[result_id][epochs])
            shuffle(real_g_list)
            shuffle(pred_g_list)
            perturbed_g_list = perturb(real_g_list, 0.05)

            #dist = eval.stats.degree_stats(real_g_list, pred_g_list)
            dist = eval.stats.clustering_stats(real_g_list, pred_g_list)
            print('dist between real and pred (', result_id, ') at epoch ', epochs, ': ', dist)
    
            #dist = eval.stats.degree_stats(real_g_list, perturbed_g_list)
            dist = eval.stats.clustering_stats(real_g_list, perturbed_g_list)
            print('dist between real and perturbed: ', dist)

            mid = len(real_g_list) // 2
            #dist = eval.stats.degree_stats(real_g_list[:mid], real_g_list[mid:])
            dist = eval.stats.clustering_stats(real_g_list[:mid], real_g_list[mid:])
            print('dist among real: ', dist)
Exemplo n.º 4
0
def process_kron(kron_dir):
    txt_files = []
    for f in os.listdir(kron_dir):
        filename = os.fsdecode(f)
        if filename.endswith('.txt'):
            txt_files.append(filename)
        elif filename.endswith('.dat'):
            return utils.load_graph_list(os.path.join(kron_dir, filename))
    G_list = []
    for filename in txt_files:
        G_list.append(utils.snap_txt_output_to_nx(os.path.join(kron_dir, filename)))

    return G_list
Exemplo n.º 5
0
def generate_data_community(fname):
    # num_communities = int(args.graph_type[-1])
    # print('Creating dataset with ', 2, ' communities')
    graphs = []
    # c_sizes = np.random.choice([12, 13, 14, 15, 16, 17], 2)
    # c_sizes = [15] * num_communities
    graphs_train = utils.load_graph_list(
        "/home/rachneet/PycharmProjects/graph_generation/graphs/GraphRNN_RNN_community2_4_128_train_0.dat"
    )
    for k in range(len(graphs_train)):
        graphs.append(getDecisionSequence(graphs_train[k]))

    with open(fname, 'wb') as f:
        pickle.dump(graphs, f)
Exemplo n.º 6
0
def load_ground_truth(dir_input, dataset_name, model_name='GraphRNN_RNN'):
    ''' Read ground truth graphs.
    '''
    if not 'small' in dataset_name:
        hidden = 128
    else:
        hidden = 64
    if model_name=='Internal' or model_name=='Noise' or model_name=='B-A' or model_name=='E-R':
        fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(
                hidden) + '_test_' + str(0) + '.dat'
    else:
        fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(
                hidden) + '_test_' + str(0) + '.dat'
    try:
        graph_test = utils.load_graph_list(fname_test,is_real=True)
    except:
        print('Not found: ' + fname_test)
        logging.warning('Not found: ' + fname_test)
        return None
    return graph_test
Exemplo n.º 7
0
def eval_single_list(graphs, dir_input, dataset_name):
    ''' Evaluate a list of graphs by comparing with graphs in directory dir_input.
    Args:
        dir_input: directory where ground truth graph list is stored
        dataset_name: name of the dataset (ground truth)
    '''
    graph_test = utils.load_graph_list(
        '/u/home/r/rlwillia/graph-generation/graphs/ground_truth.pkl'
    )  #load_ground_truth(dir_input, dataset_name)
    graph_test_len = len(graph_test)
    graph_test = graph_test[int(
        0.8 * graph_test_len):]  # test on a hold out test set
    mmd_degree = eval.stats.degree_stats(graph_test, graphs)
    mmd_clustering = eval.stats.clustering_stats(graph_test, graphs)
    try:
        mmd_4orbits = eval.stats.orbit_stats_all(graph_test, graphs)
    except:
        mmd_4orbits = -1
    print('deg: ', mmd_degree)
    print('clustering: ', mmd_clustering)
    print('orbits: ', mmd_4orbits)
Exemplo n.º 8
0
def load_ground_truth(dir_input, dataset_name, model_name="GraphRNN_RNN"):
    """Read ground truth graphs."""
    if not "small" in dataset_name:
        hidden = 128
    else:
        hidden = 64
    if (model_name == "Internal" or model_name == "Noise"
            or model_name == "B-A" or model_name == "E-R"):
        fname_test = (dir_input + "GraphRNN_MLP" + "_" + dataset_name + "_" +
                      str(args.num_layers) + "_" + str(hidden) + "_test_" +
                      str(0) + ".dat")
    else:
        fname_test = (dir_input + model_name + "_" + dataset_name + "_" +
                      str(args.num_layers) + "_" + str(hidden) + "_test_" +
                      str(0) + ".dat")
    try:
        graph_test = utils.load_graph_list(fname_test, is_real=True)
    except:
        print("Not found: " + fname_test)
        logging.warning("Not found: " + fname_test)
        return None
    return graph_test
Exemplo n.º 9
0
def eval_list_fname(
    real_graph_filename,
    pred_graphs_filename,
    baselines,
    eval_every,
    epoch_range=None,
    out_file_prefix=None,
):
    """Evaluate list of predicted graphs compared to ground truth, stored in files.
    Args:
        baselines: dict mapping name of the baseline to list of generated graphs.
    """

    if out_file_prefix is not None:
        out_files = {
            "train": open(out_file_prefix + "_train.txt", "w+"),
            "compare": open(out_file_prefix + "_compare.txt", "w+"),
        }

    out_files["train"].write("degree,clustering,orbits4\n")

    line = "metric,real,ours,perturbed"
    for bl in baselines:
        line += "," + bl
    line += "\n"
    out_files["compare"].write(line)

    results = {
        "deg": {
            "real": 0,
            "ours": 100,  # take min over all training epochs
            "perturbed": 0,
            "kron": 0,
        },
        "clustering": {
            "real": 0,
            "ours": 100,
            "perturbed": 0,
            "kron": 0
        },
        "orbits4": {
            "real": 0,
            "ours": 100,
            "perturbed": 0,
            "kron": 0
        },
    }

    num_evals = len(pred_graphs_filename)
    if epoch_range is None:
        epoch_range = [i * eval_every for i in range(num_evals)]
    for i in range(num_evals):
        real_g_list = utils.load_graph_list(real_graph_filename)
        # pred_g_list = utils.load_graph_list(pred_graphs_filename[i])

        # contains all predicted G
        pred_g_list_raw = utils.load_graph_list(pred_graphs_filename[i])
        if len(real_g_list) > 200:
            real_g_list = real_g_list[0:200]

        random.shuffle(real_g_list)
        random.shuffle(pred_g_list_raw)

        # get length
        real_g_len_list = np.array(
            [len(real_g_list[i]) for i in range(len(real_g_list))])
        pred_g_len_list_raw = np.array(
            [len(pred_g_list_raw[i]) for i in range(len(pred_g_list_raw))])
        # get perturb real
        # perturbed_g_list_001 = perturb(real_g_list, 0.01)
        perturbed_g_list_005 = perturb(real_g_list, 0.05)
        # perturbed_g_list_010 = perturb(real_g_list, 0.10)

        # select pred samples
        # The number of nodes are sampled from the similar distribution as the training set
        pred_g_list = []
        pred_g_len_list = []
        for value in real_g_len_list:
            pred_idx = find_nearest_idx(pred_g_len_list_raw, value)
            pred_g_list.append(pred_g_list_raw[pred_idx])
            pred_g_len_list.append(pred_g_len_list_raw[pred_idx])
            # delete
            pred_g_len_list_raw = np.delete(pred_g_len_list_raw, pred_idx)
            del pred_g_list_raw[pred_idx]
            if len(pred_g_list) == len(real_g_list):
                break
        # pred_g_len_list = np.array(pred_g_len_list)
        print("################## epoch {} ##################".format(
            epoch_range[i]))

        # info about graph size
        print(
            "real average nodes",
            sum([
                real_g_list[i].number_of_nodes()
                for i in range(len(real_g_list))
            ]) / len(real_g_list),
        )
        print(
            "pred average nodes",
            sum([
                pred_g_list[i].number_of_nodes()
                for i in range(len(pred_g_list))
            ]) / len(pred_g_list),
        )
        print("num of real graphs", len(real_g_list))
        print("num of pred graphs", len(pred_g_list))

        # ========================================
        # Evaluation
        # ========================================
        mid = len(real_g_list) // 2
        dist_degree, dist_clustering = compute_basic_stats(
            real_g_list[:mid], real_g_list[mid:])
        # dist_4cycle = eval.stats.motif_stats(real_g_list[:mid], real_g_list[mid:])
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list[:mid],
                                                  real_g_list[mid:])
        print("degree dist among real: ", dist_degree)
        print("clustering dist among real: ", dist_clustering)
        # print('4 cycle dist among real: ', dist_4cycle)
        print("orbits dist among real: ", dist_4orbits)
        results["deg"]["real"] += dist_degree
        results["clustering"]["real"] += dist_clustering
        results["orbits4"]["real"] += dist_4orbits

        dist_degree, dist_clustering = compute_basic_stats(
            real_g_list, pred_g_list)
        # dist_4cycle = eval.stats.motif_stats(real_g_list, pred_g_list)
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list, pred_g_list)
        print(
            "degree dist between real and pred at epoch ",
            epoch_range[i],
            ": ",
            dist_degree,
        )
        print(
            "clustering dist between real and pred at epoch ",
            epoch_range[i],
            ": ",
            dist_clustering,
        )
        # print('4 cycle dist between real and pred at epoch: ', epoch_range[i], dist_4cycle)
        print(
            "orbits dist between real and pred at epoch ",
            epoch_range[i],
            ": ",
            dist_4orbits,
        )
        results["deg"]["ours"] = min(dist_degree, results["deg"]["ours"])
        results["clustering"]["ours"] = min(dist_clustering,
                                            results["clustering"]["ours"])
        results["orbits4"]["ours"] = min(dist_4orbits,
                                         results["orbits4"]["ours"])

        # performance at training time
        out_files["train"].write(str(dist_degree) + ",")
        out_files["train"].write(str(dist_clustering) + ",")
        out_files["train"].write(str(dist_4orbits) + ",")

        dist_degree, dist_clustering = compute_basic_stats(
            real_g_list, perturbed_g_list_005)
        # dist_4cycle = eval.stats.motif_stats(real_g_list, perturbed_g_list_005)
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list,
                                                  perturbed_g_list_005)
        print(
            "degree dist between real and perturbed at epoch ",
            epoch_range[i],
            ": ",
            dist_degree,
        )
        print(
            "clustering dist between real and perturbed at epoch ",
            epoch_range[i],
            ": ",
            dist_clustering,
        )
        # print('4 cycle dist between real and perturbed at epoch: ', epoch_range[i], dist_4cycle)
        print(
            "orbits dist between real and perturbed at epoch ",
            epoch_range[i],
            ": ",
            dist_4orbits,
        )
        results["deg"]["perturbed"] += dist_degree
        results["clustering"]["perturbed"] += dist_clustering
        results["orbits4"]["perturbed"] += dist_4orbits

        if i == 0:
            # Baselines
            for baseline in baselines:
                dist_degree, dist_clustering = compute_basic_stats(
                    real_g_list, baselines[baseline])
                dist_4orbits = eval.stats.orbit_stats_all(
                    real_g_list, baselines[baseline])
                results["deg"][baseline] = dist_degree
                results["clustering"][baseline] = dist_clustering
                results["orbits4"][baseline] = dist_4orbits
                print(
                    "Kron: deg=",
                    dist_degree,
                    ", clustering=",
                    dist_clustering,
                    ", orbits4=",
                    dist_4orbits,
                )

        out_files["train"].write("\n")

    for metric, methods in results.items():
        methods["real"] /= num_evals
        methods["perturbed"] /= num_evals

    # Write results
    for metric, methods in results.items():
        line = (metric + "," + str(methods["real"]) + "," +
                str(methods["ours"]) + "," + str(methods["perturbed"]))
        for baseline in baselines:
            line += "," + str(methods[baseline])
        line += "\n"

        out_files["compare"].write(line)

    for _, out_f in out_files.items():
        out_f.close()
Exemplo n.º 10
0
def evaluation_epoch(
    dir_input,
    fname_output,
    model_name,
    dataset_name,
    args,
    is_clean=True,
    epoch_start=1000,
    epoch_end=3001,
    epoch_step=100,
):
    with open(fname_output, "w+") as f:
        f.write(
            "sample_time,epoch,degree_validate,clustering_validate,orbits4_validate,degree_test,clustering_test,orbits4_test\n"
        )

        # TODO: Maybe refactor into a separate file/function that specifies THE naming convention
        # across main and evaluate
        if not "small" in dataset_name:
            hidden = 128
        else:
            hidden = 64
        # read real graph
        if (model_name == "Internal" or model_name == "Noise"
                or model_name == "B-A" or model_name == "E-R"):
            fname_test = (dir_input + "GraphRNN_MLP" + "_" + dataset_name +
                          "_" + str(args.num_layers) + "_" + str(hidden) +
                          "_test_" + str(0) + ".dat")
        elif "Baseline" in model_name:
            fname_test = (dir_input + model_name + "_" + dataset_name + "_" +
                          str(64) + "_test_" + str(0) + ".dat")
        else:
            fname_test = (dir_input + model_name + "_" + dataset_name + "_" +
                          str(args.num_layers) + "_" + str(hidden) + "_test_" +
                          str(0) + ".dat")
        try:
            graph_test = utils.load_graph_list(fname_test, is_real=True)
        except:
            print("Not found: " + fname_test)
            logging.warning("Not found: " + fname_test)
            return None

        graph_test_len = len(graph_test)
        graph_train = graph_test[0:int(0.8 * graph_test_len)]  # train
        graph_validate = graph_test[0:int(0.2 * graph_test_len)]  # validate
        graph_test = graph_test[int(
            0.8 * graph_test_len):]  # test on a hold out test set

        graph_test_aver = 0
        for graph in graph_test:
            graph_test_aver += graph.number_of_nodes()
        graph_test_aver /= len(graph_test)
        print("test average len", graph_test_aver)

        # get performance for proposed approaches
        if "GraphRNN" in model_name:
            # read test graph
            for epoch in range(epoch_start, epoch_end, epoch_step):
                for sample_time in range(1, 4):
                    # get filename
                    fname_pred = (dir_input + model_name + "_" + dataset_name +
                                  "_" + str(args.num_layers) + "_" +
                                  str(hidden) + "_pred_" + str(epoch) + "_" +
                                  str(sample_time) + ".dat")
                    # load graphs
                    try:
                        graph_pred = utils.load_graph_list(
                            fname_pred, is_real=False)  # default False
                    except:
                        print("Not found: " + fname_pred)
                        logging.warning("Not found: " + fname_pred)
                        continue
                    # clean graphs
                    if is_clean:
                        graph_test, graph_pred = clean_graphs(
                            graph_test, graph_pred)
                    else:
                        random.shuffle(graph_pred)
                        graph_pred = graph_pred[0:len(graph_test)]
                    print("len graph_test", len(graph_test))
                    print("len graph_validate", len(graph_validate))
                    print("len graph_pred", len(graph_pred))

                    graph_pred_aver = 0
                    for graph in graph_pred:
                        graph_pred_aver += graph.number_of_nodes()
                    graph_pred_aver /= len(graph_pred)
                    print("pred average len", graph_pred_aver)

                    # evaluate MMD test
                    mmd_degree = eval.stats.degree_stats(
                        graph_test, graph_pred)
                    mmd_clustering = eval.stats.clustering_stats(
                        graph_test, graph_pred)
                    try:
                        mmd_4orbits = eval.stats.orbit_stats_all(
                            graph_test, graph_pred)
                    except:
                        mmd_4orbits = -1
                    # evaluate MMD validate
                    mmd_degree_validate = eval.stats.degree_stats(
                        graph_validate, graph_pred)
                    mmd_clustering_validate = eval.stats.clustering_stats(
                        graph_validate, graph_pred)
                    try:
                        mmd_4orbits_validate = eval.stats.orbit_stats_all(
                            graph_validate, graph_pred)
                    except:
                        mmd_4orbits_validate = -1
                    # write results
                    f.write(
                        str(sample_time) + "," + str(epoch) + "," +
                        str(mmd_degree_validate) + "," +
                        str(mmd_clustering_validate) + "," +
                        str(mmd_4orbits_validate) + "," + str(mmd_degree) +
                        "," + str(mmd_clustering) + "," + str(mmd_4orbits) +
                        "\n")
                    print(
                        "degree",
                        mmd_degree,
                        "clustering",
                        mmd_clustering,
                        "orbits",
                        mmd_4orbits,
                    )

        # get internal MMD (MMD between ground truth validation and test sets)
        if model_name == "Internal":
            mmd_degree_validate = eval.stats.degree_stats(
                graph_test, graph_validate)
            mmd_clustering_validate = eval.stats.clustering_stats(
                graph_test, graph_validate)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_validate)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + "," + str(-1) + "," + str(mmd_degree_validate) +
                "," + str(mmd_clustering_validate) + "," +
                str(mmd_4orbits_validate) + "," + str(-1) + "," + str(-1) +
                "," + str(-1) + "\n")

        # get MMD between ground truth and its perturbed graphs
        if model_name == "Noise":
            graph_validate_perturbed = perturb(graph_validate, 0.05)
            mmd_degree_validate = eval.stats.degree_stats(
                graph_test, graph_validate_perturbed)
            mmd_clustering_validate = eval.stats.clustering_stats(
                graph_test, graph_validate_perturbed)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_validate_perturbed)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + "," + str(-1) + "," + str(mmd_degree_validate) +
                "," + str(mmd_clustering_validate) + "," +
                str(mmd_4orbits_validate) + "," + str(-1) + "," + str(-1) +
                "," + str(-1) + "\n")

        # get E-R MMD
        if model_name == "E-R":
            graph_pred = Graph_generator_baseline(graph_train, generator="Gnp")
            # clean graphs
            if is_clean:
                graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
            print("len graph_test", len(graph_test))
            print("len graph_pred", len(graph_pred))
            mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
            mmd_clustering = eval.stats.clustering_stats(
                graph_test, graph_pred)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_pred)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "," +
                str(-1) + "," + str(mmd_degree) + "," + str(mmd_clustering) +
                "," + str(mmd_4orbits_validate) + "\n")

        # get B-A MMD
        if model_name == "B-A":
            graph_pred = Graph_generator_baseline(graph_train, generator="BA")
            # clean graphs
            if is_clean:
                graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
            print("len graph_test", len(graph_test))
            print("len graph_pred", len(graph_pred))
            mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
            mmd_clustering = eval.stats.clustering_stats(
                graph_test, graph_pred)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_pred)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + "," + str(-1) + "," + str(-1) + "," + str(-1) + "," +
                str(-1) + "," + str(mmd_degree) + "," + str(mmd_clustering) +
                "," + str(mmd_4orbits_validate) + "\n")

        # get performance for baseline approaches
        if "Baseline" in model_name:
            # read test graph
            for epoch in range(epoch_start, epoch_end, epoch_step):
                # get filename
                fname_pred = (dir_input + model_name + "_" + dataset_name +
                              "_" + str(64) + "_pred_" + str(epoch) + ".dat")
                # load graphs
                try:
                    graph_pred = utils.load_graph_list(
                        fname_pred, is_real=True)  # default False
                except:
                    print("Not found: " + fname_pred)
                    logging.warning("Not found: " + fname_pred)
                    continue
                # clean graphs
                if is_clean:
                    graph_test, graph_pred = clean_graphs(
                        graph_test, graph_pred)
                else:
                    random.shuffle(graph_pred)
                    graph_pred = graph_pred[0:len(graph_test)]
                print("len graph_test", len(graph_test))
                print("len graph_validate", len(graph_validate))
                print("len graph_pred", len(graph_pred))

                graph_pred_aver = 0
                for graph in graph_pred:
                    graph_pred_aver += graph.number_of_nodes()
                graph_pred_aver /= len(graph_pred)
                print("pred average len", graph_pred_aver)

                # evaluate MMD test
                mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
                mmd_clustering = eval.stats.clustering_stats(
                    graph_test, graph_pred)
                try:
                    mmd_4orbits = eval.stats.orbit_stats_all(
                        graph_test, graph_pred)
                except:
                    mmd_4orbits = -1
                # evaluate MMD validate
                mmd_degree_validate = eval.stats.degree_stats(
                    graph_validate, graph_pred)
                mmd_clustering_validate = eval.stats.clustering_stats(
                    graph_validate, graph_pred)
                try:
                    mmd_4orbits_validate = eval.stats.orbit_stats_all(
                        graph_validate, graph_pred)
                except:
                    mmd_4orbits_validate = -1
                # write results
                f.write(
                    str(-1) + "," + str(epoch) + "," +
                    str(mmd_degree_validate) + "," +
                    str(mmd_clustering_validate) + "," +
                    str(mmd_4orbits_validate) + "," + str(mmd_degree) + "," +
                    str(mmd_clustering) + "," + str(mmd_4orbits) + "\n")
                print(
                    "degree",
                    mmd_degree,
                    "clustering",
                    mmd_clustering,
                    "orbits",
                    mmd_4orbits,
                )

        return True
Exemplo n.º 11
0
                 graphs.append(nx.grid_2d_graph(i, j))
         utils.export_graphs_to_txt(graphs, output_prefix)
     elif prog_args.graph_type == "caveman":
         graphs = []
         for i in range(2, 3):
             for j in range(30, 81):
                 for k in range(10):
                     graphs.append(caveman_special(i, j, p_edge=0.3))
         utils.export_graphs_to_txt(graphs, output_prefix)
     elif prog_args.graph_type == "citeseer":
         graphs = utils.citeseer_ego()
         utils.export_graphs_to_txt(graphs, output_prefix)
     else:
         # load from directory
         input_path = dir_prefix + args.graph_save_path + args.fname_test + "0.dat"
         g_list = utils.load_graph_list(input_path)
         utils.export_graphs_to_txt(g_list, output_prefix)
 elif not prog_args.kron_dir == "":
     kron_g_list = process_kron(prog_args.kron_dir)
     fname = os.path.join(prog_args.kron_dir, prog_args.graph_type + ".dat")
     print([g.number_of_nodes() for g in kron_g_list])
     utils.save_graph_list(kron_g_list, fname)
 elif not prog_args.test_file == "":
     # evaluate single .dat file containing list of test graphs (networkx format)
     graphs = utils.load_graph_list(prog_args.test_file)
     eval_single_list(graphs,
                      dir_input=dir_prefix + "graphs/",
                      dataset_name="grid")
 ## if you don't try kronecker, only the following part is needed
 else:
     if not os.path.isdir(dir_prefix + "eval_results"):
Exemplo n.º 12
0
def eval_list_fname(real_graph_filename,
                    pred_graphs_filename,
                    baselines,
                    eval_every,
                    epoch_range=None,
                    out_file_prefix=None):
    ''' Evaluate list of predicted graphs compared to ground truth, stored in files.
    Args:
        baselines: dict mapping name of the baseline to list of generated graphs.
    '''

    if out_file_prefix is not None:
        out_files = {
            'train': open(out_file_prefix + '_train.txt', 'w+'),
            'compare': open(out_file_prefix + '_compare.txt', 'w+')
        }

    out_files['train'].write('degree,clustering,orbits4\n')

    line = 'metric,real,ours,perturbed'
    for bl in baselines:
        line += ',' + bl
    line += '\n'
    out_files['compare'].write(line)

    results = {
        'deg': {
            'real': 0,
            'ours': 100,  # take min over all training epochs
            'perturbed': 0,
            'kron': 0
        },
        'clustering': {
            'real': 0,
            'ours': 100,
            'perturbed': 0,
            'kron': 0
        },
        'orbits4': {
            'real': 0,
            'ours': 100,
            'perturbed': 0,
            'kron': 0
        }
    }

    num_evals = len(pred_graphs_filename)
    if epoch_range is None:
        epoch_range = [i * eval_every for i in range(num_evals)]
    for i in range(num_evals):
        real_g_list = utils.load_graph_list(real_graph_filename)
        #pred_g_list = utils.load_graph_list(pred_graphs_filename[i])

        # contains all predicted G
        pred_g_list_raw = utils.load_graph_list(pred_graphs_filename[i])
        if len(real_g_list) > 200:
            real_g_list = real_g_list[0:200]

        shuffle(real_g_list)
        shuffle(pred_g_list_raw)

        # get length
        real_g_len_list = np.array(
            [len(real_g_list[i]) for i in range(len(real_g_list))])
        pred_g_len_list_raw = np.array(
            [len(pred_g_list_raw[i]) for i in range(len(pred_g_list_raw))])
        # get perturb real
        #perturbed_g_list_001 = perturb(real_g_list, 0.01)
        perturbed_g_list_005 = perturb(real_g_list, 0.05)
        #perturbed_g_list_010 = perturb(real_g_list, 0.10)

        # select pred samples
        # The number of nodes are sampled from the similar distribution as the training set
        pred_g_list = []
        pred_g_len_list = []
        for value in real_g_len_list:
            pred_idx = find_nearest_idx(pred_g_len_list_raw, value)
            pred_g_list.append(pred_g_list_raw[pred_idx])
            pred_g_len_list.append(pred_g_len_list_raw[pred_idx])
            # delete
            pred_g_len_list_raw = np.delete(pred_g_len_list_raw, pred_idx)
            del pred_g_list_raw[pred_idx]
            if len(pred_g_list) == len(real_g_list):
                break
        # pred_g_len_list = np.array(pred_g_len_list)
        print('################## epoch {} ##################'.format(
            epoch_range[i]))

        # info about graph size
        print(
            'real average nodes',
            sum([
                real_g_list[i].number_of_nodes()
                for i in range(len(real_g_list))
            ]) / len(real_g_list))
        print(
            'pred average nodes',
            sum([
                pred_g_list[i].number_of_nodes()
                for i in range(len(pred_g_list))
            ]) / len(pred_g_list))
        print('num of real graphs', len(real_g_list))
        print('num of pred graphs', len(pred_g_list))

        # ========================================
        # Evaluation
        # ========================================
        mid = len(real_g_list) // 2
        dist_degree, dist_clustering = compute_basic_stats(
            real_g_list[:mid], real_g_list[mid:])
        #dist_4cycle = eval.stats.motif_stats(real_g_list[:mid], real_g_list[mid:])
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list[:mid],
                                                  real_g_list[mid:])
        print('degree dist among real: ', dist_degree)
        print('clustering dist among real: ', dist_clustering)
        #print('4 cycle dist among real: ', dist_4cycle)
        print('orbits dist among real: ', dist_4orbits)
        results['deg']['real'] += dist_degree
        results['clustering']['real'] += dist_clustering
        results['orbits4']['real'] += dist_4orbits

        dist_degree, dist_clustering = compute_basic_stats(
            real_g_list, pred_g_list)
        #dist_4cycle = eval.stats.motif_stats(real_g_list, pred_g_list)
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list, pred_g_list)
        print('degree dist between real and pred at epoch ', epoch_range[i],
              ': ', dist_degree)
        print('clustering dist between real and pred at epoch ',
              epoch_range[i], ': ', dist_clustering)
        #print('4 cycle dist between real and pred at epoch: ', epoch_range[i], dist_4cycle)
        print('orbits dist between real and pred at epoch ', epoch_range[i],
              ': ', dist_4orbits)
        results['deg']['ours'] = min(dist_degree, results['deg']['ours'])
        results['clustering']['ours'] = min(dist_clustering,
                                            results['clustering']['ours'])
        results['orbits4']['ours'] = min(dist_4orbits,
                                         results['orbits4']['ours'])

        # performance at training time
        out_files['train'].write(str(dist_degree) + ',')
        out_files['train'].write(str(dist_clustering) + ',')
        out_files['train'].write(str(dist_4orbits) + ',')

        dist_degree, dist_clustering = compute_basic_stats(
            real_g_list, perturbed_g_list_005)
        #dist_4cycle = eval.stats.motif_stats(real_g_list, perturbed_g_list_005)
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list,
                                                  perturbed_g_list_005)
        print('degree dist between real and perturbed at epoch ',
              epoch_range[i], ': ', dist_degree)
        print('clustering dist between real and perturbed at epoch ',
              epoch_range[i], ': ', dist_clustering)
        #print('4 cycle dist between real and perturbed at epoch: ', epoch_range[i], dist_4cycle)
        print('orbits dist between real and perturbed at epoch ',
              epoch_range[i], ': ', dist_4orbits)
        results['deg']['perturbed'] += dist_degree
        results['clustering']['perturbed'] += dist_clustering
        results['orbits4']['perturbed'] += dist_4orbits

        if i == 0:
            # Baselines
            for baseline in baselines:
                dist_degree, dist_clustering = compute_basic_stats(
                    real_g_list, baselines[baseline])
                dist_4orbits = eval.stats.orbit_stats_all(
                    real_g_list, baselines[baseline])
                results['deg'][baseline] = dist_degree
                results['clustering'][baseline] = dist_clustering
                results['orbits4'][baseline] = dist_4orbits
                print('Kron: deg=', dist_degree, ', clustering=',
                      dist_clustering, ', orbits4=', dist_4orbits)

        out_files['train'].write('\n')

    for metric, methods in results.items():
        methods['real'] /= num_evals
        methods['perturbed'] /= num_evals

    # Write results
    for metric, methods in results.items():
        line = metric+','+ \
                str(methods['real'])+','+ \
                str(methods['ours'])+','+ \
                str(methods['perturbed'])
        for baseline in baselines:
            line += ',' + str(methods[baseline])
        line += '\n'

        out_files['compare'].write(line)

    for _, out_f in out_files.items():
        out_f.close()
Exemplo n.º 13
0
def evaluation_epoch(
    dir_input,
    fname_output,
    model_name,
    dataset_name,
    args,
    is_clean=True,
    selected_epochs=list(range(
        1000, 3001, 100))):  # epoch_start=1000,epoch_end=3001,epoch_step=100):
    with open(fname_output, 'w+') as f:
        # f.write('sample_time,epoch,degree_validate,clustering_validate,orbits4_validate,degree_test,clustering_test,orbits4_test\n')
        f.write(
            'sample_time,\tepoch,\tdegree_test,\tclustering_test,\torbits4_test\n'
        )

        # TODO: Maybe refactor into a separate file/function that specifies THE naming convention
        # across main and evaluate
        if not 'small' in dataset_name:
            hidden = 128
        else:
            hidden = 64
        # read real graph
        if model_name == 'Internal' or model_name == 'Noise' or model_name == 'B-A' or model_name == 'E-R':
            fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str(
                args.num_layers) + '_' + str(hidden) + '_test_' + str(
                    0) + '.dat'
        elif 'Baseline' in model_name:
            fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(
                64) + '_test_' + str(0) + '.dat'
        else:
            # fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(
            fname_test = dir_input + model_name.split(
                '-'
            )[0] + '_' + dataset_name + '_' + args.input_type + '_test_' + str(
                0) + '.dat'
        try:
            graph_test = utils.load_graph_list(fname_test, is_real=True)
        except:
            print('Not found: ' + fname_test)
            logging.warning('Not found: ' + fname_test)
            return None

        graph_test_len = len(graph_test)
        graph_train = graph_test[0:int(0.8 * graph_test_len)]  # train
        graph_validate = graph_test[0:int(0.2 * graph_test_len)]  # validate
        graph_test = graph_test[int(
            0.8 * graph_test_len):]  # test on a hold out test set

        graph_test_aver = 0
        for graph in graph_test:
            graph_test_aver += graph.number_of_nodes()
        graph_test_aver /= len(graph_test)
        print('test average len', graph_test_aver)

        # get performance for proposed approaches
        # if 'GraphRNN' in model_name:
        if model_name.startswith('Gransformer'):
            # read test graph
            for epoch in selected_epochs:  # range(epoch_start,epoch_end,epoch_step):
                for sample_time in range(1, 2):  # ,4):
                    # get filename
                    # fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(hidden) + '_pred_' + str(epoch) + '_' + str(sample_time) + '.dat'
                    fname_pred = dir_input + model_name + '_' + dataset_name + '_' + args.input_type + '_pred_' + str(
                        epoch) + '_' + str(sample_time) + '.dat'
                    # load graphs
                    try:
                        graph_pred = utils.load_graph_list(
                            fname_pred, is_real=False)  # default False
                    except:
                        print('Not found: ' + fname_pred)
                        logging.warning('Not found: ' + fname_pred)
                        continue
                    # clean graphs
                    if is_clean:
                        graph_test, graph_pred = clean_graphs(
                            graph_test, graph_pred)
                    else:
                        shuffle(graph_pred)
                        graph_pred = graph_pred[0:len(graph_test)]
                    print('len graph_test', len(graph_test))
                    print('len graph_validate', len(graph_validate))
                    print('len graph_pred', len(graph_pred))

                    graph_pred_aver = 0
                    for graph in graph_pred:
                        graph_pred_aver += graph.number_of_nodes()
                    graph_pred_aver /= len(graph_pred)
                    print('pred average len', graph_pred_aver)

                    # evaluate MMD test
                    mmd_degree = eval.stats.degree_stats(
                        graph_test, graph_pred)
                    mmd_clustering = eval.stats.clustering_stats(
                        graph_test, graph_pred)
                    try:
                        mmd_4orbits = eval.stats.orbit_stats_all(
                            graph_test, graph_pred)
                    except:
                        mmd_4orbits = -1
                    # evaluate MMD validate
                    # mmd_degree_validate = eval.stats.degree_stats(graph_validate, graph_pred)
                    # mmd_clustering_validate = eval.stats.clustering_stats(graph_validate, graph_pred)
                    # try:
                    #     mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_validate, graph_pred)
                    # except:
                    #     mmd_4orbits_validate = -1
                    # write results
                    # f.write(str(sample_time)+','+
                    #         str(epoch)+','+
                    #         str(mmd_degree_validate)+','+
                    #         str(mmd_clustering_validate)+','+
                    #         str(mmd_4orbits_validate)+','+
                    #         str(mmd_degree)+','+
                    #         str(mmd_clustering)+','+
                    #         str(mmd_4orbits)+'\n')
                    f.write(
                        str(sample_time) + ',\t' + str(epoch) + ',\t' +
                        str(mmd_degree) + ',\t' + str(mmd_clustering) + ',\t' +
                        str(mmd_4orbits) + '\n')
                    print('degree', mmd_degree, 'clustering', mmd_clustering,
                          'orbits', mmd_4orbits)

        # get internal MMD (MMD between ground truth validation and test sets)
        if model_name == 'Internal':
            mmd_degree_validate = eval.stats.degree_stats(
                graph_test, graph_validate)
            mmd_clustering_validate = eval.stats.clustering_stats(
                graph_test, graph_validate)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_validate)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) +
                ',' + str(mmd_clustering_validate) + ',' +
                str(mmd_4orbits_validate) + ',' + str(-1) + ',' + str(-1) +
                ',' + str(-1) + '\n')

        # get MMD between ground truth and its perturbed graphs
        if model_name == 'Noise':
            graph_validate_perturbed = perturb(graph_validate, 0.05)
            mmd_degree_validate = eval.stats.degree_stats(
                graph_test, graph_validate_perturbed)
            mmd_clustering_validate = eval.stats.clustering_stats(
                graph_test, graph_validate_perturbed)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_validate_perturbed)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) +
                ',' + str(mmd_clustering_validate) + ',' +
                str(mmd_4orbits_validate) + ',' + str(-1) + ',' + str(-1) +
                ',' + str(-1) + '\n')

        # get E-R MMD
        if model_name == 'E-R':
            graph_pred = Graph_generator_baseline(graph_train, generator='Gnp')
            # clean graphs
            if is_clean:
                graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
            print('len graph_test', len(graph_test))
            print('len graph_pred', len(graph_pred))
            mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
            mmd_clustering = eval.stats.clustering_stats(
                graph_test, graph_pred)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_pred)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' +
                str(-1) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) +
                ',' + str(mmd_4orbits_validate) + '\n')

        # get B-A MMD
        if model_name == 'B-A':
            graph_pred = Graph_generator_baseline(graph_train, generator='BA')
            # clean graphs
            if is_clean:
                graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
            print('len graph_test', len(graph_test))
            print('len graph_pred', len(graph_pred))
            mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
            mmd_clustering = eval.stats.clustering_stats(
                graph_test, graph_pred)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(
                    graph_test, graph_pred)
            except:
                mmd_4orbits_validate = -1
            f.write(
                str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' +
                str(-1) + ',' + str(mmd_degree) + ',' + str(mmd_clustering) +
                ',' + str(mmd_4orbits_validate) + '\n')

        # get performance for baseline approaches
        if 'Baseline' in model_name:
            # read test graph
            for epoch in selected_epochs:  #range(epoch_start, epoch_end, epoch_step):
                # get filename
                fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str(
                    64) + '_pred_' + str(epoch) + '.dat'
                # load graphs
                try:
                    graph_pred = utils.load_graph_list(
                        fname_pred, is_real=True)  # default False
                except:
                    print('Not found: ' + fname_pred)
                    logging.warning('Not found: ' + fname_pred)
                    continue
                # clean graphs
                if is_clean:
                    graph_test, graph_pred = clean_graphs(
                        graph_test, graph_pred)
                else:
                    shuffle(graph_pred)
                    graph_pred = graph_pred[0:len(graph_test)]
                print('len graph_test', len(graph_test))
                print('len graph_validate', len(graph_validate))
                print('len graph_pred', len(graph_pred))

                graph_pred_aver = 0
                for graph in graph_pred:
                    graph_pred_aver += graph.number_of_nodes()
                graph_pred_aver /= len(graph_pred)
                print('pred average len', graph_pred_aver)

                # evaluate MMD test
                mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
                mmd_clustering = eval.stats.clustering_stats(
                    graph_test, graph_pred)
                try:
                    mmd_4orbits = eval.stats.orbit_stats_all(
                        graph_test, graph_pred)
                except:
                    mmd_4orbits = -1
                # evaluate MMD validate
                mmd_degree_validate = eval.stats.degree_stats(
                    graph_validate, graph_pred)
                mmd_clustering_validate = eval.stats.clustering_stats(
                    graph_validate, graph_pred)
                try:
                    mmd_4orbits_validate = eval.stats.orbit_stats_all(
                        graph_validate, graph_pred)
                except:
                    mmd_4orbits_validate = -1
                # write results
                f.write(
                    str(-1) + ',' + str(epoch) + ',' +
                    str(mmd_degree_validate) + ',' +
                    str(mmd_clustering_validate) + ',' +
                    str(mmd_4orbits_validate) + ',' + str(mmd_degree) + ',' +
                    str(mmd_clustering) + ',' + str(mmd_4orbits) + '\n')
                print('degree', mmd_degree, 'clustering', mmd_clustering,
                      'orbits', mmd_4orbits)

        return True
Exemplo n.º 14
0
              mmd_4orbits)


def save_graph_list(G_list, fname):
    with open(fname, "wb") as f:
        pickle.dump(G_list, f)


if __name__ == "__main__":

    # load test graphs to test against
    test_graphs = []
    test_path = "graph/GraphRNN_RNN_community2_multi_4_128_test_0.dat"
    validate_path = "graph/GraphRNN_RNN_community2_multi_4_128_validate_0.dat"
    # test_path = "graph/GraphRNN_RNN_barabasi_small_4_64_test_0.dat"
    test_graphs = load_graph_list(test_path)
    v_graphs = load_graph_list(validate_path)
    # load predicted graphs and add them to a list
    # path = "sample/*"
    # path = "/home/rachneet/PycharmProjects/graph_generation/baselines/graphvae/graphs/"
    path = "graph/nevae_community_pred.dat"

    #for i in range(2):
    # for fname in sorted(glob.glob(path)):
    #     pred_graphs = []
    #     print(fname)
    #     if "community_vae" in fname:
    #     with open(fname,'rb') as f:
    #         graph = nx.read_edgelist(f, nodetype=int)
    #         pred_graphs.append(graph)
Exemplo n.º 15
0
def eval_list_fname(real_graph_filename, pred_graphs_filename, baselines,
        eval_every, epoch_range=None, out_file_prefix=None):
    ''' Evaluate list of predicted graphs compared to ground truth, stored in files.
    Args:
        baselines: dict mapping name of the baseline to list of generated graphs.
    '''

    if out_file_prefix is not None:
        out_files = {
                'train': open(out_file_prefix + '_train.txt', 'w+'),
                'compare': open(out_file_prefix + '_compare.txt', 'w+')
        }

    out_files['train'].write('degree,clustering,orbits4\n')
    
    line = 'metric,real,ours,perturbed'
    for bl in baselines:
        line += ',' + bl
    line += '\n'
    out_files['compare'].write(line)

    results = {
            'deg': {
                    'real': 0,
                    'ours': 100, # take min over all training epochs
                    'perturbed': 0,
                    'kron': 0},
            'clustering': {
                    'real': 0,
                    'ours': 100,
                    'perturbed': 0,
                    'kron': 0},
            'orbits4': {
                    'real': 0,
                    'ours': 100,
                    'perturbed': 0,
                    'kron': 0}
    }


    num_evals = len(pred_graphs_filename)
    if epoch_range is None:
        epoch_range = [i * eval_every for i in range(num_evals)] 
    for i in range(num_evals):
        real_g_list = utils.load_graph_list(real_graph_filename)
        #pred_g_list = utils.load_graph_list(pred_graphs_filename[i])

        # contains all predicted G
        pred_g_list_raw = utils.load_graph_list(pred_graphs_filename[i])
        if len(real_g_list)>200:
            real_g_list = real_g_list[0:200]

        shuffle(real_g_list)
        shuffle(pred_g_list_raw)

        # get length
        real_g_len_list = np.array([len(real_g_list[i]) for i in range(len(real_g_list))])
        pred_g_len_list_raw = np.array([len(pred_g_list_raw[i]) for i in range(len(pred_g_list_raw))])
        # get perturb real
        #perturbed_g_list_001 = perturb(real_g_list, 0.01)
        perturbed_g_list_005 = perturb(real_g_list, 0.05)
        #perturbed_g_list_010 = perturb(real_g_list, 0.10)


        # select pred samples
        # The number of nodes are sampled from the similar distribution as the training set
        pred_g_list = []
        pred_g_len_list = []
        for value in real_g_len_list:
            pred_idx = find_nearest_idx(pred_g_len_list_raw, value)
            pred_g_list.append(pred_g_list_raw[pred_idx])
            pred_g_len_list.append(pred_g_len_list_raw[pred_idx])
            # delete
            pred_g_len_list_raw = np.delete(pred_g_len_list_raw, pred_idx)
            del pred_g_list_raw[pred_idx]
            if len(pred_g_list) == len(real_g_list):
                break
        # pred_g_len_list = np.array(pred_g_len_list)
        print('################## epoch {} ##################'.format(epoch_range[i]))

        # info about graph size
        print('real average nodes',
              sum([real_g_list[i].number_of_nodes() for i in range(len(real_g_list))]) / len(real_g_list))
        print('pred average nodes',
              sum([pred_g_list[i].number_of_nodes() for i in range(len(pred_g_list))]) / len(pred_g_list))
        print('num of real graphs', len(real_g_list))
        print('num of pred graphs', len(pred_g_list))

        # ========================================
        # Evaluation
        # ========================================
        mid = len(real_g_list) // 2
        dist_degree, dist_clustering = compute_basic_stats(real_g_list[:mid], real_g_list[mid:])
        #dist_4cycle = eval.stats.motif_stats(real_g_list[:mid], real_g_list[mid:])
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list[:mid], real_g_list[mid:])
        print('degree dist among real: ', dist_degree)
        print('clustering dist among real: ', dist_clustering)
        #print('4 cycle dist among real: ', dist_4cycle)
        print('orbits dist among real: ', dist_4orbits)
        results['deg']['real'] += dist_degree
        results['clustering']['real'] += dist_clustering
        results['orbits4']['real'] += dist_4orbits

        dist_degree, dist_clustering = compute_basic_stats(real_g_list, pred_g_list)
        #dist_4cycle = eval.stats.motif_stats(real_g_list, pred_g_list)
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list, pred_g_list)
        print('degree dist between real and pred at epoch ', epoch_range[i], ': ', dist_degree)
        print('clustering dist between real and pred at epoch ', epoch_range[i], ': ', dist_clustering)
        #print('4 cycle dist between real and pred at epoch: ', epoch_range[i], dist_4cycle)
        print('orbits dist between real and pred at epoch ', epoch_range[i], ': ', dist_4orbits)
        results['deg']['ours'] = min(dist_degree, results['deg']['ours'])
        results['clustering']['ours'] = min(dist_clustering, results['clustering']['ours'])
        results['orbits4']['ours'] = min(dist_4orbits, results['orbits4']['ours'])

        # performance at training time
        out_files['train'].write(str(dist_degree) + ',')
        out_files['train'].write(str(dist_clustering) + ',')
        out_files['train'].write(str(dist_4orbits) + ',')

        dist_degree, dist_clustering = compute_basic_stats(real_g_list, perturbed_g_list_005)
        #dist_4cycle = eval.stats.motif_stats(real_g_list, perturbed_g_list_005)
        dist_4orbits = eval.stats.orbit_stats_all(real_g_list, perturbed_g_list_005)
        print('degree dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_degree)
        print('clustering dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_clustering)
        #print('4 cycle dist between real and perturbed at epoch: ', epoch_range[i], dist_4cycle)
        print('orbits dist between real and perturbed at epoch ', epoch_range[i], ': ', dist_4orbits)
        results['deg']['perturbed'] += dist_degree
        results['clustering']['perturbed'] += dist_clustering
        results['orbits4']['perturbed'] += dist_4orbits

        if i == 0:
            # Baselines
            for baseline in baselines:
                dist_degree, dist_clustering = compute_basic_stats(real_g_list, baselines[baseline])
                dist_4orbits = eval.stats.orbit_stats_all(real_g_list, baselines[baseline])
                results['deg'][baseline] = dist_degree
                results['clustering'][baseline] = dist_clustering
                results['orbits4'][baseline] = dist_4orbits
                print('Kron: deg=', dist_degree, ', clustering=', dist_clustering, 
                        ', orbits4=', dist_4orbits)

        out_files['train'].write('\n')

    for metric, methods in results.items():
        methods['real'] /= num_evals
        methods['perturbed'] /= num_evals

    # Write results
    for metric, methods in results.items():
        line = metric+','+ \
                str(methods['real'])+','+ \
                str(methods['ours'])+','+ \
                str(methods['perturbed'])
        for baseline in baselines:
            line += ',' + str(methods[baseline])
        line += '\n'

        out_files['compare'].write(line)

    for _, out_f in out_files.items():
        out_f.close()
Exemplo n.º 16
0
def evaluation_epoch(dir_input, fname_output, model_name, dataset_name, args, is_clean=True, epoch_start=1000,epoch_end=3001,epoch_step=100):
    with open(fname_output, 'w+') as f:
        f.write('sample_time,epoch,degree_validate,clustering_validate,orbits4_validate,degree_test,clustering_test,orbits4_test\n')

        # TODO: Maybe refactor into a separate file/function that specifies THE naming convention
        # across main and evaluate
        if not 'small' in dataset_name:
            hidden = 128
        else:
            hidden = 64
        # read real graph
        if model_name=='Internal' or model_name=='Noise' or model_name=='B-A' or model_name=='E-R':
            fname_test = dir_input + 'GraphRNN_MLP' + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(
                hidden) + '_test_' + str(0) + '.dat'
        elif 'Baseline' in model_name:
            fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(64) + '_test_' + str(0) + '.dat'
        else:
            fname_test = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(
                hidden) + '_test_' + str(0) + '.dat'
        try:
            graph_test = utils.load_graph_list(fname_test,is_real=True)
        except:
            print('Not found: ' + fname_test)
            logging.warning('Not found: ' + fname_test)
            return None

        graph_test_len = len(graph_test)
        graph_train = graph_test[0:int(0.8 * graph_test_len)] # train
        graph_validate = graph_test[0:int(0.2 * graph_test_len)] # validate
        graph_test = graph_test[int(0.8 * graph_test_len):] # test on a hold out test set

        graph_test_aver = 0
        for graph in graph_test:
            graph_test_aver+=graph.number_of_nodes()
        graph_test_aver /= len(graph_test)
        print('test average len',graph_test_aver)


        # get performance for proposed approaches
        if 'GraphRNN' in model_name:
            # read test graph
            for epoch in range(epoch_start,epoch_end,epoch_step):
                for sample_time in range(1,4):
                    # get filename
                    fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str(args.num_layers) + '_' + str(hidden) + '_pred_' + str(epoch) + '_' + str(sample_time) + '.dat'
                    # load graphs
                    try:
                        graph_pred = utils.load_graph_list(fname_pred,is_real=False) # default False
                    except:
                        print('Not found: '+ fname_pred)
                        logging.warning('Not found: '+ fname_pred)
                        continue
                    # clean graphs
                    if is_clean:
                        graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
                    else:
                        shuffle(graph_pred)
                        graph_pred = graph_pred[0:len(graph_test)]
                    print('len graph_test', len(graph_test))
                    print('len graph_validate', len(graph_validate))
                    print('len graph_pred', len(graph_pred))

                    graph_pred_aver = 0
                    for graph in graph_pred:
                        graph_pred_aver += graph.number_of_nodes()
                    graph_pred_aver /= len(graph_pred)
                    print('pred average len', graph_pred_aver)

                    # evaluate MMD test
                    mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
                    mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred)
                    try:
                        mmd_4orbits = eval.stats.orbit_stats_all(graph_test, graph_pred)
                    except:
                        mmd_4orbits = -1
                    # evaluate MMD validate
                    mmd_degree_validate = eval.stats.degree_stats(graph_validate, graph_pred)
                    mmd_clustering_validate = eval.stats.clustering_stats(graph_validate, graph_pred)
                    try:
                        mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_validate, graph_pred)
                    except:
                        mmd_4orbits_validate = -1
                    # write results
                    f.write(str(sample_time)+','+
                            str(epoch)+','+
                            str(mmd_degree_validate)+','+
                            str(mmd_clustering_validate)+','+
                            str(mmd_4orbits_validate)+','+ 
                            str(mmd_degree)+','+
                            str(mmd_clustering)+','+
                            str(mmd_4orbits)+'\n')
                    print('degree',mmd_degree,'clustering',mmd_clustering,'orbits',mmd_4orbits)

        # get internal MMD (MMD between ground truth validation and test sets)
        if model_name == 'Internal':
            mmd_degree_validate = eval.stats.degree_stats(graph_test, graph_validate)
            mmd_clustering_validate = eval.stats.clustering_stats(graph_test, graph_validate)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_validate)
            except:
                mmd_4orbits_validate = -1
            f.write(str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) + ',' + str(
                mmd_clustering_validate) + ',' + str(mmd_4orbits_validate)
                    + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + '\n')


        # get MMD between ground truth and its perturbed graphs
        if model_name == 'Noise':
            graph_validate_perturbed = perturb(graph_validate, 0.05)
            mmd_degree_validate = eval.stats.degree_stats(graph_test, graph_validate_perturbed)
            mmd_clustering_validate = eval.stats.clustering_stats(graph_test, graph_validate_perturbed)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_validate_perturbed)
            except:
                mmd_4orbits_validate = -1
            f.write(str(-1) + ',' + str(-1) + ',' + str(mmd_degree_validate) + ',' + str(
                mmd_clustering_validate) + ',' + str(mmd_4orbits_validate)
                    + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + '\n')

        # get E-R MMD
        if model_name == 'E-R':
            graph_pred = Graph_generator_baseline(graph_train,generator='Gnp')
            # clean graphs
            if is_clean:
                graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
            print('len graph_test', len(graph_test))
            print('len graph_pred', len(graph_pred))
            mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
            mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_pred)
            except:
                mmd_4orbits_validate = -1
            f.write(str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1)
                    + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits_validate) + '\n')


        # get B-A MMD
        if model_name == 'B-A':
            graph_pred = Graph_generator_baseline(graph_train, generator='BA')
            # clean graphs
            if is_clean:
                graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
            print('len graph_test', len(graph_test))
            print('len graph_pred', len(graph_pred))
            mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
            mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred)
            try:
                mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_test, graph_pred)
            except:
                mmd_4orbits_validate = -1
            f.write(str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1) + ',' + str(-1)
                    + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits_validate) + '\n')

        # get performance for baseline approaches
        if 'Baseline' in model_name:
            # read test graph
            for epoch in range(epoch_start, epoch_end, epoch_step):
                # get filename
                fname_pred = dir_input + model_name + '_' + dataset_name + '_' + str(
                    64) + '_pred_' + str(epoch) + '.dat'
                # load graphs
                try:
                    graph_pred = utils.load_graph_list(fname_pred, is_real=True)  # default False
                except:
                    print('Not found: ' + fname_pred)
                    logging.warning('Not found: ' + fname_pred)
                    continue
                # clean graphs
                if is_clean:
                    graph_test, graph_pred = clean_graphs(graph_test, graph_pred)
                else:
                    shuffle(graph_pred)
                    graph_pred = graph_pred[0:len(graph_test)]
                print('len graph_test', len(graph_test))
                print('len graph_validate', len(graph_validate))
                print('len graph_pred', len(graph_pred))

                graph_pred_aver = 0
                for graph in graph_pred:
                    graph_pred_aver += graph.number_of_nodes()
                graph_pred_aver /= len(graph_pred)
                print('pred average len', graph_pred_aver)

                # evaluate MMD test
                mmd_degree = eval.stats.degree_stats(graph_test, graph_pred)
                mmd_clustering = eval.stats.clustering_stats(graph_test, graph_pred)
                try:
                    mmd_4orbits = eval.stats.orbit_stats_all(graph_test, graph_pred)
                except:
                    mmd_4orbits = -1
                # evaluate MMD validate
                mmd_degree_validate = eval.stats.degree_stats(graph_validate, graph_pred)
                mmd_clustering_validate = eval.stats.clustering_stats(graph_validate, graph_pred)
                try:
                    mmd_4orbits_validate = eval.stats.orbit_stats_all(graph_validate, graph_pred)
                except:
                    mmd_4orbits_validate = -1
                # write results
                f.write(str(-1) + ',' + str(epoch) + ',' + str(mmd_degree_validate) + ',' + str(
                    mmd_clustering_validate) + ',' + str(mmd_4orbits_validate)
                        + ',' + str(mmd_degree) + ',' + str(mmd_clustering) + ',' + str(mmd_4orbits) + '\n')
                print('degree', mmd_degree, 'clustering', mmd_clustering, 'orbits', mmd_4orbits)



        return True
Exemplo n.º 17
0
                 graphs.append(nx.grid_2d_graph(i,j))
         utils.export_graphs_to_txt(graphs, output_prefix)
     elif prog_args.graph_type == 'caveman':
         graphs = []
         for i in range(2, 3):
             for j in range(30, 81):
                 for k in range(10):
                     graphs.append(caveman_special(i,j, p_edge=0.3))
         utils.export_graphs_to_txt(graphs, output_prefix)
     elif prog_args.graph_type == 'citeseer':
         graphs = utils.citeseer_ego()
         utils.export_graphs_to_txt(graphs, output_prefix)
     else:
         # load from directory
         input_path = dir_prefix + real_graph_filename
         g_list = utils.load_graph_list(input_path)
         utils.export_graphs_to_txt(g_list, output_prefix)
 elif not prog_args.kron_dir == '':
     kron_g_list = process_kron(prog_args.kron_dir)
     fname = os.path.join(prog_args.kron_dir, prog_args.graph_type + '.dat')
     print([g.number_of_nodes() for g in kron_g_list])
     utils.save_graph_list(kron_g_list, fname)
 elif not prog_args.test_file == '':
     # evaluate single .dat file containing list of test graphs (networkx format)
     graphs = utils.load_graph_list(prog_args.test_file)
     eval_single_list(graphs, dir_input=dir_prefix+'graphs/', dataset_name='grid')
 ## if you don't try kronecker, only the following part is needed
 else:
     if not os.path.isdir(dir_prefix+'eval_results'):
         os.makedirs(dir_prefix+'eval_results')
     evaluation(args_evaluate,dir_input=dir_prefix+"graphs/", dir_output=dir_prefix+"eval_results/",
Exemplo n.º 18
0
def main():
    args = Args()
    print(args.graph_type, args.note)
    # epoch = 16000
    epoch = 3000
    sample_time = 3

    # for baseline model
    for num_layers in range(4, 5):
        # give file name and figure name
        fname_real = args.graph_save_path + args.fname_real + str(0)
        fname_pred = (args.graph_save_path + args.fname_pred + str(epoch) +
                      "_" + str(sample_time))
        figname = args.figure_save_path + args.fname + str(epoch) + "_" + str(
            sample_time)

        # fname_real = args.graph_save_path + args.note + '_' + args.graph_type + '_' + str(args.graph_node_num) + '_' + \
        #              str(epoch) + '_real_' + str(True) + '_' + str(num_layers)
        # fname_pred = args.graph_save_path + args.note + '_' + args.graph_type + '_' + str(args.graph_node_num) + '_' + \
        #              str(epoch) + '_pred_' + str(True) + '_' + str(num_layers)
        # figname = args.figure_save_path + args.note + '_' + args.graph_type + '_' + str(args.graph_node_num) + '_' + \
        #           str(epoch) + '_' + str(num_layers)
        print(fname_real)
        print(fname_pred)

        # load data
        graph_real_list = load_graph_list(fname_real + ".dat")
        random.shuffle(graph_real_list)
        graph_pred_list_raw = load_graph_list(fname_pred + ".dat")
        graph_real_len_list = np.array(
            [len(graph_real_list[i]) for i in range(len(graph_real_list))])
        graph_pred_len_list_raw = np.array([
            len(graph_pred_list_raw[i])
            for i in range(len(graph_pred_list_raw))
        ])

        graph_pred_list = graph_pred_list_raw
        graph_pred_len_list = graph_pred_len_list_raw

        # # select samples
        # graph_pred_list = []
        # graph_pred_len_list = []
        # for value in graph_real_len_list:
        #     pred_idx = find_nearest_idx(graph_pred_len_list_raw, value)
        #     graph_pred_list.append(graph_pred_list_raw[pred_idx])
        #     graph_pred_len_list.append(graph_pred_len_list_raw[pred_idx])
        #     # delete
        #     graph_pred_len_list_raw=np.delete(graph_pred_len_list_raw, pred_idx)
        #     del graph_pred_list_raw[pred_idx]
        #     if len(graph_pred_list)==200:
        #         break
        # graph_pred_len_list = np.array(graph_pred_len_list)

        # # select pred data within certain range
        # len_min = np.amin(graph_real_len_list)
        # len_max = np.amax(graph_real_len_list)
        # pred_index = np.where((graph_pred_len_list>=len_min)&(graph_pred_len_list<=len_max))
        # # print(pred_index[0])
        # graph_pred_list = [graph_pred_list[i] for i in pred_index[0]]
        # graph_pred_len_list = graph_pred_len_list[pred_index[0]]

        # real_order = np.argsort(graph_real_len_list)
        # pred_order = np.argsort(graph_pred_len_list)
        real_order = np.argsort(graph_real_len_list)[::-1]
        pred_order = np.argsort(graph_pred_len_list)[::-1]
        # print(real_order)
        # print(pred_order)
        graph_real_list = [graph_real_list[i] for i in real_order]
        graph_pred_list = [graph_pred_list[i] for i in pred_order]

        # shuffle(graph_real_list)
        # shuffle(graph_pred_list)
        print(
            "real average nodes",
            sum([
                graph_real_list[i].number_of_nodes()
                for i in range(len(graph_real_list))
            ]) / len(graph_real_list),
        )
        print(
            "pred average nodes",
            sum([
                graph_pred_list[i].number_of_nodes()
                for i in range(len(graph_pred_list))
            ]) / len(graph_pred_list),
        )
        print("num of real graphs", len(graph_real_list))
        print("num of pred graphs", len(graph_pred_list))

        # # draw all graphs
        # for iter in range(8):
        #     print('iter', iter)
        #     graph_list = []
        #     for i in range(8):
        #         index = 8 * iter + i
        #         # graph_real_list[index].remove_nodes_from(list(nx.isolates(graph_real_list[index])))
        #         # graph_pred_list[index].remove_nodes_from(list(nx.isolates(graph_pred_list[index])))
        #         graph_list.append(graph_real_list[index])
        #         graph_list.append(graph_pred_list[index])
        #         print('real', graph_real_list[index].number_of_nodes())
        #         print('pred', graph_pred_list[index].number_of_nodes())
        #
        #     draw_graph_list(graph_list, row=4, col=4, fname=figname + '_' + str(iter))

        # draw all graphs
        for iter in range(8):
            print("iter", iter)
            graph_list = []
            for i in range(8):
                index = 32 * iter + i
                # graph_real_list[index].remove_nodes_from(list(nx.isolates(graph_real_list[index])))
                # graph_pred_list[index].remove_nodes_from(list(nx.isolates(graph_pred_list[index])))
                # graph_list.append(graph_real_list[index])
                graph_list.append(graph_pred_list[index])
                # print('real', graph_real_list[index].number_of_nodes())
                print("pred", graph_pred_list[index].number_of_nodes())

            draw_graph_list(graph_list,
                            row=4,
                            col=4,
                            fname=figname + "_" + str(iter) + "_pred")

        # draw all graphs
        for iter in range(8):
            print("iter", iter)
            graph_list = []
            for i in range(8):
                index = 16 * iter + i
                # graph_real_list[index].remove_nodes_from(list(nx.isolates(graph_real_list[index])))
                # graph_pred_list[index].remove_nodes_from(list(nx.isolates(graph_pred_list[index])))
                graph_list.append(graph_real_list[index])
                # graph_list.append(graph_pred_list[index])
                print("real", graph_real_list[index].number_of_nodes())
                # print('pred', graph_pred_list[index].number_of_nodes())

            draw_graph_list(graph_list,
                            row=4,
                            col=4,
                            fname=figname + "_" + str(iter) + "_real")
Exemplo n.º 19
0
 def __init__(self, file_path, is_real=True, batch_size=None):
     assert batch_size is not None
     self.graph_list = utils.load_graph_list(file_path, is_real=is_real)
     self.curr_index = 0
     self.batch_size = batch_size
     self.curr_graph_list = deepcopy(self.graph_list)