Пример #1
0
def read_data(ctrl, args):
    prefix = "./dataset/" + args.data
    if args.format == "metis":
        input_graph_path = prefix + ".metis"
        graph, mapping = read_graph(ctrl, input_graph_path, metis=True)
    else:
        input_graph_path = prefix + ".edgelist"
        graph, mapping = read_graph(ctrl, input_graph_path, edgelist=True)

    return input_graph_path, graph, mapping
Пример #2
0
def main(trial=None, args=None):
    """
    Parsing command line parameters.
    Creating target matrix.
    Fitting an SGCN.
    Predicting edge signs and saving the embedding.
    """

    # fix seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    random.seed(args.seed)
    tab_printer(args)
    # read data
    edges = read_graph(args)
    trainer = SHIGTrainer(args, edges)
    trainer.setup_dataset()
    # training
    trainer.create_and_train_model(trial)

    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

    if args.metric_to_optimize is 'AUC':
        return trainer.logs["performance"][-1][1]
    elif args.metric_to_optimize is 'F1':
        return trainer.logs["performance"][-1][2]
Пример #3
0
def main():
    graph, init_states = utils.read_graph(os.getcwd() + ct.EDGE_LIST_PATH,
                                          os.getcwd() + ct.INITIAL_STATE_PATH)

    init_paths = utils.initial_guess(init_states, graph)

    print('Original ')
    for path in init_paths:
        print(path)
    print(utils.collective_cost(graph, init_paths))

    state, cost, hist = tabu_search(graph, init_paths)

    print('Annealed')
    for path in state:
        print(path)
    print(cost)

    utils.plot_graph_paths_max(graph,
                               paths=init_paths,
                               title='Individual Planning')
    utils.plot_graph_paths_max(graph,
                               paths=state,
                               title='Tabu Search Cooperative Planning')
    plt.show()
    x = range(len(hist))

    plt.plot(x, hist)

    plt.xlabel("Step")
    plt.ylabel("Cost function")

    plt.show()
Пример #4
0
def main():

    p = argparse.ArgumentParser(
        description=
        'This script is for experiment of solving TSP with TS and SA')
    p.add_argument('-g',
                   '--graph',
                   type=str,
                   help='path to graph csv file',
                   required=True)

    option_args = p.parse_known_args()[0]
    path = option_args.graph

    if not os.path.exists(path):
        print("File not found")
        sys.exit(1)

    graph = read_graph(path)

    tabu_table = experiment(tabu_search, graph)
    sa_table = experiment(simulated_anealing, graph)

    print("## Tabu Search Result")
    print(tabu_table)
    print()

    print("## Simulated Anealing Result")
    print(sa_table)
    print()
Пример #5
0
 def __init__(self, args):
     """
     Initializing the training object.
     :param args: Arguments object.
     """
     self.args = args
     self.graph = read_graph(self.args.edge_path)
     self.initialize_model_and_features()
Пример #6
0
def learn_model(args):
    """
    Method to create adjacency matrix powers, read features, and learn embedding.
    :param args: Arguments object.
    """
    A = read_graph(args.edge_path)
    model = GraRep(A, args)
    model.optimize()
    model.save_embedding()
Пример #7
0
def run_benchmark():
    config = create_tf_config()

    graph_def = read_graph(FLAGS.input_graph)

    tf.import_graph_def(graph_def, name='')

    input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('inputs:0')
    output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('output_boxes:0')

    dummy_data_shape = list(input_tensor.shape)
    dummy_data_shape[0] = FLAGS.batch_size
    dummy_data = np.random.random(dummy_data_shape).astype(np.float32)

    if FLAGS.profiling != True:
        num_warmup = 200
        total_iter = 1000
    else:
        num_warmup = 20
        total_iter = 100

    total_time = 0.0

    with tf.compat.v1.Session(config=config) as sess:
        print("Running warm-up")
        for i in range(num_warmup):
            sess.run(output_tensor, {input_tensor: dummy_data})
        print("Warm-up complete")

        for i in range(1, total_iter + 1):
            start_time = time.time()
            sess.run(output_tensor, {input_tensor: dummy_data})
            end_time = time.time()

            if i % 10 == 0:
                print(
                    "Steps = {0}, {1:10.6f} samples/sec".format(i, FLAGS.batch_size / duration))

            duration = end_time - start_time
            total_time += duration

        if FLAGS.profiling:
            options = tf.compat.v1.RunOptions(
                trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
            run_metadata = tf.compat.v1.RunMetadata()

            sess.run(output_tensor, {input_tensor: dummy_data},
                     options=options, run_metadata=run_metadata)

            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open("timeline_%s.json" % (time.time()), 'w') as f:
                f.write(chrome_trace)

    print("Average Thoughput: %f samples/sec" %
          (total_iter * FLAGS.batch_size / total_time))
Пример #8
0
def main():
    """
    Parsing command lines, creating target matrix, fitting BANE and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    P = read_graph(args)
    X = read_features(args)
    model = BANE(args, P, X)
    model.fit()
    model.save_embedding()
Пример #9
0
 def __init__(self, args):
     """
     Initializing the training object.
     :param args: Arguments parsed from command line.
     """
     self.args = args
     self.graph = read_graph(self.args.edge_path)
     self.features = read_features(self.args.feature_path)
     self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     self.initialize_model()
     self.simulate_walks()
Пример #10
0
def main():
    """
    Parsing command lines, creating target matrix, fitting DANMF and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    graph = read_graph(args)
    model = DANMF(graph, args)
    model.pre_training()
    model.training()
    if args.calculate_loss:
        loss_printer(model.loss)
Пример #11
0
 def read(self, key, cls):
     if (cls in self.__registry):
         id = self.__registry[cls] % key
         if (id in self.__last_read):
             logging.debug("Preventing reading of %s" % id)
             return self.__last_read[id]
         else:
             path = self.__build_path(key, cls)
             data = read_graph(path)
             self.__last_read = {} #FIXME: current it has only one item, which can be the right length?
             self.__last_read[id] = data
             return data
     else:
         return ""
Пример #12
0
    def __init__(self):

        t = time.time()
        print "reading graph..."
        self.n_node, self.n_relation, self.graph = utils.read_graph(
            config.graph_filename)
        self.node_list = self.graph.keys()  #range(0, self.n_node)
        print '[%.2f] reading graph finished. #node = %d #relation = %d' % (
            time.time() - t, self.n_node, self.n_relation)

        t = time.time()
        print "read initial embeddings..."
        self.node_embed_init_d = utils.read_embeddings(
            filename=config.pretrain_node_emb_filename_d,
            n_node=self.n_node,
            n_embed=config.n_emb)
        self.node_embed_init_g = utils.read_embeddings(
            filename=config.pretrain_node_emb_filename_g,
            n_node=self.n_node,
            n_embed=config.n_emb)

        #self.rel_embed_init_d = utils.read_embeddings(filename=config.pretrain_rel_emb_filename_d,
        #                                              n_node=self.n_node,
        #                                              n_embed=config.n_emb)
        #self.rel_embed_init_g = utils.read_embeddings(filename=config.pretrain_rel_emb_filename_g,
        #                                              n_node=self.n_node,
        #                                              n_embed=config.n_emb)
        print "[%.2f] read initial embeddings finished." % (time.time() - t)

        print "build GAN model..."
        self.discriminator = None
        self.generator = None
        self.build_generator()
        self.build_discriminator()

        self.latest_checkpoint = tf.train.latest_checkpoint(config.model_log)
        self.saver = tf.train.Saver()

        self.dblp_evaluation = DBLP_evaluation()
        self.yelp_evaluation = Yelp_evaluation()
        self.aminer_evaluation = Aminer_evaluation()

        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True
        self.init_op = tf.group(tf.global_variables_initializer(),
                                tf.local_variables_initializer())
        self.sess = tf.Session(config=self.config)
        self.sess.run(self.init_op)

        self.show_config()
Пример #13
0
def main():
    """
    Parsing command lines, creating target matrix, fitting an SGCN, predicting edge signs, and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)
    edges = read_graph(args)
    trainer = SignedGCNTrainer(args, edges)
    trainer.setup_dataset()
    trainer.create_and_train_model()
    if args.test_size > 0:
        trainer.save_model()
        score_printer(trainer.logs)
        save_logs(args, trainer.logs)
Пример #14
0
def load_graph(dataset, labels_is_onehot=True):
    features = read_feature("./data/" + dataset + ".feature",
                            is_normalize=False)

    if os.path.exists("./data/" + dataset + ".label"):
        labels = read_label("./data/" + dataset + ".label",
                            is_onehot=labels_is_onehot)
    else:
        labels = None

    G = read_graph("./data/" + dataset + '.edgelist')

    graph = Graph(features, G, labels)

    return graph
Пример #15
0
 def read(self, key, cls):
     if (cls in self.__registry):
         id = self.__registry[cls] % key
         if (id in self.__last_read):
             logging.debug("Preventing reading of %s" % id)
             return self.__last_read[id]
         else:
             path = self.__build_path(key, cls)
             data = read_graph(path)
             self.__last_read = {
             }  #FIXME: current it has only one item, which can be the right length?
             self.__last_read[id] = data
             return data
     else:
         return ""
Пример #16
0
def main():
    p = argparse.ArgumentParser(
        description='This script is for solve TSP with simulated anealing')
    p.add_argument('-g', '--graph', type=str,
                   help='path to graph csv file', required=True)

    option_args = p.parse_known_args()[0]
    path = option_args.graph

    if not os.path.exists(path):
        print("File not found")
        sys.exit(1)

    graph = read_graph(path)
    s = simulated_anealing(graph)
    print('Answer')
    print("Path:", s, ", Cost:", get_cost(s, graph))
Пример #17
0
def main():
    """
    Parsing command line parameters.
    Creating target matrix.
    Fitting an SGCN.
    Predicting edge signs and saving the embedding.
    """
    args = parameter_parser()
    avg_auc = []
    avg_f1 = []
    avg_precision = []
    avg_recall = []
    avg_acc = []

    for x in range(int(args.num_runs)):
        print("Iteration: ", x)
        tab_printer(args)
        edges = read_graph(args)
        trainer = SignedGCNTrainer(args, edges)
        trainer.setup_dataset()
        trainer.create_and_train_model()
        if args.test_size > 0:
            trainer.save_model()
            score_printer(trainer.logs)
            save_logs(args, trainer.logs)
            avg_auc.append(score_printer(trainer.logs, avg='auc')[0])
            print("This run's AUC: ",
                  "%.3f" % (score_printer(trainer.logs, avg='auc')[0]))
            print('-----')
            avg_f1.append(score_printer(trainer.logs, avg='auc')[1])
            avg_precision.append(score_printer(trainer.logs, avg='auc')[2])
            avg_recall.append(score_printer(trainer.logs, avg='auc')[3])
            avg_acc.append(score_printer(trainer.logs, avg='auc')[4])

    print('AUC averaged over {} runs: '.format(args.num_runs),
          "%.3f" % np.mean(avg_auc))
    print('F1 averaged over {} runs: '.format(args.num_runs),
          "%.3f" % np.mean(avg_f1))
    print('Precision averaged over {} runs: '.format(args.num_runs),
          "%.3f" % np.mean(avg_precision))
    print('Recall averaged over {} runs: '.format(args.num_runs),
          "%.3f" % np.mean(avg_recall))
    print('Accuracy averaged over {} runs: '.format(args.num_runs),
          "%.3f" % np.mean(avg_acc))
    print('Max AUC: ', "%.3f" % max(avg_auc), 'Max F1: ', "%.3f" % max(avg_f1), 'Max Precision: ', "%.3f" % max(avg_precision), \
    'Max Recall: ', "%.3f" % max(avg_recall), 'Max Accuracy', "%.3f" % max(avg_acc))
Пример #18
0
def main(fun, trials=8):
    """

    :param trials:
    :param fun: a function which should take 2 arguments
        1. graph
        2. init_states (i.e. the paths) [[..], [..], ..]

        And it should solve the problem and return a tuple
        (solution_paths, cost)
        see tabu_search for an example
    :return:
    """
    graph, init_states = utils.read_graph(os.getcwd() + ct.EDGE_LIST_PATH,
                                          os.getcwd() + ct.INITIAL_STATE_PATH)

    init_paths = utils.initial_guess(init_states, graph)

    times = []
    costs = []
    # no use for this rn, but maybe it'll have a use later on? for visualization
    # or something...
    solutions = []

    for _ in range(trials):
        # in case these are changed
        init_graph = dup(graph)
        init_state = dup(init_paths)
        start_time = datetime.now()
        res = fun(init_graph, init_state)
        solution = res[0]
        cost = res[1]
        end_time = datetime.now()
        times.append((end_time - start_time).seconds * 1000)
        costs.append(cost)
        solutions.append(solution)

    print('Avg cost: ' + str(np.mean(costs)))
    print('var cost: ' + str(np.var(costs)))
    print('Min cost:' + str(np.min(costs)))

    print('Avg time: ' + str(np.mean(times)))
    print('var time: ' + str(np.var(times)))
    print('Min time:' + str(np.min(times)))
Пример #19
0
def main():
    startTime = datetime.datetime.now()
    initialPopulationSize = 100  # Determines the initial sample size of the search space.
    numberOfParents = 5  # Determines how many parents are selected to create a new generation.
    iterations = 30  # The number of generations that are created before terminating.

    graph, init_states = utils.read_graph(os.getcwd() + ct.EDGE_LIST_PATH,
                                          os.getcwd() + ct.INITIAL_STATE_PATH)

    populationPaths = create_population(
        init_states, graph,
        initialPopulationSize)  # Creates the initial population
    populationPaths = makeEdgeProblem(
        populationPaths)  # Converts the problem into an edge problem
    savedOriginalPaths = populationPaths  # Saves the best state for comparison later

    for i in range(
            0, iterations
    ):  # Iterates the process of creating a new generation, starting from the random sampled initial population
        print("Starting iteration" + str(i + 1))
        parents = []
        for _ in range(numberOfParents):
            parent = selection(populationPaths, graph,
                               int(round(initialPopulationSize / 2)))
            parents.append(parent[0])

        populationPaths = next_generation(parents, graph,
                                          initialPopulationSize)

    output = selection(populationPaths, graph, initialPopulationSize)
    endTime = datetime.datetime.now() - startTime

    for i in range(0, len(output[0])):
        print("Agent" + str(i + 1) + '\'s path: ' + str(output[0][i]))
    print("Total cost: " + str(output[1]))
    print("Original cost: " +
          str(selection(savedOriginalPaths, graph, initialPopulationSize)[1]))

    print("Execution time with " + str(iterations) + " iterations: " +
          str(endTime.total_seconds() * 1000) + " ms")
Пример #20
0
    def __init__(self):
        t = time.time()
        print('reading graph...')
        self.graph, self.n_node, self.node_list, self.node_list_s, self.egs = utils.read_graph(config.train_file)
        self.node_emd_shape = [2, self.n_node, config.n_emb]
        print('[%.2f] reading graph finished. #node = %d' % (time.time() - t, self.n_node))

        self.dis_node_embed_init = None
        self.gen_node_embed_init = None
        if config.pretrain_dis_node_emb:
            t = time.time()
            print('reading initial embeddings...')
            dis_node_embed_init = np.array([utils.read_embeddings(filename=x, n_node=self.n_node, n_embed=config.n_emb) \
                                            for x in [config.pretrain_dis_node_emb]])
            gen_node_embed_init = np.array([utils.read_embeddings(filename=x, n_node=self.n_node, n_embed=config.n_emb) \
                                            for x in [config.pretrain_gen_node_emb]])
            print('[%.2f] read initial embeddings finished.' % (time.time() - t))

        print('building DGGAN model...')
        self.discriminator = None
        self.generator = None
        self.build_generator()
        self.build_discriminator()
        if config.experiment == 'link_prediction':
            self.link_prediction = evaluation.LinkPrediction(config)

        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True
        self.sess = tf.Session(config = self.config)
        self.saver = tf.train.Saver(max_to_keep=0)
        if config.pretrain_ckpt:
            print('restore...')
            pretrain_ckpt = tf.train.latest_checkpoint(config.pretrain_ckpt)
            self.saver.restore(self.sess, pretrain_ckpt)
        else:
            print('initial...')
            self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
            self.sess.run(self.init_op)
Пример #21
0
def main():
    """
    Parsing command line parameters.
    Creating target matrix.
    Fitting an SGCN.
    Predicting edge signs and saving the embedding.
    """
    args = parameter_parser()
    tab_printer(args)

    args.edge_path = '../input/bitcoin_otc.csv'
    args.embedding_path = '../output/embedding/bitcoin_otc_sgcn.csv'
    args.features_path = './input/bitcoin_otc.csv'
    args.regression_weights_path = '../output/weights/bitcoin_otc_sgcn.csv'
    args.epochs = 1

    edges = read_graph(args)  # 导入训练数据
    trainer = SignedGCNTrainer(args, edges)
    trainer.setup_dataset()  # 计算特征
    trainer.create_and_train_model()
    if args.test_size > 0:
        trainer.save_model()
        score_printer(trainer.logs)
        save_logs(args, trainer.logs)
Пример #22
0
import sys
import time
import utils

if __name__ == "__main__":
    try:
        start1 = time.time()
        filename = sys.argv[1]
        n = int(sys.argv[2])
        strategy = sys.argv[3]

        graph = utils.read_graph(filename)
        start2 = time.time()
        nodes = utils.strategies[strategy](graph, n)
        end2 = time.time()
        print('Computation Time: %0.3f s' % (end2 - start2))
        utils.write_nodes(nodes)
        end1 = time.time()
        print('Total Runtime: %0.3f s' % (end1 - start1))

    except:
        print(
            "   Generates output for the given graph according to the given strategy"
        )
        print(
            "   USAGE:   python gen.py [input graph file] [number of seeds] [strategy]"
        )
        print(
            "   EXAMPLE: python gen.py testgraph1.json 10 closeness_centrality"
        )
        print("   AVAILABLE STRATEGIES:")
Пример #23
0
def main(out, *rdfs):
    g = utils.read_graph(*rdfs)
    generate_void(g, out)
Пример #24
0
    # scores_matrix_one_full = scores_matrix_one.A
    # scores_matrix_two_full = scores_matrix_two.A
    # plot_matrix(matrix = scores_matrix_one_full)
    # plot_matrix(matrix = scores_matrix_two_full)
    scores_matrix_one = sp.csr_matrix(np.triu(scores_matrix_one.A, k=1)) # k=1表示不包括对角线
    scores_matrix_two = sp.csr_matrix(np.triu(scores_matrix_two.A, k=1))




    # 读入train的binary数据
    graph_train_path = get_trainset_path(base_dir=all_file_dir,
                                         graph_name=graph_name,
                                         connected_pattern='undirected',
                                         from_zeros_one='0')
    G = read_graph(weighted=0, input=graph_train_path, directed=0)
    train_binary = sp.csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G))
    train_binary = sp.csr_matrix(np.triu(train_binary.A, k=1))
    # train_binary_full = train_binary.A
    # 或 train_binary = sp.csr_matrix(np.array(nx.to_numpy_matrix(G)))



    # 构建exist和nonexist的binary
    exist_binary = sp.csr_matrix(np.triu(train_binary.A, k=1)) # k=1表示不包括对角线
    nonexist_binary = sp.csr_matrix(np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A)

    # 分数归一化到[0.0, 1.0]
    scores_matrix_one_norm = normalize_matrix(csr_matrix1 = scores_matrix_one)
    scores_matrix_two_norm = normalize_matrix(csr_matrix1 = scores_matrix_two)
    # plot_matrix(scores_matrix_one_norm.A)
Пример #25
0

def get_min_edge_cost(source, sink, graph):
    """
    Gets the cost of the edage between the source and the sink.     

    Returns Inf  if there is no direct ebdge between the two
    """
    result = [c for s, c in graph[source] if s == sink]
    if (any(result)):
        return result[0]
    return math.inf


if __name__ == "__main__":
    graph = utils.read_graph("edges.txt", True)

    #init spannning treew with first node
    X = [1]  # nodes in the spanning Tree so far
    V_X = graph  # remaining nodes
    del V_X[1]
    TCost = 0  # total cost of spanning tree so far

    # init Min Const of crossing Tree Heap. (Heap contains (Key,Value) pairs => Key - min cost, value - remNode pairs)
    minCrossingCostHeap = DynamicKeyHeap(
        V_X.keys(),
        lambda remainingNode: get_min_edge_cost(remainingNode, 1, V_X))

    while any(V_X):
        minCost, poppedNode = minCrossingCostHeap.pop_kvp()
        TCost += minCost
Пример #26
0
def auto_PNR(prex=None,
             graph_name=None,
             emb_method_name1=None,
             emb_method_name2=None):

    print('----------------------------------------------------------')
    time_start = time.time()
    # 初始化训练集和测试集的路径
    # prex = 'preprocessing_code2//'  # 改这里
    all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex

    binNum = 50  # 改这里

    emb_method_name1 = emb_method_name1.lower()  # 改这里
    emb_method_name2 = emb_method_name2.lower()  # 改这里
    print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 +
          "," + emb_method_name2)
    conf_method1 = None
    conf_method2 = None
    if emb_method_name1 in all_embedding_methods:
        config_path_method1 = 'conf/' + emb_method_name1 + '.properties'
        config_method1 = configparser.ConfigParser()
        config_method1.read(config_path_method1)
        conf_method1 = dict(config_method1.items("hyperparameters"))
    if emb_method_name2 in all_embedding_methods:
        config_path_method2 = 'conf/' + emb_method_name2 + '.properties'
        config_method2 = configparser.ConfigParser()
        config_method2.read(config_path_method2)
        conf_method2 = dict(config_method2.items("hyperparameters"))

    # 初始化embedding和scores的路径
    results_dir = 'D:\hybridrec/results//' + prex
    graph_results_dir = results_dir + graph_name + '//'

    # 计算emb method 1
    if not ((emb_method_name1 == 'arope') or
            (emb_method_name1 == 'graph2gauss') or
            (is_heuristic_method(emb_method_name1) == True)):
        graph_train_path = get_trainset_path(
            base_dir=all_file_dir,
            graph_name=graph_name,
            connected_pattern=get_connp(emb_method_name1),
            from_zeros_one=get_from_zeros_one(emb_method_name1))
        graph_results_path = graph_results_dir + graph_name + '_' + emb_method_name1 + '.emb'
        if not os.path.isfile(graph_results_path):
            run_emb_method(input=graph_train_path,
                           output=graph_results_path,
                           emb_method_name=emb_method_name1)

    # 计算emb method 2
    if not ((emb_method_name2 == 'arope') or
            (emb_method_name2 == 'graph2gauss') or
            (is_heuristic_method(emb_method_name2) == True)):
        graph_train_path = get_trainset_path(
            base_dir=all_file_dir,
            graph_name=graph_name,
            connected_pattern=get_connp(emb_method_name2),
            from_zeros_one=get_from_zeros_one(emb_method_name2))
        graph_results_path = graph_results_dir + graph_name + '_' + emb_method_name2 + '.emb'
        if not os.path.isfile(graph_results_path):
            run_emb_method(input=graph_train_path,
                           output=graph_results_path,
                           emb_method_name=emb_method_name2)

    # 计算scores1
    if conf_method1 != None:
        embedding_size_method1 = int(conf_method1['embedding_size'])
    if emb_method_name1 == 'splitter':
        scores_matrix_one = inner_product_scores_splitter(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name1,
            col_start=0,
            col_end=embedding_size_method1 + 1,
            skiprows=1,
            delimiter=',')
    elif (emb_method_name1 == 'attentionwalk') or (emb_method_name1
                                                   == 'grarep'):
        scores_matrix_one = inner_product_scores(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name1,
            col_start=0,
            col_end=embedding_size_method1 + 1,
            skiprows=1,
            delimiter=',')
    elif (emb_method_name1 == 'drne') or (emb_method_name1 == 'prune'):
        scores_matrix_one = inner_product_scores(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name1,
            col_start=0,
            col_end=embedding_size_method1,
            skiprows=0,
            delimiter=' ')  # embedding_size_method有一些是要+1有一些不需要的
    elif (emb_method_name1 == 'arope'):
        scores_matrix_one = inner_product_scores_arope(
            all_file_dir=all_file_dir,
            graph_name=graph_name,
            graph_results_dir=graph_results_dir)
    elif (emb_method_name1 == 'graph2gauss'):
        scores_matrix_one = energy_kl_scores_graph2gauss(
            all_file_dir=all_file_dir,
            graph_name=graph_name,
            graph_results_dir=graph_results_dir)
    elif is_heuristic_method(emb_method_name1):
        scores_matrix_one = heuristic_scores(
            all_file_dir=all_file_dir,
            graph_name=graph_name,
            graph_results_dir=graph_results_dir,
            heuristic_method=emb_method_name1)
    else:
        scores_matrix_one = inner_product_scores(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name1,
            col_start=0,
            col_end=embedding_size_method1 + 1,
            skiprows=1,
            delimiter=' ')

    # 计算scores2
    if conf_method2 != None:
        embedding_size_method2 = int(conf_method2['embedding_size'])
    if emb_method_name2 == 'splitter':
        scores_matrix_two = inner_product_scores_splitter(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name2,
            col_start=0,
            col_end=embedding_size_method2 + 1,
            skiprows=1,
            delimiter=',')
    elif (emb_method_name2 == 'attentionwalk') or (emb_method_name2
                                                   == 'grarep'):
        scores_matrix_two = inner_product_scores(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name2,
            col_start=0,
            col_end=embedding_size_method2 + 1,
            skiprows=1,
            delimiter=',')
    elif (emb_method_name2 == 'drne') or (emb_method_name2 == 'prune'):
        scores_matrix_two = inner_product_scores(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name2,
            col_start=0,
            col_end=embedding_size_method2,
            skiprows=0,
            delimiter=' ')
    elif (emb_method_name2 == 'arope'):
        scores_matrix_two = inner_product_scores_arope(
            all_file_dir=all_file_dir,
            graph_name=graph_name,
            graph_results_dir=graph_results_dir)
    elif (emb_method_name2 == 'graph2gauss'):
        scores_matrix_two = energy_kl_scores_graph2gauss(
            all_file_dir=all_file_dir,
            graph_name=graph_name,
            graph_results_dir=graph_results_dir)
    elif is_heuristic_method(emb_method_name2):
        scores_matrix_two = heuristic_scores(
            all_file_dir=all_file_dir,
            graph_name=graph_name,
            graph_results_dir=graph_results_dir,
            heuristic_method=emb_method_name2)
    else:
        scores_matrix_two = inner_product_scores(
            graph_results_dir=graph_results_dir,
            dataset_name=graph_name,
            emb_method_name=emb_method_name2,
            col_start=0,
            col_end=embedding_size_method2 + 1,
            skiprows=1,
            delimiter=' ')

    # scores取上三角(注意:1、前面需要保证所有的分数在右上角或占满整个矩阵。2、前面有些是右上角,有些是占满整个矩阵)
    # scores_matrix_one_full = scores_matrix_one.A
    # scores_matrix_two_full = scores_matrix_two.A
    # plot_matrix(matrix = scores_matrix_one_full)
    # plot_matrix(matrix = scores_matrix_two_full)
    scores_matrix_one = sp.csr_matrix(np.triu(scores_matrix_one.A,
                                              k=1))  # k=1表示不包括对角线
    scores_matrix_two = sp.csr_matrix(np.triu(scores_matrix_two.A, k=1))

    # 读入train的binary数据
    graph_train_path = get_trainset_path(base_dir=all_file_dir,
                                         graph_name=graph_name,
                                         connected_pattern='undirected',
                                         from_zeros_one='0')
    G = read_graph(weighted=0, input=graph_train_path, directed=0)
    train_binary = sp.csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G))
    train_binary = sp.csr_matrix(np.triu(train_binary.A, k=1))
    # train_binary_full = train_binary.A
    # 或 train_binary = sp.csr_matrix(np.array(nx.to_numpy_matrix(G)))

    # 构建exist和nonexist的binary
    exist_binary = sp.csr_matrix(np.triu(train_binary.A, k=1))  # k=1表示不包括对角线
    nonexist_binary = sp.csr_matrix(
        np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A)

    # 分数归一化到[0.0, 1.0]
    scores_matrix_one_norm = normalize_matrix(csr_matrix1=scores_matrix_one)
    scores_matrix_two_norm = normalize_matrix(csr_matrix1=scores_matrix_two)
    # plot_matrix(scores_matrix_one_norm.A)
    # plot_matrix(scores_matrix_two_norm.A)

    del scores_matrix_one, scores_matrix_two
    gc.collect()

    # 划分bin
    val_max = 1.0
    val_min = 0.0
    # bin_array = sorted(divide_bin(val_max = val_max, val_min = val_min, binNum = binNum))
    interval = float((val_max - val_min) / binNum)

    # 获取exist_binary和nonexist_binary的分数
    exist_scores_one_list = (np.array(scores_matrix_one_norm[exist_binary > 0],
                                      dtype=float))[0]
    nonexist_scores_one_list = (np.array(
        scores_matrix_one_norm[nonexist_binary > 0], dtype=float))[0]
    exist_scores_two_list = (np.array(scores_matrix_two_norm[exist_binary > 0],
                                      dtype=float))[0]
    nonexist_scores_two_list = (np.array(
        scores_matrix_two_norm[nonexist_binary > 0], dtype=float))[0]
    # # 变为稀疏矩阵
    # exist_scores_one_list_csr = sp.csr_matrix(exist_scores_one_list)
    # nonexist_scores_one_list_csr = sp.csr_matrix(nonexist_scores_one_list)
    # exist_scores_two_list_csr = sp.csr_matrix(exist_scores_two_list)
    # nonexist_scores_two_list_csr = sp.csr_matrix(nonexist_scores_two_list)

    # temp = scores_matrix_one_norm[exist_binary > 0][0] # 我怕在把分数变为list的时候出问题

    # 初始化两个大小为binNum* bnNum的二维栅格
    exist_raster_grids = np.zeros((binNum, binNum))
    nonexist_raster_grids = np.zeros((binNum, binNum))

    # 计算落在exist_raster_grids栅格的existing links的数量
    exist_links_num = len(exist_scores_one_list)
    exist_row_col_zero_num = 0  # 那些两个矩阵的分数都是0的不作统计
    for i in range(exist_links_num):
        # row_index和col_index的范围从0-->binNum-1
        if (exist_scores_one_list[i] == 0.0) & (exist_scores_two_list[i]
                                                == 0.0):
            exist_row_col_zero_num = exist_row_col_zero_num + 1
            continue
        row_index = int(
            get_row_col_index(score=exist_scores_one_list[i],
                              interval=interval,
                              binNum=binNum))
        col_index = int(
            get_row_col_index(score=exist_scores_two_list[i],
                              interval=interval,
                              binNum=binNum))
        exist_raster_grids[row_index,
                           col_index] = exist_raster_grids[row_index,
                                                           col_index] + 1

    print("exist_row_col_zero_num:" + str(exist_row_col_zero_num))
    print('sum  exist_raster_grids:' + str(np.sum(exist_raster_grids)))

    # 计算落在nonexist_raster_grids栅格的nonexisting links的数量
    nonexist_links_num = len(nonexist_scores_one_list)
    nonexist_row_col_zero_num = 0  # 那些两个矩阵的分数都是0的不作统计
    for i in range(nonexist_links_num):
        # row_index和col_index的范围从0-->binNum-1
        if (nonexist_scores_one_list[i] <= 0.0) & (nonexist_scores_two_list[i]
                                                   <= 0.0):
            nonexist_row_col_zero_num = nonexist_row_col_zero_num + 1
            continue
        row_index = int(
            get_row_col_index(score=nonexist_scores_one_list[i],
                              interval=interval,
                              binNum=binNum))
        col_index = int(
            get_row_col_index(score=nonexist_scores_two_list[i],
                              interval=interval,
                              binNum=binNum))

        nonexist_raster_grids[row_index,
                              col_index] = nonexist_raster_grids[row_index,
                                                                 col_index] + 1

    print("nonexist_row_col_zero_num:" + str(nonexist_row_col_zero_num))
    print('sum  nonexist_raster_grids:' + str(np.sum(nonexist_raster_grids)))

    # 计算PNR分数
    N = train_binary.shape[0]
    print("Graph size:" + str(N) + '\n')
    L_T = np.sum(train_binary.A)
    O = N * (N - 1) / 2
    coefficient = (O - L_T) / L_T
    PNR1 = coefficient * (exist_raster_grids / (nonexist_raster_grids + 1)
                          )  # 分母加1避免出现inf或nan,不影响evaluation但是可能好看
    PNR2 = (exist_raster_grids / nonexist_raster_grids)  # inf和nan置为0
    PNR2[np.isnan(PNR2)] = 0
    PNR2[np.isinf(PNR2)] = 0
    PNR2 = coefficient * PNR2

    # 画图(注意:图的横纵坐标是从左上角开始的而不是想象中的左上角)
    # sns.heatmap(PNR1, cmap='Reds')
    # plt.savefig(graph_results_dir + emb_method_name1 +'_'+ emb_method_name2 + '_' +'bin_' + str(binNum) + "_PNR1.jpg")
    # plt.show()
    # sns.heatmap(PNR2, cmap='Reds')
    # plt.savefig(graph_results_dir + emb_method_name1 +'_'+ emb_method_name2 + '_'+ 'bin_' + str(binNum) + "_PNR2.jpg")
    # plt.show()
    # plt.matshow(PNR1) # 好丑
    # plt.show()

    # 保存(exist_raster_grids、nonexist_raster_grids、PNR1、PNR2)
    save_ndarray_to_mat(exist_raster_grids, 'exist_raster_grids',
                        graph_results_dir, graph_name, emb_method_name1,
                        emb_method_name2, binNum)
    save_ndarray_to_mat(nonexist_raster_grids, 'nonexist_raster_grids',
                        graph_results_dir, graph_name, emb_method_name1,
                        emb_method_name2, binNum)
    save_ndarray_to_mat(PNR1, 'PNR1', graph_results_dir, graph_name,
                        emb_method_name1, emb_method_name2, binNum)
    save_ndarray_to_mat(PNR2, 'PNR2', graph_results_dir, graph_name,
                        emb_method_name1, emb_method_name2, binNum)

    # PNR调整分数(只调整non-existing link的部分)
    nonexist_scores_PNR_list = transfer_scores_PNR(
        scores_matrix_one_norm=scores_matrix_one_norm,
        scores_matrix_two_norm=scores_matrix_two_norm,
        train_binary=train_binary,
        PNR=PNR2,
        interval=interval,
        binNum=binNum)

    # weighted hybird方法的分数,0.5均权直接相加
    scores_matrix_hybrid_norm = 0.5 * scores_matrix_one_norm + 0.5 * scores_matrix_two_norm
    nonexist_scores_hybrid_list = (np.array(
        scores_matrix_hybrid_norm[nonexist_binary > 0], dtype=float))[0]

    # 评估evaluation
    graph_test_path = get_testset_path(base_dir=all_file_dir,
                                       graph_name=graph_name)
    test_binary = get_test_matrix_binary(graph_test_path=graph_test_path, N=N)
    L_full = int(np.sum(test_binary))
    L_array = np.array([
        int(L_full / 20),
        int(L_full / 10),
        int(L_full / 5),
        int(L_full / 2), L_full
    ])

    del scores_matrix_one_norm, scores_matrix_two_norm, exist_scores_one_list, exist_scores_two_list, scores_matrix_hybrid_norm
    gc.collect()


    AP_PNR, AUC_PNR, Precision_PNR, Recall_PNR, F1score_PNR=\
        evaluators(train_binary=train_binary,
                   test_binary=test_binary,
                   scores_list=nonexist_scores_PNR_list,
                   L_array=L_array)
    AP_method1, AUC_method1, Precision_method1, Recall_method1, F1score_method1=\
        evaluators(train_binary=train_binary,
                   test_binary=test_binary,
                   scores_list=nonexist_scores_one_list,
                   L_array=L_array)
    AP_method2, AUC_method2, Precision_method2, Recall_method2, F1score_method2=\
        evaluators(train_binary=train_binary,
                   test_binary=test_binary,
                   scores_list=nonexist_scores_two_list,
                   L_array=L_array)
    AP_weighted, AUC_weighted, Precision_weighted, Recall_weighted, F1score_weighted=\
        evaluators(train_binary=train_binary,
                   test_binary=test_binary,
                   scores_list=nonexist_scores_hybrid_list,
                   L_array=L_array)

    print('AP_PNR:  ' + str(AP_PNR))
    print('AP_method1:  ' + str(AP_method1))
    print('AP_method2:  ' + str(AP_method2))
    print('AP_weighted:  ' + str(AP_weighted))
    print('\n')
    print('AUC_PNR:  ' + str(AUC_PNR))
    print('AUC_method1:  ' + str(AUC_method1))
    print('AUC_method2:  ' + str(AUC_method2))
    print('AUC_weighted:  ' + str(AUC_weighted))
    print('\n')
    print('Precision_PNR:  ' + str(Precision_PNR))
    print('Precision_method1:  ' + str(Precision_method1))
    print('Precision_method2:  ' + str(Precision_method2))
    print('Precision_weighted:  ' + str(Precision_weighted))
    print('\n')
    print('Recall_PNR:  ' + str(Recall_PNR))
    print('Recall_method1:  ' + str(Recall_method1))
    print('Recall_method2:  ' + str(Recall_method2))
    print('Recall_weighted:  ' + str(Recall_weighted))
    print('\n')
    print('F1score_PNR:  ' + str(F1score_PNR))
    print('F1score_method1:  ' + str(F1score_method1))
    print('F1score_method2:  ' + str(F1score_method2))
    print('F1score_weighted:  ' + str(F1score_weighted))
    print('\n')

    write_to_excel(graph_name, emb_method_name1, emb_method_name2,
                   Precision_PNR, Precision_method1, Precision_method2,
                   Precision_weighted, Recall_PNR, Recall_method1,
                   Recall_method2, Recall_weighted, F1score_PNR,
                   F1score_method1, F1score_method2, F1score_weighted, AP_PNR,
                   AP_method1, AP_method2, AP_weighted, AUC_PNR, AUC_method1,
                   AUC_method2, AUC_weighted)

    time_end = time.time()
    print("time span:  " + str((time_end - time_start) / 60.00) + "  mins")
    # facebook_combined:bin=5, 1.5分钟
    # facebook_combined:cn和pearson\aa和cn花了3.5分钟
    # facebook_combined:graphdistance和cn花了11分钟
    # facebook_combined: graphdistance和cn的PNE矩阵为全0
    # facebooke_combined: attentionwalk和prone花了7.5分钟
    # facebooke_combined: 有rootedpagerank的效果都很差;
    # arope比PNR好一点,SDNE和PRUE很差很差;drne和graph2gauss也是极差的但是PNR融合后表现极好;

    # blogcatalog:aa和ja花了3小时
    # (path based--katz和graphdistance都十分慢,neighbor based和rank based很快)

    # google 15000 nodes: 2.5小时
    print(
        '--------------------------------------------------------------------------------'
    )
    pass
Пример #27
0
import utils as ut

# Read the graphs
ii = ut.read_graph("ii")
sgi = ut.read_graph("sgi")
ui = ut.read_graph("ui")

# Compute global measures
ut.graph_global_measures(ii, "ii")
ut.graph_global_measures(sgi, "sgi")
ut.graph_global_measures(ui, "ui")

# Compute LCC global measures
ut.graph_global_measures(ii, "ii", True)
ut.graph_global_measures(sgi, "sgi", True)
ut.graph_global_measures(ui, "ui", True)

# Visualize graph
ut.viz_graph(sgi, 'sgi')
ut.viz_graph(ii, 'ii', cc=True)
ut.viz_graph(ui, 'ui', cc=True)
Пример #28
0
import utils as ut

### read data

ii = ut.read_graph("ii")
ui = ut.read_graph("ui")

### Hypergeom Test

ii_lou = map(lambda x: ut.hypergeom_test(ii, x),
             filter(ut.check_length_mod, ut.louvain(ii)))
ii_mcl = map(lambda x: ut.hypergeom_test(ii, x),
             filter(ut.check_length_mod, ut.mcl(ii)))

ui_lou = map(lambda x: ut.hypergeom_test(ui, x),
             filter(ut.check_length_mod, ut.louvain(ui)))
ui_mcl = map(lambda x: ut.hypergeom_test(ui, x),
             filter(ut.check_length_mod, ut.mcl(ui)))

# Create tables

ii_mod = ut.create_table("ii_mod", list(ii_lou), list(ii_mcl))
ui_mod = ut.create_table("ui_mod", list(ui_lou), list(ui_mcl))

# Visualize clusters

ut.louvain(ii, 'ii', viz=True)
ut.louvain(ui, 'ui', viz=True)
ut.mcl(ii, viz=True)
ut.mcl(ui, viz=True)
Пример #29
0
def publish(input):
    g = utils.read_graph(input)
    describe_dataset(g)
    write_rdf_files(g)
    write_dump(g)
Пример #30
0
def auto_overlap(prex=None,
                 graph_name=None,
                 emb_method_name1=None,
                 emb_method_name2=None,
                 binNum=None):
    time_start = time.time()
    print('----------------------------------------------------------')
    print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 +
          "," + emb_method_name2)

    results_base_dir = 'D:\hybridrec//results//'
    all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex
    results_dir = 'D:\hybridrec/results//' + prex
    graph_results_dir = results_dir + graph_name + '//'

    path_scores_method1 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name1 + "_scores.mat"
    path_scores_method2 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name2 + "_scores.mat"

    if not (os.path.exists(path_scores_method1)
            and os.path.exists(path_scores_method2)):
        print("dataset: " + graph_name + '----' + "baselines:" +
              emb_method_name1 + "," + emb_method_name2 + ': 分数未完全计算')

    if os.path.exists(path_scores_method1) and os.path.exists(
            path_scores_method2):
        # 获取归一化分数
        scores_matrix_one_dict = (loadmat(path_scores_method1))
        scores_matrix_two_dict = (loadmat(path_scores_method2))
        scores_matrix_one = scores_matrix_one_dict['scores']
        scores_matrix_two = scores_matrix_two_dict['scores']
        if emb_method_name1 not in all_embedding_methods:
            scores_matrix_one = csr_matrix(np.triu(scores_matrix_one.A,
                                                   k=1))  # k=1表示不包括对角线
        if emb_method_name2 not in all_embedding_methods:
            scores_matrix_two = csr_matrix(np.triu(scores_matrix_two.A, k=1))
        scores_matrix_one_norm = normalize_matrix(
            csr_matrix1=csr_matrix(scores_matrix_one))  # 去掉传参的csr_matrix()则会
        scores_matrix_two_norm = normalize_matrix(
            csr_matrix1=csr_matrix(scores_matrix_two))

        # 获取train_binary和test_binary
        graph_train_path = get_trainset_path(base_dir=all_file_dir,
                                             graph_name=graph_name,
                                             connected_pattern='undirected',
                                             from_zeros_one='0')
        graph_test_path = get_testset_path(base_dir=all_file_dir,
                                           graph_name=graph_name)
        G = read_graph(weighted=0, input=graph_train_path, directed=0)
        train_binary = csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G))
        train_binary = csr_matrix(np.triu(train_binary.A, k=1))
        test_binary = get_test_matrix_binary(graph_test_path=graph_test_path,
                                             N=train_binary.shape[0])

        # 读取plus的原始分数(未归一化)
        plus_scores_name = 'plus_' + graph_name + '_' + emb_method_name1 + '_' + emb_method_name2 + '_scores.mat'
        plus_scores_path = graph_results_dir + plus_scores_name
        scores_matrix_plus_dict = (loadmat(plus_scores_path))
        scores_matrix_plus = scores_matrix_plus_dict['scores']

        # 读取multiply的原始分数(未归一化)
        multiply_scores_name = 'multiply_' + graph_name + '_' + emb_method_name1 + '_' + emb_method_name2 + '_scores.mat'
        multiply_scores_path = graph_results_dir + multiply_scores_name
        scores_matrix_multiply_dict = (loadmat(multiply_scores_path))
        scores_matrix_multiply = scores_matrix_multiply_dict['scores']

        # 读取MLP的原始分数(未归一化)
        mlp_scores_name = 'mlp_' + graph_name + '_' + emb_method_name1 + '_' + emb_method_name2 + '_scores.mat'
        mlp_scores_path = graph_results_dir + mlp_scores_name
        scores_matrix_mlp_dict = (loadmat(mlp_scores_path))
        scores_matrix_mlp = scores_matrix_mlp_dict['scores']

        # 归一化hybrid分数
        scores_matrix_plus_norm = normalize_matrix(
            csr_matrix1=scores_matrix_plus)
        scores_matrix_multiply_norm = normalize_matrix(
            csr_matrix1=scores_matrix_multiply)
        scores_matrix_mlp_norm = normalize_matrix(
            csr_matrix1=scores_matrix_mlp)

        # 计算plus、multiply、mlp、PNR的rasterization grids
        mlp_path = results_base_dir + prex + graph_name + "//" + "mlp_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat"
        mlp_dict = (loadmat(mlp_path))
        mlp_raster_grids = mlp_dict["count"]
        multiply_path = results_base_dir + prex + graph_name + "//" + "multiply_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat"
        multiply_dict = (loadmat(multiply_path))
        multiply_raster_grids = multiply_dict["count"]
        plus_path = results_base_dir + prex + graph_name + "//" + "plus_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat"
        plus_dict = (loadmat(plus_path))
        plus_raster_grids = plus_dict["count"]

        # plus_raster_grids = rasterization_grids(binNum=binNum,
        #                                        train_binary=train_binary,
        #                                        scores_matrix_DNN=scores_matrix_plus_norm,
        #                                        scores_matrix_one_norm=scores_matrix_one_norm,
        #                                        scores_matrix_two_norm=scores_matrix_two_norm)
        # multiply_raster_grids = rasterization_grids(binNum=binNum,
        #                                        train_binary=train_binary,
        #                                        scores_matrix_DNN=scores_matrix_multiply_norm,
        #                                        scores_matrix_one_norm=scores_matrix_one_norm,
        #                                        scores_matrix_two_norm=scores_matrix_two_norm)
        # mlp_raster_grids = rasterization_grids(binNum=binNum,
        #                                        train_binary=train_binary,
        #                                        scores_matrix_DNN=scores_matrix_mlp_norm,
        #                                        scores_matrix_one_norm=scores_matrix_one_norm,
        #                                        scores_matrix_two_norm=scores_matrix_two_norm)
        PNR_path = results_base_dir + prex + graph_name + "//" + "PNR2_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat"
        PNR_dict = (loadmat(PNR_path))
        PNR_raster_grids = PNR_dict["count"]

        exist_binary = csr_matrix(np.triu(train_binary.A, k=1))  # k=1表示不包括对角线
        nonexist_binary = csr_matrix(
            np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A)
        # 获取plus的nonexist_scores_list
        nonexist_scores_plus_list = transfer_scores_PNR(
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm,
            train_binary=train_binary,
            PNR=plus_raster_grids,
            interval=float((1.0 - 0.0) / binNum),
            binNum=binNum)
        # 获取multiply的nonexist_scores_list
        nonexist_scores_multiply_list = transfer_scores_PNR(
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm,
            train_binary=train_binary,
            PNR=multiply_raster_grids,
            interval=float((1.0 - 0.0) / binNum),
            binNum=binNum)
        # 获取mlp的nonexist_scores_list
        nonexist_scores_mlp_list = transfer_scores_PNR(
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm,
            train_binary=train_binary,
            PNR=mlp_raster_grids,
            interval=float((1.0 - 0.0) / binNum),
            binNum=binNum)
        # 获取PNR的nonexist_scores_list
        nonexist_scores_PNR_list = transfer_scores_PNR(
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm,
            train_binary=train_binary,
            PNR=PNR_raster_grids,
            interval=float((1.0 - 0.0) / binNum),
            binNum=binNum)

        # 获取阈值
        E_test = np.sum(test_binary.A)
        thresold_plus = get_list_thresold(nonexist_scores_plus_list, L=E_test)
        thresold_multiply = get_list_thresold(nonexist_scores_multiply_list,
                                              L=E_test)
        thresold_mlp = get_list_thresold(nonexist_scores_mlp_list, L=E_test)
        thresold_PNR = get_list_thresold(nonexist_scores_PNR_list, L=E_test)

        # 这里的trick, L=1/2 |E_test|!!!!!!!!!!!
        # thresold_plus = int(thresold_plus*0.5)
        # thresold_multiply = int(thresold_multiply * 0.5)
        # thresold_mlp = int(thresold_mlp * 0.5)
        # thresold_PNR = int(thresold_PNR * 0.5)

        # 修改grids
        plus_raster_grids = plus_raster_grids.A
        multiply_raster_grids = multiply_raster_grids.A
        mlp_raster_grids = mlp_raster_grids.A
        PNR_raster_grids = PNR_raster_grids.A
        # np.where(plus_raster_grids > thresold_plus, plus_raster_grids, 0)
        # np.where(multiply_raster_grids > thresold_multiply, multiply_raster_grids, 0)
        # np.where(mlp_raster_grids > thresold_mlp, mlp_raster_grids, 0)
        # np.where(PNR_raster_grids > thresold_PNR, PNR_raster_grids, 0)
        plus_raster_grids[plus_raster_grids <= thresold_plus] = 0.0
        multiply_raster_grids[multiply_raster_grids <= thresold_multiply] = 0.0
        mlp_raster_grids[mlp_raster_grids <= thresold_mlp] = 0.0
        PNR_raster_grids[PNR_raster_grids <= thresold_PNR] = 0.0

        plus_raster_grids[plus_raster_grids >= thresold_plus] = 1.0
        multiply_raster_grids[multiply_raster_grids >= thresold_multiply] = 1.0
        mlp_raster_grids[mlp_raster_grids >= thresold_mlp] = 1.0
        PNR_raster_grids[PNR_raster_grids >= thresold_PNR] = 1.0

        # 画图
        # colors = ['OrangeRed', 'darkseagreen', 'dodgerblue', 'blueviolet']
        colors = ['Red', 'green', 'blue', 'purple']
        result = np.float32(PNR_raster_grids)
        result = cv2.GaussianBlur(result, (5, 5),
                                  0)  # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0
        title = graph_name + '-PNR-' + emb_method_name1 + '-' + emb_method_name2
        plot_contourf_overlap(result=result, title=title, color=colors[0])

        result = np.float32(plus_raster_grids)
        result = cv2.GaussianBlur(result, (5, 5),
                                  0)  # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0
        title = graph_name + '-plus-' + emb_method_name1 + '-' + emb_method_name2
        plot_contourf_overlap(result=result, title=title, color=colors[1])

        result = np.float32(multiply_raster_grids)
        result = cv2.GaussianBlur(result, (5, 5),
                                  0)  # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0
        title = graph_name + '-multiply-' + emb_method_name1 + '-' + emb_method_name2
        plot_contourf_overlap(result=result, title=title, color=colors[2])

        result = np.float32(mlp_raster_grids)
        result = cv2.GaussianBlur(result, (5, 5),
                                  0)  # (5, 5)表示高斯矩阵的长与宽都是5,标准差取0
        title = graph_name + '-mlp-' + emb_method_name1 + '-' + emb_method_name2
        plot_contourf_overlap(result=result, title=title, color=colors[3])

        # # 计算plus的rasterization grids
        # plus_raster_grids = rasterization_grids(binNum=plus_binNum,
        #                                        train_binary=train_binary,
        #                                        scores_matrix_DNN=scores_matrix_plus_norm,
        #                                        scores_matrix_one_norm=scores_matrix_one_norm,
        #                                        scores_matrix_two_norm=scores_matrix_two_norm)
        # # plus_raster_grids = np.log10(plus_raster_grids) # 出现-inf而报错
        # plus_raster_grids = normalize_matrix_full(csr_matrix1=csr_matrix(plus_raster_grids))
        # plus_raster_grids = better_show_grids(csr_matrix1=plus_raster_grids)
        #
        # source = np.float32(plus_raster_grids.A)
        # result = cv2.GaussianBlur(source, (5, 5), 0)
        # title = graph_name + '-' + 'plus' +'-' + emb_method_name1 + '-' + emb_method_name2
        # plot_contourf(result=result, title=title, binNum=10)
        #

        time_end = time.time()
        print("It takes : " + str((time_end - time_start) / 60.0) + "  mins.")
        pass
Пример #31
0
    assert len(
        datasets
    ) == 2, "Please input datasets pair!"  # Assert whether the dataset input is a pair
    assert datasets[0] == datasets[
        1], "Unkown datasets pair!"  # Assert the dataset input is same
    params.node_num = get_node_num(params)

## Get the initial embedding matrix
if params.profile_feature:
    src_emb = load_embeddings(params, True)
    tgt_emb = load_embeddings(params, False)
else:
    src_emb, tgt_emb = initialize_feature(params)

## Read original graph
G_source_original, G_target_original = read_graph(params)
## Assign the original graph to G_source and G_target as the current graph
G_source = G_source_original
G_target = G_target_original
G_source_edge_num = nx.number_of_edges(G_source)
G_target_edge_num = nx.number_of_edges(G_target)
print(
    "=====> number of source grpah edge: %d, number of target grpah edge: %d" %
    (G_source_edge_num, G_target_edge_num))
A_source = nx.adjacency_matrix(G_source)
A_target = nx.adjacency_matrix(G_target)
## Get the adjacency matrix of the current graph and normalize it to facilitate graph convolution
A_source_norm, A_target_norm = adjacency_matrix_normalize(
    params, G_source, G_target)
## Build model
model = build_model(params)
Пример #32
0
            # find the nearest vertice would suffice
            heapq.heappush(h, (dists[v], curr_node, v))

        # queue is empty, exit while loop
        if not h:
            break

        # promote a vertice from trial tree to shortest path tree
        _, _ , curr_node = heapq.heappop(h)

    # use the dst only
    return dists[dst]


if __name__ == '__main__':
    g = utils.read_graph()

    # res = run(g, SRC, DST)
    # print 'Your answer:', res
    # print 'Model answer:', nx.shortest_path_length(g, SRC, DST, 'weight')

    n = g.number_of_nodes()
    for _ in range(N_TESTS):
        # make a copy because calling run() will change 'queued' field of edge
        h = g.copy()
        src = random.randint(0, n - 1)
        dst = random.randint(0, n - 1)
        try:
            res1 = run(h, src, dst)
            res2 = nx.shortest_path_length(h, src, dst, 'weight')
            assert(res1==res2)
Пример #33
0
def auto_DNN(prex=None,
             graph_name=None,
             emb_method_name1=None,
             emb_method_name2=None,
             model_name=None,
             DNN_binNum=None):
    print('----------------------------------------------------------')
    print("dataset: " + graph_name + '\n' + "baselines:" + emb_method_name1 +
          "," + emb_method_name2)

    results_base_dir = 'D:\hybridrec//results//'
    all_file_dir = 'D:\hybridrec\dataset\split_train_test//' + prex
    results_dir = 'D:\hybridrec/results//' + prex
    graph_results_dir = results_dir + graph_name + '//'
    # (facebook_combined的规律:ratio越小则正负样本的预测准确率越高,花的时间也越少)
    ratio = 1  # 负样本的总数是正样 本的ratio倍  # 改这里

    path_scores_method1 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name1 + "_scores.mat"
    path_scores_method2 = results_base_dir + prex + graph_name + "//" + graph_name + "_" + emb_method_name2 + "_scores.mat"

    # Initialize the model,改这里

    # hidden_layer_sizes=(10, 20, 10):三个隐藏层,分别10、20、10个神经元
    if model_name == "mlp":
        model = MLPClassifier(hidden_layer_sizes=(10, 20),
                              activation='relu',
                              solver='adam',
                              max_iter=200,
                              alpha=0.01,
                              batch_size=256,
                              learning_rate='constant',
                              learning_rate_init=0.001,
                              shuffle=False,
                              random_state=2020,
                              early_stopping=True,
                              validation_fraction=0.2,
                              beta_1=0.9,
                              beta_2=0.999,
                              epsilon=1e-08,
                              n_iter_no_change=10)
    pass

    if model_name == "svm":
        model = SVC(C=5, random_state=42)  # 出问题了
    pass

    if model_name == "lr":
        model = LogisticRegression(C=5,
                                   penalty='l1',
                                   tol=1e-6,
                                   random_state=42)  # penalty 有l1和l2
    pass

    if model_name == "lgbm":
        model = LGBMClassifier(num_leaves=31,
                               learning_rate=0.1,
                               n_estimators=64,
                               random_state=42,
                               n_jobs=-1)
    pass

    if model_name == "xgb":
        model = XGBClassifier(max_depth=5,
                              learning_rate=0.1,
                              n_jobs=-1,
                              nthread=-1,
                              gamma=0.06,
                              min_child_weight=5,
                              subsample=1,
                              colsample_bytree=0.9,
                              reg_alpha=0,
                              reg_lambda=0.5,
                              random_state=42)
    pass

    if model_name == "ld":
        model = LinearDiscriminantAnalysis(solver='lsqr')
    pass

    if model_name == "rf":
        model = RandomForestClassifier(n_estimators=50,
                                       max_depth=20,
                                       min_samples_split=2,
                                       min_samples_leaf=5,
                                       max_features="log2",
                                       random_state=12)
    pass

    if not (os.path.exists(path_scores_method1)
            and os.path.exists(path_scores_method2)):
        print("dataset: " + graph_name + '----' + "baselines:" +
              emb_method_name1 + "," + emb_method_name2 + ': 分数未完全计算')

    if os.path.exists(path_scores_method1) and os.path.exists(
            path_scores_method2):
        # 获取归一化分数
        scores_matrix_one_dict = (loadmat(path_scores_method1))
        scores_matrix_two_dict = (loadmat(path_scores_method2))
        scores_matrix_one = scores_matrix_one_dict['scores']
        scores_matrix_two = scores_matrix_two_dict['scores']
        if emb_method_name1 not in all_embedding_methods:
            scores_matrix_one = csr_matrix(np.triu(scores_matrix_one.A,
                                                   k=1))  # k=1表示不包括对角线
        if emb_method_name2 not in all_embedding_methods:
            scores_matrix_two = csr_matrix(np.triu(scores_matrix_two.A, k=1))
        scores_matrix_one_norm = normalize_matrix(
            csr_matrix1=csr_matrix(scores_matrix_one))
        scores_matrix_two_norm = normalize_matrix(
            csr_matrix1=csr_matrix(scores_matrix_two))

        # 获取train_binary和test_binary
        graph_train_path = get_trainset_path(base_dir=all_file_dir,
                                             graph_name=graph_name,
                                             connected_pattern='undirected',
                                             from_zeros_one='0')
        graph_test_path = get_testset_path(base_dir=all_file_dir,
                                           graph_name=graph_name)
        G = read_graph(weighted=0, input=graph_train_path, directed=0)
        train_binary = csr_matrix(nx.convert_matrix.to_scipy_sparse_matrix(G))
        train_binary = csr_matrix(np.triu(train_binary.A, k=1))
        test_binary = get_test_matrix_binary(graph_test_path=graph_test_path,
                                             N=train_binary.shape[0])

        del scores_matrix_one, scores_matrix_two
        gc.collect()

        # 获取正样本的分数
        exist_binary = csr_matrix(np.triu(train_binary.A, k=1))  # k=1表示不包括对角线
        exist_scores_one_list = (np.array(
            scores_matrix_one_norm[exist_binary > 0], dtype=float))[0]
        exist_scores_two_list = (np.array(
            scores_matrix_two_norm[exist_binary > 0], dtype=float))[0]

        # 构建测试样本(正样本+负样本)
        X_train_1 = (np.array([exist_scores_one_list,
                               exist_scores_two_list])).T
        X_train_0 = negative_samples(
            train_binary=train_binary,
            test_binary=test_binary,
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm,
            ratio=ratio)
        Y_train_1 = np.random.randint(1, 2, X_train_1.shape[0])
        Y_train_0 = np.random.randint(0, 1, X_train_0.shape[0])
        X_train = np.vstack((np.array(X_train_1), np.array(X_train_0)))
        Y_train = (np.hstack((np.array(Y_train_1), np.array(Y_train_0)))).T

        time_start = time.time()

        # 模型训练
        model.fit(X_train, Y_train)

        # 模型预测
        preds_0 = model.predict(X_train_0)
        preds_1 = model.predict(X_train_1)
        print(np.sum(preds_0))
        print(np.sum(preds_1))
        preds_0_proba = model.predict_proba(X_train_0)
        preds_1_proba = model.predict_proba(X_train_1)

        # 模型预测
        scores_matrix_DNN = predicted_scores_DNN(
            model=model,
            train_binary=train_binary,
            test_binary=test_binary,
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm)
        save_DNN_hybrid_scores(scores_matrix_DNN=scores_matrix_DNN,
                               method1=emb_method_name1,
                               method2=emb_method_name2,
                               graph_results_dir=graph_results_dir,
                               dataset_name=graph_name,
                               model_name=model_name)
        scores_matrix_DNN_norm = normalize_matrix(
            csr_matrix1=scores_matrix_DNN)

        # 计算DNN的rasterization grids
        DNN_raster_grids = rasterization_grids(
            binNum=DNN_binNum,
            train_binary=train_binary,
            scores_matrix_DNN=scores_matrix_DNN_norm,
            scores_matrix_one_norm=scores_matrix_one_norm,
            scores_matrix_two_norm=scores_matrix_two_norm)
        # DNN_raster_grids = np.log10(DNN_raster_grids) # 出现-inf而报错
        DNN_raster_grids = normalize_matrix_full(
            csr_matrix1=csr_matrix(DNN_raster_grids))
        DNN_raster_grids = better_show_grids(csr_matrix1=DNN_raster_grids)
        save_DNN_raster_scores(rastser_grids=DNN_raster_grids,
                               method1=emb_method_name1,
                               method2=emb_method_name2,
                               graph_results_dir=graph_results_dir,
                               dataset_name=graph_name,
                               model_name=model_name,
                               DNN_binNum=DNN_binNum)
        source = np.float32(DNN_raster_grids.A)
        result = cv2.GaussianBlur(source, (5, 5), 0)
        title = graph_name + '-' + model_name + '-' + emb_method_name1 + '-' + emb_method_name2
        plot_contourf(result=result, title=title, binNum=10)

        # 读取PNR grids
        PNR_path = results_base_dir + prex + graph_name + "//" + "PNR1_" + graph_name + "_" + emb_method_name1 + "_" + emb_method_name2 + "_50_count.mat"
        if is_excel_file_exist(PNR_path):
            PNR_dict = (loadmat(PNR_path))
            PNR_matrix = PNR_dict["count"]
            PNR_matrix = better_show_grids(csr_matrix1=PNR_matrix)
            source = np.float32(PNR_matrix.A)
            result = cv2.GaussianBlur(source, (5, 5),
                                      0)  #(5, 5)表示高斯矩阵的长与宽都是5,标准差取0
            title = graph_name + '-PNR-' + emb_method_name1 + '-' + emb_method_name2
            plot_contourf(result=result, title=title, binNum=10)

        # 评估DNN
        exist_binary = csr_matrix(np.triu(train_binary.A, k=1))  # k=1表示不包括对角线
        nonexist_binary = csr_matrix(
            np.triu(np.ones(exist_binary.shape), k=1) - exist_binary.A)
        nonexist_scores_DNN_list = (np.array(
            scores_matrix_DNN[nonexist_binary > 0], dtype=float))[0]
        L_full = int(np.sum(test_binary))
        L_array = np.array([
            int(L_full / 20),
            int(L_full / 10),
            int(L_full / 5),
            int(L_full / 2), L_full
        ])
        AP_DNN, AUC_DNN, Precision_DNN, Recall_DNN, F1score_DNN = \
            evaluators(train_binary=train_binary,
                       test_binary=test_binary,
                       scores_list=nonexist_scores_DNN_list,
                       L_array=L_array)
        # print('AP_DNN:  ' + str(AP_DNN))
        # print('\n')
        # print('AUC_DNN:  ' + str(AUC_DNN))
        # print('\n')
        # print('Precision_DNN:  ' + str(Precision_DNN))
        # print('\n')
        # print('Recall_DNN:  ' + str(Recall_DNN))
        # print('\n')
        # print('F1score_DNN:  ' + str(F1score_DNN))
        # print('\n')

        # 把precision、recall、F1score、AP写入excel文件
        DNN_write_to_excel(DL_name=model_name,
                           dataset_name=graph_name,
                           method1=emb_method_name1,
                           method2=emb_method_name2,
                           precision_DL=Precision_DNN,
                           recall_DL=Recall_DNN,
                           F1score_DL=F1score_DNN,
                           AP_DL=AP_DNN)

        time_end = time.time()
        print("It takes : " + str((time_end - time_start) / 60.0) + "  mins.")
        pass
Пример #34
0
# -*- coding:utf-8 -*-
import sys
import os
import networkx as nx
import matplotlib.pyplot as plt

lib = os.path.join(os.path.abspath('.'), 'lib')
sys.path.insert(0, lib)

import utils

# USER:saicologic
user_id = 1502

# G = nx.Graph()
G = utils.read_graph(user_id)

# undirected graph

# 次数中心性
utils.show_ranking(G, 'degree_centrality', nx.degree_centrality(G))

# 近接中心性
utils.show_ranking(G, 'closeness_centrality', nx.closeness_centrality(G))

# 媒介中心性
utils.show_ranking(G, 'betweenness_centrality', nx.betweenness_centrality(G))

# 媒介中心性(組)
utils.show_ranking(G, 'edge_betweenness_centrality', nx.edge_betweenness_centrality(G))
Пример #35
0
    # Write on file the result
    output_file(final_partitioning)

    # THE RESULT OBTAINED WITH METIS
    start_meth = time.time()
    edge_cut , metis_partitioning = metis.part_graph(graphs_history[0], k)
    end_meth = time.time()
    m, s = divmod((end_meth - start_meth), 60)
    enlapsed_time = "%d minutes and %f seconds" % (m, s)
    print('the edge cut obtained with metis is ',edge_cut)
    print('the time taken by metis was ', enlapsed_time)


parser = argparse.ArgumentParser(description='Partition the vertices of a graph in k roughly '
                                             'equal partitions such that the number of edges connecting vertices in different partitions'
                                             'is minimized')

parser.add_argument("k", help="The number of partitions", type = int)
group = parser.add_mutually_exclusive_group()
group.add_argument("-f", "--file", help="The file containing the graph to elaborate, if not specified a random graph is generated", type = str)
group.add_argument("-r", "--random", nargs = 2, metavar = ('DEGREE','N_NODES'), help="Generate a random graph with degree and number of nodes specified ",
                   type = int,default=[30,10000]) #default value for degree and number of nodes of the random generated graph

args = parser.parse_args()

if(args.file != None):
    g = read_graph(args.file)
else:
    g = random_graph(args.random[0],args.random[1])

k_way_partitioning(args.k,g)