Пример #1
0
def community_detection(graph, ground_truth_file_address):
    start_time = time.time()
    graph_copy = deepcopy(graph)
    all_communities = list()

    while graph_copy.number_of_nodes() > 0:
        nodes = list(graph_copy.nodes())
        intended_node = nodes[0]
        for i in range(1, len(nodes)):
            if graph_copy.degree[nodes[i]] > graph_copy.degree[intended_node]:
                intended_node = nodes[i]

        community = community_search(graph_copy, intended_node, True)
        all_communities.append(sorted(community))
        graph_copy.remove_nodes_from(community)

    # all_communities = utils.amend_partition(graph, all_communities)
    report = utils.report_performance(graph, all_communities,
                                      ground_truth_file_address)
    print(
        report, '\tCommunity Detection Task is done in %.4f seconds.' %
        (time.time() - start_time))
    return all_communities
    (b_proj, b_proj - learning_rate * grad[3]),
]

f_train = theano.function([X, y_true], cost, updates=updates,
                          allow_input_downcast=True)
f_pred = theano.function([X], y_pred,
                         allow_input_downcast=True)


training_history, valid_history = train_networks(f_train, f_pred,
                                        nb_epoch=3, batch_size=128,
                                        X_train=X_train, y_train=y_train,
                                        X_valid=X_valid, y_valid=y_valid)


report_performance(f_pred, X_test, y_test)

plt.figure()
plt.plot(training_history, c='b', label="Training cost")
plt.plot(valid_history, c='r', label="Validation accuracy")
plt.legend()
plt.show()

W_ = W_conv.get_value()
plot_weights4D(W_, colormap = "Blues")
plt.show()

f = theano.function([X], conv_output, allow_input_downcast=True)
X_new = f(X_train[:16])
plot_weights4D(X_new, 'Reds')
plt.show()
def community_detection(graph, ground_truth_file_address):
    """detects all communities in the network one after the other using only local information and updation.

	Args:
		graph ([nx.Graph]): [the given network]
		ground_truth_file_address ([str]): [filename of the ground-truth information of communities]

	Returns:
		[list]: [all discovered communities]
	"""
    start_time = time.time()
    all_communities = list()
    nodes_discovered = 0

    while nodes_discovered < graph.number_of_nodes():
        intended_node = utils.find_node_highest_degree(graph, all_communities)
        community = list()
        boundary = list()
        neighbors = list()
        R_measure = 0.0

        community.append(intended_node)
        nodes_discovered += 1
        boundary.append(intended_node)
        neighbors = list(graph.neighbors(intended_node))
        for node in [
                dummy_node for community in all_communities
                for dummy_node in community
        ]:
            if node in neighbors:
                neighbors.remove(node)

        while len(community) < graph.number_of_nodes():

            # compute the improvement caused by any node in the neighborhood
            mod_r = dict()
            for neigh in neighbors:
                mod_r[neigh] = compute_r(graph, community, boundary, neigh)

            if len(mod_r) == 0:
                break

            # find the neighbor with the highest improvement to the modularity R
            best_neigh = find_the_best_neighbor(mod_r)

            # continue expanding the community only if modularity R increases
            if mod_r[best_neigh] < R_measure:
                break

            # update modularity R and add the best neighbor to the community
            R_measure = mod_r[best_neigh]
            community.append(best_neigh)
            nodes_discovered += 1

            # update the neighborhood list
            neighbors = update_neighbors(graph, community, neighbors,
                                         best_neigh, all_communities)

            # update the boundary list
            boundary = update_boundaries(graph, community, boundary,
                                         best_neigh)

        all_communities.append(community)

    all_communities = utils.amend_partition(graph, all_communities)
    report = utils.report_performance(graph, all_communities,
                                      ground_truth_file_address)
    print(
        report, '\tCommunity Detection Task is done in %.4f seconds.' %
        (time.time() - start_time))
    return all_communities
    # save_results(cvResults, bestPred, modelName)
    # print("")

    # 'Random Forest'
    # modelName = "Random Forest"
    # print("\n", modelName)
    #
    # start   = time.perf_counter()
    # bestPred, cvResults = hyp_random_forest(trainDf, y_train, testDf)
    # elapsed = time.perf_counter() - start
    #
    # metricsTest = report_performance(y_test, bestPred, elapsed=elapsed, model_name=modelName)
    #
    # resultsDf, _ = save_results(cvResults, bestPred, modelName)
    # plot_hyp(resultsDf, modelName)
    # print("")

    'AdaBoost'
    modelName = "AdaBoost"
    print("\n", modelName)

    start   = time.perf_counter()
    bestPred, cvResults = hyp_ada_boost(trainDf, y_train, testDf)
    elapsed = time.perf_counter() - start

    metricsTest = report_performance(y_test, bestPred, elapsed=elapsed, model_name=modelName)

    resultsDf, _ = save_results(cvResults, bestPred, modelName)
    plot_hyp(resultsDf, modelName)
    print("")
Пример #5
0
    # # save_excel(metricsTest, metricsCV)
    #
    # print(metricsCV)

    'Logistic Regression'
    modelName = "Logistic Regression"
    print("\n" + modelName)

    start = time.perf_counter()
    predictions, metricsCV, _ = log_reg(trainDf, y_train, testDf, y_test)

    elapsed = time.perf_counter() - start

    metricsTest, conf_matrix = report_performance(y_test,
                                                  predictions,
                                                  elapsed=elapsed,
                                                  modelName=modelName,
                                                  report=True)

    fig = plot_conf_matrix(y_test, predictions, modelName=modelName)
    # fig = plot_conf_matrix(y_train, predTrain, modelName=modelName+'_train')

    save_excel(metricsTest, metricsCV)

    'Linear Perceptron'
    modelName = "Linear Perceptron"
    print("\n" + modelName)

    start = time.perf_counter()
    predictions, predTrain, _ = perceptron(trainDf, y_train, testDf, y_test)
    elapsed = time.perf_counter() - start
Пример #6
0
def community_detection(graph, ground_truth_file_address):
    """detects all communities in the network one after the other using only local information and updation.

	Args:
		graph ([nx.Graph]): [the given network]
		ground_truth_file_address ([str]): [filename of the ground-truth information of communities]

	Returns:
		[list]: [all discovered communities]
	"""
    start_time = time.time()
    all_communities = list()
    nodes_discovered = 0

    while nodes_discovered < graph.number_of_nodes():
        initial_node = utils.find_node_highest_degree(graph, all_communities)
        community = [initial_node]
        nodes_discovered += 1

        shell_set = list(graph.neighbors(initial_node))
        if initial_node in shell_set:
            shell_set.remove(initial_node)

        for node in [
                dummy_node for community in all_communities
                for dummy_node in community
        ]:
            if node in shell_set:
                shell_set.remove(node)

        T_in = update_T_in(graph, initial_node, community=[], T_in_prev=0.0)
        T_ex = update_T_ex(graph, initial_node, community=[], T_ex_prev=0.0)
        curr_T = calc_T(T_in, T_ex)

        while len(community) < graph.number_of_nodes() and shell_set != []:

            # find the neighbor with the highest improvement to the T score
            best_node, new_T_in, new_T_ex = find_the_best_neighbor(
                graph, community, shell_set, T_in, T_ex)

            # calculate the new T score
            new_T = calc_T(new_T_in, new_T_ex)

            if new_T >= curr_T:
                T_in, T_ex, curr_T = new_T_in, new_T_ex, new_T
                community.append(best_node)
                nodes_discovered += 1

                new_neighbors = list(
                    set(graph.neighbors(best_node)) - set(community))
                for node in [
                        dummy_node for community in all_communities
                        for dummy_node in community
                ]:
                    if node in new_neighbors:
                        new_neighbors.remove(node)

                shell_set.extend(new_neighbors)
                shell_set = list(set(shell_set))
                shell_set.remove(best_node)

            else:
                break

        all_communities.append(sorted(community))

    # all_communities = utils.amend_partition(graph, all_communities)
    report = utils.report_performance(graph, all_communities,
                                      ground_truth_file_address)
    print(
        report, '\tCommunity Detection Task is done in %.4f seconds.' %
        (time.time() - start_time))
    return all_communities