def community_detection(graph, ground_truth_file_address): start_time = time.time() graph_copy = deepcopy(graph) all_communities = list() while graph_copy.number_of_nodes() > 0: nodes = list(graph_copy.nodes()) intended_node = nodes[0] for i in range(1, len(nodes)): if graph_copy.degree[nodes[i]] > graph_copy.degree[intended_node]: intended_node = nodes[i] community = community_search(graph_copy, intended_node, True) all_communities.append(sorted(community)) graph_copy.remove_nodes_from(community) # all_communities = utils.amend_partition(graph, all_communities) report = utils.report_performance(graph, all_communities, ground_truth_file_address) print( report, '\tCommunity Detection Task is done in %.4f seconds.' % (time.time() - start_time)) return all_communities
(b_proj, b_proj - learning_rate * grad[3]), ] f_train = theano.function([X, y_true], cost, updates=updates, allow_input_downcast=True) f_pred = theano.function([X], y_pred, allow_input_downcast=True) training_history, valid_history = train_networks(f_train, f_pred, nb_epoch=3, batch_size=128, X_train=X_train, y_train=y_train, X_valid=X_valid, y_valid=y_valid) report_performance(f_pred, X_test, y_test) plt.figure() plt.plot(training_history, c='b', label="Training cost") plt.plot(valid_history, c='r', label="Validation accuracy") plt.legend() plt.show() W_ = W_conv.get_value() plot_weights4D(W_, colormap = "Blues") plt.show() f = theano.function([X], conv_output, allow_input_downcast=True) X_new = f(X_train[:16]) plot_weights4D(X_new, 'Reds') plt.show()
def community_detection(graph, ground_truth_file_address): """detects all communities in the network one after the other using only local information and updation. Args: graph ([nx.Graph]): [the given network] ground_truth_file_address ([str]): [filename of the ground-truth information of communities] Returns: [list]: [all discovered communities] """ start_time = time.time() all_communities = list() nodes_discovered = 0 while nodes_discovered < graph.number_of_nodes(): intended_node = utils.find_node_highest_degree(graph, all_communities) community = list() boundary = list() neighbors = list() R_measure = 0.0 community.append(intended_node) nodes_discovered += 1 boundary.append(intended_node) neighbors = list(graph.neighbors(intended_node)) for node in [ dummy_node for community in all_communities for dummy_node in community ]: if node in neighbors: neighbors.remove(node) while len(community) < graph.number_of_nodes(): # compute the improvement caused by any node in the neighborhood mod_r = dict() for neigh in neighbors: mod_r[neigh] = compute_r(graph, community, boundary, neigh) if len(mod_r) == 0: break # find the neighbor with the highest improvement to the modularity R best_neigh = find_the_best_neighbor(mod_r) # continue expanding the community only if modularity R increases if mod_r[best_neigh] < R_measure: break # update modularity R and add the best neighbor to the community R_measure = mod_r[best_neigh] community.append(best_neigh) nodes_discovered += 1 # update the neighborhood list neighbors = update_neighbors(graph, community, neighbors, best_neigh, all_communities) # update the boundary list boundary = update_boundaries(graph, community, boundary, best_neigh) all_communities.append(community) all_communities = utils.amend_partition(graph, all_communities) report = utils.report_performance(graph, all_communities, ground_truth_file_address) print( report, '\tCommunity Detection Task is done in %.4f seconds.' % (time.time() - start_time)) return all_communities
# save_results(cvResults, bestPred, modelName) # print("") # 'Random Forest' # modelName = "Random Forest" # print("\n", modelName) # # start = time.perf_counter() # bestPred, cvResults = hyp_random_forest(trainDf, y_train, testDf) # elapsed = time.perf_counter() - start # # metricsTest = report_performance(y_test, bestPred, elapsed=elapsed, model_name=modelName) # # resultsDf, _ = save_results(cvResults, bestPred, modelName) # plot_hyp(resultsDf, modelName) # print("") 'AdaBoost' modelName = "AdaBoost" print("\n", modelName) start = time.perf_counter() bestPred, cvResults = hyp_ada_boost(trainDf, y_train, testDf) elapsed = time.perf_counter() - start metricsTest = report_performance(y_test, bestPred, elapsed=elapsed, model_name=modelName) resultsDf, _ = save_results(cvResults, bestPred, modelName) plot_hyp(resultsDf, modelName) print("")
# # save_excel(metricsTest, metricsCV) # # print(metricsCV) 'Logistic Regression' modelName = "Logistic Regression" print("\n" + modelName) start = time.perf_counter() predictions, metricsCV, _ = log_reg(trainDf, y_train, testDf, y_test) elapsed = time.perf_counter() - start metricsTest, conf_matrix = report_performance(y_test, predictions, elapsed=elapsed, modelName=modelName, report=True) fig = plot_conf_matrix(y_test, predictions, modelName=modelName) # fig = plot_conf_matrix(y_train, predTrain, modelName=modelName+'_train') save_excel(metricsTest, metricsCV) 'Linear Perceptron' modelName = "Linear Perceptron" print("\n" + modelName) start = time.perf_counter() predictions, predTrain, _ = perceptron(trainDf, y_train, testDf, y_test) elapsed = time.perf_counter() - start
def community_detection(graph, ground_truth_file_address): """detects all communities in the network one after the other using only local information and updation. Args: graph ([nx.Graph]): [the given network] ground_truth_file_address ([str]): [filename of the ground-truth information of communities] Returns: [list]: [all discovered communities] """ start_time = time.time() all_communities = list() nodes_discovered = 0 while nodes_discovered < graph.number_of_nodes(): initial_node = utils.find_node_highest_degree(graph, all_communities) community = [initial_node] nodes_discovered += 1 shell_set = list(graph.neighbors(initial_node)) if initial_node in shell_set: shell_set.remove(initial_node) for node in [ dummy_node for community in all_communities for dummy_node in community ]: if node in shell_set: shell_set.remove(node) T_in = update_T_in(graph, initial_node, community=[], T_in_prev=0.0) T_ex = update_T_ex(graph, initial_node, community=[], T_ex_prev=0.0) curr_T = calc_T(T_in, T_ex) while len(community) < graph.number_of_nodes() and shell_set != []: # find the neighbor with the highest improvement to the T score best_node, new_T_in, new_T_ex = find_the_best_neighbor( graph, community, shell_set, T_in, T_ex) # calculate the new T score new_T = calc_T(new_T_in, new_T_ex) if new_T >= curr_T: T_in, T_ex, curr_T = new_T_in, new_T_ex, new_T community.append(best_node) nodes_discovered += 1 new_neighbors = list( set(graph.neighbors(best_node)) - set(community)) for node in [ dummy_node for community in all_communities for dummy_node in community ]: if node in new_neighbors: new_neighbors.remove(node) shell_set.extend(new_neighbors) shell_set = list(set(shell_set)) shell_set.remove(best_node) else: break all_communities.append(sorted(community)) # all_communities = utils.amend_partition(graph, all_communities) report = utils.report_performance(graph, all_communities, ground_truth_file_address) print( report, '\tCommunity Detection Task is done in %.4f seconds.' % (time.time() - start_time)) return all_communities