def test_example_1_detail_3_and_4(): solution = { 3: [ set([40, 41, 42, 43, 39]), set([32, 33, 34, 35, 36, 37, 38, 42, 25, 26, 27, 28, 29, 30, 31]), set([58, 59, 60, 61, 62]), set([44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 61]), set([80, 81, 77, 78, 79]), set([64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 80, 63]), set([97, 98, 99, 100, 101]), set([96, 100, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 94, 95]) ], 4: [ set([40, 41, 42, 43, 39]), set([42, 35, 36, 37, 38]), set([58, 59, 60, 61, 62]), set([56, 57, 61, 54, 55]), set([80, 81, 77, 78, 79]), set([80, 73, 74, 75, 76]), set([97, 98, 99, 100, 101]), set([96, 100, 92, 94, 95]) ], } G = graph_example_1() result = k_components(G) for k, components in solution.items(): for component in components: assert_true(component in result[k])
def calcular_lineas(self, repeticiones, lon_minima): """ Metodo que se encargara de llamar a las funciones de calculo de nuestra aplicacion para mostrar y guardar las lineas que han sido calculadas por el algoritmo de deteccion de aquellas que ya esten pintadas en color rojo. """ sin_ruido = self.procesado.reducir_grosor(self.img_bin) l = [] while repeticiones > 0: lines = self.procesado.pro_hough(10, 5, 11, sin_ruido) l.extend(lines) repeticiones = repeticiones - 1 G = nx.Graph() G = self.procesado_de_lineas.combina2(4, 8, 4, 1, l, G) k_components = apxa.k_components(G) segmentos_de_verdad = self.procesado_de_lineas.segmentos_verdad( k_components, l) segmentos_de_verdad_pintar = [] for i in segmentos_de_verdad: if self.procesado_de_lineas.longitud_linea( i, self.ref_numeros) > lon_minima: segmentos_de_verdad_pintar.append(i) self.pintar_imagen_y_segmentos(segmentos_de_verdad_pintar) self.ventana.lineas = segmentos_de_verdad_pintar self.ventana.tam_segmen_verdad = len(self.ventana.lineas) self.ventana.pestannas.anadir_puntos() self.ventana.pestannas.button4.setEnabled(True) self.ventana.selec_ante = None
def test_example_1_detail_3_and_4(): solution = { 3: [set([40, 41, 42, 43, 39]), set([32, 33, 34, 35, 36, 37, 38, 42, 25, 26, 27, 28, 29, 30, 31]), set([58, 59, 60, 61, 62]), set([44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 61]), set([80, 81, 77, 78, 79]), set([64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 80, 63]), set([97, 98, 99, 100, 101]), set([96, 100, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 94, 95]) ], 4: [set([40, 41, 42, 43, 39]), set([42, 35, 36, 37, 38]), set([58, 59, 60, 61, 62]), set([56, 57, 61, 54, 55]), set([80, 81, 77, 78, 79]), set([80, 73, 74, 75, 76]), set([97, 98, 99, 100, 101]), set([96, 100, 92, 94, 95]) ], } G = graph_example_1() result = k_components(G) for k, components in solution.items(): for component in components: assert_true(component in result[k])
def test_karate_1(): karate_k_num = {0: 4, 1: 4, 2: 4, 3: 4, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 2, 10: 3, 11: 1, 12: 2, 13: 4, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 3, 20: 2, 21: 2, 22: 2, 23: 3, 24: 3, 25: 3, 26: 2, 27: 3, 28: 3, 29: 3, 30: 4, 31: 3, 32: 4, 33: 4} G = nx.karate_club_graph() k_comps = k_components(G) k_num = build_k_number_dict(k_comps) assert_equal(karate_k_num, k_num)
def _check_connectivity(G): result = k_components(G) for k, components in result.items(): if k < 3: continue for component in components: C = G.subgraph(component) K = nx.node_connectivity(C) assert_greater_equal(K, k)
def _check_connectivity(G): result = k_components(G) for k, components in result.items(): if k < 3: continue for component in components: C = G.subgraph(component) K = nx.node_connectivity(C) assert_true(K >= k)
def kcomp(): tmp = apxa.k_components(g) e = "" for i in tmp.keys(): e += str(i) + ": " e += ", ".join([str(x) for x in tmp[i]]) e += "\n" messagebox.showinfo("Info", "K-component structure of a graph is {}".format(e))
def test_segmentos_verdad(self): procesado=ProcesadoDeLineas() g=nx.Graph() lines=[((128, 72), (164, 18)), ((24, 43), (12, 31)), ((55, 71), (31, 49)), ((136, 59), (163, 19)), ((11, 31), (4, 24)), ((34, 51), (25, 43)), ((109, 42), (90, 18)), ((51, 67), (30, 48)), ((116, 50), (92, 20))] g=procesado.combina(8, 4, lines, g) k_components = apxa.k_components(g) segmentos_de_verdad = procesado.segmentos_verdad(k_components, lines) self.assertEqual(segmentos_de_verdad,[((164, 18), (128, 72)), ((55, 71), (4, 24)), ((116, 50), (90, 18))]) print("OK,test_segmentos_verdad")
def test_karate_1(): karate_k_num = {0: 4, 1: 4, 2: 4, 3: 4, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 2, 10: 3, 11: 1, 12: 2, 13: 4, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 3, 20: 2, 21: 2, 22: 2, 23: 3, 24: 3, 25: 3, 26: 2, 27: 3, 28: 3, 29: 3, 30: 4, 31: 3, 32: 4, 33: 4} approx_karate_k_num = karate_k_num.copy() approx_karate_k_num[24] = 2 approx_karate_k_num[25] = 2 G = nx.karate_club_graph() k_comps = k_components(G) k_num = build_k_number_dict(k_comps) assert_in(k_num, (karate_k_num, approx_karate_k_num))
def test_example_1_detail_3_and_4(): G = graph_example_1() result = k_components(G) # In this example graph there are 8 3-components, 4 with 15 nodes # and 4 with 5 nodes. assert_equal(len(result[3]), 8) assert_equal(len([c for c in result[3] if len(c) == 15]), 4) assert_equal(len([c for c in result[3] if len(c) == 5]), 4) # There are also 8 4-components all with 5 nodes. assert_equal(len(result[4]), 8) assert_true(all(len(c) == 5 for c in result[4])) # Finally check that the k-components detected have actually node # connectivity >= k. for k, components in result.items(): if k < 3: continue for component in components: K = nx.node_connectivity(G.subgraph(component)) assert_greater_equal(K, k)
def obtencion_lineas(self, no_small2): """ A traves de este metodo vamosa a encontrar las estrias que hau en la mascara que le pasamos como parametro a nuestra funcion. @param no_small2: este parametro se corresponde con la mascara una vez que le hemos pasado el preprocesado de la imagen. @return Los segmentos obtenidos a traves de este metodo. """ self.proce_lines = ProcesadoDeLineas() lines = probabilistic_hough_line(no_small2, 30, 20, 30) G = nx.Graph() G = self.proce_lines.combina2(4, 8, 4, 1, lines, G) k_components = apxa.k_components(G) segmentos_de_verdad = self.proce_lines.segmentos_verdad( k_components, lines) segmentos_de_verdad_pintar = [] for i in segmentos_de_verdad: if self.proce_lines.longitud_linea(i, 100) > 10: segmentos_de_verdad_pintar.append(i) return segmentos_de_verdad_pintar
def test_directed(): G = nx.gnp_random_graph(10, 0.4, directed=True) kc = k_components(G)
def k_components_model(data_processed: list, vocab: list, tokenized_docs: list, test_tokenized_segments: list, data_set_name: str): """ k_components_model is used to perform topic model on the word embedding graph using k-components algorithm. This function uses the k-components approximation function from the Networkx library :param data_processed: preprocessed data set used to calculated word embeddings :param vocab: vocabulary of the preprocessed data set :param tokenized_docs: tokenized version of the training data set :param test_tokenized_segments: tokenized version of the test data set :param data_set_name: name of the preprocessed data set used """ n_words = len([w for d in data_processed for w in d]) word_weights = get_word_weights(data_processed, vocab, n_words, weight_type='tf') # get Word2Vec embeddings w2v_model_file = "w2v_model-k_components-" + data_set_name + "-temp.pickle" if False: # Path("data/" + w2v_model_file).is_file(): print("using pre-calculated w2v model") with open("data/" + w2v_model_file, "rb") as myFile: w2v_model = pickle.load(myFile) vocab_words = [w for w in vocab if w in w2v_model.wv.index2word] vocab_embeddings = [ w2v_model.wv.vectors[w2v_model.wv.index2word.index(w)] for w in vocab_words ] else: w2v_params_k_components = { "min_c": 50, "win": 15, "negative": 0, "sample": 1e-5, "hs": 1, "epochs": 400, "sg": 1, 'seed': 42 } vocab_words, vocab_embeddings, w2v_model = get_word_vectors( data_processed, vocab, params=w2v_params_k_components) with open("data/" + w2v_model_file, "wb") as myFile: pickle.dump(w2v_model, myFile) # dictionary used to save topic model scores y_topics = {"K=1": [], "K=2": [], "K=3": []} y_c_v_model = {"K=1": [], "K=2": [], "K=3": []} y_dbs_model = {"K=1": [], "K=2": [], "K=3": []} y_npmi_model = {"K=1": [], "K=2": [], "K=3": []} test_y_c_v_model = {"K=1": [], "K=2": [], "K=3": []} test_y_npmi_model = {"K=1": [], "K=2": [], "K=3": []} y_uMass_model = {"K=1": [], "K=2": [], "K=3": []} test_y_uMass_model = {"K=1": [], "K=2": [], "K=3": []} # iterate over all x values (percentile cutoff values) x = [x for x in range(50, 100, 10)] + [95] # x = [80] # x = [x for x in range(1, 11, 1)] execution_times = [] number_of_nodes = [] topic_vector_flag = False for sim in x: # create word embedding graph using cutoff threshold graph, graph_creation_time = create_networkx_graph( vocab_words, vocab_embeddings, similarity_threshold=0.8, percentile_cutoff=sim) number_of_nodes.append(graph.number_of_nodes()) # calculate the k-components start_time = time.process_time() components_all = apxa.k_components(graph) k_components_time = time.process_time() - start_time # iterate over all k-components for k_component in y_topics.keys(): # extract k-components temp_k_dict = {"K=1": 1, "K=2": 2, "K=3": 3} components = components_all[temp_k_dict[k_component]] # remove too small topics corpus_clusters = [] clusters_words_embeddings = [] for comp in components: if len(comp) >= 6: corpus_clusters.append(list(comp)) clusters_words_embeddings.append( [w2v_model.wv.get_vector(w) for w in comp]) if topic_vector_flag: # perform Topic Vector Similarity topic_vectors = [ get_topic_vector(c) for c in clusters_words_embeddings ] # get topics based on topic vectors topic_vector_cluster_words = [] topic_vector_cluster_words_embeddings = [] for i, t_vector in enumerate(topic_vectors): sim_indices = get_nearest_indices( t_vector, clusters_words_embeddings[i]) topic_vector_cluster_words.append( [corpus_clusters[i][i_w] for i_w in sim_indices]) topic_vector_cluster_words_embeddings.append([ clusters_words_embeddings[i][i_w] for i_w in sim_indices ]) cluster_words = topic_vector_cluster_words else: # sort topic representatives by node degree cluster_words = [ sorted( list(c), key=(lambda w: sort_words_by(graph, w, word_weights)), reverse=True) for c in corpus_clusters ] if len(cluster_words) <= 2: # topic model did not find enough topics # -1000.0 is the NaN value used in the charts, these values will not be shown in the charts cs_c_v = -1000.0 dbs = -1000.0 cs_npmi = -1000.0 cs_c_v_test = -1000.0 cs_npmi_test = -1000.0 cs_u_mass = -1000.0 cs_u_mass_test = -1000.0 else: cluster_embeddings = [[ w2v_model.wv.vectors[w2v_model.wv.index2word.index(w)] for w in words ] for words in cluster_words] # topic model evaluation # intrinsic scores cs_c_v = c_v_coherence_score(tokenized_docs, cluster_words) dbs = davies_bouldin_index(cluster_embeddings) cs_npmi = npmi_coherence_score(data_processed, cluster_words, len(cluster_words)) cs_u_mass = c_v_coherence_score(tokenized_docs, cluster_words, cs_type='u_mass') # extrinsic scores if test_tokenized_segments is not None: cs_c_v_test = c_v_coherence_score(test_tokenized_segments, cluster_words) cs_npmi_test = npmi_coherence_score( test_tokenized_segments, cluster_words, len(cluster_words)) cs_u_mass_test = c_v_coherence_score( test_tokenized_segments, cluster_words, cs_type='u_mass') else: cs_c_v_test = -1000.0 cs_npmi_test = -1000.0 cs_u_mass_test = -1000.0 y_topics[k_component].append(cluster_words) y_c_v_model[k_component].append(cs_c_v) y_npmi_model[k_component].append(cs_npmi) y_dbs_model[k_component].append(dbs) test_y_c_v_model[k_component].append(cs_c_v_test) test_y_npmi_model[k_component].append(cs_npmi_test) y_uMass_model[k_component].append(cs_u_mass) test_y_uMass_model[k_component].append(cs_u_mass_test) # save topic model scores execution_times.append(k_components_time + graph_creation_time) save_model_scores(x_values=x, models=list(y_topics.keys()), model_topics=y_topics, model_c_v_scores=y_c_v_model, model_npmi_scores=y_npmi_model, model_c_v_test_scores=test_y_c_v_model, model_npmi_test_scores=test_y_npmi_model, model_u_mass_scores=y_uMass_model, model_u_mass_test_scores=test_y_uMass_model, execution_time=execution_times, number_of_nodes=number_of_nodes, filename_prefix='k-components', model_dbs_scores=y_dbs_model, x_label="Percentile Cutoff")
import operator from matplotlib import pyplot as plt from matplotlib import cm from mpl_toolkits.axes_grid1 import make_axes_locatable import networkx as nx from networkx.algorithms import approximation as apxa import numpy as np if __name__ == '__main__': n, radius, seed = 700, 0.05, 0 g = nx.random_geometric_graph(n, radius, seed=seed) k_components = apxa.k_components(g) result = {} for k, comps in sorted(k_components.items(), key=operator.itemgetter(0)): for comp in comps: for node in comp: result[node] = k plt.figure(figsize=(10, 10)) pos = nx.get_node_attributes(g, 'pos') _max = max(k_components.keys()) + 1 sizes = [2.0**result[v] + 20 if v in result else 10 for v in g.nodes()] cmap = cm.rainbow colors = cmap(map(lambda c: float(c) / _max, [result[v] if v in result else 0 for v in g.nodes()])) nx.draw_networkx(g, pos=pos, node_size=1, alpha=0.2, with_labels=False) nx.draw_networkx_nodes(g, pos=pos, node_size=sizes, node_color=colors) plt.gca().collections[2].set_edgecolor('#000000')
def iniciar_test(self): actual = os.getcwd() procesado_de_imagen = ProcesadoDeImagen() procesado_de_lineas = ProcesadoDeLineas img = procesado_de_imagen.leer_imagen( actual + "/Test/codigo/calidad/imagenesPrueba/1-350-7547 1.jpg") l, a, b = self.pixel_rgb_2lab([255, 8, 0]) lab = rgb2lab(img) distance = abs(lab - [l, a, b]).mean(axis=2) im = self.binarizar(distance) # tru_positive_inicial,tru_negative_inicial,false_positive_inicial,false_negative_inicial = self.inicial_test(im,img) # print(tru_positive_inicial,tru_negative_inicial) # print(false_positive_inicial,false_negative_inicial) sin_ruido = procesado_de_imagen.reducir_grosor(im) lines = procesado_de_imagen.pro_hough(10, 5, 11, sin_ruido) G = nx.Graph() G = procesado_de_lineas.combina2(4, 8, 4, 1, lines, G) k_components = apxa.k_components(G) segmentos_de_verdad = procesado_de_lineas.segmentos_verdad( k_components, lines) pathh = actual + "/Test/codigo/calidad/imagenesPrueba/1-350-7547 1SinPintar.jpg" temp = actual + "/Test/codigo/calidad/imagenesPrueba/" self.guardar_y_pintar(pathh, temp, segmentos_de_verdad) img_pintada = procesado_de_imagen.leer_imagen( actual + "/Test/codigo/calidad/imagenesPrueba/calculada.jpg") tru_positive_compara, false_negative_compara, false_positive_compara, tru_negative_compara = self.calcula_medidas( img_pintada, im) TP = tru_positive_compara TN = tru_negative_compara FP = false_positive_compara FN = false_negative_compara P = TP + FN N = FP + TN TPR = TP / (TP + FN) self.assertGreater(TPR, 0.6) TNR = TN / (FP + TN) self.assertGreater(TNR, 0.6) PPV = TP / (TP + FP) self.assertGreater(PPV, 0.6) NPV = TN / (TN + FN) self.assertGreater(NPV, 0.6) FPR = 1 - TNR self.assertGreater(0.4, FPR) FDR = 1 - PPV self.assertGreater(0.4, FDR) FNR = 1 - TPR self.assertGreater(0.4, FNR) ACC = (TP + TN) / (P + N) self.assertGreater(ACC, 0.6) F1 = (2 * TP) / ((2 * TP) + FP + FN) self.assertGreater(F1, 0.6) MCC = (TP * TN - FP * FN) / ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))**(1 / 2) self.assertGreater(MCC, 0.6) BM = TPR + TNR - 1 self.assertGreater(BM, 0.6) MK = PPV + NPV - 1 self.assertGreater(MK, 0.6) print("TP", TP, "TN", TN, "FP", FP, "FN", FN) print("TPR", TPR, "TNR", TNR, "PPV ", PPV) print("NPV ", NPV, "FPR ", FPR, "FDR ", FDR) print("FNR ", FNR, "ACC ", ACC, "F1 ", F1) print("N", N, "P", P, "MCC", MCC, "MK", MK, "BM", BM) print()
top_level_communities = next(communities_generator) top_level_communities # In[72]: next_level_communities = next(communities_generator) next_level_communities # In[54]: approximation.k_components(G) # In[55]: ## This crashes the session approximation.max_clique(G) # In[86]: #lab.write_pandas_to_csv_on_gcs(bucket='swe-files' ,data=dfxtest2 ,fileName='swe-files/dfxtest2.csv')
G = nx.karate_club_graph() ######### 获取网络nodes的label,即每个node所属的类别 ######## def build_k_number_dict(G_k_components): k_components_dict = {} for k, comps in sorted(G_k_components.items()): for comp in comps: for node in comp: k_components_dict[node] = k return k_components_dict G_k_components = k_components(G) k_components_dict = build_k_number_dict(G_k_components) ######### 节点的颜色设置 ######## colors = ['red', 'green', 'blue', 'yellow'] color = [] for v in k_components_dict.values(): color.append(colors[v - 1]) #################### 获取网络图的输入矩阵和节点的特征矩阵 #################### ########### 网络图的邻接矩阵 ########### adj = nx.adj_matrix( G) # 也可以用这种方式 A = to_numpy_matrix(G, nodelist=sorted(list(G.nodes()))) nodes = adj.shape[0] ########### 网络图的闭环矩阵=邻接矩阵+自身闭环矩阵 ###########
def get_components(img_binary): image_arr = np.array(img_binary, dtype='int16') graph = creating_graph(image_arr) comps = apxa.k_components(graph) comps = comps[2] return reject_comp(comps)
def test_directed(): with pytest.raises(nx.NetworkXNotImplemented): G = nx.gnp_random_graph(10, 0.4, directed=True) kc = k_components(G)