def create_feat_mat_1(graph): CCs = list(nx_clustering(graph).values()) DCs = list(nx_average_neighbor_degree(graph).values()) degrees = [tup[1] for tup in graph.degree()] edge_wts = [tup[2] for tup in graph.edges.data('weight')] A_mat = nx_to_numpy_matrix(graph) svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False) if len(svs) >= 3: sv1 = svs[0] sv2 = svs[1] sv3 = svs[2] elif len(svs) >= 2: sv1 = svs[0] sv2 = svs[1] sv3 = 0 else: sv1 = svs[0] sv2 = sv3 = 0 feat_mat = np_vstack( (nx_density(graph), nx_number_of_nodes(graph), max(degrees), np_mean(degrees), np_median(degrees), np_var(degrees), max(CCs), np_mean(CCs), np_var(CCs), np_mean(edge_wts), max(edge_wts), np_var(edge_wts), np_mean(DCs), np_var(DCs), max(DCs), sv1, sv2, sv3)).T return feat_mat
def data_handing_after_handing(self, data={}, dt_max=0.013, dt_min=0.006, var_threshold=1000): ''' process the data after rough OD calculation and return the real OD after calculation :param data: the data that needs to be processed has only one key value :param dt_max: confidence interval upper limit parameter :param dt_min: confidence interval lower limit parameter :param var_threshold: variance threshold :return: 返回真实OD ''' if var_threshold == 1000: for i in data: var_tem = np_var(data[i]) if var_tem <= 1000: # static state key = list(data.keys())[0] key = self.all_board_address.index(key) # print('key=',key) # print('ave=',ave) max_lim = ave[key] + ave[ key] * dt_max # calculate the upper limit of confidence interval min_lim = ave[key] - ave[ key] * dt_min # calculate the lower limit of confidence interval for i in data: for j in data[i]: if j > max_lim or j < min_lim: data[i].remove(j) return data else: # status of agitation for i in data: data[i] = [max(data[i])] return data else: for i in data: var_tem = np_var(data[i]) if var_tem <= 500: # static status key = list(data.keys())[0] key = self.all_board_address.index(key) max_lim = ave[key] + ave[ key] * dt_max # calculate the upper limit of confidence interval min_lim = ave[key] - ave[ key] * dt_min # calculate the lower limit of confidence interval for i in data: for j in data[i]: if j > max_lim or j < min_lim: data[i].remove(j) return data else: # status of agitation for i in data: data[i] = [max(data[i])] return data
def within_cluster_similarity_statistics(cluster): """ Calculate the sequence similarities within a cluster. Return the similarity matrix. """ representations = cluster.seqs _representations = cluster.seqs_as_list() lenrep = len(_representations) similarities = np.ones((lenrep, lenrep, 3)) for j in range(lenrep): for k in range(j + 1, lenrep): # calculate once sim = diff_sequences(_representations[j], _representations[k]) # but fill both triangles of the matrix similarities[j, k, :] = [ representations[j].id, representations[k].id, sim ] similarities[k, j, :] = [ representations[k].id, representations[j].id, sim ] average_rep_sim = np_mean(similarities[:, :, 2]) var_rep_sim = np_var(similarities[:, :, 2]) return similarities, average_rep_sim, var_rep_sim
def transfer_same_dist(test_list, train_list, com_comp, test_rem): if len(test_rem) == 0: return test_list, train_list, com_comp sizes = [len(line) for line in test_rem] mean_test_size = np_mean(sizes) sd = sqrt(np_var(sizes)) if sd != 0: test_rem_dist = norm_dist(mean_test_size, sd) p_dist = [test_rem_dist.pdf(len(line)) for line in train_list] norm_ct = sum(p_dist) if norm_ct != 0: p_dist = [val / norm_ct for val in p_dist] train_rem = rand_choice(train_list, size=com_comp, replace=False, p=p_dist) else: train_rem = [ line for line in train_list if len(line) == mean_test_size ][:com_comp] test_list = test_list + train_rem for line in train_rem: train_list.remove(line) return test_list, train_list
def test(data=None, precision_bp=2000, nb_bp=3, taille_fenetre=10, breakp=None, abscisse=None): """Paramètres""" #donnees if data == None: data = [ 580.38, 581.86, 580.97, 580.8, 579.79, 580.39, 580.42, 580.82, 581.4, 581.32, 581.44, 581.68, 581.17, 580.53, 580.01, 579.91, 579.14, 579.16, 579.55, 579.67, 578.44, 578.24, 579.1, 579.09, 579.35, 578.82, 579.32, 579.01, 579, 579.8, 579.83, 579.72, 579.89, 580.01, 579.37, 578.69, 578.19, 578.67, 579.55, 578.92, 578.09, 579.37, 580.13, 580.14, 579.51, 579.24, 578.66, 578.86, 578.05, 577.79, 576.75, 576.75, 577.82, 578.64, 580.58, 579.48, 577.38, 576.9, 576.94, 576.24, 576.84, 576.85, 576.9, 577.79, 578.18, 577.51, 577.23, 578.42, 579.61, 579.05, 579.26, 579.22, 579.38, 579.1, 577.95, 578.12, 579.75, 580.85, 580.41, 579.96, 579.61, 578.76, 578.18, 577.21, 577.13, 579.1, 578.25, 577.91, 576.89, 575.96, 576.8, 577.68, 578.38, 578.52, 579.74, 579.31, 579.89, 579.96, 579.96, 579.96 ] #valeur du découpage pour trouver les breakpoints #nombre de breakpoints > 0 #Affichage variance et moyenne des données print("variance = ", np_var(data)) print("ecart type = ", np_var(data)**0.5) print("moyenne = ", np_mean(data)) #Calcul de l'intégrale de la gaussienne trouvé mu = np_mean(data) sig = np_var(data) ecart = (max(data) - min(data)) integral_g = quad(gaussian, min(data) - ecart, max(data) + ecart, args=(mu, sig)) print("integrale gauss", integral_g) print(mu, sig, ecart) #Appel de la fonctio SAX vector_c, vector_c_fit = sax(data, taille_fenetre)
def precision(PE): """ Calculate precision as the inverse variance of the updated prediction error. return updated precision and updated average_free_energy """ with np.errstate(all='raise'): try: variance = np_var(PE) # np_var(PE, ddof=1) # mad(PE) variance = variance if variance > 0.00001 else 0.00001 # so log(var) should max at -5 pi = np_log(1. / variance) # should max at 5 new_precision = 1 / (1 + np.exp(-(pi - 2.5)) ) # should be about max. 1 return new_precision # , variance except Exception as e: raise Exception("RuntimeWarning in precision(PE):", str(e), "PE:", PE) from e
def clc_var_ave(self, file_dir=r'F:\test\----dif.txt', save_='yes'): ''' #calculate the variance and mean #If the variance threshold is set at 450, the fermenter is considered to be in the state of agitation :param file_dir: directory of file that saves data :param save_: save file or not ''' global var, ave var = [] ave = [] for i in self.all_board_address: var.append(np_var(self.OD_raw_value_ON_OFF[i][:min_len])) ave.append(np_average(self.OD_raw_value_ON_OFF[i][:min_len])) if save_ == 'yes': with open(file_dir, 'a') as file: for i in var: file.write(str(i) + '\t') file.write('\n') for i in ave: file.write(str(i) + '\t') file.write('\n')
def inter_cluster_similarity_statistics(cluster_a, cluster_b): """ Calculate the similarities between the two cluster's sequences. Return the matrix, mean distance and variance. """ seqs_a = cluster_a.seqs_as_list() seqs_b = cluster_b.seqs_as_list() lenrep_a = len(cluster_a.seqs) lenrep_b = len(cluster_b.seqs) similarities = np.ones((lenrep_a, lenrep_b, 3)) for j in range(lenrep_a): for k in range(lenrep_b): # sadly have to compare all of them # calculate similarity between sequences sim = diff_sequences(seqs_a[j], seqs_b[k]) similarities[j, k, :] = [ cluster_a.seqs[j].id, cluster_b.seqs[k].id, sim ] average_cluster_sim = np_mean(similarities[:, :, 2]) var_cluster_sim = np_var(similarities[:, :, 2]) return similarities, average_cluster_sim, var_cluster_sim
def test(data=None, precision_bp=2000, nb_bp=3, taille_fenetre=10, breakp=None, abscisse=None): """Paramètres""" #donnees if data == None: data = [ 580.38, 581.86, 580.97, 580.8, 579.79, 580.39, 580.42, 580.82, 581.4, 581.32, 581.44, 581.68, 581.17, 580.53, 580.01, 579.91, 579.14, 579.16, 579.55, 579.67, 578.44, 578.24, 579.1, 579.09, 579.35, 578.82, 579.32, 579.01, 579, 579.8, 579.83, 579.72, 579.89, 580.01, 579.37, 578.69, 578.19, 578.67, 579.55, 578.92, 578.09, 579.37, 580.13, 580.14, 579.51, 579.24, 578.66, 578.86, 578.05, 577.79, 576.75, 576.75, 577.82, 578.64, 580.58, 579.48, 577.38, 576.9, 576.94, 576.24, 576.84, 576.85, 576.9, 577.79, 578.18, 577.51, 577.23, 578.42, 579.61, 579.05, 579.26, 579.22, 579.38, 579.1, 577.95, 578.12, 579.75, 580.85, 580.41, 579.96, 579.61, 578.76, 578.18, 577.21, 577.13, 579.1, 578.25, 577.91, 576.89, 575.96, 576.8, 577.68, 578.38, 578.52, 579.74, 579.31, 579.89, 579.96, 579.96, 579.96 ] #valeur du découpage pour trouver les breakpoints #nombre de breakpoints > 0 fig = plt.figure(figsize=(15, 10)) # TODO arranger les axes ici !!! years = mdates.YearLocator() yearsFmt = mdates.DateFormatter('%Y') months = mdates.MonthLocator() days = mdates.DayLocator() daysFmt = mdates.DateFormatter('%d') hours = mdates.HourLocator() minutes = mdates.MinuteLocator() gauss = fig.add_subplot(2, 1, 1) gauss.xaxis.set_major_locator(daysFmt) gauss.xaxis.set_minor_locator(hours) #plt.ylabel('some numbers') #Affichage variance et moyenne des données print("variance = ", np_var(data)) print("ecart type = ", np_var(data)**0.5) print("moyenne = ", np_mean(data)) #Calcul de l'intégrale de la gaussienne trouvé mu = np_mean(data) sig = np_var(data) ecart = (np.amax(data) - np.amin(data)) integral_g = quad(gaussian, np.amin(data) - ecart, np.amax(data) + ecart, args=(mu, sig)) print("integrale gauss", integral_g) #Appel de la fonctio SAX vector_c, vector_c_fit = sax(data, taille_fenetre) if abscisse is None: plt.plot(vector_c) plt.plot(data) else: plt.plot(abscisse, vector_c) plt.plot(abscisse, data) if breakp is None: breakp = breakpoints(integral_g, min(data), np_mean(data), precision_bp, nb_bp, mu, sig) for bp in breakp: print("seuil : ", bp) plt.axhline(bp, c='grey') #print("seuil 2 : ",bp+2*(mu-bp)) #plt.axhline(bp+2*(mu-bp)) #Affichage de la gaussienne #fig.add_subplot(2,1,2) """ x = np_linspace(min(data)-ecart,max(data)+ecart,100) plt.plot(x,gaussian(x,mu,sig)) for bp in breakp: plt.axvline(bp) fig.add_subplot(2,1,1) """ tab_classif = [0] * (len(breakp) + 1) print(tab_classif) """ for val in vector_c: it = 0 for var in breakp: if val < var: tab_classif[it] = tab_classif[it] + 1 break it = it + 1 """ pos_x = 1 for val in vector_c_fit: it = 0 test = 0 int_char = 0 for var in breakp: if val < var: tab_classif[it] = tab_classif[it] + 1 test = 1 # conversion en binaire str(bin(int_char))[2:] plt.annotate( str(bin(int_char))[2:], xy=(taille_fenetre * pos_x - taille_fenetre / 2, val), xytext=(taille_fenetre * pos_x - taille_fenetre / 2, val + 2), arrowprops=dict(facecolor='white', shrink=0.05), ) break it = it + 1 int_char = int_char + 1 if test == 0: tab_classif[it] = tab_classif[it] + 1 # conversion en binaire str(bin(int_char))[2:] plt.annotate( str(bin(int_char))[2:], xy=(taille_fenetre * pos_x - taille_fenetre / 2, val), xytext=(taille_fenetre * pos_x - taille_fenetre / 2, val + 2), arrowprops=dict(facecolor='white', shrink=0.05), ) pos_x = pos_x + 1 print(tab_classif) plt.show() return breakp
def create_feat_mat(graph_list, n_feats): dens_pos = [nx_density(graph) for graph in graph_list] nodes_pos = [nx_number_of_nodes(graph) for graph in graph_list] # CC statistics - mean and max - faster to use a big loop mostly CC_mean = [] CC_mean_append = CC_mean.append CC_max = [] CC_max_append = CC_max.append CC_var = [] CC_var_append = CC_var.append # Degree correlation - avg degree of the neighborhood DC_mean = [] DC_mean_append = DC_mean.append DC_max = [] DC_max_append = DC_max.append DC_var = [] DC_var_append = DC_var.append # Degree statistics degree_mean = [] degree_mean_append = degree_mean.append degree_max = [] degree_max_append = degree_max.append degree_median = [] degree_median_append = degree_median.append degree_var = [] degree_var_append = degree_var.append # Edge weight statistics edge_wt_mean = [] edge_wt_mean_append = edge_wt_mean.append edge_wt_max = [] edge_wt_max_append = edge_wt_max.append edge_wt_var = [] edge_wt_var_append = edge_wt_var.append # First 3 singular values sv1 = [] sv1_append = sv1.append sv2 = [] sv2_append = sv2.append sv3 = [] sv3_append = sv3.append for graph in graph_list: CCs = list(nx_clustering(graph).values()) CC_max_append(max(CCs)) CC_mean_append(np_mean(CCs)) CC_var_append(np_var(CCs)) DCs = list(nx_average_neighbor_degree(graph).values()) DC_max_append(max(DCs)) DC_mean_append(np_mean(DCs)) DC_var_append(np_var(DCs)) degrees = [tup[1] for tup in graph.degree()] degree_mean_append(np_mean(degrees)) degree_median_append(np_median(degrees)) degree_max_append(max(degrees)) degree_var_append(np_var(degrees)) edge_wts = [tup[2] for tup in graph.edges.data('weight')] edge_wt_mean_append(np_mean(edge_wts)) edge_wt_var_append(np_var(edge_wts)) edge_wt_max_append(max(edge_wts)) A_mat = nx_to_numpy_matrix(graph) svs = np_linalg_svd(A_mat, full_matrices=False, compute_uv=False) if len(svs) >= 3: sv1_append(svs[0]) sv2_append(svs[1]) sv3_append(svs[2]) elif len(svs) >= 2: sv1_append(svs[0]) sv2_append(svs[1]) sv3_append(0) else: sv1_append(svs[0]) sv2_append(0) sv3_append(0) feat_mat = np_vstack((dens_pos, nodes_pos, degree_max, degree_mean, degree_median, degree_var, CC_max, CC_mean, CC_var, edge_wt_mean, edge_wt_max, edge_wt_var, DC_mean, DC_var, DC_max, sv1, sv2, sv3)).T if n_feats == 1: feat_mat = np_array(dens_pos).reshape(-1, 1) return feat_mat