def visualization(self): feature_1 = self.x_pca[:, 0] feature_2 = self.x_pca[:, 1] labels = self.y cdict = {0: 'red', 1: 'green'} labl = {0: 'without bug', 1: 'bug'} marker = {0: '*', 1: 'o'} alpha = {0: .3, 1: .5} fig, ax = plt.subplots(figsize=(7, 5)) fig.patch.set_facecolor('white') for l in np.unique(labels): ix = np.where(labels == l) ax.scatter(feature_1[ix], feature_2[ix], c=cdict[l], s=100, label=labl[l], marker=marker[l], alpha=alpha[l]) # for loop ends plt.xlabel("First Principal Component", fontsize=14) plt.ylabel("Second Principal Component", fontsize=14) plt.legend() plt.savefig( str(pathlib.Path().absolute()) + "/File/PCA_visualization.png")
def threshold_search(true, prob, criteria): true = true.to_numpy() prob_train, prob_test, true_train, true_test = train_test_split(prob, true, test_size=0.2, random_state=1234) thresholds = np.linspace(0, 1, 101) all_f1_train = np.zeros(len(thresholds)) for j in range(len(thresholds)): predictions_train = np.ones(len(prob_train)) predictions_train[prob_train < thresholds[j]] = 0 macro_f1_train = macro_weighted_f1(true_train, predictions_train, [0, 1]) all_f1_train[j] = macro_f1_train best_threshold = thresholds[np.where(max(all_f1_train) == all_f1_train)] best_threshold = best_threshold[0] predictions_test = np.ones(len(prob_test)) predictions_test[prob_test < best_threshold] = 0 print("The best threshold for this prediction is: %s" % best_threshold) plt.plot(thresholds, all_f1_train, 'b') plt.axvline(x=0.5, linestyle=':', color='r') plt.axvline(x=best_threshold, linestyle='--', color='g') plt.axhline(y=all_f1_train[np.where(thresholds == 0.5)], linestyle=':', color='r') plt.axhline(y=max(all_f1_train), linestyle='--', color='g') plt.xlabel("Threshold") plt.ylabel("Macro F1") plt.savefig('{0} threshold plot.png'.format(criteria), bbox_inches='tight') plt.clf() return best_threshold
def get_hist(data, col): [columndates, orderdates] = column_and_order_dates(data, col) [difference_dates, date_values] = days_difference(columndates, orderdates, 31) plt.hist(x=difference_dates, bins=100) plt.xlabel('Day') plt.ylabel('Amount') plt.show() print(date_values)
def create_plot(logbook, name_file): maxFitnessValues, meanFitnessValues, minFitnessValues, medianFitnessValues, stdFitnessValues = \ logbook.select("max", "avg", "min", "median", "std") plt.plot(maxFitnessValues, color='red', label="Worst Fitness") plt.plot(meanFitnessValues, color='green', label="Mean Fitness") plt.plot(minFitnessValues, color='orange', label="Best Fitness") plt.plot(medianFitnessValues, color='blue', label="Avg. Fitness") plt.plot(stdFitnessValues, color='pink', label="Std. Fitness") plt.xlabel('Generation') plt.ylabel('Max / Average / Min / Median/ Std Fitness') plt.title('Max, Average, Min, Median and Std Fitness over Generations') plt.legend(loc='lower right') plt.savefig(name_file) plt.close()
def show_images(images, labels, preds): plt.figure(figsize=(8, 4)) for i, image in enumerate(images): plt.subplot(1, 6, i + 1, xticks=[], yticks=[]) image = image.numpy().transpose((1, 2, 0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) image = image * std + mean image = np.clip(image, 0., 1.) plt.imshow(image) col = 'green' if preds[i] != labels[i]: col = 'red' plt.xlabel(f'{class_names[int(labels[i].numpy())]}') plt.ylabel(f'{class_names[int(preds[i].numpy())]}', color=col) plt.tight_layout() plt.show()
with open(file_graph) as f: content = f.readlines() # read each line content = [x.strip() for x in content] original_cc = [] supergraph_cc = [] for line in content: value = line.split(" ") if len(value): supergraph_cc.append(float(value.pop())) original_cc.append(float(value.pop())) plt.clf() plt.plot(k_array, original_cc, 'r--', k_array, supergraph_cc, 'g-') plt.ylabel("Clustering Coefficent") plt.xlabel("k_degree") plt.legend(('Original Graph', 'Supergraph'), loc='lower center', shadow=True) plt.title(str(sys.argv[4])) plt.savefig("metric_cc_web.png") #if choose dataset web #plt.savefig("metric_cc_socfb.png") plt.clf() list_norm = [] for i in norm: list_norm.append(float(i)) list_k_array = [] for i in k_array: list_k_array.append(float(i)) plt.plot(list_k_array, list_norm, 'r--')
if size == 1: for line in content: k_array = line.split(" ") file_graph = str(sys.argv[2]) if os.path.exists(file_graph): # if file exist with open(file_graph) as f: content = f.readlines() # read each line content = [x.strip() for x in content] ratio = NULL size = len(content) if size == 1: for line in content: ratio = line.split(" ") list_ratio = [] for i in ratio: list_ratio.append(float(i)) list_k_array = [] for i in k_array: list_k_array.append(float(i)) plt.clf() plt.figure(figsize=(16, 10)) plt.ylabel("Ratio") plt.xlabel("k_degree") plt.plot(list_k_array, list_ratio, 'r--') plt.title("Graph friend 1000 10 100") plt.savefig("ratio_fakedataset.png", dpi=120)
epochs = 5 #训练5次 model1.summary() #模型输出 model1.compile( loss='sparse_categorical_crossentropy', #模型编译 optimizer='adam', metrics=['accuracy']) #从训练集中抽取0.2进行验证 history = model1.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2) #-----------------------------------------------保存模型,可视化-------------------------- #保存模型 model1.save('model_CNN_text.h5') #模型可视化 plot_model(model1, to_file='model_CNN_text.png', show_shape=True) #加载模型 model = load_model('model_CNN_text.h5') y_new = model.predict(x_train[0].reshape(1, 50)) #训练结果可视化 plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(['Train', 'Valid'], loc='upper left') plt.savefig('Valid_acc.png') plt.show()
#-----------------------------------------------------查看解码效果-------------------------------------------- decoded_imgs = model.predict(x_test) n = 10 plt.figure(figsize=(20, 6)) for i in range(n): # 原图 ax = plt.subplot(3, n, i+1) plt.imshow(x_test[i].reshape(28, 28)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # 解码效果图 ax = plt.subplot(3, n, i+n+1) plt.imshow(decoded_imgs[i].reshape(28, 28)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show() #----------------------------------------------------训练过程可视化--------------------------------------------- print(history.history.keys()) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'validation'], loc='upper right') plt.show()
workers=10) #------------------------------------------------------保存模型--------------------------------------- model.summary() #判断路径是否存在,不存在创建 if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) #保存模型 #------------------------------------------------------训练过程可视化----------------------------------- #绘制训练与验证的准确率值 plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Valid'], loc='upper left') plt.savefig('tradition_cnn_valid_acc.png') plt.show() #绘制训练与验证的损失 plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Valid'], loc='upper left') plt.savefig('tradition_cnn_valid_loss.png') plt.show()