def test_prec_recall(): r1 = ["R", "F", "R", "F", "F", "F", "F", "F", "R", "R"] r2 = ["F", "R", "F", "F", "R", "R", "R", "F", "F", "F"] precision, recall = utils.precision_recall(r1) interpolated_p = utils.interpolate_p(precision) print("precision = {0}".format(precision)) print("recall = {0}".format(recall)) print("interpolated_p = {0}".format(interpolated_p)) precision, recall = utils.precision_recall(r2) interpolated_p = utils.interpolate_p(precision) print("precision = {0}".format(precision)) print("recall = {0}".format(recall)) print("interpolated_p = {0}".format(interpolated_p))
def show_result(sess): # print ("Creating ply files...") # bs = 0 # trData, trLabel = [], [] # batch_arr = [] # for item in glob.glob(directory + "/*.ply"): # os.remove(item) # batch_arr = [] # name_arr = [] # counter = 0 # for item in glob.glob(test_directory + '*.npy'): # name_arr.append(str(item[12:])) # loaded_file = np.load(item) # batch_arr.append(utils.npy_cutter(loaded_file, scene_shape)) # counter += 1 # batch_arr = np.reshape( batch_arr, ( -1, scene_shape[0], scene_shape[1], scene_shape[2] )) # trData = batch_arr[ :, 0:scene_shape[0], 0:scene_shape[1], 0:halfed_scene_shape ] # input # trLabel = batch_arr[ :, 0:scene_shape[0], 0:scene_shape[1], halfed_scene_shape:scene_shape[2] ] # gt # trData = np.reshape(trData, (-1, scene_shape[0] * scene_shape[1] * halfed_scene_shape)) # score = sess.run(ConvNet_class.generator, feed_dict={x: trData, keepProb: 1.0, phase: False}) # score = np.reshape(score, (counter, scene_shape[0], scene_shape[1], halfed_scene_shape, classes_count)) # score = np.argmax(score, 4) # trData = np.reshape(trData, (-1, scene_shape[0], scene_shape[1], halfed_scene_shape)) # for i in range(counter): # trData_i = trData[i,:,:,:] # trData_i = np.reshape( trData_i, (scene_shape[0], scene_shape[1], halfed_scene_shape)) # score_i = score[i,:,:,:] # score_i = np.reshape( score_i, (scene_shape[0], scene_shape[1], halfed_scene_shape)) # empty_scene = np.zeros((84,44,42)) # empty_space = np.zeros((scene_shape[0], scene_shape[1], 50)) # empty_scene = np.concatenate((trData_i, empty_scene), axis=2) # empty_scene = np.concatenate((empty_scene, empty_space), axis=2) # gen_scn = np.concatenate((trData_i, score_i), axis=2) # gen_scn = np.concatenate((empty_scene, gen_scn), axis=2) # empty_space = np.zeros((scene_shape[0], scene_shape[1], 50)) # gen_scn = np.concatenate((gen_scn, empty_space), axis=2) # gen_scn = np.concatenate((gen_scn, batch_arr[i,:,:,:]), axis=2) # output = open( directory + "/" + name_arr[i] + ".ply" , 'w') # ply = "" # numOfVrtc = 0 # for idx1 in range(gen_scn.shape[0]): # for idx2 in range(gen_scn.shape[1]): # for idx3 in range(gen_scn.shape[2]): # if gen_scn[idx1][idx2][idx3] > 0: # ply = ply + str(idx1)+ " " +str(idx2)+ " " +str(idx3) + str(utils.colors[ int(gen_scn[idx1][idx2][idx3]) ]) + "\n" # numOfVrtc += 1 # output.write("ply" + "\n") # output.write("format ascii 1.0" + "\n") # output.write("comment VCGLIB generated" + "\n") # output.write("element vertex " + str(numOfVrtc) + "\n") # output.write("property float x" + "\n") # output.write("property float y" + "\n") # output.write("property float z" + "\n") # output.write("property uchar red" + "\n") # output.write("property uchar green" + "\n") # output.write("property uchar blue" + "\n") # output.write("property uchar alpha" + "\n") # output.write("element face 0" + "\n") # output.write("property list uchar int vertex_indices"+ "\n") # output.write("end_header" + "\n") # output.write( ply ) # output.close() # print (test_data[i][12:] + ".ply" + " is Done!") # batch_arr = [] # name_arr = [] # counter = 0 # ################################################################### print("Creating ply files...") bs = 0 trData, trLabel = [], [] batch_arr = [] precision = np.zeros(classes_count) recall = np.zeros(classes_count) accu1_all, accu2_all = 0.0, 0.0 for counter in range(num_of_vis_batch): trData, trLabel = [], [] batch_arr = [] batch_arr_2d = [] bs = 0 test_data = utils.fetch_random_batch(train_directory, batch_size) for test in test_data: loaded_file = np.load(test) batch_arr.append(utils.npy_cutter(loaded_file, scene_shape)) bs += 1 batch_arr = np.reshape( batch_arr, (bs, scene_shape[0], scene_shape[1], scene_shape[2])) trData = batch_arr[:, 0:scene_shape[0], 0:scene_shape[1], 0:halfed_scene_shape] # input trLabel = batch_arr[:, 0:scene_shape[0], 0:scene_shape[1], halfed_scene_shape:scene_shape[2]] # gt trData = np.reshape( trData, (-1, scene_shape[0] * scene_shape[1] * halfed_scene_shape)) score = sess.run(ConvNet_class.generator, feed_dict={ x: trData, keepProb: 1.0, phase: False }) score = np.reshape(score, (-1, scene_shape[0], scene_shape[1], halfed_scene_shape, classes_count)) score = np.argmax(score, 4) score = np.reshape( score, (-1, scene_shape[0], scene_shape[1], halfed_scene_shape)) pre, rec = utils.precision_recall(score, trLabel, batch_size, classes_count) precision += pre recall += rec accu1, accu2 = accuFun(sess, trData, trLabel, bs) accu1_all += accu1 accu2_all += accu2 logging.info("A1: %g, A2: %g" % (accu1, accu2)) print("A1: %g, A2: %g" % (accu1, accu2)) print precision / num_of_vis_batch * 1.0 print recall / num_of_vis_batch * 1.0 print accu1_all / num_of_vis_batch * 1.0 print accu2_all / num_of_vis_batch * 1.0
def eval(self, query, retrieval, similarity_matrix, query_modal, retrieval_modal, dis_metric, radius=None): saver = tf.train.Saver(var_list=self.image_encoder_vars + self.text_encoder_vars) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: saver.restore(sess, 'saved_model/model.ckpt') if query_modal == 'img': query_latent = sess.run(self.image_latent, feed_dict={ self.image_ph: query, self.keep_prob_ph: 1.0, self.training_ph: False }) else: query_latent = sess.run(self.text_latent, feed_dict={ self.text_ph: query, self.keep_prob_ph: 1.0, self.training_ph: False }) if retrieval_modal == 'img': retrieval_latent = sess.run(self.image_latent, feed_dict={ self.image_ph: retrieval, self.keep_prob_ph: 1.0, self.training_ph: False }) else: retrieval_latent = sess.run(self.text_latent, feed_dict={ self.text_ph: retrieval, self.keep_prob_ph: 1.0, self.training_ph: False }) MAP = optimized_mAP(query_latent, retrieval_latent, similarity_matrix, dis_metric=dis_metric) top_k_precision = precision_top_k(query_latent, retrieval_latent, similarity_matrix, [10, 20, 50, 100, 500], dis_metric) if dis_metric == 'hash': MAP_comp = optimized_mAP(query_latent, retrieval_latent, similarity_matrix, dis_metric='cosine') top_k_precision_comp = precision_top_k(query_latent, retrieval_latent, similarity_matrix, [10, 20, 50, 100, 500], dis_metric='cosine') precision, recall = precision_recall(query_latent, retrieval_latent, similarity_matrix) return MAP, top_k_precision, MAP_comp, top_k_precision_comp, precision, recall return MAP, top_k_precision
def main(): ll_data_2g = utils.gongcan_to_ll() train_data = utils.ll_to_grid(ll_data_2g) # print(train_data) # 删除原有的ID,不作为训练特征 for i in range(1, 8): train_data.drop(['RNCID_' + str(i)], axis=1, inplace=True) train_data.drop(['CellID_' + str(i)], axis=1, inplace=True) # 将空余的信号强度,用0补填补 train_data = train_data.fillna(0) # features和labels X = train_data.drop( ['MRTime', 'Longitude', 'Latitude', 'Num_connected', 'grid_num'], axis=1, inplace=False).as_matrix() y = train_data[['grid_num', 'Longitude', 'Latitude']].as_matrix() # 通过设置每一次的随机数种子,保证不同分类器每一次的数据集是一样的 random_states = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] errors_all = [] top10_pres_all = [] top10_recalls_all = [] top10_fs_all = [] overall_pres_all = [] # 高斯朴素贝叶斯分类器 start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) gnb = GaussianNB() y_pred = gnb.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("Gaussian") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") # K近邻分类器 start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) neigh = KNeighborsClassifier() y_pred = neigh.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("KNeighbors") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") # 决策树分类器 start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) clf = DecisionTreeClassifier() y_pred = clf.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("DecisionTree") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") # 随机森林 start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) clf = RandomForestClassifier(max_depth=20, random_state=0) y_pred = clf.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("RandomForest") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") # AdaBoost start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) clf = AdaBoostClassifier( base_estimator=DecisionTreeClassifier(max_depth=20), learning_rate=0.01, n_estimators=30, algorithm='SAMME.R') y_pred = clf.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("AdaBoost") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") # Bagging start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) clf = BaggingClassifier(n_estimators=20) y_pred = clf.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("Bagging") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") # GradientBoosting start = datetime.datetime.now() errors = [] overall_pres = [] top10_pres = [] top10_recalls = [] top10_fs = [] for i in range(10): print(i) # 切分训练集和验证集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=random_states[i]) clf = GradientBoostingClassifier(n_estimators=60, learning_rate=0.01) y_pred = clf.fit(np.delete(X_train, 0, axis=1), y_train[:, 0]).predict(np.delete(X_test, 0, axis=1)) overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall( y_test[:, 0], y_pred) overall_pres.append(overall_pre) top10_pres.append(top10_pre) top10_recalls.append(top10_recall) top10_fs.append(top10_f) errors.append(utils.pos_error(y_test, y_pred)) print("GradientBoosting") print("Overall precision: %.3f" % np.mean(np.array(overall_pres))) print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean()) print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean()) print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean()) print("Median error: {}".format( np.percentile(np.array(errors).mean(axis=0), 50))) print("Time spend: {}".format(datetime.datetime.now() - start)) errors_all.append(errors) top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean()) top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean()) overall_pres_all.append(np.mean(np.array(overall_pres))) top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean()) print("****************************") utils.cdf_figure(errors_all) utils.figure(overall_pres_all, top10_pres_all, top10_recalls_all, top10_fs_all)
model.load_weights(options.model_weights) opt = keras.optimizers.Adam(float(options.lr)) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) raise KeyboardInterrupt print 'Loss and Validations history stored in ', options.outputFile + '_' + curr_time #model.save_weights('../Output/ModelParams/LSTM_params_BiDi_%s.h5'%curr_time) print 'Best Model parameters stored at ../Output/ModelParams/LSTM_BiDi_%s.h5' % curr_time mfalseLength, mactualLength = utils.analyze_false(validData, validDataNumbers, validLabels, model) c1p, c1r, c0p, c0r, acc, c1f, c0f = utils.precision_recall( validDataNumbers, validLabels, model, weightsPath=options.outputWeights) print 'Run %d results :-' % (run + 1) scores['c1_precision'].append(c1p) scores['c1_recall'].append(c1r) scores['c0_precision'].append(c0p) scores['c0_recall'].append(c0r) scores['accuracy'].append(acc) scores['c1_fscore'].append(c1f) scores['c0_fscore'].append(c0f) scores['mean_actual_length'].append(mactualLength) scores['mean_false_length'].append(mfalseLength) scores['sample_info'].append( (len(trainData), pos_train_samples, len(validData), pos_valid_samples))
curr_time = datetime.datetime.strftime(datetime.datetime.now(), '%dth-%H:%M:%S') with open(options.outputFile+'_'+curr_time + '.pkl','w') as f: json.dump(Hist.history,f) elif os.path.exists(options.model_weights): model.load_weights(options.model_weights) opt = keras.optimizers.Adam(float(options.lr)) model.compile(loss = 'binary_crossentropy',optimizer = opt,metrics = ['accuracy'] ) raise KeyboardInterrupt print 'Loss and Validations history stored in ',options.outputFile+'_'+curr_time #model.save_weights('../Output/ModelParams/LSTM_params_BiDi_%s.h5'%curr_time) print 'Best Model parameters stored at ../Output/ModelParams/LSTM_BiDi_%s.h5' %curr_time mfalseLength,mactualLength = utils.analyze_false(validData,validDataNumbers,validLabels,model) c1p,c1r,c0p,c0r,acc,c1f,c0f = utils.precision_recall(validDataNumbers,validLabels,model,weightsPath = options.outputWeights) print 'Run %d results :-' %(run+1) scores['c1_precision'].append(c1p) scores['c1_recall'].append(c1r) scores['c0_precision'].append(c0p) scores['c0_recall'].append(c0r) scores['accuracy'].append(acc) scores['c1_fscore'].append(c1f) scores['c0_fscore'].append(c0f) scores['mean_actual_length'].append(mactualLength) scores['mean_false_length'].append(mfalseLength) scores['sample_info'].append((len(trainData),pos_train_samples,len(validData),pos_valid_samples)) results_info = curr_time + '\tHyperParameters:- \nWord-Index Dictionary : %s \tWordvectors file : %s \tLearning Rate : %f \t split-ratio : %f \tEpochs : %d \tOutput Weights : %s \tResults File : %s\n Neurons : %s \nModel-Layers: %s\nResults averaged over %d runs' %(options.vec_dict, options.pretrained, float(options.lr), float(options.split), int(options.nEpochs),options.outputWeights, options.outputFile,str(options.neurons),str(options.nLayers),int(options.runs)) results += '\nLabel : %s' %options.label
def main(): overall_start_time = datetime.datetime.now() # Allow user to compare only a subset of the faces ( number_of_people_to_scan, attempting_all, file_str_prefix, peoples_faces_to_scan, ) = get_number_faces_to_scan(lfw_path, overall_start_time) # Build up encodings dataset all_encodings, encodings_start_time, lists_of_images = encodings_builder( lfw_path, number_of_people_to_scan, peoples_faces_to_scan, IMAGES_TO_EXCLUDE) # Compare the encodings ( same_face_distances_df, different_face_distances_df, comparisons_start_time, comparison_counter, ) = encodings_comparer(all_encodings) # Make graphs graph_start_time = all_graphs( same_face_distances_df, different_face_distances_df, comparison_counter, lists_of_images, file_str_prefix, doing_graphs, CUMULATIVE_GRAPHS, ) # Calculate precision and recall precision_recall_start_time = precision_recall( same_face_distances_df, different_face_distances_df, file_str_prefix, doing_precision_recall, ) # Find lookalikes and different-looking images of same person ( different_face_distances_df_sorted, same_face_distances_df_sorted, ) = output_most_similar_different_people_and_most_different_same_faces( different_face_distances_df, same_face_distances_df, file_str_prefix) # Image of lookalikes etc combine_face_images(different_face_distances_df_sorted, file_str_prefix, "_8_lookalikes.jpg") combine_face_images( same_face_distances_df_sorted, file_str_prefix, "_9_different_looking_same_people.jpg", ) # First names wordcloud plot_first_names_wordcloud(file_str_prefix, lists_of_images) # Write out timings and info about images that failed run_outputs( attempting_all, overall_start_time, encodings_start_time, comparisons_start_time, graph_start_time, precision_recall_start_time, file_str_prefix, lists_of_images, )
def train(self, tr_X, tr_y, te_X, te_y, batchSize=32, maxIter=50, start=10, period=2, threshold=10, earlyStopTol=2, totalStopTol=2): trainfn = self.trainfn lr = self.lr tr_va_split = int(tr_X.shape[0] * 0.7) tr_X, va_X = tr_X[:tr_va_split], tr_X[tr_va_split:] tr_y, va_y = tr_y[:tr_va_split], tr_y[tr_va_split:] earlyStop = earlyStopGen(start, period, threshold, earlyStopTol) earlyStop.next() # 初始化生成器 totalStopCount = 0 for epoch in xrange(maxIter): # every epoch # In each epoch, we do a full pass over the training data: trAllPred = None trRandy = None trCostSum = 0. startTime = time.time() for batch in miniBatchGen(tr_X, tr_y, batchSize, shuffle=True): Xb, yb = batch trCost, trPred = trainfn(Xb, yb) trCostSum += trCost trAllPred = np.concatenate((trAllPred, trPred), axis=0) \ if trAllPred is not None else trPred trRandy = np.concatenate((trRandy, yb)) if trRandy is not None else yb trIter = len(tr_X) // batchSize if len(tr_X) % batchSize != 0: trIter += 1 trCostMean = trCostSum / trIter trAcc = accuracy(trAllPred, trRandy) trP, trR = precision_recall(trAllPred, trRandy) # And a full pass over the validation data: vaAllPred = None vaCostSum = 0. for batch in miniBatchGen(va_X, va_y, batchSize, shuffle=False): Xb, yb = batch vaCost, vaPred = self.vatefn(Xb, yb) vaCostSum += vaCost vaAllPred = np.concatenate((vaAllPred, vaPred), axis=0) \ if vaAllPred is not None else vaPred vaIter = len(va_X) // batchSize if len(va_X) % batchSize != 0: vaIter += 1 vaCostMean = vaCostSum / vaIter vaAcc = accuracy(vaAllPred, va_y) vaP, vaR = precision_recall(vaAllPred, va_y) print 'epoch ', epoch, ' time: %.3f' % (time.time() - startTime), print 'trcost: %.5f tracc: %.5f trp: %.5f trr: %.5f' % (trCostMean, trAcc, trP, trR), print 'vacost: %.5f vaacc: %.5f vap: %.5f var: %.5f' % (vaCostMean, vaAcc, vaP, vaR) # Then we decide whether to early stop: if earlyStop.send((trCostMean, vaCostMean)): lr /= 10 # 如果一次早停止发生,则学习率降低继续迭代 updatesDict = updates.nesterov_momentum(self.trCost, self.params, lr, self.momentum) trainfn = makeFunc([self.X, self.y], [self.trCost, self.yDropProb], updatesDict) totalStopCount += 1 if totalStopCount > totalStopTol: # 如果学习率降低仍然发生早停止,则退出迭代 print 'stop' break print 'learning rate decreases to ', lr ################################################################################################################ self.istrained = True params = layers.get_all_param_values(self.outprob) cPickle.dump(params, open(dataset_path + 'plain_cnn.pkl', 'w')) ################################################################################################################ teAllPred = None teCostSum = 0. for batch in miniBatchGen(te_X, te_y, batchSize, shuffle=False): Xb, yb = batch teCost, tePred = self.vatefn(Xb, yb) teCostSum += teCost teAllPred = np.concatenate((teAllPred, tePred), axis=0) \ if teAllPred is not None else tePred teIter = len(te_X) // batchSize if len(te_X) % batchSize != 0: teIter += 1 teCostMean = teCostSum / teIter teAcc = accuracy(teAllPred, te_y) teP, teR = precision_recall(teAllPred, te_y) print 'tecost: %.5f teacc: %.5f tep: %.5f ter: %.5f' % (teCostMean, teAcc, teP, teR)
y_hat = model(x) loss = K.binary_crossentropy(y, y_hat) # Weight the loss pos_weights = y * pos_weight neg_weights = (1.0 - y) * neg_weight loss_weights = pos_weights + neg_weights loss_weighted = tf.reduce_mean(loss * loss_weights) grads = tape.gradient(loss_weighted, model.weights) optimizer.apply_gradients(zip(grads, model.weights)) acc = utils.accuracy(y, y_hat) acc_topk = utils.accuracy_topk(y, y_hat) precision, recall = utils.precision_recall(y, y_hat) auc_metric = tf.keras.metrics.AUC() auc_metric.update_state(y, y_hat) tf.summary.image('Inputs', x, step=step) tf.summary.scalar('ClassLoss', loss_weighted, step=step) tf.summary.scalar('Acc', acc, step=step) tf.summary.scalar('AuC', auc_metric.result(), step=step) tf.summary.scalar('AccTopK', acc_topk, step=step) tf.summary.scalar('Precision', precision, step=step) tf.summary.scalar('Recall', recall, step=step) tf.summary.histogram('Labels', y, step=step) tf.summary.histogram('Predictions', y_hat, step=step) auc_metric.reset_states() print('Step: ', step, acc.numpy() * 100, precision.numpy(), recall.numpy(),
def validation(x_valid, y_valid, val_batch_size, num_classes, sess, model, epoch, start_time, w_plus): loss_batch_all = np.array([]) acc_batch_all = y_pred_all = logits_all = np.zeros((0, num_classes)) model.is_train = False x_valid, y_valid = randomize(x_valid, y_valid) step_count = int(len(x_valid) / val_batch_size) for step in range(step_count): start = step * val_batch_size end = (step + 1) * val_batch_size x_batch, y_batch = get_next_batch(x_valid, y_valid, start, end) feed_dict_val = {model.x: x_batch, model.y: y_batch, model.w_plus: w_plus} acc_valid, loss_valid, y_pred, logits = sess.run( [model.accuracy, model.loss, model.prediction, model.get_logits], feed_dict=feed_dict_val) acc_batch_all = np.concatenate((acc_batch_all, acc_valid.reshape([1, num_classes]))) y_pred_all = np.concatenate((y_pred_all, y_pred.reshape([val_batch_size, num_classes]))) logits_all = np.concatenate((logits_all, logits.reshape([val_batch_size, num_classes]))) loss_batch_all = np.append(loss_batch_all, loss_valid) mean_acc = np.mean(acc_batch_all, axis=0) mean_loss = np.mean(loss_batch_all) num_examples = np.sum(y_valid, axis=0) num_preds = np.sum(y_pred_all, axis=0) epoch_time = time.time() - start_time print('******************************************************************************' '********************************************************') print('--------------------------------------------------------Validation, Epoch: {}' ' -----------------------------------------------------------'.format(epoch + 1)) print("Atlc\tCrdmg\tEffus\tInflt\tMass\tNodle\tPnum\tPntrx\tConsd" "\tEdma\tEmpys\tFbrss\tTkng\tHrna\t|Avg.\t|Loss\t|Run Time") for accu in mean_acc: print '{:.01%}\t'.format(accu), print '|{0:.01%}\t|{1:0.02}\t|{2}'.format(np.mean(mean_acc), mean_loss, epoch_time) for exm in num_examples: print '{:}\t'.format(exm), print("Count of pathalogies") for pred in num_preds: print '{:}\t'.format(pred), print("Count of recognized pathalogies") P = R = np.zeros((1, args.n_cls)) for cond in range(args.n_cls): y_true = y_valid[:, cond] y_pred = y_pred_all[:, cond] P[0, cond], R[0, cond] = precision_recall(y_true, y_pred) P = np.reshape(P, args.n_cls) R = np.reshape(R, args.n_cls) for p in P: print '{:0.03}\t'.format(p), print("Precision") for r in R: print '{:0.03}\t'.format(r), print("Recall") plot_precision_recall_curve(y_valid[:logits_all.shape[0], :], logits_all, epoch) write_acc_loss_csv(mean_acc, mean_loss, epoch) write_precision_recall_csv(P, R, epoch) return mean_acc, mean_loss