def analysis_pick_results(pick_results_file, reference_coordinate_dir, reference_coordinate_symbol, particle_size, minimum_distance_rate): """Load the picking results from a file of binary format and compare it with the reference coordinate. This function analysis the picking results with reference coordinate and calculate the recall, precision and the deviation from the center. Args: pick_results_file: string, the file name of the pre-picked results. reference_mrc_dir: string, the directory of the mrc file dir. reference_coordinate_symbol: the symbol of the coordinate, like '_manualpick' particle_size: int, the size of particle minimum_distance_rate: float, the default is 0.2, a picked coordinate is considered to be a true positive only when the distance between the picked coordinate and the reference coordinate is less than minimum_distance_rate mutiplicate particle_size. """ with open(pick_results_file, 'rb') as f: coordinate = pickle.load(f) """ coordinate: a list, the length of it stands for the number of picked micrograph file. Each element is a list too, which contains all coordinates from the same micrograph. The length of the list stands for the number of the particles. And each element in the list is a small list of length of 4. The first element in the small list is the coordinate x-aixs. The second element in the small list is the coordinate y-aixs. The third element in the small list is the prediction score. The fourth element in the small list is the micrograh name. """ tp = 0. total_pick = 0 total_reference = 0 coordinate_total = [] print len(coordinate) total_analyse_num = 0 for i in range(len(coordinate)): #print coordinate[i] #print coordinate[i] if len(coordinate[i]) == 0: continue total_analyse_num += 1 mrc_filename = os.path.basename(coordinate[i][0][3]) #print(mrc_filename) reference_coordinate_file = mrc_filename.replace( '.mrc', reference_coordinate_symbol + '.star') reference_coordinate_file = os.path.join( reference_coordinate_dir, reference_coordinate_file) #print(reference_coordinate_file) if os.path.isfile(reference_coordinate_file): reference_coordinate = DataLoader.read_coordinate_from_star( reference_coordinate_file) """ reference_coordinate: a list, the length of it stands for the number of picked particles. And each element in the list is a small list of length of 2. The first element in the small list is the coordinate x-aixs. The second element in the small list is the coordinate y-aixs. """ tp_sigle, average_distance = Picker.calculate_tp( coordinate[i], reference_coordinate, particle_size * minimum_distance_rate) print("tp:", tp_sigle) print("average_distance:", average_distance) # calculate the number of true positive, when the threshold is set to 0.5 tp_sigle = 0. total_reference = total_reference + len(reference_coordinate) for j in range(len(coordinate[i])): coordinate_total.append(coordinate[i][j]) #if coordinate[i][j][2]>minimum_distance_rate: threshold = 0.99 if coordinate[i][j][2] > threshold: total_pick = total_pick + 1 if coordinate[i][j][4] == 1: tp = tp + 1 tp_sigle = tp_sigle + 1 print(tp_sigle / len(reference_coordinate)) else: print("Can not find the reference coordinate:" + reference_coordinate_file) print "tp=", tp print "total_pick=", total_pick print "total_analyse_num=", total_analyse_num precision = tp / total_pick recall = tp / total_reference print("(threshold %.2f)precision:%f recall:%f" % (minimum_distance_rate, precision, recall)) # sort the coordinate based on prediction score in a descending order. coordinate_total = sorted(coordinate_total, key=itemgetter(2), reverse=True) total_tp = [] total_recall = [] total_precision = [] total_probability = [] total_average_distance = [] total_distance = 0. tp_tem = 0. for i in range(len(coordinate_total)): if coordinate_total[i][4] == 1: tp_tem = tp_tem + 1 total_distance = total_distance + coordinate_total[i][5] precision = tp_tem / (i + 1) recall = tp_tem / total_reference total_tp.append(tp_tem) total_recall.append(recall) total_precision.append(precision) total_probability.append(coordinate_total[i][2]) if tp_tem == 0: average_distance = 0 else: average_distance = total_distance / tp_tem total_average_distance.append(average_distance) # write the list results in file directory_pick = os.path.dirname(pick_results_file) total_results_file = os.path.join(directory_pick, 'results.txt') f = open(total_results_file, 'w') # write total_tp f.write(','.join(map(str, total_tp)) + '\n') f.write(','.join(map(str, total_recall)) + '\n') f.write(','.join(map(str, total_precision)) + '\n') f.write(','.join(map(str, total_probability)) + '\n') f.write(','.join(map(str, total_average_distance)) + '\n') f.write('#total autopick number:%d\n' % (len(coordinate_total))) f.write('#total manual pick number:%d\n' % (total_reference)) f.write('#the first row is number of true positive\n') f.write('#the second row is recall\n') f.write('#the third row is precision\n') f.write('#the fourth row is probability\n') f.write('#the fiveth row is distance\n') # show the recall and precision times_of_manual = len(coordinate_total) // total_reference + 1 for i in range(times_of_manual): print( 'autopick_total sort, take the head number of total_manualpick * ratio %d' % (i + 1)) f.write( '#autopick_total sort, take the head number of total_manualpick * ratio %d \n' % (i + 1)) if i == times_of_manual - 1: print('precision:%f \trecall:%f' % (total_precision[-1], total_recall[-1])) f.write('precision:%f \trecall:%f \n' % (total_precision[-1], total_recall[-1])) else: print('precision:%f \trecall:%f' % (total_precision[(i + 1) * total_reference - 1], total_recall[(i + 1) * total_reference - 1])) f.write('precision:%f \trecall:%f \n' % (total_precision[(i + 1) * total_reference - 1], total_recall[(i + 1) * total_reference - 1])) f.close()
def analysis_pick_results(pick_results_file, reference_coordinate_dir, reference_coordinate_symbol, particle_size, minimum_distance_rate): """Load the picking results from a file of binary format and compare it with the reference coordinate. This function analysis the picking results with reference coordinate and calculate the recall, precision and the deviation from the center. Args: pick_results_file: string, the file name of the pre-picked results. reference_mrc_dir: string, the directory of the mrc file dir. reference_coordinate_symbol: the symbol of the coordinate, like '_manualpick' particle_size: int, the size of particle minimum_distance_rate: float, the default is 0.2, a picked coordinate is considered to be a true positive only when the distance between the picked coordinate and the reference coordinate is less than minimum_distance_rate mutiplicate particle_size. """ with open(pick_results_file, 'rb') as f: coordinate = pickle.load(f) """ coordinate: a list, the length of it stands for the number of picked micrograph file. Each element is a list too, which contains all coordinates from the same micrograph. The length of the list stands for the number of the particles. And each element in the list is a small list of length of 4. The first element in the small list is the coordinate x-aixs. The second element in the small list is the coordinate y-aixs. The third element in the small list is the prediction score. The fourth element in the small list is the micrograh name. """ tp = 0 total_pick = 0 total_reference = 0 coordinate_total = [] for i in range(len(coordinate)): mrc_filename = os.path.basename(coordinate[i][0][3]) #print(mrc_filename) reference_coordinate_file = mrc_filename.replace('.mrc', reference_coordinate_symbol+'.star') reference_coordinate_file = os.path.join(reference_coordinate_dir, reference_coordinate_file) #print(reference_coordinate_file) if os.path.isfile(reference_coordinate_file): reference_coordinate = DataLoader.read_coordinate_from_star(reference_coordinate_file) """ reference_coordinate: a list, the length of it stands for the number of picked particles. And each element in the list is a small list of length of 2. The first element in the small list is the coordinate x-aixs. The second element in the small list is the coordinate y-aixs. """ tp_sigle, average_distance = AutoPicker.calculate_tp(coordinate[i], reference_coordinate, particle_size*minimum_distance_rate) #print("tp:",tp_sigle) #print("average_distance:",average_distance) # calculate the number of true positive, when the threshold is set to 0.5 tp_sigle = 0 total_reference = total_reference + len(reference_coordinate) for j in range(len(coordinate[i])): coordinate_total.append(coordinate[i][j]) if coordinate[i][j][2]>0.5: total_pick = total_pick + 1 if coordinate[i][j][4] == 1: tp = tp + 1 tp_sigle = tp_sigle + 1 print(tp_sigle/len(reference_coordinate)) else: print("Can not find the reference coordinate:"+reference_coordinate_file) precision = tp/total_pick recall = tp/total_reference print("(threshold 0.5)precision:%f recall:%f"%(precision, recall)) # sort the coordinate based on prediction score in a descending order. coordinate_total = sorted(coordinate_total, key = itemgetter(2), reverse = True) total_tp = [] total_recall = [] total_precision = [] total_probability = [] total_average_distance = [] total_distance = 0 tp_tem = 0 for i in range(len(coordinate_total)): if coordinate_total[i][4] == 1: tp_tem = tp_tem + 1 total_distance = total_distance + coordinate_total[i][5] precision = tp_tem/(i+1) recall = tp_tem/total_reference total_tp.append(tp_tem) total_recall.append(recall) total_precision.append(precision) total_probability.append(coordinate_total[i][2]) if tp_tem==0: average_distance = 0 else: average_distance = total_distance/tp_tem total_average_distance.append(average_distance) # write the list results in file directory_pick = os.path.dirname(pick_results_file) total_results_file = os.path.join(directory_pick, 'results.txt') f = open(total_results_file, 'w') # write total_tp f.write(','.join(map(str, total_tp))+'\n') f.write(','.join(map(str, total_recall))+'\n') f.write(','.join(map(str, total_precision))+'\n') f.write(','.join(map(str, total_probability))+'\n') f.write(','.join(map(str, total_average_distance))+'\n') f.write('#total autopick number:%d\n'%(len(coordinate_total))) f.write('#total manual pick number:%d\n'%(total_reference)) f.write('#the first row is number of true positive\n') f.write('#the second row is recall\n') f.write('#the third row is precision\n') f.write('#the fourth row is probability\n') f.write('#the fiveth row is distance\n') # show the recall and precision times_of_manual = len(coordinate_total)//total_reference + 1 for i in range(times_of_manual): print('autopick_total sort, take the head number of total_manualpick * ratio %d'%(i+1)) f.write('#autopick_total sort, take the head number of total_manualpick * ratio %d \n'%(i+1)) if i==times_of_manual-1: print('precision:%f \trecall:%f'%(total_precision[-1], total_recall[-1])) f.write('precision:%f \trecall:%f \n'%(total_precision[-1], total_recall[-1])) else: print('precision:%f \trecall:%f'%(total_precision[(i+1)*total_reference-1], total_recall[(i+1)*total_reference-1])) f.write('precision:%f \trecall:%f \n'%(total_precision[(i+1)*total_reference-1], total_recall[(i+1)*total_reference-1])) f.close()
def pick(self, mrc_filename): if mrc_filename.endswith('.rec'): header, body = DataLoader.readRecFile(mrc_filename) else: header, body = DataLoader.readMrcFile(mrc_filename) if header == None or body == None: return [] num_col = header[0] num_row = header[1] body_2d = np.array(body, dtype=np.float32).reshape(num_row, num_col) body_2d_ori = body_2d body_2d, bin_size = DataLoader.preprocess_micrograph(body_2d) step_size = 4 candidate_patches = None candidate_patches_exist = False num_total_patch = 0 patch_size = int(self.particle_size / bin_size) local_window_size = int(patch_size / step_size) #local_window_size = int(0.6*patch_size) map_col = int((body_2d.shape[0] - patch_size + 1) / step_size) map_row = int((body_2d.shape[1] - patch_size + 1) / step_size) time1 = time.time() particle_candidate_all = [] map_index_col = 0 for col in range(0, body_2d.shape[0] - patch_size, step_size): for row in range(0, body_2d.shape[1] - patch_size, step_size): patch = np.copy(body_2d[col:(col + patch_size), row:(row + patch_size)]) #patch = DataLoader.preprocess_particle(patch, self.model_input_size) particle_candidate_all.append(patch) num_total_patch = num_total_patch + 1 map_index_col = map_index_col + 1 map_index_row = map_index_col - map_col + map_row #particle_candidate_all = np.array(particle_candidate_all).reshape( # num_total_patch, self.model_input_size[1], self.model_input_size[2], 1) particle_candidate_all = np.array(particle_candidate_all).reshape( num_total_patch, patch_size, patch_size, 1) predictions = self.deepModel.evaluation(particle_candidate_all, self.sess) predictions = predictions[:, 1:2] predictions = predictions.reshape(map_index_col, map_index_row) time_cost = time.time() - time1 if self.verbose: print("gpu time: %.1f s" % time_cost) list_coordinate = self.peak_detection(predictions, local_window_size) for i in range(len(list_coordinate)): list_coordinate[i].append(mrc_filename) list_coordinate[i][0] = (list_coordinate[i][0] * step_size + patch_size / 2) * bin_size list_coordinate[i][1] = (list_coordinate[i][1] * step_size + patch_size / 2) * bin_size #return all coordinate list_coordinate_all = [i for i in list_coordinate if i[2] > 0.0] list_coordinate_all = sorted(list_coordinate_all, key=lambda x: x[2], reverse=True) #print ("size = ", len(list_coordinate)) list_coordinate = [i for i in list_coordinate if i[2] > self.threshold] list_coordinate = sorted(list_coordinate, key=lambda x: x[2], reverse=True) #print ("filtered size = ", len(list_coordinate)) #list_coordinate = list_coordinate[:100] print("#candidate:%d, #picked:%d" % (num_total_patch, len(list_coordinate))) plot_list_coordinate = copy.deepcopy(list_coordinate) for i in range(len(plot_list_coordinate)): plot_list_coordinate[i][0] = plot_list_coordinate[i][0] / bin_size plot_list_coordinate[i][1] = plot_list_coordinate[i][1] / bin_size if self.plot_picking_result: #print ">>>>>>>>>>>>>>>", body_2d.shape reference_coordinate_file = mrc_filename.replace( '.mrc', '_DW_recentered.star') reference_coordinate_file = os.path.join( '/data00/Data/piezo/train', reference_coordinate_file) #print(reference_coordinate_file) if os.path.isfile(reference_coordinate_file): reference_coordinate = DataLoader.read_coordinate_from_star( reference_coordinate_file) for i in range(len(reference_coordinate)): reference_coordinate[i][ 0] = reference_coordinate[i][0] / bin_size reference_coordinate[i][ 1] = reference_coordinate[i][1] / bin_size #display.plot_circle_in_micrograph(body_2d, reference_coordinate, plot_list_coordinate, patch_size, "plot/micro_circle_%s.png" % (os.path.basename(mrc_filename))) #plot_dir = os.path.basename(self.output_dir) plot_dir = os.path.join(os.path.abspath(self.output_dir), "plot") #pos = plot_dir.rfind('/') #plot_dir = os.path.join(plot_dir[:pos], 'plot') if self.verbose: print "plot_dir >>>>>>>>>> ", plot_dir if self.plot_picking_result and os.path.exists(plot_dir) == False: os.makedirs(plot_dir) display.plot_circle_in_micrograph( body_2d, plot_list_coordinate, patch_size, plot_dir + "/micro_circle_%s.png" % (os.path.basename(mrc_filename))) #display.plot_circle_in_micrograph(body_2d_ori, list_coordinate, self.particle_size, "plot/micro_circle_%s.png" % (os.path.basename(mrc_filename))) return list_coordinate, list_coordinate_all