def test_process(): path = '/home/admin/jupyter/Data/test' filename = [s[:-4] for s in os.listdir(path) if s.find('.kfb')!=-1] print(filename) print(len(filename)) for name in filename: filepath = os.path.join(path, name + '.json') image_path = os.path.join(path, name + '.kfb') f = open(filepath, encoding='utf-8') content = f.read() label_dict = json.loads(content) f.close() count = 0 for dt in label_dict: if dt['class'] != 'roi': print(dt['class']) if dt['class'] == 'roi': count+=1 save_path = os.path.join('/home/admin/jupyter/tianchi_data/test', 'ROI_images/'+name+'roi'+str(count)+'.jpg') scale_kfb = 20 read = kfbReader.reader() kfbReader.reader.ReadInfo(read, image_path, scale_kfb, True) kfbReader.reader.setReadScale(read, scale=20) roi = read.ReadRoi(dt['x'], dt['y'], dt['w'], dt['h'], 20) cv.imwrite(save_path, roi) f = open(os.path.join('/home/admin/jupyter/tianchi_data/test', 'ROI_coord/'+name+'roi'+str(count)+'.txt'),'w') f.write(str(dt['x'])+' '+str(dt['y'])+' '+str(dt['w'])+' '+str(dt['h'])+'\n') f.close()
def cell_sampling(label_file, wsi_path, save_path, size): labels = get_labels(label_file) if len(labels) == 0: return print("PROCESSING %s ..." % wsi_path) scale = 20 reader = kr.reader() kr.reader.ReadInfo(reader, wsi_path, scale, True) points_xy = get_windows_new(labels, size) filename, _ = os.path.splitext(os.path.basename(wsi_path)) # generate img files points_num = len(points_xy) for i, (x, y) in enumerate(points_xy): if ((i % 100) == 0): print(filename, "processed #", i) cell = reader.ReadRoi(x, y, size, size, scale) image_file_name = save_path + "/" + filename + "_" + str(x) + "_" + str(y) + ".bmp" # change l and s of image # cell = hls_trans_smart(cell) cv2.imwrite(image_file_name, cell) # generate xml files print(filename, "generating xml") new_xmls = Xml(filename, save_path, points_xy, labels, size) new_xmls.gen_xml() print("[INFO]", "processed ", filename)
def save_roi_to_npz(path): pos_paths = glob(os.path.join(path, "pos_[0-8]/*.kfb")) for pos_path in pos_paths: filename = pos_path.split("/")[-1].split(".")[0] json_path = glob(os.path.join(path, "labels", filename + ".json"))[0] with open(json_path, "r") as f: json_infos = json.loads(f.read()) r = kfbReader.reader() r.ReadInfo(pos_path, 20, True) roi_coords = [] for json_info in json_infos: if json_info["class"] == "roi": coord = { "x": json_info["x"], "y": json_info["y"], "w": json_info["w"], "h": json_info["h"], } roi_coords.append(coord) roi_cnt = 1 for roi_coord in roi_coords: X, Y, W, H = roi_coord["x"], roi_coord["y"], roi_coord[ "w"], roi_coord["h"] img = r.ReadRoi(X, Y, W, H, 20).copy() label = np.zeros((0, 4), dtype="int") pos_cnt = 0 for json_info in json_infos: if json_info["class"] == "pos": x, y, w, h = ( json_info["x"], json_info["y"], json_info["w"], json_info["h"], ) if X < x < X + W and Y < y < Y + H: pos_cnt += 1 box = np.zeros((1, 4), dtype="int") box[0, 0] = max(int(x - X), 0) box[0, 1] = max(int(y - Y), 0) box[0, 2] = min(int(x - X + w), W) box[0, 3] = min(int(y - Y + h), H) label = np.append(label, box, axis=0) if pos_cnt == 0: continue sample_path = cfg.sample_path mkdir(sample_path) save_path = os.path.join(sample_path, filename + "_" + str(roi_cnt) + ".npz") np.savez_compressed(save_path, img=img, label=label) roi_cnt += 1 print("Finish: ", filename, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
def gen_csv(dataIds, pos_path, save_path, idi, topleft, imgid2name): labels_num = 0 slide = kfbReader.reader() paths=[] x_min=[] y_min=[] x_max=[] y_max=[] cls=[] image_id=[] pbar = tqdm(total = idi) pbar.set_description('Checking labels') for img_id in range(idi): pbar.update(1) img_fn = imgid2name[img_id] tlx, tly, tlw, tlh = topleft[img_id] dataId = img_fn[:img_fn.find('_')] scale_x = tlw/1024 scale_y = tlh/1024 kfbReader.reader.ReadInfo(slide, pos_path + dataId + '.kfb', 20, True) labels = json.load(open(pos_path + dataId + '.json','r')) poss=[] for label in labels: if label['class'] == 'roi': continue poss.append(label) n_labels = 0 for pos_ii in poss: pxi=pos_ii['x'];pyi=pos_ii['y'];pwi=pos_ii['w'];phi=pos_ii['h'] if pxi>=tlx and pyi>=tly and pxi+pwi<=tlx+tlw and pyi+phi<=tly+tlh: #inner label paths.append(save_path+img_fn) xxmin=int((pxi-tlx)/scale_x) yymin=int((pyi-tly)/scale_y) xxmax=int((pxi-tlx+pwi)/scale_x) yymax=int((pyi-tly+phi)/scale_y) x_min.append(xxmin) y_min.append(yymin) x_max.append(xxmax) y_max.append(yymax) image_id.append(img_id) cls.append(pos_ii['class']) n_labels += 1 labels_num += n_labels data = [[paths[i], x_min[i], y_min[i], x_max[i],y_max[i],cls[i],image_id[i]] for i in range(len(paths))] print(len(data)) csv_fn = "/home/admin/jupyter/zxy/label_roival.csv" print('Output CSV with ' + str(labels_num) + ' labels in ' + csv_fn) col = ['path', 'xmin', 'ymin','xmax','ymax','cls','img_id'] df = pd.DataFrame(data,columns=col) df.to_csv(csv_fn,index=False) return csv_fn
def predict_slide(f): # q_coord = Queue(maxsize=4096) # q_img = Queue(maxsize=4096) # q_res = Queue(maxsize=4096) q_coord = Manager().Queue(4096) q_img = Manager().Queue(4096) q_res = Manager().Queue(4096) scale = 20 reader = kr.reader() kr.reader.ReadInfo(reader, f, scale, True) height = reader.getHeight() width = reader.getWidth() #print(height, width) # p_coord = [] # p_img = [] # p_res = [] # p_save = [] # p_coord.append(Process(target=get_coord, args=(q_coord,height, width,))) # for i in range(16): # p_img.append(Process(target=get_img_608, args=(q_img,q_coord,f, ))) # for i in range(4): # p_res.append(Process(target=predict_608, args=(q_res, q_img, i, ))) # p_save.append(Process(target=get_result, args=(q_res,f, ))) # # start processes # start_processes([p_coord, p_img, p_res, p_save]) # # join processes # join_processes([p_coord, p_img, p_res, p_save]) p = Pool(22) p.apply_async(func=get_coord, args=( q_coord, height, width, )) for i in range(16): p.apply_async(func=get_img_608, args=( q_img, q_coord, f, )) for i in range(4): p.apply_async(func=predict_608, args=( q_res, q_img, i, )) p.apply_async(func=get_result, args=( q_res, f, )) p.close() p.join()
def __init__(self, imgpath, input_size): self.imgpath = imgpath self.input_size = input_size self.reader = kr.reader() self.reader.ReadInfo(imgpath, 20, True) self.Width = self.reader.getWidth() self.Height = self.reader.getHeight() self.stride = int(input_size * 0.85) self.index_w -= self.stride self.index_h = 0
def cut_save_test_roi(kfb_path,json_path,save_dir): kfb_reader = kr.reader() kr.reader.ReadInfo(kfb_reader,kfb_path,20,False) test_roi_list = load_json(json_path) basename = osp.splitext(osp.basename(json_path))[0] for i,test_roi in enumerate(test_roi_list): # print(test_roi) x,y,w,h = test_roi['x'],test_roi['y'],test_roi['w'],test_roi['h'] npz_name = osp.join(save_dir,basename+"_"+str(i)+".npz") roi_image = kfb_reader.ReadRoi(x,y,w,h,20) np.savez_compressed(npz_name,img=roi_image,label=(np.array([x,y,x+w,y+h])[None,:]))
def get_img_608(q_img, q_coord, f): scale = 20 reader = kr.reader() kr.reader.ReadInfo(reader, f, scale, True) size = 608 while True: (x, y) = q_coord.get() if x == None: break cell = reader.ReadRoi(x, y, size, size, scale) q_img.put((cell, x, y)) for i in range(4): q_img.put((None, None, None))
def gen_topleft(dataIds, idi, topleft, imgid2name, w, h): xys = get_xylist(dataIds, w, h) for dataId in dataIds: slide = kfbReader.reader() kfbReader.reader.ReadInfo(slide, test_path + dataId + '.kfb', 20, True) xy = xys[dataId] for xmin,ymin,xmax,ymax in xy: imgid = idi idi += 1 w = xmax-xmin h = ymax-ymin topleft[imgid] = [xmin, ymin, w, h] imgid2name[imgid] = dataId + '_' + str(xmin) + '_' + str(ymin) + '_' + str(w) + '_' + str(h) + '.jpg' return idi, topleft, imgid2name
def randomcrop_neg_kfb(kfb_path,save_dir,target_size=(4000,4000)): kfb_reader = kr.reader() kr.reader.ReadInfo(kfb_reader,kfb_path,20,False) Width,Height = kfb_reader.getWidth(),kfb_reader.getHeight() center_x,center_y = Width//2,Height//2 target_w,target_h = target_size basename = osp.splitext(osp.basename(kfb_path))[0] label = (np.array([0,0,4000,4000,-1]))[None,:] # center_crop offset_x = np.random.randint(center_x-target_w//2,center_x) offset_y = np.random.randint(center_y-target_h//2,center_y) crop_image = kfb_reader.ReadRoi(offset_x,offset_y,target_w,target_h,20) npz_name = osp.join(save_dir,basename+"_"+"center_crop"+".npz") np.savez_compressed(npz_name,img=crop_image,label=label) #topleft crop offect_x = np.random.randint(max(0,center_x-target_w*2),center_x-target_w) offect_y = np.random.randint(max(0,center_y-target_h*2),center_y-target_h) crop_image = kfb_reader.ReadRoi(offect_x,offect_y,target_w,target_h,20) npz_name = osp.join(save_dir,basename+"_"+"top_left_crop"+".npz") np.savez_compressed(npz_name,img=crop_image,label=label) #topright crop offect_x = np.random.randint(center_x+target_w,center_x+2*target_w) offect_y = np.random.randint(center_y-target_h*2,center_y-target_h) crop_image = kfb_reader.ReadRoi(offect_x,offect_y,target_w,target_h,20) npz_name = osp.join(save_dir,basename+"_"+"top_right_crop"+".npz") np.savez_compressed(npz_name,img=crop_image,label=label) #buttomleft crop offect_x = np.random.randint(center_x-target_w*2,center_x-target_w) offect_y = np.random.randint(center_y+target_h,center_y+target_h*2) crop_image = kfb_reader.ReadRoi(offect_x,offect_y,target_w,target_h,20) npz_name = osp.join(save_dir,basename+"_"+"buttom_left_crop"+".npz") np.savez_compressed(npz_name,img=crop_image,label=label) #buttomright crop offect_x = np.random.randint(center_x+target_w,center_x+2*target_w) offect_y = np.random.randint(center_y+target_h,center_y+2*target_h) crop_image = kfb_reader.ReadRoi(offect_x,offect_y,target_w,target_h,20) npz_name = osp.join(save_dir,basename+"_"+"buttom_right_crop"+".npz") np.savez_compressed(npz_name,img=crop_image,label=label)
def split_data(kfb_path): reader = kfbReader.reader() scale = 20 reader.ReadInfo(kfb_path, scale, False) x_num = int(reader.getWidth() / OVERLAP) + 1 y_num = int(reader.getHeight() / OVERLAP) + 1 for i in range(x_num): for j in range(y_num): square = reader.ReadRoi(OVERLAP * i, OVERLAP * j, WIDTH, HEIGHT, 20) file_name = test_data_dir + '/' + kfb_path.split('/')[-1].split('.')[0] \ + '_' + str(i) + '_' + str(j) + ".jpg" print(file_name) cv2.imwrite(file_name, square)
def gen_pseudo_label(pred_json_root, dst_npz_root, \ keep_score_thred=0.9, kfb_root="/home/admin/jupyter/Data/test/"): count = 0 json_names = os.listdir(pred_json_root) CELL_TYPES = get_cls_types() for json_name in tqdm(json_names): json_dir = osp.join(pred_json_root, json_name) kfb_dir = osp.join(kfb_root, '{}.kfb'.format(json_name.split('.')[0])) with open(json_dir, 'r') as f: json_infos = json.load(f) r = kfbReader.reader() r.ReadInfo(kfb_dir, 20, True) ### 20 for i, json_info in enumerate(json_infos): x = json_info['x'] y = json_info['y'] w = json_info['w'] h = json_info['h'] p = json_info['p'] c = json_info['class'] if p > keep_score_thred: count += 1 npz_dir = osp.join(dst_npz_root, c, \ '{}_{}.npz'.format(json_name.split('.')[0], i)) if not osp.exists(osp.dirname(npz_dir)): os.makedirs(osp.dirname(npz_dir)) img = r.ReadRoi(x, y, w, h, 20).copy() label = CELL_TYPES.index(c) + 1 # 1-based np.savez_compressed(npz_dir, img=img, label=label)
def neg_image_segment(): dir_path = '/home/admin/jupyter/Data/train' json_files = [ s[:-5] for s in os.listdir(dir_path) if s.find('.json') != -1 ] kfb_files = [s[:-4] for s in os.listdir(dir_path) if s.find('.kfb') != -1] image_names = [] for file in kfb_files: if file not in json_files: image_names.append(file + '.kfb') print(len(image_names)) assert len(image_names) == 250, 'error' for image_name in image_names: print(image_name[:-4]) #if image_name[:-4] not in tmp: # continue image_path = os.path.join(dir_path, image_name) scale_kfb = 20 read = kfbReader.reader() kfbReader.reader.ReadInfo(read, image_path, scale_kfb, True) kfbReader.reader.setReadScale(read, scale=20) height = read.getHeight() width = read.getWidth() print('height:', height) print('width:', width) n = int(height / 1024.0) m = int(width / 1024.0) for i in range(m): for j in range(n): x_tmp = i * 1024 y_tmp = j * 1024 roi = read.ReadRoi(x_tmp, y_tmp, 1024, 1024, 20) img_save_name = image_name[:-4] + '_segment_' + str(i * n + j) + '.jpg' label_save_name = image_name[:-4] + '_segment_' + str( i * n + j) + '.txt' cv.imwrite( '/home/admin/jupyter/tianchi_data/train/neg_segment_images/' + img_save_name, roi) f = open( '/home/admin/jupyter/tianchi_data/train/neg_segment_labels/' + label_save_name, 'w') f.close()
def gen_imgs(name, dataIds, sample_xywh, img_fns): bar = tqdm(total=len(dataIds)) bar.set_description(name + ' Croping...') slide = kfbReader.reader() for dataId in dataIds: bar.update(1) kfbReader.reader.ReadInfo(slide, pos_path + dataId + '.kfb', 20, True) for i in range(len(sample_xywh[dataId])): sample_x, sample_y, sample_w, sample_h = sample_xywh[dataId][i] img_fn = img_fns[dataId][i] scale_sample_img = cv2.resize( slide.ReadRoi(sample_x, sample_y, sample_w, sample_h, 20), (W, H)) marksave_path = os.path.join(img_dir, img_fn) if not (cv2.imwrite(marksave_path, scale_sample_img)): print(marksave_path + ' is existed ! Will be removed...') os.remove(marksave_path) cv2.imwrite(marksave_path, scale_sample_img)
def gen_imgs(name, test_path, dir, imgids, topleft, imgid2name): slide = kfbReader.reader() pbar = tqdm(total = len(imgids)) pbar.set_description(name+': Croping samples...') for imgid in imgids: pbar.update(1) tlx, tly, rw, rh = topleft[imgid] file_name = imgid2name[imgid] first_id = file_name.find('_') dataId = file_name[:first_id] kfbReader.reader.ReadInfo(slide, test_path + dataId + '.kfb', 20, False) if rw != 1024 or rh != 1024: simg = cv2.resize(slide.ReadRoi(tlx,tly,rw,rh,20),(1024,1024)) else: simg = slide.ReadRoi(tlx,tly,rw,rh,20) if not cv2.imwrite(dir + file_name, simg): print(dir + file_name + ' is existed ! Will be removed...') os.remove(dir + file_name) cv2.imwrite(dir + file_name, simg)
def get_roi_from_src_img(cfb_dir, json_dir, scale=20): """ Get roi info from a src img. - outputs: res: list(tuple). Each correspond to a roi, (roi_img, (ltx, lty)) """ res = [] json_infos = load_json(json_dir) kfb_reader = kr.reader() kr.reader.ReadInfo(kfb_reader, kfbPath=cfb_dir, scale=scale, readAll=False) for json_info in json_infos: if json_info['class'] == 'roi': # print("Before: ", json_info) roi_image = kfb_reader.ReadRoi(json_info["x"], json_info["y"], json_info["w"], json_info["h"], scale) # print("After: ", json_info) res.append((roi_image.copy(), (json_info["x"], json_info["y"]))) return res
def draw_rectangle(labels_filename, corres_json_list, total_time): start_time = time.time() # 完成画一张图记一次时间 #读取图像 filename = labels_filename[:-10] + '.kfb' Roi_x = corres_json_list[0]['x'] Roi_y = corres_json_list[0]['y'] Roi_w = corres_json_list[0]['w'] Roi_h = corres_json_list[0]['h'] # 实例化reader类 path = os.path.join(kfb_image_root, filename) image = kfbReader.reader() kfbReader.reader.ReadInfo(image, path, Scale, True) #获取读取视野倍数 scale = kfbReader.reader.getReadScale(image) # 实例化后,按照说明文档的方法,读取kfb格式文件的Roi区域 draw = image.ReadRoi(Roi_x, Roi_y, Roi_w, Roi_h, scale=scale) # 这个sacle将读取的ROI对应到相应倍数上,影响大 # # 将所有的pos遍历,画在同一张Roi上面 # for i in range(1, len(corres_json_list)): # Pos_x = corres_json_list[i]['x'] # Pos_y = corres_json_list[i]['y'] # Pos_w = corres_json_list[i]['w'] # Pos_h = corres_json_list[i]['h'] # rela_x, rela_y = caculate_relative_position(Roi_x, Roi_y, Pos_x, Pos_y) # # draw = cv.rectangle(draw, (rela_x, rela_y), (rela_x + Pos_w, rela_y + Pos_h), (255, 0, 0), 10)#在图像上画出标记框 cv.imwrite( f"E:/ali_cervical_carcinoma_data/ROI_image/{labels_filename}.jpg", draw) #保存图像 end_time = time.time() cost_time = end_time - start_time total_time = total_time + cost_time print(f'The {labels_filename} done,which cost {cost_time}s') return total_time
def neg_sample(): path = '/mnt/C/tianchi/neg_samples/test/test.txt' f = open(path, 'r') tmp = [] for line in f.readlines(): line = line.split() tmp.append(line[0][0:line[0].rfind('_sample')]) print(tmp) neg_dir = ['neg_0', 'neg_1', 'neg_2', 'neg_3', 'neg_4', 'neg_5'] data_path = '/mnt/C/tianchi/raw_data/' for dir in neg_dir: dir_path = data_path + dir image_names = os.listdir(dir_path) for image_name in image_names: print(image_name[:-4]) if image_name[:-4] in tmp: continue image_path = os.path.join(dir_path, image_name) scale_kfb = 20 read = kfbReader.reader() kfbReader.reader.ReadInfo(read, image_path, scale_kfb, True) kfbReader.reader.setReadScale(read, scale=20) height = read.getHeight() width = read.getWidth() for i in range(30): x_tmp = random.randint(0, width - 1024) y_tmp = random.randint(0, height - 1024) roi = read.ReadRoi(x_tmp, y_tmp, 1024, 1024, 20) img_save_name = image_name[:-4] + '_sample_' + str(i) + '.jpg' label_save_name = image_name[:-4] + '_sample_' + str( i) + '.txt' cv.imwrite( '/mnt/C/tianchi/neg_samples/neg_sample_7500_images/' + img_save_name, roi) f = open( '/mnt/C/tianchi/neg_samples/neg_sample_7500_labels/' + label_save_name, 'w') f.close()
def generate_tile_img(res_df_roi): res_df_roi['file'] = res_df_roi['file'].str.replace(".json", ".kfb") for i in trange(res_df_roi.shape[0]): file = res_df_roi.loc[i, 'file'] read = kfbReader.reader() read.ReadInfo(f"./data/train_pos/{file}", scale, False) roi = read.ReadRoi(int(res_df_roi.loc[i, 'x']), int(res_df_roi.loc[i, 'y']), int(res_df_roi.loc[i, 'w']), int(res_df_roi.loc[i, 'h']), scale) roi = np.asarray(roi) # roi = H x W x C x, y = 0, 0 counter = 0 while y + size < roi.shape[0]: x = 0 while x + size < roi.shape[1]: tile = roi[y:y + size, x:x + size, :] cv.imwrite( f"./data/roi_tiles/{file.replace('.kfb', '')}_{counter}.jpg", tile) counter += 1 x += min((size - overlap), roi.shape[1] - (x + size)) y += min((size - overlap), roi.shape[0] - (y + size))
def get_cells(wsi, label, path): """ read all positive cells and save into image files :param wsi: kfb file name :param label: label file name :param path: image save path """ with open(label, 'r') as f: js = json.load(f) basename = os.path.splitext(os.path.basename(wsi))[0] scale = 20 reader = kr.reader() kr.reader.ReadInfo(reader, wsi, scale, True) for dic in js: # ignore rois, use position of cells directly if dic['class'] == 'roi': continue img_name = '{}_{}_{}_{}_{}.jpg'.format(basename, dic['x'], dic['y'], dic['w'], dic['h']) img_name = os.path.join(path, img_name) img = reader.ReadRoi(dic['x'], dic['y'], dic['w'], dic['h'], scale) cv2.imwrite(img_name, img)
def pos_process(): path = '/home/admin/jupyter/Data/train' filename = [s[:-5] for s in os.listdir(path) if s.find('.json')!=-1] for name in filename: print(name) filepath = os.path.join(path, name+'.json') image_path = os.path.join(path, name+'.kfb') f = open(filepath, encoding='utf-8') content = f.read() label_dict = json.loads(content) f.close() count = 0 for dt in label_dict: if dt['class'] == 'roi': count+=1 save_path = os.path.join( '/home/admin/jupyter/tianchi_data/train', 'ROI_images/'+name+'roi'+str(count)+'.jpg') scale_kfb = 20 read = kfbReader.reader() kfbReader.reader.ReadInfo(read, image_path, scale_kfb, True) kfbReader.reader.setReadScale(read, scale=20) roi = read.ReadRoi(dt['x'], dt['y'], dt['w'], dt['h'], 20) cv.imwrite(save_path, roi) tmp = [] for dt1 in label_dict: if dt1['class'] == 'roi': continue if dt1['x']>=dt['x'] and dt1['y']>=dt['y'] and (dt1['x']+dt1['w'])<=(dt['x']+dt['w']) and (dt1['y']+dt1['h'])<=(dt['y']+dt['h']): tmp.append(str(dt1['x']-dt['x'] + dt1['w']/2.0)+' '+str(dt1['y']-dt['y'] + dt1['h']/2.0)+' '+ str(dt1['w'])+' '+str(dt1['h'])+' '+dt1['class']) f = open(os.path.join('/home/admin/jupyter/tianchi_data/train', 'ROI_labels/'+name+'roi'+str(count)+'.txt'),'w') for s in tmp: if s!=tmp[-1]: f.write(s+'\n') else: f.write(s) f.close()
def prediction_single_roi(kfb_img_path, roi_dict, img_size, stride, kfb_scale, predictor, output_save_path, class_names, FINAL_NMS_SWITCH, FINAL_NMS_THRESH_DICT, CONF_THRESH_DICT): ''' FUNCTION: Predict all 1000x1000 small image splitted from a ROI. The input ROI has one of the following three types: 1. large: ROI_w and ROI_h are all larger than 1000. 2. medium: only ROI_w or ROI_h is larger than 1000. 3. small: ROI_w and ROI_h are all smaller than 1000. INPUTS: 1. kfb_image_path (str): a path to an image kfb file 2. roi_dict (dict): a ROI dict. e.g., {"x": 4670, "y": 20127, "w": 3189, "h": 3174, "class": "roi"} 3. img_size (int): image size in training process. i.e.,1000 4. stride (int): the sliding stride. e.g., 1000*(1/2) 5. kfb_scale (int): the scale of kfb file. e.g., 20 6. predictor: DefaultPredictor used to get prediction result 7. output_save_path (str): the output saving path 8. class_names (list): a list of class names 9. FINAL_NMS_SWITCH (bool): whether to do NMS 4. FINAL_NMS_THRESH_DICT (dict): a dict of different NMS thresholds for different classes 5. CONF_THRESH_DICT (dict): a dict of different confidence thresholds for differents classes OUTPUTS: 1. prediction (list): the prediction result. the list is formated as follows: [{"x": 22890, "y": 3877, "w": 96, "h": 55,"p": 0.94135,"class": "ASC-H"}, {"x": 20411, "y": 2260, "w":25, "h": 83,"p": 0.67213,"class": "ASC-US"}, {"x": 26583, "y": 7937, "w": 72, "h": 128,"p": 0.73228,"class": "Candida"}] ''' time_start = time.time() im_name = kfb_img_path.split('/')[-1][:-4] # read kfb file read = kfbReader.reader() read.ReadInfo(kfb_img_path, kfb_scale, False) # get the height/width/scale of the ROI image H = roi_dict['h'] W = roi_dict['w'] S = kfb_scale # get the horizontal/vertical moving step (i.e., sliding step) HOR_MOV_STEP = int((W - img_size) / stride + 1) VER_MOV_STEP = int((H - img_size) / stride + 1) x = roi_dict['x'] y = roi_dict['y'] w = copy.deepcopy(img_size) h = copy.deepcopy(img_size) # get the total number of 1000x1000 images from an kfb image file total_img_arr_num = VER_MOV_STEP * HOR_MOV_STEP prediction = [] # VER_MOV_STEP != 0 and HOR_MOV_STEP != 0 means sliding patch is far smaller than roi if VER_MOV_STEP != 0 and HOR_MOV_STEP != 0: roi_type = 'large' # two forloops for horizontal and vertical moving the windows for ver in range(VER_MOV_STEP): for hor in range(HOR_MOV_STEP): x_new = x + hor * stride y_new = y + ver * stride # get a small image array (1000*1000) img_arr = read.ReadRoi(x_new, y_new, w, h, S) # get prediciton result output = predictor(img_arr) instances = output["instances"].to(torch.device("cpu")) boxes = instances.pred_boxes.tensor.numpy() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() for box, score, cls in zip(boxes, scores, classes): if score < CONF_THRESH_DICT[class_names[cls]]: continue else: xmin, ymin, xmax, ymax = box width = xmax - xmin + 1 height = ymax - ymin + 1 cls_name = class_names[cls] # format prediction reuslt pred_result = OrderedDict() pred_result['x'] = int(xmin + x_new) pred_result['y'] = int(ymin + y_new) pred_result['w'] = int(width) pred_result['h'] = int(height) pred_result['p'] = float(round(score, 5)) pred_result['class'] = str(cls_name) # format: [{'x','y','w','h','p','class'}, {}, ...] prediction.append(pred_result) # total_img_arr_num = 0 means roi_h or roi_w is smaller than IMG_SIZE,which cannot be slided elif VER_MOV_STEP == 0 and HOR_MOV_STEP == 0: roi_type = 'small' img_arr = read.ReadRoi(x, y, w, h, S) # get prediciton result output = predictor(img_arr) instances = output["instances"].to(torch.device("cpu")) boxes = instances.pred_boxes.tensor.numpy() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() for box, score, cls in zip(boxes, scores, classes): if score < CONF_THRESH_DICT[class_names[cls]]: continue else: # roi-based coordinate xmin, ymin, xmax, ymax = box # convert to kfb-based coordinate bbox_xmin = int(xmin + x) bbox_ymin = int(ymin + y) bbox_xmax = int(xmax + x) bbox_ymax = int(ymax + y) # if the bbox outside the original roi if (bbox_xmin > x + W - 1) or (bbox_ymin > y + H - 1): continue else: new_bbox_xmin = int(max(bbox_xmin, x)) new_bbox_ymin = int(max(bbox_ymin, y)) new_bbox_xmax = int(min(bbox_xmax, x + W - 1)) new_bbox_ymax = int(min(bbox_ymax, y + H - 1)) new_bbox_w = int(max(0, new_bbox_xmax - new_bbox_xmin + 1)) new_bbox_h = int(max(0, new_bbox_ymax - new_bbox_ymin + 1)) if new_bbox_w * new_bbox_h > 0: pred_result = OrderedDict() pred_result['x'] = new_bbox_xmin pred_result['y'] = new_bbox_ymin pred_result['w'] = new_bbox_w pred_result['h'] = new_bbox_h pred_result['p'] = float(round(score, 5)) pred_result['class'] = str(class_names[cls]) # format: [{'x','y','w','h','p','class'}, {}, ...] prediction.append(pred_result) # VER_MOV_STEP == 0 or HOR_MOV_STEP == 0 means patch can only slide along one of direction else: roi_type = 'medium' if VER_MOV_STEP == 0: W = img_size ver = 0 for hor in range(HOR_MOV_STEP): x_new = x + hor * stride y_new = y + ver * stride # get a small image array (1000*1000) img_arr = read.ReadRoi(x_new, y_new, w, h, S) # get prediciton result output = predictor(img_arr) instances = output["instances"].to(torch.device("cpu")) boxes = instances.pred_boxes.tensor.numpy() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() for box, score, cls in zip(boxes, scores, classes): if score < CONF_THRESH_DICT[class_names[cls]]: continue else: # roi-based coordinate xmin, ymin, xmax, ymax = box # convert to kfb-based coordinate bbox_xmin = int(xmin + x_new) bbox_ymin = int(ymin + y_new) bbox_xmax = int(xmax + x_new) bbox_ymax = int(ymax + y_new) # if the bbox outside the original roi if (bbox_xmin > x_new + W - 1) or (bbox_ymin > y_new + H - 1): pass else: new_bbox_xmin = int(max(bbox_xmin, x_new)) new_bbox_ymin = int(max(bbox_ymin, y_new)) new_bbox_xmax = int(min(bbox_xmax, x_new + W - 1)) new_bbox_ymax = int(min(bbox_ymax, y_new + H - 1)) new_bbox_w = int( max(0, new_bbox_xmax - new_bbox_xmin + 1)) new_bbox_h = int( max(0, new_bbox_ymax - new_bbox_ymin + 1)) if new_bbox_w * new_bbox_h > 0: pred_result = OrderedDict() pred_result['x'] = new_bbox_xmin pred_result['y'] = new_bbox_ymin pred_result['w'] = new_bbox_w pred_result['h'] = new_bbox_h pred_result['p'] = float(round(score, 5)) pred_result['class'] = str(class_names[cls]) # format: [{'x','y','w','h','p','class'}, {}, ...] prediction.append(pred_result) else: H = img_size hor = 0 for ver in range(VER_MOV_STEP): x_new = x + hor * stride y_new = y + ver * stride # get a small image array (1000*1000) img_arr = read.ReadRoi(x_new, y_new, w, h, S) # get prediciton result output = predictor(img_arr) instances = output["instances"].to(torch.device("cpu")) boxes = instances.pred_boxes.tensor.numpy() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() for box, score, cls in zip(boxes, scores, classes): if score < CONF_THRESH_DICT[class_names[cls]]: continue else: # roi-based coordinate xmin, ymin, xmax, ymax = box # convert to kfb-based coordinate bbox_xmin = int(xmin + x_new) bbox_ymin = int(ymin + y_new) bbox_xmax = int(xmax + x_new) bbox_ymax = int(ymax + y_new) # if the bbox outside the original roi if (bbox_xmin > x_new + W - 1) or (bbox_ymin > y_new + H - 1): pass else: new_bbox_xmin = int(max(bbox_xmin, x_new)) new_bbox_ymin = int(max(bbox_ymin, y_new)) new_bbox_xmax = int(min(bbox_xmax, x_new + W - 1)) new_bbox_ymax = int(min(bbox_ymax, y_new + H - 1)) new_bbox_w = int( max(0, new_bbox_xmax - new_bbox_xmin + 1)) new_bbox_h = int( max(0, new_bbox_ymax - new_bbox_ymin + 1)) if new_bbox_w * new_bbox_h > 0: pred_result = OrderedDict() pred_result['x'] = new_bbox_xmin pred_result['y'] = new_bbox_ymin pred_result['w'] = new_bbox_w pred_result['h'] = new_bbox_h pred_result['p'] = float(round(score, 5)) pred_result['class'] = str(class_names[cls]) # format: [{'x','y','w','h','p','class'}, {}, ...] prediction.append(pred_result) before_nms_dets_num = len(prediction) if FINAL_NMS_SWITCH == True: pred = copy.deepcopy(prediction) prediction = [] for cls_ind, cls_n in enumerate(class_names): dets2 = [] # format: [[xmin,ymin,xmax,ymax,p], [], ...] PEDS = [] # format: [{'x','y','w','h','p','class'}, {}, ...] for p in pred: if p['class'] == cls_n: det = [ p['x'], p['y'], p['x'] + p['w'] - 1, p['y'] + p['h'] - 1, p['p'] ] dets2.append(det) PEDS.append(p) # if certain class has no detection, jump to next forloop if np.array(dets2).shape[0] == 0: continue else: nms_threshold = FINAL_NMS_THRESH_DICT[cls_n] # NMS filter out detected boxes with IOU<nms_threshold keep2 = py_cpu_nms(np.array(dets2), nms_threshold) PEDS = [PEDS[i] for i in keep2] prediction.extend(PEDS) after_nms_dets_num = len(prediction) time_end = time.time() print('Image %s-%s_roi %s | total_img_arr_num: %d | before_nms_dets_num: %d | after_nms_dets_num: %d | time: %.3f' \ % (im_name, roi_type, str(roi_dict), total_img_arr_num, before_nms_dets_num, after_nms_dets_num, time_end-time_start)) return prediction
import pandas as pd import os import kfbReader from scripts.utils import load_full_kfb from PIL import Image import glob import cv2 as cv from tqdm import tqdm import os import time scale = 20 size = 1024 for fp in tqdm(glob.glob("./data/train_neg/*")): f_name = fp.replace('./data/train_neg/', '').replace('.kfb','.jpg') read = kfbReader.reader() read.ReadInfo(fp, scale, False) a = load_full_kfb(read) if a.shape[0] > a.shape[1]: width = size height = int(size * a.shape[0] / a.shape[1]) else: height = size width = int(size * a.shape[1] / a.shape[0]) a_small = cv.resize(a, (width, height)) cv.imwrite(f"./data/train_neg_1024/{f_name}", a_small) print("finished.") time.sleep(10)
data_path_pos = "..\data\pos" label_path = "..\labels" tmp_path = "C:\\tmp_cervical" pos_files = os.listdir(data_path_pos) save_dir = "." # some hyper parameters scale = 20 for ii, name1 in enumerate(pos_files): # some path for one image pos_file = os.path.join(data_path_pos, name1) pos_file_name = name1.split(".")[0] json_file = os.path.join(label_path, pos_file_name + ".json") # save images reader = kr.reader() reader.ReadInfo(pos_file, scale, False) width = reader.getWidth() height = reader.getHeight() print("processing", pos_file, width, height, json_file) rois = get_roi(json_file) for idx, roi1 in enumerate(rois): roi = reader.ReadRoi(roi1["x"], roi1["y"], roi1["w"], roi1["h"], scale) for pos in roi1["poses"]: rx = pos["x"] - roi1["x"] ry = pos["y"] - roi1["y"] cv2.rectangle(roi, (rx, ry), (rx + pos["w"], ry + pos["h"]), (0, 255, 0), 4) save_name = os.path.join(tmp_path, pos_file_name + "_roi" + str(idx) + ".jpg") cv2.imwrite(save_name, roi)
def save_roi_to_npz(src_dir, dst_dir, update_dir, cell_types): json_paths = glob(os.path.join(src_dir, "*.json")) update_json_names = os.listdir(update_dir) if not osp.isdir(dst_dir): os.makedirs(dst_dir) # pdb.set_trace() for json_path in tqdm(json_paths): # use update json if osp.basename(json_path) in update_json_names: json_path = osp.join(update_dir, osp.basename(json_path)) # if osp.basename(json_path) in ['2393.json', '8484.json']: # json_path = osp.join(update_dir, osp.basename(json_path)) # else: # continue filename = json_path.split("/")[-1].split('.')[0] pos_path = osp.join(src_dir, filename + '.kfb') with open(json_path, 'r') as f: json_infos = json.loads(f.read()) r = kfbReader.reader() r.ReadInfo(pos_path, 20, True) ### 20 # select the roi coord. roi_coords = [] for json_info in json_infos: if json_info['class'] == 'roi': coord = { 'x': json_info['x'], 'y': json_info['y'], 'w': json_info['w'], 'h': json_info['h'] } roi_coords.append(coord) # print(len(roi_coords)) roi_cnt = 1 for roi_coord in roi_coords: X, Y, W, H = roi_coord['x'], roi_coord['y'], roi_coord[ 'w'], roi_coord['h'] img = r.ReadRoi(X, Y, W, H, 20).copy() label = np.zeros((0, 5), dtype="int") pos_cnt = 0 for json_info in json_infos: if json_info['class'] in cell_types: x, y, w, h = json_info['x'], json_info['y'], json_info[ 'w'], json_info['h'] if X < x < X + W and Y < y < Y + H: pos_cnt += 1 box = np.zeros((1, 5), dtype="int") box[0, 0] = max(int(x - X), 0) box[0, 1] = max(int(y - Y), 0) box[0, 2] = min(int(x - X + w), W) box[0, 3] = min(int(y - Y + h), H) box[0, 4] = cell_types.index(json_info['class']) # print(json_info['class'], cell_types.index(json_info['class'])) if int(x - X + w) > W or int(y - Y + h) > H: print(json_info) label = np.append(label, box, axis=0) if pos_cnt == 0: continue save_path = osp.join(dst_dir, filename + "_" + str(roi_cnt) + ".npz") np.savez_compressed(save_path, img=img, label=label) roi_cnt += 1
def create_pos_data(label_path, kfb_path): with open(label_path, "r") as f: labels = json.load(f) reader = kfbReader.reader() scale = 20 reader.ReadInfo(kfb_path, scale, False) filename = kfb_path.split("/")[-1].split(".")[0] pos_labels = [label for label in labels if label["class"]=="pos"] WIDTH, HEIGHT, DELTA = 1000, 1000, 500 for i, label in enumerate(pos_labels): count = 3 while count<10: is_truncated_too_small = False w_delta = random.randrange(-DELTA, DELTA) h_delta = random.randrange(-DELTA, DELTA) roi_x, roi_y, roi_w, roi_h = label["x"], label["y"], label["w"], label["h"] if roi_x + w_delta < 0: image_x = 0 elif roi_x + w_delta > reader.getWidth()-WIDTH: image_x = reader.getWidth()-WIDTH else: image_x = roi_x + w_delta if roi_y + h_delta < 0: image_y = 0 elif roi_y + h_delta > reader.getHeight()-HEIGHT: image_y = reader.getHeight()-HEIGHT else: image_y = roi_y + h_delta image = {"x":image_x, "y":image_y, "w":WIDTH, "h":HEIGHT} roi = reader.ReadRoi(image["x"], image["y"], image["w"], image["h"], 20) lbl_json = [] """ 1. Distinguish if [label.x, label.y, label.w, label.h] is in [image.x, image.y image.width, image.height] 2. Distinguish if label is truncated. 3. Record label's position, its origin size=(w, h) and its new_size=(new_w, new_h) in new image. 4. Record image's position in origin kfb file. """ pos_lbls = deepcopy(pos_labels) for lbl in pos_lbls: if is_left_top_in_image(lbl, image): lbl["new_x"] = lbl["x"] - image["x"] lbl["new_y"] = lbl["y"] - image["y"] lbl["new_w"] = min(image["x"] + image["w"] - lbl["x"], lbl["w"]) lbl["new_h"] = min(image["y"] + image["h"] - lbl["y"], lbl["h"]) lbl["truncated"] = True if (lbl["new_w"] != lbl["w"] or lbl["new_h"] != lbl["h"]) else False elif is_right_top_in_image(lbl, image): lbl["new_x"] = 0 lbl["new_y"] = lbl["y"] - image["y"] lbl["new_w"] = lbl["w"] - (image["x"]-lbl["x"]) lbl["new_h"] = min(image["y"] + image["h"] - lbl["y"], lbl["h"]) lbl["truncated"] = True elif is_left_down_in_image(lbl, image): lbl["new_x"] = lbl["x"] - image["x"] lbl["new_y"] = 0 lbl["new_w"] = min(image["x"] + image["w"] - lbl["x"], lbl["w"]) lbl["new_h"] = lbl["h"] - (image["y"]-lbl["y"]) lbl["truncated"] = True elif is_right_down_in_image(lbl, image): lbl["new_x"], lbl["new_y"] = 0, 0 lbl["new_w"] = lbl["w"] - (image["x"]-lbl["x"]) lbl["new_h"] = lbl["h"] - (image["y"] - lbl["y"]) lbl["truncated"] = True if "new_w" in lbl: # cv2.rectangle(roi, (lbl["new_x"],lbl["new_y"]), # (lbl["new_x"]+lbl["new_w"],lbl["new_y"]+lbl["new_h"]), (255,0,0), 2) """ If the clopped label loss too much information, then discard it. That is, only conserve the label with enough information. """ if not (lbl["new_w"]<lbl["w"]/2 or lbl["new_h"] < lbl["h"]/2): lbl_json.append(lbl) else: is_truncated_too_small = True break ## Record image and label if is_truncated_too_small: continue elif len(lbl_json)>0: anno_file = filename + "_" + str(i) + "_"+str(count) store_annotation(anno_file, lbl_json) create_annotations(anno_file, lbl_json) cv2.imwrite(CONSTANT.POS_IMAGE_PATH+anno_file+ ".jpg", roi) count+=1
def predict(sample_paths, args): model, start_epoch = build_network(snapshot=args.snapshot, backend="retinanet") model.eval() if not os.path.exists(cfg.result_path): os.makedirs(cfg.result_path) print( "Begin to predict mask: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) ) for sample_path in sample_paths: filename = sample_path.split("/")[-1].split(".")[0] read = kfbReader.reader() read.ReadInfo(sample_path, 20, False) width = read.getWidth() height = read.getHeight() image_shape = (width, height) strides, x_num, y_num = calc_split_num((width, height)) model.eval() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() transformed_all = [] classification_all = [] for i in range(x_num // 2): for j in range(y_num // 2): x = ( strides[0] * i if i < x_num - 1 else image_shape[0] - cfg.patch_size[0] ) y = ( strides[1] * j if j < y_num - 1 else image_shape[1] - cfg.patch_size[1] ) img = read.ReadRoi( x, y, cfg.patch_size[0], cfg.patch_size[1], scale=20 ).copy() img = img.transpose((2, 0, 1)) img = img[np.newaxis, :, :, :] img = img.astype(np.float32) / 255.0 img = torch.from_numpy(img).float() with torch.no_grad(): classification, regression, anchors = model(img.cuda()) transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors) scores = classification scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: continue classification = classification[0, scores_over_thresh, :] transformed_anchors = transformed_anchors[0, scores_over_thresh, :] transformed_anchors[:, 0] = transformed_anchors[:, 0] + x transformed_anchors[:, 1] = transformed_anchors[:, 1] + y transformed_anchors[:, 2] = transformed_anchors[:, 2] + x transformed_anchors[:, 3] = transformed_anchors[:, 3] + y scores = scores[0, scores_over_thresh, :] transformed_all.append(torch.cat([transformed_anchors, scores], dim=1)) classification_all.append(classification) transformed_all = torch.cat(transformed_all, dim=0) classification_all = torch.cat(classification_all, dim=0) anchors_num_idx = nms(transformed_all, 0.5) nms_scores = classification_all[anchors_num_idx, :] nms_transformed = transformed_all[anchors_num_idx, :] scores = nms_scores.detach().cpu().numpy() transformed = nms_transformed.detach().cpu().numpy() pos_all = [] for i in range(scores.shape[0]): x = int(transformed[i, 0]) y = int(transformed[i, 1]) w = max(int(transformed[i, 2] - transformed[i, 0]), 1) h = max(int(transformed[i, 3] - transformed[i, 1]), 1) p = float(scores[i, 0]) pos = {"x": x, "y": y, "w": w, "h": h, "p": p} pos_all.append(pos) with open(os.path.join(cfg.result_path, filename + ".json"), "w") as f: json.dump(pos_all, f) print( "Finish predict mask: ", filename, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), )
def predict(sample_paths, args): model, start_epoch = build_network(snapshot=args.snapshot, backend='retinanet') model.eval() if not os.path.exists(cfg.result_path): os.makedirs(cfg.result_path) print("Begin to predict mask: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for sample_path in sample_paths: filename = sample_path.split('/')[-1].split('.')[0] read = kfbReader.reader() read.ReadInfo(sample_path, 20, False) width = read.getWidth() height = read.getHeight() image_shape = (width, height) strides, x_num, y_num = calc_split_num((width, height)) model.eval() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() transformed_all = [] classification_all = [] for i in range(x_num): for j in range(y_num): x = strides[0] * i if i < x_num - 1 else image_shape[ 0] - cfg.patch_size[0] y = strides[1] * j if j < y_num - 1 else image_shape[ 1] - cfg.patch_size[1] img = read.ReadRoi(x, y, cfg.patch_size[0], cfg.patch_size[1], scale=20).copy() img = img.transpose((2, 0, 1)) img = img[np.newaxis, :, :, :] img = img.astype(np.float32) / 255.0 img = torch.from_numpy(img).float() with torch.no_grad(): classification, regression, anchors = model(img.cuda()) transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors) scores = classification scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: continue classification = classification[0, scores_over_thresh, :] transformed_anchors = transformed_anchors[ 0, scores_over_thresh, :] transformed_anchors[:, 0] = transformed_anchors[:, 0] + x transformed_anchors[:, 1] = transformed_anchors[:, 1] + y transformed_anchors[:, 2] = transformed_anchors[:, 2] + x transformed_anchors[:, 3] = transformed_anchors[:, 3] + y scores = scores[0, scores_over_thresh, :] transformed_all.append( torch.cat([transformed_anchors, scores], dim=1)) classification_all.append(classification) # transformed_all = torch.cat(transformed_all, dim=0) # classification_all = torch.cat(classification_all, dim=0) # anchors_num_idx = nms(transformed_all, 0.5) # nms_scores = classification_all[anchors_num_idx, :] # nms_transformed = transformed_all[anchors_num_idx, :] # scores = nms_scores.detach().cpu().numpy() # transformed = nms_transformed.detach().cpu().numpy() # pos_all = [] # for i in range(scores.shape[0]): # x = int(transformed[i, 0]) # y = int(transformed[i, 1]) # w = max(int(transformed[i, 2] - transformed[i, 0]), 1) # h = max(int(transformed[i, 3] - transformed[i, 1]), 1) # p = float(scores[i, 0]) # pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p} # pos_all.append(pos) transformed_all = torch.cat(transformed_all, dim=0) classification_all = torch.cat(classification_all, dim=0) #print("transformed_all.size(0)=", transformed_all.size(0)) #print("classification_all.size(0)=", classification_all.size(0)) num = int((transformed_all.size(0) + 200000) / 200000) #print("num=", num) pos_all = [] trans = transformed_all.chunk(num, 0) classi = classification_all.chunk(num, 0) for i in range(num): #print("len(trans[i]),len(classi[i])=",len(trans[i]),len(classi[i])) pos_all = handle_nms(trans[i], classi[i], pos_all) #print("len(pos_all)=", len(pos_all)) with open(os.path.join(cfg.result_path, filename + ".json"), 'w') as f: json.dump(pos_all, f) print("Finish predict mask: ", filename, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
import cv2 import os fs = [] demo_dir = 'roi_illustrations/' pos_path = '/home/admin/jupyter/Data/train/' for root, dirs, files in os.walk(pos_path): for file in files: if file[-1] == 'n': fs.append(file[:-5]) for dataId in fs: rois = [] poss = [] labels = json.load(open(pos_path + dataId + '.json','r')) slide = kfbReader.reader() kfbReader.reader.ReadInfo(slide, pos_path + dataId + '.kfb', 20, True) for label in labels: if label['class'] != 'roi': continue rois.append(label) for label in labels: if label['class'] == 'roi': continue poss.append(label) for roi in rois: xx = np.random.uniform(0,1) if xx < 0.9: continue sx = int(roi['x']) sy = int(roi['y']) sw = int(roi['w']) sh = int(roi['h'])
def gen_csv(): img_fns = {} sample_xywh = {} imgs_num = 0 labels_num = 0 slide = kfbReader.reader() paths = [] x_min = [] y_min = [] x_max = [] y_max = [] cls = [] scale_nums = [0, 0, 0, 0] for dataId in dataIds: sample_xywh[dataId] = [] img_fns[dataId] = [] poss = [] pbar.update(1) kfbReader.reader.ReadInfo(slide, pos_path + dataId + '.kfb', 20, True) width = slide.getWidth() height = slide.getHeight() labels = json.load(open(pos_path + dataId + '.json', 'r')) for label in labels: if label['class'] == 'roi': continue poss.append(label) for k, pos_i in enumerate(poss): gx = int(pos_i['x']) gy = int(pos_i['y']) gw = int(pos_i['w']) gh = int(pos_i['h']) # garea=int(math.sqrt(gw*gh)) garea = max([gw, gh]) #if garea<100 or garea>=600 and garea<1000: # continue #if not garea>1000: # continue for i in range(weight[categories[pos_i['class']]]): if garea < 100: scale_ratio = np.random.uniform(2, 6) scale_nums[0] += 1 elif garea >= 600 and garea < 1000: scale_ratio = np.random.uniform(0.2, 0.6) scale_nums[1] += 1 elif garea > 1000: scale_ratio = 0.1 scale_nums[2] += 1 else: scale_ratio = 1.0 scale_nums[3] += 1 sgx = int(gx) sgy = int(gy) sgw = int(gw) sgh = int(gh) sample_w = int(W / scale_ratio) sample_h = int(H / scale_ratio) rangx_max = sgx rangx_min = sgx + sgw - sample_w rangy_max = sgy rangy_min = sgy + sgh - sample_h randomx = random.randint(rangx_min, rangx_max) randomy = random.randint(rangy_min, rangy_max) sample_x = int(randomx) sample_y = int(randomy) img_fn = str(dataId) + '_' + str(sample_x) + '_' + str( sample_y) + '_' + str(scale_ratio) + '.jpg' marksave_path = os.path.join(marksave_dir + img_path, img_fn) #xx = np.random.uniform(0,1) #if scale_ratio == 1 and pos_i['class'] != 'Candida' and xx >= 0.9: # img = cv2.resize(slide.ReadRoi(sample_x,sample_y,sample_w,sample_h,20),(1024,1024)) n_labels = 0 for pos_ii in poss: pxi = pos_ii['x'] pyi = pos_ii['y'] pwi = pos_ii['w'] phi = pos_ii['h'] if pxi >= sample_x and pyi >= sample_y and pxi + pwi <= sample_x + sample_w and pyi + phi <= sample_y + sample_h: #inner label paths.append(marksave_path) xxmin = int((pxi - sample_x) * scale_ratio) yymin = int((pyi - sample_y) * scale_ratio) xxmax = int((pxi - sample_x + pwi) * scale_ratio) yymax = int((pyi - sample_y + phi) * scale_ratio) x_min.append(xxmin) y_min.append(yymin) x_max.append(xxmax) y_max.append(yymax) cls.append(pos_ii['class']) n_labels += 1 #if scale_ratio == 1 and pos_i['class'] != 'Candida' and xx >= 0.9: # for i in range(len(paths)): # if paths[i] == marksave_path: # cv2.rectangle(img,(x_min[i],y_min[i]),(x_max[i],y_max[i]),(0,0,255),2) # cv2.putText(img, cls[i], (x_min[i],y_min[i]), cv2.FONT_HERSHEY_COMPLEX, .5, (255,0,0), 1) # print([paths[i], x_min[i], y_min[i], x_max[i],y_max[i],cls[i]]) # cv2.imwrite(demo_dir+dataId+'_'+str(sample_x)+'_'+str(sample_y)+'_'+ str(scale_ratio) +'.jpg',img) # print(n_labels) # input() labels_num += n_labels if n_labels != 0: sample_xywh[dataId].append( [sample_x, sample_y, sample_w, sample_h]) img_fns[dataId].append(img_fn) imgs_num += 1 data = [[paths[i], x_min[i], y_min[i], x_max[i], y_max[i], cls[i]] for i in range(len(paths))] print(len(data), imgs_num) csv_fn = "/home/admin/jupyter/zxy/label_2393&8484.csv" print('Output CSV with ' + str(imgs_num) + ' imgs and ' + str(labels_num) + ' labels in ' + csv_fn) print('2~6 0.2~0.6 0.1 1') print(scale_nums) col = ['path', 'xmin', 'ymin', 'xmax', 'ymax', 'cls'] df = pd.DataFrame(data, columns=col) df.to_csv(csv_fn, index=False) return sample_xywh, img_fns