def parent_children_lst(parent_path): dict_ = generate_name_path_dict(parent_path, ['.jpg']) lst_ = [] for key, path in dict_.items(): parent = os.path.dirname(path) cell_type = os.path.basename(parent) lst_.append({'path': path, 'type': cell_type}) return lst_
def get_and_download(file_path): tiff_dict = generate_name_path_dict(TIFF_FILES_PATH, ['.kfb', '.tif']) local_tiff_dict = generate_name_path_dict(LOCAL_TIFF_PATH, ['.kfb', '.tif']) remote_tiff_dict = generate_name_path_dict(REMOTE_TIFF_PATH, ['.kfb', '.tif']) print(remote_tiff_dict) with open(file_path) as f: lines = f.readlines() items = [line.replace('\n', '').replace(' ', '-') for line in lines] miss_tiff_lst = [] total = len(items) for index, item in enumerate(items): print("%s / %s" % (index + 1, total)) if item not in tiff_dict: if item in local_tiff_dict: remote_file_path = local_tiff_dict[item] print("COPY FILE ...\nFROM %s\nTO %s" % (remote_file_path, TIFF_FILES_PATH)) shutil.copy(remote_file_path, TIFF_FILES_PATH) else: try: remote_file_path = remote_tiff_dict[item] print("COPY FILE ...\nFROM %s\nTO %s" % (remote_file_path, TIFF_FILES_PATH)) shutil.copy(remote_file_path, TIFF_FILES_PATH) except: miss_tiff_lst.append(item) print("%s NOT FOUND " % item) continue else: print("%s IS ALREADY EXIST!" % item) print('\n'.join(miss_tiff_lst))
def collect_useful_tiff_by_txt(path): collect_tiff_path = "/home/cnn/Development/DATA/TRAIN_DATA/TIFFS/TRAIN_TIFF_FOR_20181110/" local_tiff_dict = generate_name_path_dict(LOCAL_TIFF_PATH, ['.kfb', '.tif']) with open(path) as f: lines = f.readlines() items = [line.replace('\n', '').replace(' ', '-') for line in lines] total = len(items) for index, item in enumerate(items): print("%s / %s %s..." % (index + 1, total, item)) if item in local_tiff_dict: shutil.move(local_tiff_dict[item], collect_tiff_path) else: print(item) exit()
tif_path = [ '/home/cnn/Development/DATA/TRAIN_DATA/TIFFS/SLIDE_4X/', ] # 自动标注细胞图像存储位置 auto_path = [ '/home/cnn/Development/DATA/NEW_REQUIREMENT_4X/CELLS', ] # 1. 检查大图 名称与路径对应关系 txt 文件是否存在, 生成生成大图文件名与路径 dict tif_images_collections_path = os.path.join(METADATA_FILE_PATH, 'TIFF_IMAGES_PATH_DICT.txt') if not os.path.exists(tif_images_collections_path): print('GENERATE TIFF IMAGES FILES PATH COLLECTIONS FILE...') tif_images = generate_name_path_dict(tif_path, ['.tif', '.kfb'], tif_images_collections_path) else: print('LOAD TIFF IMAGES FILES PATH COLLECTIONS...') tif_images = {} with open(tif_images_collections_path) as f: lines = f.readlines() for line in lines: name, path = line.replace('\n', '').split('\t') if name in tif_images: print('ERROR') else: tif_images[name] = path if not tif_images: print("NO TIFF FILES FOUND!")
# do augmentation: flip # do_flip(path_train) t4 = datetime.now() print('[info] time cost for image flipping:', str(t4 - t3)) # generate txt files gen_txt(path_out) t5 = datetime.now() print('[info] time cost for text file generating:', str(t5 - t4)) print('[info] total time cost:', str(t5 - t1)) if __name__ == "__main__": label_files_path = ["../data/labels/"] wsi_files_path = [ "../data/pos_0/", "../data/pos_1", "../data/pos_2", "../data/pos_3", "../data/pos_4", "../data/pos_5", "../data/pos_6", "../data/pos_7", "../data/pos_8", "../data/pos_9" ] label_dict = generate_name_path_dict(label_files_path, ['.json']) wsi_dict = generate_name_path_dict(wsi_files_path, ['.kfb']) print('found {} label files and {} wsi files'.format( len(label_dict), len(wsi_dict))) path_out = "../data/postrain" process(label_dict, wsi_dict, path_out, size=608)
if cal_IOU((x, y, w, h), (x_, y_, w_, h_)) > 0.8: break else: lst_.append(item) write_to_labelme_xml(lst_, os.path.join(save_path, key + '.xml')) shutil.copy(image_path, save_path) else: raise Exception("%s NOT FOUND IN DICT" % file) if __name__ == '__main__': image_608_path = "/home/tsimage/Development/DATA/remark" image_608_dict = generate_name_path_dict(image_608_path, ['.jpg']) data_save_path = "/home/tsimage/Development/DATA/recheck_xml_and_608" dict_ = {} for key, value in image_608_dict.items(): parent = os.path.dirname(value) label = os.path.basename(parent) dict_[key] = { "label": label, "path": value }
# 连接到服务器,也就是运行task_master.py的机器: master_address = '192.168.2.148' print('Connect to server %s...' % master_address) # 端口和验证码注意保持与task_master.py设置的完全一致: m = QueueManager(address=(master_address, 5000), authkey=b'abc') # 连接: m.connect() # 获取Queue的队列 task = m.get_task_queue() result = m.get_result_queue() dict_ = generate_name_path_dict(SLIDE_STORAGE_PATH) resource_save_path = '/home/cnn/Development/DATA/PRODUCTION_FULL_TEST/' # 从task队列取任务,并把结果写入result队列: while 1: try: obj = task.get(timeout=1) basename, _ = os.path.splitext(os.path.basename(obj['name'])) print('Run Task Image Id = %s...\nPath=%s' % (obj['id'], dict_[basename])) slides_diagnose_worker([dict_[basename]], resource_save_path) result.put({'id': obj['id'], 'status': 1}) except queue.Empty: time.sleep(5) print('task queue is empty.')
tasks.append( executor.submit(cell_sampling, xml_path, tif_path, save_path)) job_count = len(tasks) for future in as_completed(tasks): # result = future.result() # get the returning result from calling fuction job_count -= 1 print("One Job Done, Remaining Job Count: %s" % (job_count)) if __name__ == "__main__": # generate name mapping xml_files_path = '/home/data_samba/DATA/4TRAIN_DATA/20181216_BATCH_6.1/XMLS_CHECKED' tif_files_path = '/home/data_samba/DATA/4TRAIN_DATA/20181102/DATA_FOR_TRAIN/TIFFS' xml_dict = generate_name_path_dict(xml_files_path, ['.xml']) tif_dict = generate_name_path_dict(tif_files_path, ['.tif', '.kfb']) count = 0 for basename in xml_dict: if basename not in tif_dict: print("xml does not match with tif", basename) else: count += 1 print("number of matched files", count) save_path = "/home/data_samba/Code_by_yuli/batch6.1_cells_b" cut_cells(xml_dict, tif_dict, save_path) # # @test cell_sampling