def for_dahui(): labels = read_excel() file_path = "D:\\2018-04-24\\tiff" file_names = scan_files(file_path, postfix=".tif") print(len(labels), len(file_names)) for i in range(len(labels)): os.rename(file_names[i], os.path.join(file_path, str(labels[i]) + ".tif"))
def get_crops(tif_file, image_path, new_image_path, size): name = tif_file[-23:-4] image_names = scan_files(image_path) image_xy = [] for image_name in image_names: xy_part = image_name[:-4].rsplit("_", 2) x = int(xy_part[-2]) y = int(xy_part[-1]) image_xy.append([x, y]) """ # when size is larger than original image size pad = int(size/4) slide = openslide.OpenSlide(tif_file) i = 0 for xy in image_xy: x = max(xy[0] - pad, 0) y = max(xy[1] - pad, 0) cell = slide.read_region((x, y), 0, (size, size)) cell = cell.convert("RGB") scipy.misc.imsave(new_image_path + "/" + name + "_" + str(i).zfill(6) + ".jpg", cell) i += 1 print("total images in " + name + ": " + str(i)) slide.close() """ # when size is smaller than original image size slide = openslide.OpenSlide(tif_file) i = 0 for xy in image_xy: x = xy[0] + 16 y = xy[1] + 16 cell = slide.read_region((x, y), 0, (size, size)) cell = cell.convert("RGB") scipy.misc.imsave( new_image_path + "/" + name + "_" + str(i).zfill(6) + ".jpg", cell) i += 1 print("total images in " + name + ": " + str(i)) slide.close()
def batch_delete_dots(file_path): file_list = scan_files(file_path, postfix=".xml") for file in file_list: delete_dots(file)
parent.removeChild(annotation) with open(file, 'w') as newfile: DOMTree.writexml(newfile) def batch_delete_dots(file_path): file_list = scan_files(file_path, postfix=".xml") for file in file_list: delete_dots(file) if __name__ == "__main__": # the file path that contains the images to delete or change file_path = "D:\\data0\\2018-05-05-AGC+VIRUS\\2018-05-05-VIRUS-shenhe\\shandiaode" files_list = scan_files(file_path, postfix=".jpg") # the file path that contains xml files target_path = "D:\\" files_dic = parse_path(files_list, len(file_path)) # delete annotations batch_delete(files_dic, target_path) # # change annotations # batch_change(files_dic, target_path, "#aa0000") # # delete single dots in xml # file_path = "D:\\data7\\15_special" # batch_delete_dots(file_path)
def add_postfix(): file_path = "D:\\2018-04-24\\tiff" file_names = scan_files(file_path, postfix=".jpg") for file_name in file_names: os.rename(file_name, file_name[:-4] + "-label.jpg")
from random import randint from utils.scan_files import scan_files def cellSampling(files_list, n, size, save_path): for file in files_list: filename = os.path.splitext(file)[0].rsplit("\\", 1)[1] try: slide = openslide.OpenSlide(file) [size_x, size_y] = slide.dimensions for i in range(n): x = randint(size, size_x - size) y = randint(size, size_y - size) cell = slide.read_region((x, y), 0, (size, size)) cell = cell.convert("RGB") scipy.misc.imsave( save_path + "\\" + filename + "_" + str(i).zfill(3) + ".jpg", cell) slide.close() except: print(filename + " cannot be processed") if __name__ == "__main__": file_path = "E:\\2018-04-08-normal\\tiff" files_list = scan_files(file_path, postfix=".tif") save_path = "D:\\data0\\2018-04-08-normal-crops" n = 10 # number of crops per slide size = 4096 # size of each crop cellSampling(files_list, n, size, save_path)
def replace(): files_list = scan_files(tif_path, postfix=".xml") total = 0 replaced = 0 for xml_file in files_list: # from .xml filename, get .til filename filename = os.path.splitext(xml_file)[0] name = filename.rsplit("\\", 1)[1] # open .tif file tif_file = filename + ".tif" slide = openslide.OpenSlide(tif_file) # open .xml file DOMTree = xml.dom.minidom.parse(xml_file) collection = DOMTree.documentElement annotations = collection.getElementsByTagName("Annotation") for annotation in annotations: coordinates = annotation.getElementsByTagName("Coordinate") # read (x, y) coordinates x_coords = [] y_coords = [] for coordinate in coordinates: x_coords.append(float(coordinate.getAttribute("X"))) y_coords.append(float(coordinate.getAttribute("Y"))) if len(x_coords) < 3: continue x_min = min(x_coords) x_max = max(x_coords) y_min = min(y_coords) y_max = max(y_coords) x_size = int(x_max - x_min) y_size = int(y_max - y_min) x_min = int(x_min) y_min = int(y_min) x = int((x_min + x_max) / 2 - des_image_size / 2) y = int((y_min + y_max) / 2 - des_image_size / 2) # marked region is small, can be replaced if annotation.getAttribute( "Color" ) in colors and x_size < des_image_size and y_size < des_image_size: print("marked region size: " + str(x_size) + ", " + str(y_size)) cell = slide.read_region((x, y), 0, (des_image_size, des_image_size)) cell = cv2.cvtColor(np.asarray(cell), cv2.COLOR_RGBA2BGR) cell[y_min - y:y_min - y + y_size, x_min - x:x_min - x + x_size] = get_random_image( x_size, y_size) cv2.imwrite( des_image_path1 + "\\" + name + "_" + annotation.getAttribute("Name") + "_replaced1_tri_0514.jpg", cell) cell[y_min - y:y_min - y + y_size, x_min - x:x_min - x + x_size] = get_random_image( x_size, y_size) cv2.imwrite( des_image_path2 + "\\" + name + "_" + annotation.getAttribute("Name") + "_replaced2_tri_0514.jpg", cell) cell[y_min - y:y_min - y + y_size, x_min - x:x_min - x + x_size] = get_random_image( x_size, y_size) cv2.imwrite( des_image_path3 + "\\" + name + "_" + annotation.getAttribute("Name") + "_replaced3_tri_0514.jpg", cell) replaced += 1 total += 1 slide.close() print("# total images: " + str(total)) print("# replaced: " + str(replaced))
# # test # dest = cv2.imread("../res/temp/minions.jpg") # height, width, channels = dest.shape # print(height, width, channels) # print(dest.dtype) # # src = cv2.imread("../res/temp/minions.jpg") # dest[0:int(height/2), 0:int(width/2)] = src[int(height/2):height, int(width/2):width] # status = cv2.imwrite("../res/temp/minions_1.jpg", dest) # if not status: # print("error") src_image_path = "F:\\data0\\2018-04-08-normal-crops" src_image_size = 4096 src_images = scan_files(src_image_path) tif_path = "F:\\data8\\11_TRI" des_image_path1 = "F:\\0514-data1-8-replace2normal-size256\\replace2normal_1" des_image_path2 = "F:\\0514-data1-8-replace2normal-size256\\replace2normal_2" des_image_path3 = "F:\\0514-data1-8-replace2normal-size256\\replace2normal_3" des_image_size = 256 colors = { "#000000": 0, "#aa0000": 0, "#aa007f": 0, "#aa00ff": 0, "#ff0000": 0, "#005500": 0, "#00557f": 0,
if annotation.getAttribute("Color") in colorCounts: colorCounts[annotation.getAttribute("Color")] += 1 n = 0 for color, count in colorCounts.items(): if count > 0: n += 1 if len(categoryCounts) < 10: categoryCounts.append((file, n)) if len(categoryCounts) == 10: sorted(categoryCounts, key=lambda x: x[1], reverse=True) else: i = 0 for filename, count in categoryCounts: if count > n: i += 1 categoryCounts.insert(i, (file, n)) del categoryCounts[-1] for color in colorCounts: colorCounts[color] = 0 file_path = "E:\\data7" files_list = scan_files(file_path, postfix=".xml") print("# files: " + str(len(files_list))) select_most(files_list) for filename, count in categoryCounts: print(filename, count)