def get_perf(wsi, xml1, xml2, args): if args.wsi_ext != '.tif': WSIinfo = getWsi.getWsi(wsi) dim_x, dim_y = WSIinfo.dimensions else: im = Image.open(wsi) dim_x, dim_y = im.size totalPixels = np.float(dim_x * dim_y) # annotated xml mask_gt = xml_to_mask(xml1, (0, 0), (dim_x, dim_y), 1, 0) # predicted xml mask_pred = xml_to_mask(xml2, (0, 0), (dim_x, dim_y), 1, 0) np.place(mask_pred, mask_pred > 0, 1) np.place(mask_gt, mask_gt > 0, 1) TP = float(np.sum(np.multiply(mask_pred, mask_gt))) FP = float(np.sum(mask_pred) - TP) mask_pred = abs(mask_pred - 1) mask_gt = abs(mask_gt - 1) np.place(mask_pred, mask_pred > 0, 1) np.place(mask_gt, mask_gt > 0, 1) TN = float(np.sum(np.multiply(mask_pred, mask_gt))) FN = float(np.sum(mask_pred) - TN) if TP + FP == 0: precision = 1 else: precision = (TP / (TP + FP)) accuracy = ((TP + TN) / (TN + FN + TP + FP)) if TN + FP == 0: specificity = 1 else: specificity = (TN / (FP + TN)) if TP + FN == 0: sensitivity = 1 else: sensitivity = (TP / (TP + FN)) return sensitivity, specificity, precision, accuracy
def return_region(args, xmlID, wsiID, fileID, yStart, xStart, idxy, idxx, downsampleRate, outdirT, region_size, dirs, chop_regions, cNum): # perform cutting in parallel if chop_regions[idxy, idxx] != 0: uniqID = fileID + str(yStart) + str(xStart) if wsiID.split('.')[-1] != 'tif': slide = getWsi(wsiID) Im = np.array( slide.read_region((xStart, yStart), 0, (region_size, region_size))) Im = Im[:, :, :3] else: yEnd = yStart + region_size xEnd = xStart + region_size Im = np.zeros([region_size, region_size, 3], dtype=np.uint8) Im_ = imread(wsiID)[yStart:yEnd, xStart:xEnd, :3] Im[0:Im_.shape[0], 0:Im_.shape[1], :] = Im_ mask_annotation = xml_to_mask(xmlID, [xStart, yStart], [region_size, region_size], downsampleRate, 0) c = (Im.shape) s1 = int(c[0] / (downsampleRate**.5)) s2 = int(c[1] / (downsampleRate**.5)) Im = resize(Im, (s1, s2), mode='reflect') with warnings.catch_warnings(): warnings.simplefilter("ignore") imsave(outdirT + '/regions/' + uniqID + dirs['imExt'], Im) imsave(outdirT + '/masks/' + uniqID + dirs['maskExt'], mask_annotation) ''' plt.subplot(121) plt.imshow(Im) plt.subplot(122) plt.imshow(mask_annotation) plt.show() ''' classespresent = np.unique(mask_annotation) classes = range(0, cNum) classEnumC = np.zeros([cNum, 1]) for index, chk in enumerate(classes): if chk in classespresent: classEnumC[index] = classEnumC[index] + 1 return classEnumC else: classes = range(0, cNum) classEnumC = np.zeros([cNum, 1]) return classEnumC
def inspect_mask(yStart, xStart, block_size, annotation_xml, prediction_xml): # perform cutting in parallel performance = np.zeros((5, 4)) yEnd = yStart + block_size #print(yEnd) xEnd = xStart + block_size #print(xEnd) xLen = xEnd - xStart yLen = yEnd - yStart mask_annotation = xml_to_mask(annotation_xml, [xStart, yStart], [xLen, yLen], 1, 0) prediction_annotation = xml_to_mask(prediction_xml, [xStart, yStart], [xLen, yLen], 1, 0) for classID in range(0, 5): annotation = mask_annotation == classID prediction = prediction_annotation == classID TP = (np.sum(np.multiply(annotation, prediction))) FP = (np.sum(np.multiply((1 - annotation), (prediction)))) FN = (np.sum(np.multiply((annotation), (1 - prediction)))) TN = (np.sum(np.multiply((1 - annotation), (1 - prediction)))) performance[classID, :] = [TP, FP, FN, TN] return performance
def main(): # go though all WSI for idx, XML in enumerate(XMLs): bounds, masks = get_annotation_bounds(XML, 1) basename = os.path.basename(XML) basename = os.path.splitext(basename)[0] print('opening: ' + WSIs[idx]) pas_img = openslide.OpenSlide(WSIs[idx]) for idxx, bound in enumerate(bounds): if extract_one_region: mask = masks[idxx] else: mask = (xml_to_mask(XML, (bound[0], bound[1]), (final_image_size, final_image_size), downsample_factor=1, verbose=0)) if size_thresh == None: PAS = pas_img.read_region((int(bound[0]), int(bound[1])), 0, (final_image_size, final_image_size)) PAS = np.array(PAS)[:, :, 0:3] else: size = np.sum(mask) if size >= size_thresh: PAS = pas_img.read_region( (bound[0], bound[1]), 0, (final_image_size, final_image_size)) PAS = np.array(PAS)[:, :, 0:3] if white_background: for channel in range(3): PAS_ = PAS[:, :, channel] PAS_[mask == 0] = 255 PAS[:, :, channel] = PAS_ subdir = '{}/{}/'.format(save_dir, basename) make_folder(subdir) imsave(subdir + basename + '_' + str(idxx) + '.jpg', PAS)
def IterateTraining(args): ## calculate low resolution block params downsampleLR = int(args.downsampleRateLR**.5) #down sample for each dimension region_sizeLR = int(args.boxSizeLR*(downsampleLR)) #Region size before downsampling stepLR = int(region_sizeLR*(1-args.overlap_percentLR)) #Step size before downsampling ## calculate low resolution block params downsampleHR = int(args.downsampleRateHR**.5) #down sample for each dimension region_sizeHR = int(args.boxSizeHR*(downsampleHR)) #Region size before downsampling stepHR = int(region_sizeHR*(1-args.overlap_percentHR)) #Step size before downsampling global classEnumLR,classEnumHR dirs = {'imExt': '.jpeg'} dirs['basedir'] = args.base_dir dirs['maskExt'] = '.png' dirs['modeldir'] = '/MODELS/' dirs['tempdirLR'] = '/TempLR/' dirs['tempdirHR'] = '/TempHR/' dirs['pretraindir'] = '/Deeplab_network/' dirs['training_data_dir'] = '/TRAINING_data/' dirs['model_init'] = 'deeplab_resnet.ckpt' dirs['project']= '/' + args.project dirs['data_dir_HR'] = args.base_dir +'/' + args.project + '/Permanent/HR/' dirs['data_dir_LR'] = args.base_dir +'/' +args.project + '/Permanent/LR/' ##All folders created, initiate WSI loading by human #raw_input('Please place WSIs in ') ##Check iteration session currentmodels=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir']) currentAnnotationIteration=check_model_generation(dirs) print('Current training session is: ' + str(currentAnnotationIteration)) ##Create objects for storing class distributions annotatedXMLs=glob(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/*.xml') classes=[] if args.classNum == 0: for xml in annotatedXMLs: classes.append(get_num_classes(xml)) classNum_LR = max(classes) classNum_HR = max(classes) else: classNum_LR = args.classNum if args.classNum_HR != 0: classNum_HR = args.classNum_HR else: classNum_HR = classNum_LR classEnumLR=np.zeros([classNum_LR,1]) classEnumHR=np.zeros([classNum_HR,1]) ##for all WSIs in the initiating directory: if args.chop_data == 'True': print('Chopping') start=time.time() for xmlID in annotatedXMLs: #Get unique name of WSI fileID=xmlID.split('/')[-1].split('.xml')[0] #create memory addresses for wsi files for ext in [args.wsi_ext]: wsiID=dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration) +'/'+ fileID + ext #Ensure annotations exist if os.path.isfile(wsiID)==True: break #Ensure annotations exist if os.path.isfile(wsiID)==False: print('\nError - missing wsi file: ' + wsiID + ' Please provide.\n') #Load openslide information about WSI if ext != '.tif': slide=getWsi(wsiID) #WSI level 0 dimensions (largest size) dim_x,dim_y=slide.dimensions else: im = Image.open(wsiID) dim_x, dim_y=im.size wsi_mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y]) print('Loaded mask') #Generate iterators for parallel chopping of WSIs in low resolution index_yLR=np.array(range(0,dim_y,stepLR)) index_xLR=np.array(range(0,dim_x,stepLR)) index_yLR[-1]=dim_y-stepLR index_xLR[-1]=dim_x-stepLR #Create memory address for chopped images low resolution outdirLR=dirs['basedir'] + dirs['project'] + dirs['tempdirLR'] #Enumerate cpu core count num_cores = multiprocessing.cpu_count() #Perform low resolution chopping in parallel and return the number of #images in each of the labeled classes chop_regions=get_choppable_regions(wsi=wsiID, index_x=index_xLR,index_y=index_yLR,boxSize=region_sizeLR,white_percent=args.white_percent) classEnumCLR=Parallel(n_jobs=num_cores)(delayed(return_region)(args=args, wsi_mask=wsi_mask, wsiID=wsiID, fileID=fileID, yStart=j, xStart=i, idxy=idxy, idxx=idxx, downsampleRate=args.downsampleRateLR, outdirT=outdirLR, region_size=region_sizeLR, dirs=dirs, chop_regions=chop_regions,classNum=classNum_LR) for idxx,i in enumerate(index_xLR) for idxy,j in enumerate(index_yLR)) print('Time for low res WSI chopping: ' + str(time.time()-start)) #Add number of images in each class to the global count low resolution CSLR=(sum(classEnumCLR)) for c in range(0,CSLR.shape[0]): classEnumLR[c]=classEnumLR[c]+CSLR[c] #Print enumerations for each class #Generate iterators for parallel chopping of WSIs in high resolution index_yHR=np.array(range(0,dim_y,stepHR)) index_xHR=np.array(range(0,dim_x,stepHR)) index_yHR[-1]=dim_y-stepHR index_xHR[-1]=dim_x-stepHR #Create memory address for chopped images high resolution outdirHR=dirs['basedir'] + dirs['project'] + dirs['tempdirHR'] #Perform high resolution chopping in parallel and return the number of #images in each of the labeled classes chop_regions=get_choppable_regions(wsi=wsiID, index_x=index_xHR,index_y=index_yHR,boxSize=region_sizeHR,white_percent=args.white_percent) classEnumCHR=Parallel(n_jobs=num_cores)(delayed(return_region)(args=args, wsi_mask=wsi_mask, wsiID=wsiID, fileID=fileID, yStart=j, xStart=i, idxy=idxy, idxx=idxx, downsampleRate=args.downsampleRateHR, outdirT=outdirHR, region_size=region_sizeHR, dirs=dirs, chop_regions=chop_regions,classNum=classNum_HR) for idxx,i in enumerate(index_xHR) for idxy,j in enumerate(index_yHR)) print('Time for high res WSI chopping: ' + str(time.time()-start)) #Add number of images in each class to the global count high resolution CSHR=(sum(classEnumCHR)) for c in range(0,CSHR.shape[0]): classEnumHR[c]=classEnumHR[c]+CSHR[c] #classEnumHR=[float(6334),float(488)] #Print enumerations for each class print('Time for WSI chopping: ' + str(time.time()-start)) ##Augment low resolution data #Location of augmentable data #Output location for augmented data dirs['outDirAI']=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + '/Augment' + '/regions/' dirs['outDirAM']=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + '/Augment' + '/masks/' #Enumerate low resolution class distributions for augmentation ratios classDistLR=np.zeros(len(classEnumLR)) for idx,value in enumerate(classEnumLR): classDistLR[idx]=value/sum(classEnumLR) #Define number of augmentations per class if args.aug_LR > 0: augmentOrder=np.argsort(classDistLR) classAugs=(np.round(args.aug_LR*(1-classDistLR))+1) classAugs=classAugs.astype(int) print('Low resolution augmentation distribution:') print(classAugs) imagesToAugmentLR=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + 'regions/' masksToAugmentLR=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + 'masks/' augmentList=glob(imagesToAugmentLR + '*.jpeg') #Parallel iter augIter=range(0,len(augmentList)) auglen=len(augmentList) #Augment images in parallel using inverted class distributions for augmentation iterations num_cores = multiprocessing.cpu_count() start=time.time() Parallel(n_jobs=num_cores)(delayed(run_batch)(augmentList,masksToAugmentLR, batchidx,classAugs,args.boxSizeLR,args.hbound,args.lbound, augmentOrder,dirs,classNum_LR,auglen) for batchidx in augIter) moveimages(dirs['outDirAI'], dirs['basedir']+dirs['project'] + '/Permanent/LR/regions/') moveimages(dirs['outDirAM'], dirs['basedir']+dirs['project'] + '/Permanent/LR/masks/') moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirLR']+ '/regions/', dirs['basedir']+dirs['project'] + '/Permanent/LR/regions/') moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirLR']+ '/masks/',dirs['basedir']+dirs['project'] + '/Permanent/LR/masks/') end=time.time()-start print('Time for low resolution augmenting: ' + str((time.time()-totalStart)/60) + ' minutes.') ##High resolution augmentation #Enumerate high resolution class distribution classDistHR=np.zeros(len(classEnumHR)) for idx,value in enumerate(classEnumHR): classDistHR[idx]=value/sum(classEnumHR) #Define number of augmentations per class if args.aug_HR >0: augmentOrder=np.argsort(classDistHR) classAugs=(np.round(args.aug_HR*(1-classDistHR))+1) classAugs=classAugs.astype(int) print('High resolution augmentation distribution:') print(classAugs) #High resolution input augmentable data imagesToAugmentHR=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + 'regions/' masksToAugmentHR=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + 'masks/' augmentList=glob(imagesToAugmentHR + '*.jpeg') #Parallel iterator augIter=range(0,len(augmentList)) auglen=len(augmentList) #Output for augmented data dirs['outDirAI']=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' + '/regions/' dirs['outDirAM']=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' + '/masks/' #Augment in parallel num_cores = multiprocessing.cpu_count() start=time.time() Parallel(n_jobs=num_cores)(delayed(run_batch)(augmentList,masksToAugmentHR, batchidx,classAugs,args.boxSizeHR,args.hbound,args.lbound, augmentOrder,dirs,classNum_HR,auglen) for batchidx in augIter) end=time.time()-start #augamt=len(glob(dirs['outDirAI'] + '*' + dirs['imExt'])) moveimages(dirs['outDirAI'], dirs['basedir']+dirs['project'] + '/Permanent/HR/regions/') moveimages(dirs['outDirAM'], dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/') moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/regions/', dirs['basedir']+dirs['project'] + '/Permanent/HR/regions/') moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/masks/',dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/') #Total time print('Time for high resolution augmenting: ' + str((time.time()-totalStart)/60) + ' minutes.') #Generate training and validation arguments training_args_list = [] training_args_LR = [] training_args_HR = [] ##### LOW REZ ARGS ##### dirs['outDirAILR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/LR/regions/' dirs['outDirAMLR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/LR/masks/' ########fix this trainOutLR=dirs['basedir'] + '/Codes' + '/Deeplab_network/datasetLR/train.txt' valOutLR=dirs['basedir'] + '/Codes' + '/Deeplab_network/datasetLR/val.txt' generateDatalists(dirs['outDirAILR'],dirs['outDirAMLR'],'/regions/','/masks/',dirs['imExt'],dirs['maskExt'],trainOutLR) numImagesLR=len(glob(dirs['outDirAILR'] + '*' + dirs['imExt'])) numStepsLR=int((args.epoch_LR*numImagesLR)/ args.CNNbatch_sizeLR) pretrain_LR=get_pretrain(currentAnnotationIteration,'/LR/',dirs) modeldir_LR =dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration +1) + '/LR/' pretrain_HR=get_pretrain(currentAnnotationIteration,'/HR/',dirs) modeldir_HR = dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration+1) + '/HR/' # assign to dict training_args_LR = { 'numImages': numImagesLR, 'data_list': trainOutLR, 'batch_size': args.CNNbatch_sizeLR, 'num_steps': numStepsLR, 'save_interval': np.int(round(numStepsLR/args.saveIntervals)), 'pretrain_file': pretrain_LR, 'input_height': args.boxSizeLR, 'input_width': args.boxSizeLR, 'modeldir': modeldir_LR, 'num_classes': classNum_LR, 'gpu': args.gpu, 'data_dir': dirs['data_dir_LR'], 'print_color': "\033[3;37;40m", 'log_file': modeldir_LR + 'log_'+ str(currentAnnotationIteration+1) +'_LR.txt', 'log_dir': modeldir_LR + 'log/', 'learning_rate': args.learning_rate_LR, 'encoder_name':args.encoder_name } training_args_list.append(training_args_LR) ##### HIGH REZ ARGS ##### dirs['outDirAIHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/regions/' dirs['outDirAMHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/masks/' #######Fix this trainOutHR=dirs['basedir'] + '/Codes' +'/Deeplab_network/datasetHR/train.txt' valOutHR=dirs['basedir'] + '/Codes' + '/Deeplab_network/datasetHR/val.txt' generateDatalists(dirs['outDirAIHR'],dirs['outDirAMHR'],'/regions/','/masks/',dirs['imExt'],dirs['maskExt'],trainOutHR) numImagesHR=len(glob(dirs['outDirAIHR'] + '*' + dirs['imExt'])) numStepsHR=int((args.epoch_HR*numImagesHR)/ args.CNNbatch_sizeHR) # assign to dict training_args_HR={ 'numImages': numImagesHR, 'data_list': trainOutHR, 'batch_size': args.CNNbatch_sizeHR, 'num_steps': numStepsHR, 'save_interval': np.int(round(numStepsHR/args.saveIntervals)), 'pretrain_file': pretrain_HR, 'input_height': args.boxSizeHR, 'input_width': args.boxSizeHR, 'modeldir': modeldir_HR, 'num_classes': classNum_HR, 'gpu': args.gpu + args.gpu_num - 1, 'data_dir': dirs['data_dir_HR'], 'print_color': "\033[1;32;40m", 'log_file': modeldir_HR + 'log_'+ str(currentAnnotationIteration+1) +'_HR.txt', 'log_dir': modeldir_HR + 'log/', 'learning_rate': args.learning_rate_HR, 'encoder_name': args.encoder_name } training_args_list.append(training_args_HR) # train networks in parallel num_cores = args.gpu_num # GPUs Parallel(n_jobs=num_cores, backend='threading')(delayed(train_net)(training_args,dirs) for training_args in training_args_list) finish_model_generation(dirs,currentAnnotationIteration) print('\n\n\033[92;5mPlease place new wsi file(s) in: \n\t' + dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration+1)) print('\nthen run [--option predict]\033[0m\n')
from xml_to_mask import xml_to_mask from getWsi import getWsi from matplotlib import pyplot as plt slide=getWsi('/hdd/bg/HAIL2/DeepZoomPrediction/TRAINING_data/0/52483.svs') [d1,d2]=slide.dimensions x='/hdd/bg/HAIL2/DeepZoomPrediction/TRAINING_data/0/52483.xml' wsiMask=xml_to_mask(x,(0,0),(d1,d2),16,0) plt.imshow(wsiMask*255) plt.show()
if x_point<xMin: xMin=x_point if x_point>xMax: xMax=x_point if y_point<yMin: yMin=y_point if y_point>yMax: yMax=y_point # test if points are in bounds region_coords.append([xMin,xMax,yMin,yMax]) for region in region_coords: xMin=region[0] yMin=region[2] xL=region[1]-xMin yL=region[3]-yMin test_im=np.int8(xml_to_mask(xml_path,[xMin,yMin],[xL,yL],1,0)) test_im=test_im-2 test_im = test_im.clip(min=0) rgb_im=np.array(slide.read_region([xMin,yMin],0,[xL,yL]))[:,:,0:3] subIter1=range(0,yL-int(box_size*overlap),step_size) subIter2=range(0,xL-int(box_size*overlap),step_size) num_cores = multiprocessing.cpu_count() Parallel(n_jobs=num_cores, backend='threading')(delayed(subArrayChopper)(boxSize=box_size,xSt=i,ySt=j) for i in subIter1 for j in subIter2)