예제 #1
0
def get_perf(wsi, xml1, xml2, args):
    if args.wsi_ext != '.tif':
        WSIinfo = getWsi.getWsi(wsi)
        dim_x, dim_y = WSIinfo.dimensions
    else:
        im = Image.open(wsi)
        dim_x, dim_y = im.size

    totalPixels = np.float(dim_x * dim_y)

    # annotated xml
    mask_gt = xml_to_mask(xml1, (0, 0), (dim_x, dim_y), 1, 0)
    # predicted xml
    mask_pred = xml_to_mask(xml2, (0, 0), (dim_x, dim_y), 1, 0)

    np.place(mask_pred, mask_pred > 0, 1)
    np.place(mask_gt, mask_gt > 0, 1)

    TP = float(np.sum(np.multiply(mask_pred, mask_gt)))
    FP = float(np.sum(mask_pred) - TP)

    mask_pred = abs(mask_pred - 1)
    mask_gt = abs(mask_gt - 1)
    np.place(mask_pred, mask_pred > 0, 1)
    np.place(mask_gt, mask_gt > 0, 1)

    TN = float(np.sum(np.multiply(mask_pred, mask_gt)))
    FN = float(np.sum(mask_pred) - TN)

    if TP + FP == 0:
        precision = 1
    else:
        precision = (TP / (TP + FP))

    accuracy = ((TP + TN) / (TN + FN + TP + FP))

    if TN + FP == 0:
        specificity = 1
    else:
        specificity = (TN / (FP + TN))

    if TP + FN == 0:
        sensitivity = 1
    else:
        sensitivity = (TP / (TP + FN))

    return sensitivity, specificity, precision, accuracy
예제 #2
0
def return_region(args, xmlID, wsiID, fileID, yStart, xStart, idxy, idxx,
                  downsampleRate, outdirT, region_size, dirs, chop_regions,
                  cNum):  # perform cutting in parallel

    if chop_regions[idxy, idxx] != 0:
        uniqID = fileID + str(yStart) + str(xStart)
        if wsiID.split('.')[-1] != 'tif':
            slide = getWsi(wsiID)
            Im = np.array(
                slide.read_region((xStart, yStart), 0,
                                  (region_size, region_size)))
            Im = Im[:, :, :3]
        else:
            yEnd = yStart + region_size
            xEnd = xStart + region_size
            Im = np.zeros([region_size, region_size, 3], dtype=np.uint8)
            Im_ = imread(wsiID)[yStart:yEnd, xStart:xEnd, :3]
            Im[0:Im_.shape[0], 0:Im_.shape[1], :] = Im_

        mask_annotation = xml_to_mask(xmlID, [xStart, yStart],
                                      [region_size, region_size],
                                      downsampleRate, 0)

        c = (Im.shape)

        s1 = int(c[0] / (downsampleRate**.5))
        s2 = int(c[1] / (downsampleRate**.5))
        Im = resize(Im, (s1, s2), mode='reflect')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            imsave(outdirT + '/regions/' + uniqID + dirs['imExt'], Im)
            imsave(outdirT + '/masks/' + uniqID + dirs['maskExt'],
                   mask_annotation)
            '''
            plt.subplot(121)
            plt.imshow(Im)
            plt.subplot(122)
            plt.imshow(mask_annotation)
            plt.show()
            '''

        classespresent = np.unique(mask_annotation)
        classes = range(0, cNum)
        classEnumC = np.zeros([cNum, 1])

        for index, chk in enumerate(classes):
            if chk in classespresent:
                classEnumC[index] = classEnumC[index] + 1
        return classEnumC
    else:

        classes = range(0, cNum)
        classEnumC = np.zeros([cNum, 1])
        return classEnumC
def inspect_mask(yStart, xStart, block_size, annotation_xml,
                 prediction_xml):  # perform cutting in parallel
    performance = np.zeros((5, 4))
    yEnd = yStart + block_size
    #print(yEnd)
    xEnd = xStart + block_size
    #print(xEnd)
    xLen = xEnd - xStart
    yLen = yEnd - yStart
    mask_annotation = xml_to_mask(annotation_xml, [xStart, yStart],
                                  [xLen, yLen], 1, 0)
    prediction_annotation = xml_to_mask(prediction_xml, [xStart, yStart],
                                        [xLen, yLen], 1, 0)
    for classID in range(0, 5):
        annotation = mask_annotation == classID
        prediction = prediction_annotation == classID

        TP = (np.sum(np.multiply(annotation, prediction)))
        FP = (np.sum(np.multiply((1 - annotation), (prediction))))
        FN = (np.sum(np.multiply((annotation), (1 - prediction))))
        TN = (np.sum(np.multiply((1 - annotation), (1 - prediction))))
        performance[classID, :] = [TP, FP, FN, TN]

    return performance
def main():
    # go though all WSI
    for idx, XML in enumerate(XMLs):
        bounds, masks = get_annotation_bounds(XML, 1)
        basename = os.path.basename(XML)
        basename = os.path.splitext(basename)[0]

        print('opening: ' + WSIs[idx])
        pas_img = openslide.OpenSlide(WSIs[idx])

        for idxx, bound in enumerate(bounds):
            if extract_one_region:
                mask = masks[idxx]
            else:
                mask = (xml_to_mask(XML, (bound[0], bound[1]),
                                    (final_image_size, final_image_size),
                                    downsample_factor=1,
                                    verbose=0))

            if size_thresh == None:
                PAS = pas_img.read_region((int(bound[0]), int(bound[1])), 0,
                                          (final_image_size, final_image_size))
                PAS = np.array(PAS)[:, :, 0:3]

            else:
                size = np.sum(mask)
                if size >= size_thresh:
                    PAS = pas_img.read_region(
                        (bound[0], bound[1]), 0,
                        (final_image_size, final_image_size))
                    PAS = np.array(PAS)[:, :, 0:3]

            if white_background:
                for channel in range(3):
                    PAS_ = PAS[:, :, channel]
                    PAS_[mask == 0] = 255
                    PAS[:, :, channel] = PAS_

            subdir = '{}/{}/'.format(save_dir, basename)
            make_folder(subdir)
            imsave(subdir + basename + '_' + str(idxx) + '.jpg', PAS)
예제 #5
0
def IterateTraining(args):
    ## calculate low resolution block params
    downsampleLR = int(args.downsampleRateLR**.5) #down sample for each dimension
    region_sizeLR = int(args.boxSizeLR*(downsampleLR)) #Region size before downsampling
    stepLR = int(region_sizeLR*(1-args.overlap_percentLR)) #Step size before downsampling
    ## calculate low resolution block params
    downsampleHR = int(args.downsampleRateHR**.5) #down sample for each dimension
    region_sizeHR = int(args.boxSizeHR*(downsampleHR)) #Region size before downsampling
    stepHR = int(region_sizeHR*(1-args.overlap_percentHR)) #Step size before downsampling


    global classEnumLR,classEnumHR
    dirs = {'imExt': '.jpeg'}
    dirs['basedir'] = args.base_dir
    dirs['maskExt'] = '.png'
    dirs['modeldir'] = '/MODELS/'
    dirs['tempdirLR'] = '/TempLR/'
    dirs['tempdirHR'] = '/TempHR/'
    dirs['pretraindir'] = '/Deeplab_network/'
    dirs['training_data_dir'] = '/TRAINING_data/'
    dirs['model_init'] = 'deeplab_resnet.ckpt'
    dirs['project']= '/' + args.project
    dirs['data_dir_HR'] = args.base_dir +'/' + args.project + '/Permanent/HR/'
    dirs['data_dir_LR'] = args.base_dir +'/' +args.project + '/Permanent/LR/'


    ##All folders created, initiate WSI loading by human
    #raw_input('Please place WSIs in ')

    ##Check iteration session

    currentmodels=os.listdir(dirs['basedir'] + dirs['project'] + dirs['modeldir'])

    currentAnnotationIteration=check_model_generation(dirs)

    print('Current training session is: ' + str(currentAnnotationIteration))

    ##Create objects for storing class distributions
    annotatedXMLs=glob(dirs['basedir'] + dirs['project'] + dirs['training_data_dir'] + str(currentAnnotationIteration) + '/*.xml')
    classes=[]
    if args.classNum == 0:
        for xml in annotatedXMLs:
            classes.append(get_num_classes(xml))

        classNum_LR = max(classes)
        classNum_HR = max(classes)
    else:
        classNum_LR = args.classNum
        if args.classNum_HR != 0:
            classNum_HR = args.classNum_HR
        else:
            classNum_HR = classNum_LR
    classEnumLR=np.zeros([classNum_LR,1])
    classEnumHR=np.zeros([classNum_HR,1])


    ##for all WSIs in the initiating directory:
    if args.chop_data == 'True':
        print('Chopping')

        start=time.time()
        for xmlID in annotatedXMLs:

            #Get unique name of WSI
            fileID=xmlID.split('/')[-1].split('.xml')[0]

            #create memory addresses for wsi files
            for ext in [args.wsi_ext]:
                wsiID=dirs['basedir'] + dirs['project']+  dirs['training_data_dir'] + str(currentAnnotationIteration) +'/'+ fileID + ext

                #Ensure annotations exist
                if os.path.isfile(wsiID)==True:
                    break

            #Ensure annotations exist
            if os.path.isfile(wsiID)==False:
                print('\nError - missing wsi file: ' + wsiID + ' Please provide.\n')

            #Load openslide information about WSI
            if ext != '.tif':
                slide=getWsi(wsiID)
                #WSI level 0 dimensions (largest size)
                dim_x,dim_y=slide.dimensions
            else:
                im = Image.open(wsiID)
                dim_x, dim_y=im.size
            wsi_mask=xml_to_mask(xmlID, [0,0], [dim_x,dim_y])
            print('Loaded mask')

            #Generate iterators for parallel chopping of WSIs in low resolution
            index_yLR=np.array(range(0,dim_y,stepLR))
            index_xLR=np.array(range(0,dim_x,stepLR))

            index_yLR[-1]=dim_y-stepLR
            index_xLR[-1]=dim_x-stepLR
            #Create memory address for chopped images low resolution
            outdirLR=dirs['basedir'] + dirs['project'] + dirs['tempdirLR']

            #Enumerate cpu core count
            num_cores = multiprocessing.cpu_count()

            #Perform low resolution chopping in parallel and return the number of
            #images in each of the labeled classes
            chop_regions=get_choppable_regions(wsi=wsiID,
                index_x=index_xLR,index_y=index_yLR,boxSize=region_sizeLR,white_percent=args.white_percent)


            classEnumCLR=Parallel(n_jobs=num_cores)(delayed(return_region)(args=args,
                wsi_mask=wsi_mask, wsiID=wsiID,
                fileID=fileID, yStart=j, xStart=i, idxy=idxy,
                idxx=idxx, downsampleRate=args.downsampleRateLR,
                outdirT=outdirLR, region_size=region_sizeLR,
                dirs=dirs, chop_regions=chop_regions,classNum=classNum_LR) for idxx,i in enumerate(index_xLR) for idxy,j in enumerate(index_yLR))
            print('Time for low res WSI chopping: ' + str(time.time()-start))

            #Add number of images in each class to the global count low resolution
            CSLR=(sum(classEnumCLR))
            for c in range(0,CSLR.shape[0]):
                classEnumLR[c]=classEnumLR[c]+CSLR[c]

            #Print enumerations for each class

            #Generate iterators for parallel chopping of WSIs in high resolution
            index_yHR=np.array(range(0,dim_y,stepHR))
            index_xHR=np.array(range(0,dim_x,stepHR))
            index_yHR[-1]=dim_y-stepHR
            index_xHR[-1]=dim_x-stepHR
            #Create memory address for chopped images high resolution
            outdirHR=dirs['basedir'] + dirs['project'] + dirs['tempdirHR']

            #Perform high resolution chopping in parallel and return the number of
            #images in each of the labeled classes
            chop_regions=get_choppable_regions(wsi=wsiID,
                index_x=index_xHR,index_y=index_yHR,boxSize=region_sizeHR,white_percent=args.white_percent)

            classEnumCHR=Parallel(n_jobs=num_cores)(delayed(return_region)(args=args,
                wsi_mask=wsi_mask, wsiID=wsiID,
                fileID=fileID, yStart=j, xStart=i, idxy=idxy,
                idxx=idxx, downsampleRate=args.downsampleRateHR,
                outdirT=outdirHR, region_size=region_sizeHR,
                dirs=dirs, chop_regions=chop_regions,classNum=classNum_HR) for idxx,i in enumerate(index_xHR) for idxy,j in enumerate(index_yHR))
            print('Time for high res WSI chopping: ' + str(time.time()-start))

            #Add number of images in each class to the global count high resolution
            CSHR=(sum(classEnumCHR))
            for c in range(0,CSHR.shape[0]):
                classEnumHR[c]=classEnumHR[c]+CSHR[c]

            #classEnumHR=[float(6334),float(488)]
            #Print enumerations for each class

        print('Time for WSI chopping: ' + str(time.time()-start))

        ##Augment low resolution data
        #Location of augmentable data

        #Output location for augmented data
        dirs['outDirAI']=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + '/Augment' + '/regions/'
        dirs['outDirAM']=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + '/Augment' + '/masks/'

        #Enumerate low resolution class distributions for augmentation ratios
        classDistLR=np.zeros(len(classEnumLR))

        for idx,value in enumerate(classEnumLR):
            classDistLR[idx]=value/sum(classEnumLR)

        #Define number of augmentations per class
        if args.aug_LR > 0:
            augmentOrder=np.argsort(classDistLR)
            classAugs=(np.round(args.aug_LR*(1-classDistLR))+1)
            classAugs=classAugs.astype(int)
            print('Low resolution augmentation distribution:')
            print(classAugs)
            imagesToAugmentLR=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + 'regions/'
            masksToAugmentLR=dirs['basedir']+dirs['project'] + dirs['tempdirLR'] + 'masks/'
            augmentList=glob(imagesToAugmentLR + '*.jpeg')

            #Parallel iter
            augIter=range(0,len(augmentList))

            auglen=len(augmentList)
            #Augment images in parallel using inverted class distributions for augmentation iterations
            num_cores = multiprocessing.cpu_count()
            start=time.time()

            Parallel(n_jobs=num_cores)(delayed(run_batch)(augmentList,masksToAugmentLR,
                batchidx,classAugs,args.boxSizeLR,args.hbound,args.lbound,
                augmentOrder,dirs,classNum_LR,auglen) for batchidx in augIter)

            moveimages(dirs['outDirAI'], dirs['basedir']+dirs['project'] + '/Permanent/LR/regions/')
            moveimages(dirs['outDirAM'], dirs['basedir']+dirs['project'] + '/Permanent/LR/masks/')

        moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirLR']+ '/regions/', dirs['basedir']+dirs['project'] + '/Permanent/LR/regions/')
        moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirLR']+ '/masks/',dirs['basedir']+dirs['project'] + '/Permanent/LR/masks/')
        end=time.time()-start
        print('Time for low resolution augmenting: ' + str((time.time()-totalStart)/60) + ' minutes.')
        ##High resolution augmentation
        #Enumerate high resolution class distribution

        classDistHR=np.zeros(len(classEnumHR))
        for idx,value in enumerate(classEnumHR):
            classDistHR[idx]=value/sum(classEnumHR)


        #Define number of augmentations per class
        if args.aug_HR >0:
            augmentOrder=np.argsort(classDistHR)
            classAugs=(np.round(args.aug_HR*(1-classDistHR))+1)
            classAugs=classAugs.astype(int)
            print('High resolution augmentation distribution:')
            print(classAugs)
            #High resolution input augmentable data
            imagesToAugmentHR=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + 'regions/'
            masksToAugmentHR=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + 'masks/'
            augmentList=glob(imagesToAugmentHR + '*.jpeg')

            #Parallel iterator
            augIter=range(0,len(augmentList))
            auglen=len(augmentList)

            #Output for augmented data
            dirs['outDirAI']=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' + '/regions/'
            dirs['outDirAM']=dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/Augment' + '/masks/'

            #Augment in parallel
            num_cores = multiprocessing.cpu_count()
            start=time.time()
            Parallel(n_jobs=num_cores)(delayed(run_batch)(augmentList,masksToAugmentHR,
                batchidx,classAugs,args.boxSizeHR,args.hbound,args.lbound,
                augmentOrder,dirs,classNum_HR,auglen) for batchidx in augIter)
            end=time.time()-start
            #augamt=len(glob(dirs['outDirAI'] + '*' +  dirs['imExt']))


            moveimages(dirs['outDirAI'], dirs['basedir']+dirs['project'] + '/Permanent/HR/regions/')
            moveimages(dirs['outDirAM'], dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/')

        moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/regions/', dirs['basedir']+dirs['project'] + '/Permanent/HR/regions/')
        moveimages(dirs['basedir']+dirs['project'] + dirs['tempdirHR'] + '/masks/',dirs['basedir']+dirs['project'] + '/Permanent/HR/masks/')


        #Total time
        print('Time for high resolution augmenting: ' + str((time.time()-totalStart)/60) + ' minutes.')

    #Generate training and validation arguments
    training_args_list = []
    training_args_LR = []
    training_args_HR = []

    ##### LOW REZ ARGS #####
    dirs['outDirAILR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/LR/regions/'
    dirs['outDirAMLR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/LR/masks/'

    ########fix this
    trainOutLR=dirs['basedir'] + '/Codes' + '/Deeplab_network/datasetLR/train.txt'
    valOutLR=dirs['basedir'] + '/Codes' + '/Deeplab_network/datasetLR/val.txt'

    generateDatalists(dirs['outDirAILR'],dirs['outDirAMLR'],'/regions/','/masks/',dirs['imExt'],dirs['maskExt'],trainOutLR)
    numImagesLR=len(glob(dirs['outDirAILR'] + '*' + dirs['imExt']))

    numStepsLR=int((args.epoch_LR*numImagesLR)/ args.CNNbatch_sizeLR)
    pretrain_LR=get_pretrain(currentAnnotationIteration,'/LR/',dirs)
    modeldir_LR =dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration +1) + '/LR/'



    pretrain_HR=get_pretrain(currentAnnotationIteration,'/HR/',dirs)

    modeldir_HR = dirs['basedir']+dirs['project'] + dirs['modeldir'] + str(currentAnnotationIteration+1) + '/HR/'

    # assign to dict
    training_args_LR = {
        'numImages': numImagesLR,
        'data_list': trainOutLR,
        'batch_size': args.CNNbatch_sizeLR,
        'num_steps': numStepsLR,
        'save_interval': np.int(round(numStepsLR/args.saveIntervals)),
        'pretrain_file': pretrain_LR,
        'input_height': args.boxSizeLR,
        'input_width': args.boxSizeLR,
        'modeldir': modeldir_LR,
        'num_classes': classNum_LR,
        'gpu': args.gpu,
        'data_dir': dirs['data_dir_LR'],
        'print_color': "\033[3;37;40m",
        'log_file': modeldir_LR + 'log_'+ str(currentAnnotationIteration+1) +'_LR.txt',
        'log_dir': modeldir_LR + 'log/',
        'learning_rate': args.learning_rate_LR,
        'encoder_name':args.encoder_name
        }
    training_args_list.append(training_args_LR)


    ##### HIGH REZ ARGS #####
    dirs['outDirAIHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/regions/'
    dirs['outDirAMHR']=dirs['basedir']+'/'+dirs['project'] + '/Permanent/HR/masks/'

    #######Fix this
    trainOutHR=dirs['basedir'] + '/Codes' +'/Deeplab_network/datasetHR/train.txt'
    valOutHR=dirs['basedir'] + '/Codes' + '/Deeplab_network/datasetHR/val.txt'

    generateDatalists(dirs['outDirAIHR'],dirs['outDirAMHR'],'/regions/','/masks/',dirs['imExt'],dirs['maskExt'],trainOutHR)
    numImagesHR=len(glob(dirs['outDirAIHR'] + '*' + dirs['imExt']))

    numStepsHR=int((args.epoch_HR*numImagesHR)/ args.CNNbatch_sizeHR)
    # assign to dict
    training_args_HR={
        'numImages': numImagesHR,
        'data_list': trainOutHR,
        'batch_size': args.CNNbatch_sizeHR,
        'num_steps': numStepsHR,
        'save_interval': np.int(round(numStepsHR/args.saveIntervals)),
        'pretrain_file': pretrain_HR,
        'input_height': args.boxSizeHR,
        'input_width': args.boxSizeHR,
        'modeldir': modeldir_HR,
        'num_classes': classNum_HR,
        'gpu': args.gpu + args.gpu_num - 1,
        'data_dir': dirs['data_dir_HR'],
        'print_color': "\033[1;32;40m",
        'log_file': modeldir_HR + 'log_'+ str(currentAnnotationIteration+1) +'_HR.txt',
        'log_dir': modeldir_HR + 'log/',
        'learning_rate': args.learning_rate_HR,
        'encoder_name': args.encoder_name
        }
    training_args_list.append(training_args_HR)

    # train networks in parallel
    num_cores = args.gpu_num # GPUs
    Parallel(n_jobs=num_cores, backend='threading')(delayed(train_net)(training_args,dirs) for training_args in training_args_list)



    finish_model_generation(dirs,currentAnnotationIteration)

    print('\n\n\033[92;5mPlease place new wsi file(s) in: \n\t' + dirs['basedir'] + dirs['project']+ dirs['training_data_dir'] + str(currentAnnotationIteration+1))
    print('\nthen run [--option predict]\033[0m\n')
예제 #6
0
from xml_to_mask import xml_to_mask
from getWsi import getWsi
from matplotlib import pyplot as plt

slide=getWsi('/hdd/bg/HAIL2/DeepZoomPrediction/TRAINING_data/0/52483.svs')
[d1,d2]=slide.dimensions
x='/hdd/bg/HAIL2/DeepZoomPrediction/TRAINING_data/0/52483.xml'
wsiMask=xml_to_mask(x,(0,0),(d1,d2),16,0)

plt.imshow(wsiMask*255)
plt.show()
                    if x_point<xMin:
                        xMin=x_point
                    if x_point>xMax:
                        xMax=x_point
                    if y_point<yMin:
                        yMin=y_point
                    if y_point>yMax:
                        yMax=y_point
                    # test if points are in bounds


                region_coords.append([xMin,xMax,yMin,yMax])
    for region in region_coords:
        xMin=region[0]
        yMin=region[2]
        xL=region[1]-xMin
        yL=region[3]-yMin

        test_im=np.int8(xml_to_mask(xml_path,[xMin,yMin],[xL,yL],1,0))
        test_im=test_im-2
        test_im = test_im.clip(min=0)

        rgb_im=np.array(slide.read_region([xMin,yMin],0,[xL,yL]))[:,:,0:3]
        subIter1=range(0,yL-int(box_size*overlap),step_size)
        subIter2=range(0,xL-int(box_size*overlap),step_size)


        num_cores = multiprocessing.cpu_count()

        Parallel(n_jobs=num_cores, backend='threading')(delayed(subArrayChopper)(boxSize=box_size,xSt=i,ySt=j) for i in subIter1 for j in subIter2)