def calculate_F1(databasefile, result_boxes=None, resfile=None, det_thres=0.5, hotclass=2): DB = Database() DB = DB.open(databasefile) if (result_boxes is None): if resfile is None: raise ValueError( 'At least one of resfile/result_boxes must be given') if (resfile[-3:] == 'bz2'): f = bz2.BZ2File(resfile, 'rb') else: f = open(resfile, 'rb') result_boxes = pickle.load(f) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() result_boxes = nms(result_boxes, det_thres) print('Calculating F1 for test set of %d files' % len(result_boxes)) for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN, F1 = 0, 0, 0, 0 if boxes.shape[0] > 0: score = boxes[:, -1] # print('ID:',resfile,DB.findSlideWithFilename(resfile,'')) DB.loadIntoMemory(DB.findSlideWithFilename(resfile, '')) # perform NMS on detections annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres) sTP += TP sFP += FP sFN += FN F1dict[resfile] = F1 print('Overall: ') sF1 = 2 * sTP / (2 * sTP + sFP + sFN) print('TP:', sTP, 'FP:', sFP, 'FN: ', sFN, 'F1:', sF1) return sF1, F1dict
class Plugin(SlideRunnerPlugin.SlideRunnerPlugin): version = 0.1 shortName = 'Secondary database visualization' inQueue = Queue() outQueue = Queue() initialOpacity = 1.0 updateTimer = 0.1 outputType = SlideRunnerPlugin.PluginOutputType.RGB_IMAGE description = 'Visualize secondary SlideRunner database' pluginType = SlideRunnerPlugin.PluginTypes.WHOLESLIDE_PLUGIN configurationList = list( (SlideRunnerPlugin.FilePickerConfigurationEntry(uid='file', name='Database file', mask='*.sqlite'), )) COLORS = [[0, 128, 0, 255], [128, 0, 0, 255], [0, 0, 128, 255], [128, 128, 0, 255], [0, 128, 128, 255], [128, 128, 128, 255]] def __init__(self, statusQueue: Queue): self.statusQueue = statusQueue self.annotationLabels = {} self.secondaryDB = Database() self.p = Thread(target=self.queueWorker, daemon=True) self.p.start() pass def getAnnotationUpdatePolicy(): # This is important to tell SlideRunner that he needs to update for every change in position. return SlideRunnerPlugin.AnnotationUpdatePolicy.UPDATE_ON_SLIDE_CHANGE def queueWorker(self): debugModule = False quitSignal = False oldFilename = '' oldArchive = '' oldSlide = '' oldThres = -1 while not quitSignal: job = SlideRunnerPlugin.pluginJob(self.inQueue.get()) print(job) if (job.jobDescription == SlideRunnerPlugin.JobDescription.QUIT_PLUGIN_THREAD): # signal to exit this thread quitSignal = True continue if (job.configuration['file'] == oldArchive) and (job.slideFilename == oldSlide): continue if not (os.path.exists(job.configuration['file'])): continue self.sendAnnotationLabelUpdate() oldArchive = job.configuration['file'] oldSlide = job.slideFilename self.secondaryDB.open(oldArchive) self.annos = list() self.annotationLabels = dict() for key, (label, annoId, col) in enumerate(self.secondaryDB.getAllClasses()): self.annotationLabels[ annoId] = SlideRunnerPlugin.PluginAnnotationLabel( 0, '%s' % label, [*hex_to_rgb(col), 0]) pname, fname = os.path.split(job.slideFilename) self.slideUID = self.secondaryDB.findSlideWithFilename( fname, pname) self.secondaryDB.loadIntoMemory(self.slideUID) self.annos = list() for annoId in self.secondaryDB.annotations.keys(): anno = self.secondaryDB.annotations[annoId] anno.pluginAnnotationLabel = self.annotationLabels[ anno.agreedClass] self.annos.append(anno) self.sendAnnotationLabelUpdate() self.updateAnnotations() self.setProgressBar(-1) self.setMessage('found %d annotations.' % len(self.annos)) def getAnnotations(self): return self.annos def getAnnotationLabels(self): # sending default annotation labels return [self.annotationLabels[k] for k in self.annotationLabels.keys()]
if True: #len(sys.argv)>2: slidelist_test_1 = ['14', '18', '3', '22', '10', '15', '21'] slidelist_test_2 = ['1', '20', '17', '5', '2', '11', '16'] slidelist_test_3 = ['13', '7', '19', '8', '6', '9', '12'] # slidelist_test_1 = ['18','3','22','10','15','21','12'] # slidelist_test_2 = ['1','20','17','5','2','11','16'] # slidelist_test_3 = ['14','13','7','19','8','6','9'] if (sys.argv[2] == '1'): slidelist_test = slidelist_test_1 elif (sys.argv[2] == '2'): slidelist_test = slidelist_test_2 elif (sys.argv[2] == '3'): slidelist_test = slidelist_test_3 DB = Database() DB.open('databases/MITOS_WSI_CMC_ODAEL_TR.sqlite') DB.execute('SELECT uid, filename FROM Slides') slides = DB.fetchall() slidelist_train = [y[1] for y in slides if str(y[0]) not in slidelist_test] slidelist_test = [y[1] for y in slides if str(y[0]) in slidelist_test] print('Training:', slidelist_train) print('Test:', slidelist_test) test = {x: f[x] for x in slidelist_test} train = {x: f[x] for x in slidelist_train} pickle.dump(train, open('trainval_' + sys.argv[1], 'wb')) pickle.dump(test, open('test_' + sys.argv[1], 'wb'))
def listOfSlides(DB): DB.execute('SELECT uid,filename from Slides') return DB.fetchall() test_slide_filenames = [ 'be10fa37ad6e88e1f406.svs', 'f3741e764d39ccc4d114.svs', 'c86cd41f96331adf3856.svs', '552c51bfb88fd3e65ffe.svs', '8c9f9618fcaca747b7c3.svs', 'c91a842257ed2add5134.svs', 'dd4246ab756f6479c841.svs', 'f26e9fcef24609b988be.svs', '96274538c93980aad8d6.svs', 'add0a9bbc53d1d9bac4c.svs', '1018715d369dd0df2fc0.svs' ] DB.open('MITOS_WSI_CMCT_HEAEL.sqlite') #Slides_final_cleaned_checked.sqlite') for slide, filename in listOfSlides(DB): DB.loadIntoMemory(slide) slide = openslide.open_slide(basepath + filename) for k in DB.annotations.keys(): anno = DB.annotations[k] coord_x = anno.x1 coord_y = anno.y1 lu_x = int(coord_x - int(patchSize / 2)) lu_y = int(coord_y - int(patchSize / 2))
def optimize_threshold(databasefile, result_boxes=None, resfile=None, hotclass=2, minthres=0.5): DB = Database() DB = DB.open(databasefile) if (result_boxes is None): if resfile is None: raise ValueError( 'At least one of resfile/result_boxes must be given') if (resfile[-3:] == 'bz2'): f = bz2.BZ2File(resfile, 'rb') else: f = open(resfile, 'rb') result_boxes = pickle.load(f) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() MIN_THR = minthres result_boxes = nms(result_boxes, MIN_THR) TPd, FPd, FNd, F1d = dict(), dict(), dict(), dict() thresholds = np.arange(MIN_THR, 0.99, 0.01) print( 'Optimizing threshold for validation set of %d files: ' % len(result_boxes.keys()), ','.join(list(result_boxes.keys()))) for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN = 0, 0, 0 TPd[resfile] = list() FPd[resfile] = list() FNd[resfile] = list() F1d[resfile] = list() if (boxes.shape[0] > 0): score = boxes[:, -1] DB.loadIntoMemory(DB.findSlideWithFilename(resfile, '')) # perform NMS on detections annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) for det_thres in thresholds: F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres) TPd[resfile] += [TP] FPd[resfile] += [FP] FNd[resfile] += [FN] F1d[resfile] += [F1] else: for det_thres in thresholds: TPd[resfile] += [0] FPd[resfile] += [0] FNd[resfile] += [0] F1d[resfile] += [0] F1 = 0 F1dict[resfile] = F1 allTP = np.zeros(len(thresholds)) allFP = np.zeros(len(thresholds)) allFN = np.zeros(len(thresholds)) allF1 = np.zeros(len(thresholds)) allF1M = np.zeros(len(thresholds)) for k in range(len(thresholds)): allTP[k] = np.sum([TPd[x][k] for x in result_boxes]) allFP[k] = np.sum([FPd[x][k] for x in result_boxes]) allFN[k] = np.sum([FNd[x][k] for x in result_boxes]) allF1[k] = 2 * allTP[k] / (2 * allTP[k] + allFP[k] + allFN[k]) allF1M[k] = np.mean([F1d[x][k] for x in result_boxes]) print('Best threshold: F1=', np.max(allF1), 'Threshold=', thresholds[np.argmax(allF1)]) return thresholds[np.argmax(allF1)], allF1, thresholds
from SlideRunner.dataAccess.database import Database delete_slides = [ "13,8,4,7,17,22,28,26,23,24,36,29,15,14,19,32,25,12", # 3 WSI "13,8,4,7,17,22,28,26,23,24,36,29,15,14,19", #6 WSI "13,8,4,7,17,22,28,26,23" ] #12 WSI import os DB = Database() WSI_lists = ['3', '6', '12'] for i, k in enumerate(WSI_lists): os.system( 'cp databases/MITOS_WSI_CMCT_ODAEL.sqlite databases/MITOS_WSI_CMCT_ODAEL_%sWSI.sqlite' % k) DB.open('databases/MITOS_WSI_CMCT_ODAEL_%sWSI.sqlite' % k) cnt = DB.execute( 'DELETE FROM Annotations_label where annoid in (SELECT uid from Annotations where slide in (%s))' % delete_slides[i]) cnt = DB.execute( 'DELETE FROM Annotations_coordinates where slide in (%s)' % delete_slides[i]) cnt = DB.execute('DELETE FROM Annotations where slide in (%s)' % delete_slides[i]) cnt = DB.execute('DELETE FROM Slides where uid in (%s)' % delete_slides[i]) DB.commit()
import sys if (len(sys.argv)<5): print('Syntax: Inference-RetinaNet-var.py ModelSavepath.pth Database.sqlite DatasetName val/test [SlideDir]') exit() fname = 'RetinaNet-ODAEL-export.pth'#RetinaNet-CMCT-ODAEL-export.pth' #FullModel256-Fold1-HardExampSampling-export.pth' if len(sys.argv)>1: fname = sys.argv[1] size=256 path = Path('./') database = Database() database.open(str(sys.argv[2]))#Slides_Mitosis_final_checked_cleaned.sqlite')) slidedir = 'WSI' if len(sys.argv)<5 else sys.argv[4] datasetname= sys.argv[3] size = 256 level = 0 files = [] # In[3]: test_slide_filenames = ['be10fa37ad6e88e1f406.svs',
from tqdm import tqdm from pathlib import Path import openslide import time import pandas as pd from random import randint import random import openslide import cv2 from SlideRunner.dataAccess.database import Database patches_path = Path('D:/ProgProjekte/Python/Results-Exact-Study/Patches') path = Path('./') database = Database() database.open(str('C:/Users/c.marzahl/Downloads/MITOS_WSI_CCMCT_MEL.sqlite')) slide_names = [ 'f26e9fcef24609b988be.svs', 'f3741e764d39ccc4d114.svs', 'fff27b79894fe0157b08.svs' ] slidelist_test = ['27', '30', '31', '6', '18', '20', '1', '2', '3', '9', '11'] nr_target_cells = 350 getslides = """SELECT uid, filename FROM Slides""" for idx, (currslide, filename) in enumerate( tqdm(database.execute(getslides).fetchall(), desc='Loading slides .. ')): if (str(filename) not in slide_names): continue
class Plugin(SlideRunnerPlugin.SlideRunnerPlugin): version = 0.1 shortName = 'Mitosis Heatmap' inQueue = Queue() outQueue = Queue() initialOpacity = 0.6 updateTimer = 0.1 outputType = SlideRunnerPlugin.PluginOutputType.BINARY_MASK description = 'Show heatmap of mitotic figures in WSI' pluginType = SlideRunnerPlugin.PluginTypes.WHOLESLIDE_PLUGIN configurationList = list( (SlideRunnerPlugin.FilePickerConfigurationEntry( uid='file', name='Result file', mask='*.p;;*.txt;;*.p.bz2'), SlideRunnerPlugin.FilePickerConfigurationEntry(uid='dbfile', name='Database file', mask='*.sqlite'), SlideRunnerPlugin.PluginConfigurationEntry(uid='threshold', name='Detection threshold', initValue=0.75, minValue=0.0, maxValue=1.0), SlideRunnerPlugin.ComboboxPluginConfigurationEntry( uid='source', name='Heatmap shows', options=['Primary Database', 'Results', 'SecondaryDatabase']))) COLORS = [[0, 128, 0, 255], [128, 0, 0, 255], [0, 0, 128, 255], [128, 128, 0, 255], [0, 128, 128, 255], [128, 128, 128, 255]] def __init__(self, statusQueue: Queue): self.statusQueue = statusQueue self.annotationLabels = { 'Detection': SlideRunnerPlugin.PluginAnnotationLabel(0, 'Detection', [0, 180, 0, 255]), } self.p = Thread(target=self.queueWorker, daemon=True) self.p.start() self.annos = [] self.downsampledMap = np.zeros((10, 10)) pass def getAnnotationUpdatePolicy(): # This is important to tell SlideRunner that he needs to update for every change in position. return SlideRunnerPlugin.AnnotationUpdatePolicy.UPDATE_ON_SLIDE_CHANGE def queueWorker(self): debugModule = False quitSignal = False oldFilename = '' oldArchive = '' oldSlide = '' oldDBfile = '' oldCoordinates = [-1, -1, -1, -1] oldThres = -1 oldSource = -1 self.ds = 32 while not quitSignal: job = SlideRunnerPlugin.pluginJob(self.inQueue.get()) print(job) print(job.configuration) if (job.jobDescription == SlideRunnerPlugin.JobDescription.QUIT_PLUGIN_THREAD): # signal to exit this thread quitSignal = True continue sourceChanged = job.configuration['source'] != oldSource if (job.configuration['source'] == 0): if not hasattr(job.openedDatabase, 'dbfilename'): # DB not open yet continue job.configuration['dbfile'] = job.openedDatabase.dbfilename dbfilechanged = job.configuration['dbfile'] != oldDBfile if not (sourceChanged) and ( job.configuration['file'] == oldArchive ) and (job.configuration['threshold'] == oldThres) and (job.slideFilename == oldSlide) and np.all( job.coordinates == oldCoordinates) and not ( dbfilechanged): continue if not (os.path.exists(job.configuration['file'])) and ( job.configuration['source'] == 1): continue self.sendAnnotationLabelUpdate() fileChanged = job.configuration['file'] != oldArchive oldDBfile = job.configuration['dbfile'] slideChanged = job.slideFilename != oldSlide thresChanged = job.configuration['threshold'] != oldThres oldArchive = job.configuration['file'] oldThres = job.configuration['threshold'] oldSlide = job.slideFilename oldSource = job.configuration['source'] oldCoordinates = job.coordinates [foo, self.ext] = os.path.splitext(oldArchive) self.ext = self.ext.upper() self.slideObj = openslide.open_slide(job.slideFilename) if (fileChanged): if (self.ext == '.P') or ( self.ext == '.BZ2'): # Pickled format - results for many slides if (self.ext == '.BZ2'): self.resultsArchive = pickle.load( bz2.BZ2File(oldArchive, 'rb')) print('Opened bz2-compressed results container.') else: self.resultsArchive = pickle.load( open(oldArchive, 'rb')) print('Sourcechanged:', sourceChanged, 'dbfilechanged:', dbfilechanged, (len(job.configuration['dbfile']) > 0)) if (sourceChanged or dbfilechanged or slideChanged) and ( (job.configuration['source'] == 2) or (job.configuration['source'] == 0)) and (len(job.configuration['dbfile']) > 0): self.slideObj = openslide.open_slide(job.slideFilename) self.downsampledMap = np.zeros( (int(self.slideObj.dimensions[1] / self.ds), int(self.slideObj.dimensions[0] / self.ds))) self.newDB = Database() self.newDB.open(job.configuration['dbfile']) allClasses = self.newDB.getAllClasses() mitosisClass = -1 for clsname, clsuid, col in allClasses: if (mitosisClass == -1) and ('MITO' in clsname.upper()) and ( 'LOOK' not in clsname.upper()): mitosisClass = clsuid pname, fname = os.path.split(job.slideFilename) uid = self.newDB.findSlideWithFilename(fname, pname) self.newDB.loadIntoMemory(uid) for anno in self.newDB.annotations: if (self.newDB.annotations[anno].agreedClass == mitosisClass): annodet = self.newDB.annotations[anno] self.downsampledMap[int(annodet.y1 / self.ds), int(annodet.x1 / self.ds)] += 1 else: self.setMessage('No DB open.') if ((sourceChanged and job.configuration['source'] == 1) or (slideChanged) or (thresChanged)) and len(job.configuration['file']) > 0: pname, fname = os.path.split(job.slideFilename) print('Stage 6') if (oldFilename is not fname) or (slideChanged): # process slide self.annos = list() if (fname not in self.resultsArchive): self.setMessage('Slide ' + str(fname) + ' not found in results file.') print('List of files is: ', self.resultsArchive.keys()) continue oldFilename = fname uniqueLabels = np.unique( np.array(self.resultsArchive[fname])[:, 4]) self.annotationLabels = dict() for key, label in enumerate(uniqueLabels): self.annotationLabels[ label] = SlideRunnerPlugin.PluginAnnotationLabel( 0, 'Class %d' % label, self.COLORS[key % len(self.COLORS)]) if (job.configuration['source'] == 1): self.downsampledMap = np.zeros( (int(self.slideObj.dimensions[1] / self.ds), int(self.slideObj.dimensions[0] / self.ds))) print('Downsampled image: ', self.downsampledMap.shape) for idx in range(len(self.resultsArchive[fname])): row = self.resultsArchive[fname][idx] if (row[5] > job.configuration['threshold']): myanno = annotations.rectangularAnnotation( uid=idx, x1=row[0], x2=row[2], y1=row[1], y2=row[3], text='%.2f' % row[5], pluginAnnotationLabel=self.annotationLabels[ row[4]]) if (job.configuration['source'] == 1): self.downsampledMap[int( (row[1] + row[3]) / 2 / self.ds), int((row[0] + row[2]) / 2 / self.ds)] += 1 self.annos.append(myanno) self.sendAnnotationLabelUpdate() elif (self.ext == '.TXT'): # Assume MS Coco format self.annos = list() self.resultsArchive = np.loadtxt( oldArchive, dtype={ 'names': ('label', 'confidence', 'x', 'y', 'w', 'h'), 'formats': ('U30', 'f4', 'i4', 'i4', 'i4', 'i4') }, skiprows=0, delimiter=' ') uniqueLabels = np.unique(self.resultsArchive['label']) self.annotationLabels = dict() for key, label in enumerate(uniqueLabels): self.annotationLabels[ label] = SlideRunnerPlugin.PluginAnnotationLabel( 0, label, self.COLORS[key % len(self.COLORS)]) self.sendAnnotationLabelUpdate() self.slideObj = openslide.open_slide(job.slideFilename) self.ds = 32 if (job.configuration['source'] == 1): self.downsampledMap = np.zeros( (int(self.slideObj.dimensions[1] / self.ds), int(self.slideObj.dimensions[0] / self.ds))) print('Downsampled image: ', self.downsampledMap.shape) for idx in range(len(self.resultsArchive)): row = self.resultsArchive[idx] if (row[5] > job.configuration['threshold']): if (job.configuration['source'] == 1): self.downsampledMap[int( (row['y'] - row['h'] / 2) / self.ds), int((row['x'] - row['w'] / 2) / self.ds)] += 1 myanno = annotations.rectangularAnnotation( uid=idx, x1=row['x'], x2=row['y'], y1=row['x'] + row['w'], y2=row['y'] + row['h'], text='%.2f' % row['confidence'], pluginAnnotationLabel=self.annotationLabels[ row['label']]) self.annos.append(myanno) print('returning overlay...') A = 2.37 # mm^2 W_hpf_microns = np.sqrt(A * 4 / 3) * 1000 # in microns H_hpf_microns = np.sqrt(A * 3 / 4) * 1000 # in microns micronsPerPixel = self.slideObj.properties[ openslide.PROPERTY_NAME_MPP_X] W_hpf = int(W_hpf_microns / float(micronsPerPixel)) H_hpf = int(H_hpf_microns / float(micronsPerPixel)) W_x = int(W_hpf / self.ds) W_y = int(H_hpf / self.ds) kernel = np.ones((W_y, W_x), np.float32) mitoticCount = cv2.filter2D(self.downsampledMap, -1, kernel) coords_ds = np.int16(np.array(job.coordinates) / self.ds) centerImg = cv2.getRectSubPix( np.float32(mitoticCount[:, :, None]), patchSize=(coords_ds[2], coords_ds[3]), center=(coords_ds[0] + coords_ds[2] * 0.5, coords_ds[1] + coords_ds[3] * 0.5)) resized = cv2.resize(centerImg, dsize=(job.currentImage.shape[1], job.currentImage.shape[0])) self.returnImage(resized) self.updateAnnotations() self.setProgressBar(-1) self.setMessage('found %d annotations.' % len(self.annos)) def getAnnotations(self): return self.annos def getAnnotationLabels(self): # sending default annotation labels return [self.annotationLabels[k] for k in self.annotationLabels.keys()]
from SlideRunner.dataAccess.database import Database DB = Database() slidelist_test = ['27', '30', '31', '6', '18', '20', '1', '2', '3', '9', '11'] clause = ','.join(slidelist_test) files = [ 'MITOS_WSI_CCMCT_ODAEL_50HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_10HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_5HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_12WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL_6WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL_3WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL.sqlite' ] for f in files: DB.open('databases/' + f) cnt = DB.execute( 'SELECT COUNT(*) FROM Annotations where agreedClass==2 and slide not in (%s)' % clause).fetchone()[0] print('%40s: %d mitotic figures in training set' % (f, cnt)) DB.open(files[-1]) slidelist_train = [ s[0] for s in DB.execute( 'SELECT Slides.filename, count(*) as cnt FROM Slides left join Annotations on Annotations.slide == Slides.uid where Slides.uid not in (%s) and Annotations.agreedClass==2 group by slide order by cnt asc' % clause).fetchall() ] slidelist_cnt = {
import sqlite3 import numpy as np from SlideRunner.dataAccess.database import Database from tqdm import tqdm from pathlib import Path import openslide import time import pickle import cv2 from glob import glob import os path = Path('/data/Datasets/EIPH_WSI/') database = Database() database.open(str(path/'EIPH.sqlite')) size = 1024 level = 0 files = [] class SlideContainer(): def __init__(self, file: Path, level: int=0, width: int=256, height: int=256): self.file = file self.slide = openslide.open_slide(str(file)) self.width = width self.height = height self.down_factor = slide.level_downsamples[level]
def listOfSlides(DB): DB.execute('SELECT uid,filename from Slides') return DB.fetchall() test_slide_filenames = [ '3369_07_B_1_MCT Mitose 2017.svs', '3786_09 A MCT Mitose 2017.svs', '1659_08_1_MCT Mitose 2017.svs', '28_08_A_1_MCT Mitose 2017.svs', '3806_09_B_1_MCT Mitose 2017.svs', '2253_06_A_1_MCT Mitose 2017.svs', '1410_08_A_1_MCT Mitose 2017.svs', '1490_08_1_MCT Mitose 2017.svs', '2281_14_A_1_MCT Mitose 2017.svs', '221_08 MCT Mitose 2017.svs', '5187_11 B MCT Mitose 2017.svs' ] DB.open('../databases/MITOS_WSI_CMCT_ODAEL_%sWSI.sqlite' % sys.argv[1]) #Slides_final_cleaned_checked.sqlite') for slide, filename in listOfSlides(DB): DB.loadIntoMemory(slide) slide = openslide.open_slide(basepath + filename) for k in DB.annotations.keys(): anno = DB.annotations[k] coord_x = anno.x1 coord_y = anno.y1 lu_x = int(coord_x - int(patchSize / 2)) lu_y = int(coord_y - int(patchSize / 2))
DB.execute('SELECT uid,filename from Slides') return DB.fetchall() test_slide_filenames = ['be10fa37ad6e88e1f406.svs', 'f3741e764d39ccc4d114.svs', 'c86cd41f96331adf3856.svs', '552c51bfb88fd3e65ffe.svs', '8c9f9618fcaca747b7c3.svs', 'c91a842257ed2add5134.svs', 'dd4246ab756f6479c841.svs', 'f26e9fcef24609b988be.svs', '96274538c93980aad8d6.svs', 'add0a9bbc53d1d9bac4c.svs', '1018715d369dd0df2fc0.svs'] DB.open('../databases/MITOS_WSI_CCMCT_ODAEL.sqlite') for slide,filename in listOfSlides(DB): DB.loadIntoMemory(slide) slide=openslide.open_slide(basepath+filename) for k in DB.annotations.keys(): anno = DB.annotations[k] coord_x = anno.x1 coord_y = anno.y1 lu_x = int(coord_x - int(patchSize/2))
def calculate_F1_fromCSV(databasefile, csv_directory, hotclass=1, verbose=False): DB = Database() DB = DB.open(databasefile) import os import csv result_boxes = {} for root, dirs, files in os.walk(csv_directory): if len(dirs) > 0: continue slidenr = int(root.split('/')[-1]) result_boxes['%02d_test.tif' % slidenr] = [] f = open(root + '/' + files[0], 'r') for y, x in csv.reader(f, delimiter=','): result_boxes['%02d_test.tif' % slidenr].append([int(x), int(y), 1]) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() sP = 0 print('Calculating F1 for test set of %d files' % len(result_boxes), ':', result_boxes.keys()) mitcount = DB.execute( f'SELECT COUNT(*) FROM Annotations where agreedClass={hotclass}' ).fetchall() print('Official count of mitotic figures in DB: ', mitcount) slideids = [] for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN, F1 = 0, 0, 0, 0 slide_id = DB.findSlideWithFilename(resfile, '') slideids.append(str(slide_id)) DB.loadIntoMemory(slide_id) annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) if boxes.shape[0] > 0: score = boxes[:, -1] F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, 0) if (centers_DB.shape[0] != TP + FN): print(resfile, centers_DB.shape[0], TP + FN) else: # no detections --> missed all FN = centers_DB.shape[0] if (verbose): print(f'{resfile}: F1:{F1}, TP:{TP}, FP:{FP}, FN:{FN}') sTP += TP sFP += FP sP += centers_DB.shape[0] sFN += FN F1dict[resfile] = F1 print('Overall: ') sF1 = 2 * sTP / (2 * sTP + sFP + sFN) print('F1: ', sF1) print('Precision: %.3f ' % (sTP / (sTP + sFP))) print('Recall: %.3f' % (sTP / (sTP + sFN))) return sF1, F1dict
def calculate_F1(databasefile, result_boxes=None, resfile=None, det_thres=0.5, hotclass=2, verbose=False): DB = Database() DB = DB.open(databasefile) if (result_boxes is None): if resfile is None: raise ValueError( 'At least one of resfile/result_boxes must be given') if (resfile[-3:] == 'bz2'): f = bz2.BZ2File(resfile, 'rb') else: f = open(resfile, 'rb') result_boxes = pickle.load(f) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() sP = 0 result_boxes = nms(result_boxes, det_thres) print('Calculating F1 for test set of %d files' % len(result_boxes), ':', result_boxes.keys()) mitcount = DB.execute( f'SELECT COUNT(*) FROM Annotations where agreedClass={hotclass}' ).fetchall() print('Official count of mitotic figures in DB: ', mitcount) slideids = [] for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN, F1 = 0, 0, 0, 0 slide_id = DB.findSlideWithFilename(resfile, '') slideids.append(str(slide_id)) DB.loadIntoMemory(slide_id) annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) if boxes.shape[0] > 0: score = boxes[:, -1] F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres) if (centers_DB.shape[0] != TP + FN): print(resfile, centers_DB.shape[0], TP + FN) else: # no detections --> missed all FN = centers_DB.shape[0] if (verbose): print(f'{resfile}: F1:{F1}, TP:{TP}, FP:{FP}, FN:{FN}') sTP += TP sFP += FP sP += centers_DB.shape[0] sFN += FN F1dict[resfile] = F1 print('Overall: ') sF1 = 2 * sTP / (2 * sTP + sFP + sFN) print('F1: ', sF1) print('Precision: %.3f ' % (sTP / (sTP + sFP))) print('Recall: %.3f' % (sTP / (sTP + sFN))) return sF1, F1dict
from SlideRunner.dataAccess.database import Database import SlideRunner.dataAccess.annotations as annotations import openslide import os import numpy as np import sys if len(sys.argv) < 2: print('syntax:', sys.argv[0], '<area in WSI>') else: hpf = int(sys.argv[1]) os.system('mkdir -p %dHPF' % hpf) DB = Database() DB.open('../databases/MITOS_WSI_CCMCT_ODAEL.sqlite') DBRK = Database() DBRK.open('databases/HighMCAreas.sqlite') os.system( 'cp ../databases/MITOS_WSI_CCMCT_ODAEL.sqlite MITOS_WSI_CCMCT_ODAEL_%dHPF.sqlite' % hpf) DBnew = Database() DBnew.open('MITOS_WSI_CCMCT_ODAEL_%dHPF.sqlite' % hpf) DBnew.execute('ATTACH `../databases/MITOS_WSI_CCMCT_ODAEL.sqlite` as orig') for uid, filename in DB.listOfSlides(): print(uid, filename)