def test_compute_fsd_features(self): Fs = 6 expected_feature_list = ['Shape.FSD' + str(i + 1) for i in range(Fs)] fdata = htk_features.compute_fsd_features(cfg.im_nuclei_seg_mask, Fs=Fs) check_fdata_sanity(fdata, expected_feature_list) check_fdata_sanity(cfg.fdata_nuclei, expected_feature_list, match_feature_count=False) if GENERATE_GROUNDTRUTH: fdata.to_csv(os.path.join(tempfile.gettempdir(), 'Easy1_nuclei_fsd_features.csv'), index=False) fdata_gtruth = pd.read_csv( utilities.getTestFilePath('Easy1_nuclei_fsd_features.csv')) pd.testing.assert_frame_equal(fdata, fdata_gtruth, check_less_precise=2)
def test_compute_morphometry_features(self): expected_feature_list = [ 'Size.Area', 'Size.MajorAxisLength', 'Size.MinorAxisLength', 'Size.Perimeter', 'Shape.Circularity', 'Shape.Eccentricity', 'Shape.EquivalentDiameter', 'Shape.Extent', 'Shape.MinorMajorAxisRatio', 'Shape.Solidity', ] fdata = htk_features.compute_morphometry_features( cfg.im_nuclei_seg_mask) check_fdata_sanity(fdata, expected_feature_list) check_fdata_sanity(cfg.fdata_nuclei, expected_feature_list, match_feature_count=False) if GENERATE_GROUNDTRUTH: fdata.to_csv(os.path.join(tempfile.gettempdir(), 'Easy1_nuclei_morphometry_features.csv'), index=False) fdata_gtruth = pd.read_csv( utilities.getTestFilePath('Easy1_nuclei_morphometry_features.csv')) pd.testing.assert_frame_equal(fdata, fdata_gtruth, check_less_precise=2)
def test_prep(girderClient): # noqa cfg.gc = girderClient cfg.iteminfo = cfg.gc.get('/item', parameters={ 'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0] # read GTCodes dataframe gtcodePath = getTestFilePath('sample_GTcodes.csv') cfg.GTcodes = read_csv(gtcodePath) cfg.GTcodes.index = cfg.GTcodes.loc[:, 'group'] # other params cfg.get_roi_mask_kwargs = { 'iou_thresh': 0.0, 'crop_to_roi': True, 'use_shapely': True, 'verbose': False } cfg.get_contours_kwargs = { 'groups_to_get': None, 'roi_group': 'roi', 'get_roi_contour': True, 'discard_nonenclosed_background': True, 'background_group': 'mostly_stroma', 'MIN_SIZE': 10, 'MAX_SIZE': None, 'verbose': False, 'monitorPrefix': "" } # Microns-per-pixel / Magnification (either or) cfg.MPP = 5.0 cfg.MAG = None # get annotations for slide cfg.slide_annotations = cfg.gc.get('/annotation/item/' + cfg.iteminfo['_id']) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr( gc=cfg.gc, slide_id=cfg.iteminfo['_id'], MPP=cfg.MPP, MAG=cfg.MAG) cfg.slide_annotations = scale_slide_annotations(cfg.slide_annotations, sf=sf) # get bounding box information for all annotations cfg.element_infos = get_bboxes_from_slide_annotations(cfg.slide_annotations) # params for get_image_and_mask_from_slide() cfg.get_kwargs = { 'gc': cfg.gc, 'slide_id': cfg.iteminfo['_id'], 'GTCodes_dict': cfg.GTcodes.T.to_dict(), 'bounds': { 'XMIN': 58000, 'XMAX': 63000, 'YMIN': 35000, 'YMAX': 39000}, 'MPP': cfg.MPP, 'MAG': cfg.MAG, 'get_roi_mask_kwargs': cfg.get_roi_mask_kwargs, 'get_contours_kwargs': cfg.get_contours_kwargs, 'get_rgb': True, 'get_contours': True, 'get_visualization': True, }
def test_compute_haralick_features(self): f = [ 'Haralick.ASM', 'Haralick.Contrast', 'Haralick.Correlation', 'Haralick.SumOfSquares', 'Haralick.IDM', 'Haralick.SumAverage', 'Haralick.SumVariance', 'Haralick.SumEntropy', 'Haralick.Entropy', 'Haralick.DifferenceVariance', 'Haralick.DifferenceEntropy', 'Haralick.IMC1', 'Haralick.IMC2', ] expected_feature_list = [] for col in f: expected_feature_list.append(col + '.Mean') expected_feature_list.append(col + '.Range') fdata = htk_features.compute_haralick_features(cfg.im_nuclei_seg_mask, cfg.im_nuclei_stain) check_fdata_sanity(fdata, expected_feature_list) check_fdata_sanity(cfg.fdata_nuclei, expected_feature_list, prefix='Nucleus.', match_feature_count=False) check_fdata_sanity(cfg.fdata_nuclei, expected_feature_list, prefix='Cytoplasm.', match_feature_count=False) if GENERATE_GROUNDTRUTH: fdata.to_csv(os.path.join(tempfile.gettempdir(), 'Easy1_nuclei_haralick_features.csv'), index=False) fdata_gtruth = pd.read_csv( utilities.getTestFilePath('Easy1_nuclei_haralick_features.csv')) pd.testing.assert_frame_equal(fdata, fdata_gtruth, check_less_precise=2)
def test_compute_intensity_features(self): expected_feature_list = [ 'Intensity.Min', 'Intensity.Max', 'Intensity.Mean', 'Intensity.Median', 'Intensity.MeanMedianDiff', 'Intensity.Std', 'Intensity.IQR', 'Intensity.MAD', 'Intensity.Skewness', 'Intensity.Kurtosis', 'Intensity.HistEnergy', 'Intensity.HistEntropy', ] fdata = htk_features.compute_intensity_features( cfg.im_nuclei_seg_mask, cfg.im_nuclei_stain) check_fdata_sanity(fdata, expected_feature_list) check_fdata_sanity(cfg.fdata_nuclei, expected_feature_list, prefix='Nucleus.', match_feature_count=False) check_fdata_sanity(cfg.fdata_nuclei, expected_feature_list, prefix='Cytoplasm.', match_feature_count=False) if GENERATE_GROUNDTRUTH: fdata.to_csv(os.path.join(tempfile.gettempdir(), 'Easy1_nuclei_intensity_features.csv'), index=False) fdata_gtruth = pd.read_csv( utilities.getTestFilePath('Easy1_nuclei_intensity_features.csv'), index_col=None) pd.testing.assert_frame_equal(fdata, fdata_gtruth, check_less_precise=2)
def test_prep(girderClient): # noqa cfg.gc = girderClient cfg.iteminfo = cfg.gc.get('/item', parameters={ 'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0] cfg.GTcodes = read_csv(getTestFilePath('saliency_GTcodes.csv'))
def test_prep(girderClient): # noqa cfg.gc = girderClient iteminfo = cfg.gc.get('/item', parameters={ 'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0] # read GTCodes dataframe gtcodePath = getTestFilePath('sample_GTcodes.csv') GTCodes_dict = read_csv(gtcodePath) GTCodes_dict.index = GTCodes_dict.loc[:, 'group'] GTCodes_dict = GTCodes_dict.to_dict(orient='index') # just a temp directory to save masks for now cfg.BASE_SAVEPATH = tempfile.mkdtemp() cfg.SAVEPATHS = { 'contours': os.path.join(cfg.BASE_SAVEPATH, 'contours'), 'rgb': os.path.join(cfg.BASE_SAVEPATH, 'rgbs'), 'visualization': os.path.join(cfg.BASE_SAVEPATH, 'vis'), 'mask': os.path.join(cfg.BASE_SAVEPATH, 'masks'), } for _, savepath in cfg.SAVEPATHS.items(): if not os.path.exists(savepath): os.mkdir(savepath) # Microns-per-pixel / Magnification (either or) cfg.MPP = 5.0 cfg.MAG = None # get annotations for slide cfg.slide_annotations = cfg.gc.get('/annotation/item/' + iteminfo['_id']) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr( gc=cfg.gc, slide_id=iteminfo['_id'], MPP=cfg.MPP, MAG=cfg.MAG) cfg.slide_annotations = scale_slide_annotations(cfg.slide_annotations, sf=sf) # get bounding box information for all annotations cfg.element_infos = get_bboxes_from_slide_annotations(cfg.slide_annotations) # common params for annotations_to_contours_no_mask() annotations_to_contours_kwargs = { 'MPP': cfg.MPP, 'MAG': cfg.MAG, 'linewidth': 0.2, 'get_rgb': True, 'get_visualization': True, } # params for TESTING annotations_to_contours_no_mask() cfg.test_annots_to_contours_kwargs = copy.deepcopy( annotations_to_contours_kwargs) cfg.test_annots_to_contours_kwargs.update({ 'gc': cfg.gc, 'slide_id': iteminfo['_id'], 'bounds': { 'XMIN': 58000, 'XMAX': 63000, 'YMIN': 35000, 'YMAX': 39000}, }) # params for getting all rois for slide cfg.get_all_rois_kwargs = { 'gc': cfg.gc, 'slide_id': iteminfo['_id'], 'GTCodes_dict': GTCodes_dict, 'save_directories': cfg.SAVEPATHS, 'annotations_to_contours_kwargs': annotations_to_contours_kwargs, 'slide_name': 'TCGA-A2-A0YE', 'verbose': False, 'monitorprefix': 'test', }