def test_NewFromDirectory( self ): """""" ref_sig_path = 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.sig' ref_fv = FeatureVector.NewFromSigFile( pychrm_test_dir + sep + ref_sig_path ) from shutil import copy tempdir = mkdtemp() img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.tiff" orig_img_filepath = pychrm_test_dir + sep + img_filename copy( orig_img_filepath, tempdir ) try: fs = FeatureSpace.NewFromDirectory( tempdir, quiet=False ) self.assertTrue( compare( fs.data_matrix[0], ref_fv.values ) ) #from numpy.testing import assert_allclose #assert_allclose( ref_fv.values, fs.data_matrix[0], rtol=1e-05 ) finally: rmtree( tempdir ) from os import mkdir toptempdir = mkdtemp() try: class_names = [] for letter in 'CBA': dirname = toptempdir + sep + letter mkdir( dirname ) copy( orig_img_filepath, dirname ) fs = FeatureSpace.NewFromDirectory( toptempdir, quiet=False, ) self.assertEqual( fs.class_names, ['A', 'B', 'C' ] ) for row_of_features in fs.data_matrix: self.assertTrue( compare( row_of_features, ref_fv.values ) ) finally: rmtree( toptempdir )
def test_ParallelTiling( self ): """Specify bounding box to FeatureVector, calc features, then compare with C++ implementation-calculated feats.""" import zipfile from shutil import copy from tempfile import NamedTemporaryFile refdir = mkdtemp(prefix='ref') targetdir = mkdtemp(prefix='target') try: reference_feats = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile( reference_feats, mode='r' ) zf.extractall( refdir ) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename # copy the tiff to the tempdir so the .sig files end up there too copy( orig_img_filepath, targetdir ) copy( orig_img_filepath, refdir ) input_image_path = targetdir + sep + img_filename with NamedTemporaryFile( mode='w', dir=refdir, prefix='ref', delete=False ) as temp: ref_fof = temp.name temp.write( 'reference_samp\ttest_class\t{}\t{{}}\n'.format( refdir + sep + img_filename ) ) with NamedTemporaryFile( mode='w', dir=targetdir, prefix='target', delete=False ) as temp: target_fof = temp.name temp.write( 'test_samp\ttest_class\t{}\t{{}}\n'.format( targetdir + sep + img_filename ) ) global_sampling_options = \ FeatureVector( long=True, tile_num_cols=6, tile_num_rows=5 ) # Should just load reference sigs ref_fs = FeatureSpace.NewFromFileOfFiles( ref_fof, quiet=False, global_sampling_options=global_sampling_options ) target_fs = FeatureSpace.NewFromFileOfFiles( target_fof, n_jobs=True, quiet=False, global_sampling_options=global_sampling_options ) #from numpy.testing import assert_allclose #self.assertTrue( assert_allclose( ref_fs.data_matrix, target_fs.data_matrix ) ) from wndcharm.utils import compare for row_num, (ref_row, test_row) in enumerate( zip( ref_fs.data_matrix, target_fs.data_matrix )): retval = compare( ref_row, test_row ) if retval == False: print "error in sample row", row_num print "FIT: ", ref_fs._contiguous_sample_names[row_num], "FOF", target_fs._contiguous_sample_names[row_num] self.assertTrue( retval ) finally: rmtree( refdir ) rmtree( targetdir )
def test_LargeFeatureSetGrayscale( self ): """Large feature set, grayscale image""" reference_sample = FeatureVector.NewFromSigFile( self.sig_file_path, image_path=self.test_tif_path ) target_sample = FeatureVector( source_filepath=self.test_tif_path, long=True).GenerateFeatures( write_to_disk=False ) # This doesn't work since the ranges of features are so wide # Tried using relative tolerance, but no dice: # from numpy.testing import assert_allclose # assert_allclose( reference_sample.values, target_sample.values, rtol=1e-3 ) # Remember we're reading these values in from strings. and the ranges are so wide # you only have 6 sig figs. Better apples to apples comparison is to # compare strings. self.assertTrue( compare( target_sample.values, reference_sample.values ) )
def test_HeatMap_w_FeatureComputationPlan( self ): """Classification results using SampleImageTiles method and FOF should be the same. """ # chris@NIA-LG-01778617 ~/src/wnd-charm/tests/pywndcharm_tests # $ tiffinfo lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif # TIFF Directory at offset 0x18ea9c (1632924) # Image Width: 1388 Image Length: 1040 # Bits/Sample: 8 # Compression Scheme: LZW # Photometric Interpretation: min-is-black # Samples/Pixel: 1 # Rows/Strip: 5 # Planar Configuration: single image plane # 5x6 tiling scheme => tile dims 208 x 231.33 each scan_x = 231 scan_y = 208 #num_features = 200 # Inflate the zipped test fit into a temp file tempdir = mkdtemp() try: import zipfile reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile( reference_sigs, mode='r' ) zf.extractall( tempdir ) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename from shutil import copy # copy the tiff to the tempdir so the .sig files end up there too copy( orig_img_filepath, tempdir ) input_image_path = tempdir + sep + img_filename # create the tile image iterator image_iter = SampleImageTiles( input_image_path, scan_x, scan_y, True) print "Number of samples = " + str( image_iter.samples ) base, ext = splitext( input_image_path ) # Just grab the first tile: import pdb; pdb.set_trace() tile_cropped_px_plane = image_iter.sample() kwargs = {} kwargs[ 'name' ] = input_image_path kwargs[ 'source_filepath' ] = tile_cropped_px_plane #kwargs[ 'feature_names' ] = fw.feature_names #kwargs[ 'feature_computation_plan' ] = comp_plan kwargs[ 'long' ] = True kwargs[ 'tile_num_cols' ] = image_iter.tiles_x kwargs[ 'tile_num_rows' ] = image_iter.tiles_y kwargs[ 'tiling_scheme' ] = '{0}x{1}'.format( image_iter.tiles_x, image_iter.tiles_y ) kwargs[ 'tile_col_index' ] = image_iter.current_col kwargs[ 'tile_row_index' ] = image_iter.current_row kwargs[ 'sample_group_id' ] = 0 top_left_tile_feats = FeatureVector( **kwargs ).GenerateFeatures( quiet=False, write_to_disk=False ) top_left_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'sj-05-3362-R2_001_E-t5x6_0_0-l.sig' ) # Remember we're reading these values in from strings. and the ranges are so wide # you only have 6 sig figs. Better apples to apples comparison is to # compare strings. self.assertTrue( compare( top_left_tile_feats.values, top_left_tile_reference_feats.values ) ) # Setting feature_names initiates the feature reduce from # the larger set of features that comes back from computation #kwargs[ 'feature_names' ] = fw.feature_names # if these are set, then the code will try to take a ROI of a ROI: #kwargs[ 'x' ] = image_iter.current_x #kwargs[ 'y' ] = image_iter.current_y #kwargs[ 'w' ] = image_iter.tile_width #kwargs[ 'h' ] = image_iter.tile_height finally: rmtree( tempdir )
def test_FeatureComputationFromROI( self ): """Specify bounding box to FeatureVector, calc features, then compare with C++ implementation-calculated feats.""" # orig image lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif # has size=1388x1040 # WND-CHARM command line specifies via -tCxR param # where C is columns and R is rows, ergo 5 rows, 6 cols = -t6x5 # tile dims => w=1388/6 cols = 231.33px wide, h=1040/5 rows = 208 px tall ROI_width = 231 ROI_height = 208 # Inflate the zipped test fit into a temp file tempdir = mkdtemp() try: import zipfile reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile( reference_sigs, mode='r' ) zf.extractall( tempdir ) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename from shutil import copy # copy the tiff to the tempdir so the .sig files end up there too copy( orig_img_filepath, tempdir ) input_image_path = tempdir + sep + img_filename kwargs = {} kwargs[ 'name' ] = img_filename kwargs[ 'source_filepath' ] = input_image_path #kwargs[ 'feature_names' ] = fw.feature_names #kwargs[ 'feature_computation_plan' ] = comp_plan kwargs[ 'long' ] = True kwargs[ 'x' ] = 0 kwargs[ 'y' ] = 0 kwargs[ 'w' ] = ROI_width kwargs[ 'h' ] = ROI_height kwargs[ 'sample_group_id' ] = 0 top_left_tile_feats = FeatureVector( **kwargs ).GenerateFeatures( quiet=False, write_to_disk=False ) top_left_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_0_0-l.sig' ) # Remember we're reading these values in from strings. and the ranges are so wide # you only have 6 sig figs. Better apples to apples comparison is to # compare strings. self.assertEqual( top_left_tile_feats.feature_names, top_left_tile_reference_feats.feature_names ) self.assertTrue( compare( top_left_tile_feats.values, top_left_tile_reference_feats.values ) ) kwargs[ 'x' ] = 1155 kwargs[ 'y' ] = 832 bot_right_tile_feats = FeatureVector( **kwargs ).GenerateFeatures( quiet=False, write_to_disk=False ) bot_right_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.sig' ) self.assertEqual( bot_right_tile_feats.feature_names, bot_right_tile_reference_feats.feature_names ) self.assertTrue( compare( bot_right_tile_feats.values, bot_right_tile_reference_feats.values ) ) finally: rmtree( tempdir )
def test_NewFromFileOfFiles( self ): """Pulls in the lymphoma eosin histology 5x6 tiled featureset via .sig files.""" # Types of files containing features: # FIT: contains an entire FeatureSpace definition including features. # FOF: "File Of Files" containing a FeatureSpace structure definition only, # listing paths to files of pre-calculated features (.sig) or the # tiff images themselves so features can be calculated # SIG: A text file containing pre-calculated features for a single sample. # Test dataset: subset of the IICBU2008 lymphoma dataset. 2 channels (H+E), # 3 classes ('CLL', 'FL', 'MCL'), 10 images per class per channel, # 5x6 tiling grid = 30 samples per image resulting in # 2 x 3 x 10 X 30 = 1800 total samples available # Files containing features included in this test suite: # 1. lymphoma_iicbu2008_subset_EOSIN_ONLY_t5x6_v3.2features.fit.zip: # A zip archive containing a single FIT file with features pre-calculated. # 2. lymphoma_iicbu2008_subset_HE_t5x6_v3.2features_SIGFILES.zip: # Contains 1800 SIG files, plus 4 FOF files (items 2-5 below): # "lymphoma_iicbu2008_subset_EOSIN_ONLY_images.fof.tsv" # "lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv" # "lymphoma_iicbu2008_subset_2CHAN_HE_images.fof.tsv" # "lymphoma_iicbu2008_subset_2CHAN_HE_sigfiles_t5x6-l.fof.tsv" # List of possible feature sources: # 1. Single channel FIT (Eosin only) # 2. Single channel FOF (Eosin only) referencing to 30 tiffs (requires global sampling options -t5x6 -l to grab sigs) # 3. Single channel FOF (Eosin only) referencing 900 sig files # 4. Double channel FOF (Eosin+Haemotoxylin) referencing 60 tiffs (requires global sampling options -t5x6 -l to grab sigs) # 5. Double channel FOF (Eosin+Haemotoxylin) referencing 1800 sig files. #============================================= # BEGIN CODE TO CREATE TESTDATA ZIP PACKAGE #import zipfile #import zlib #path = '/Users/chris/src/wnd-charm/tests/pywndcharm_tests/TESTDATA_lymphoma_iicbu2008_subset_HE_t5x6_v3.2features_SIGFILES.zip' #zf = zipfile.ZipFile( path, mode='w' ) #import os #classes = 'CLL', 'FL', 'MCL', #channels = 'haemotoxylin', 'eosin' #from collections import defaultdict #sig_tracker = defaultdict(int) #samplegroupid_tracker = {} #samplegroup_counter = 0 # #eosin_tif_fof = [] # 30 lines #eosin_sig_fof = [] # 900 lines #double_tif_fof = [] # 30 lines, 2 feature set columns #double_sig_fof = [] # 900 lines, 2 feature set columns # #for _channel in channels: # zf.write( './' + _channel, compress_type=zipfile.ZIP_DEFLATED ) # for _class in classes: # zf.write( './' + _channel + '/' + _class, compress_type=zipfile.ZIP_DEFLATED ) # for root, dirs, files in os.walk( _channel + '/' + _class ): # for _file in files: # if _file.endswith( '.tif' ): # # Strip off the _H.tif or _E.tif # samplename = _file[:-6] # eosinpath = './eosin/' + _class + '/' + samplename + '_E.tif' # haemopath = './haemotoxylin/' + _class + '/' + samplename + '_H.tif' # if _channel == 'eosin': # eosin_tif_fof.append( eosinpath + '\t' + _class ) # double_tif_fof.append( samplename + '\t' + _class + '\t' + eosinpath + '\t{\tchannel\t=\teosin\t}\t' + haemopath + '\t{\tchannel\t=\thaemotoxylin\t}') # elif _file.endswith( '.sig' ): # zf.write( './' + _channel + '/' + _class + '/' + _file, compress_type=zipfile.ZIP_DEFLATED ) # if _channel == 'eosin': # # Strip off the _H-t5x6_0_0-l.sig # samplename = _file[:-17] + '.tif' # eosinpath = './eosin/' + _class + '/' + _file # haemopath = './haemotoxylin/' + _class + '/' + _file.replace( '_E-t5x6_', '_H-t5x6_' ) # # count samples from 0: # samplesequenceid = str( sig_tracker[ samplename ] ) # sig_tracker[ samplename ] += 1 # if samplename not in samplegroupid_tracker: # samplegroupid_tracker[ samplename ] = samplegroup_counter # samplegroup_counter += 1 # samplegroupid = str( samplegroupid_tracker[ samplename ] ) # eosin_sig_fof.append( eosinpath + '\t' + _class ) # double_sig_fof.append( samplename + '\t' + _class + '\t' + eosinpath + '\t{\tchannel\t=\teosin\t;\tsamplegroupid\t=\t' + samplegroupid + '\t;\tsamplesequenceid\t=\t' + samplesequenceid + '\t}\t' + haemopath + '\t{\tchannel\t=\thaemotoxylin\t;\tsamplegroupid\t=\t' + samplegroupid + '\t;\tsamplesequenceid\t=\t' + samplesequenceid + '\t}\t') # #fof_dir = '/Users/chris/src/wnd-charm/tests/pywndcharm_tests/' #with open( 'lymphoma_iicbu2008_subset_EOSIN_ONLY_images.fof.tsv', 'w') as out: # for _ in eosin_tif_fof: # out.write( _ + '\n') #with open( 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv', 'w') as out: # for _ in eosin_sig_fof: # out.write( _ + '\n') #with open( 'lymphoma_iicbu2008_subset_2CHAN_HE_images.fof.tsv', 'w') as out: # for _ in double_tif_fof: # out.write( _ + '\n') #with open( 'lymphoma_iicbu2008_subset_2CHAN_HE_sigfiles_t5x6-l.fof.tsv', 'w') as out: # for _ in double_sig_fof: # out.write( _ + '\n') #zf.write( './' + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_images.fof.tsv', compress_type=zipfile.ZIP_DEFLATED ) #zf.write( './' + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv', compress_type=zipfile.ZIP_DEFLATED ) #zf.write( './' + 'lymphoma_iicbu2008_subset_2CHAN_HE_images.fof.tsv', compress_type=zipfile.ZIP_DEFLATED ) #zf.write( './' + 'lymphoma_iicbu2008_subset_2CHAN_HE_sigfiles_t5x6-l.fof.tsv', compress_type=zipfile.ZIP_DEFLATED ) #zf.printdir() #zf.close() # END CODE TO CREATE TESTDATA ZIP PACKAGE #============================================= # Inflate the zipped test fit into a temp file import zipfile zipped_file_path = pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_HE_t5x6_v3.2features_SIGFILES.zip' zf1 = zipfile.ZipFile( zipped_file_path, mode='r' ) tempdir = mkdtemp() zf1.extractall( tempdir ) # for comparison: zf2 = zipfile.ZipFile( pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_t5x6_v3.2features.fit.zip', mode='r') zf2.extractall( tempdir ) try: kwargs = {} kwargs['pathname'] = tempdir + sep + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv' kwargs['quiet'] = True # sampling opts: -l -t5x6 implies 5 columns and 6 rows ... I know it's weird. kwargs['long'] = True kwargs['tile_num_rows'] = 6 kwargs['tile_num_cols'] = 5 fs_fof = FeatureSpace.NewFromFileOfFiles( **kwargs ) kwargs['pathname'] = tempdir + sep + 'lymphoma_iicbu2008_subset_eosin_t5x6_v3.2features.fit' fs_fit = FeatureSpace.NewFromFitFile( **kwargs ) # Fit file has less significant figures than Signature files, and it's not # consistent how many there are. Seems like fit file just lops off numbers # at the end. Example: (signatures on top, fit on bottom) # # Example: # - 17.232246, # sig # ? -- # # + 17.2322, # fit # - -63.549056, # sig # ? ^^^ # # + -63.5491, # fit # ? ^ # # - 223.786977, # sig # ? --- # # + 223.787, # fit # More of the same: #(Pdb) fs_fof.data_matrix[0,-5:] #array([ 0.935442, 14.005003, -43.562076, 127.394914, 0.628772]) #(Pdb) fs_fit.data_matrix[0,-5:] #array([ 0.935442, 14.005 , -43.5621 , 127.395 , 0.628772]) # default is rtol=1e-07, atol=0 #np.testing.assert_allclose( actual=fs_fit.data_matrix, desired=fs_fof.data_matrix, # rtol=1e-03, atol=0 ) #np.testing.assert_array_almost_equal_nulp( fs_fit.data_matrix, fs_fof.data_matrix ) for row_num, (fit_row, fof_row) in enumerate( zip( fs_fit.data_matrix, fs_fof.data_matrix )): retval = compare( fit_row, fof_row ) if retval == False: print "error in sample row", row_num print "FIT: ", fs_fit._contiguous_sample_names[row_num], "FOF", fs_fof._contiguous_sample_names[row_num] self.assertTrue( retval ) # Test sorting; scramble the FOF then load and check: sorted_fof = tempdir + sep + \ 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l.fof.tsv' with open( sorted_fof) as fof: lines = fof.readlines() from random import shuffle shuffle(lines) unsorted_fof = tempdir + sep + \ 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_t5x6-l_UNSORTED.fof.tsv' with open( unsorted_fof, 'w' ) as fof: for line in lines: fof.write( line ) kwargs = {} kwargs['pathname'] = unsorted_fof kwargs['quiet'] = True # sampling opts: -l -t5x6 implies 5 columns and 6 rows ... I know it's weird. kwargs['long'] = True kwargs['tile_num_rows'] = 6 kwargs['tile_num_cols'] = 5 fs_fof = FeatureSpace.NewFromFileOfFiles( **kwargs ) # Check again for row_num, (fit_row, fof_row) in enumerate( zip( fs_fit.data_matrix, fs_fof.data_matrix )): retval = compare( fit_row, fof_row ) if retval == False: print "error in sample row", row_num print "FIT: ", fs_fit._contiguous_sample_names[row_num], "FOF", fs_fof._contiguous_sample_names[row_num] self.assertTrue( retval ) # TESTING TAKE TILES: self.assertRaises( ValueError, fs_fof.TakeTiles, tuple() ) self.assertRaises( ValueError, fs_fof.TakeTiles, (45, 46, 47,) ) self.assertRaises( TypeError, fs_fof.TakeTiles, 'crap' ) # take middle 4 wanted_tiles = ( 14, 15, 20, 21 ) took = fs_fof.TakeTiles( wanted_tiles, inplace=False ) num_sample_groups = len( set( fs_fof._contiguous_sample_group_ids ) ) self.assertEqual( took.num_samples_per_group, len( wanted_tiles ) ) self.assertEqual( took.num_samples, len( wanted_tiles ) * num_sample_groups ) # mid4 = 'lymphoma_iicbu2008_subset_EOSIN_ONLY_sigfiles_MIDDLE_4_TILES_t5x6-l.fof.tsv' # # fake out wndcharm by putting empty tiffs in the temp dir # # we don't need them, the sigs are in there already. # with open( mid4) as fof: # lines = fof.readlines() # names, classes, paths, opts = zip( *[ _.split('\t') for _ in lines ] ) # for _path in paths: # with open( tempdir + sep + _path, 'w' ): # pass # took_via_fof = FeatureSpace.NewFromFileOfFiles( mid4, num_samples_per_group=4 ) # # for row_num, (fit_row, fof_row) in enumerate( zip( took.data_matrix, took_via_fof.data_matrix )): # retval = compare( fit_row, fof_row ) # if retval == False: # print "error in sample row", row_num # print "FIT: ", took._contiguous_sample_names[row_num], "FOF", took_via_fof._contiguous_sample_names[row_num] # self.assertTrue( retval ) finally: rmtree( tempdir )
def test_ParallelTiling(self): """Specify bounding box to FeatureVector, calc features, then compare with C++ implementation-calculated feats.""" import zipfile from shutil import copy from tempfile import NamedTemporaryFile refdir = mkdtemp(prefix='ref') targetdir = mkdtemp(prefix='target') try: reference_feats = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile(reference_feats, mode='r') zf.extractall(refdir) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename # copy the tiff to the tempdir so the .sig files end up there too copy(orig_img_filepath, targetdir) copy(orig_img_filepath, refdir) input_image_path = targetdir + sep + img_filename with NamedTemporaryFile(mode='w', dir=refdir, prefix='ref', delete=False) as temp: ref_fof = temp.name temp.write('reference_samp\ttest_class\t{}\t{{}}\n'.format( refdir + sep + img_filename)) with NamedTemporaryFile(mode='w', dir=targetdir, prefix='target', delete=False) as temp: target_fof = temp.name temp.write( 'test_samp\ttest_class\t{}\t{{}}\n'.format(targetdir + sep + img_filename)) global_sampling_options = \ FeatureVector( long=True, tile_num_cols=6, tile_num_rows=5 ) # Should just load reference sigs ref_fs = FeatureSpace.NewFromFileOfFiles( ref_fof, quiet=False, global_sampling_options=global_sampling_options) target_fs = FeatureSpace.NewFromFileOfFiles( target_fof, n_jobs=True, quiet=False, global_sampling_options=global_sampling_options) #from numpy.testing import assert_allclose #self.assertTrue( assert_allclose( ref_fs.data_matrix, target_fs.data_matrix ) ) from wndcharm.utils import compare for row_num, (ref_row, test_row) in enumerate( zip(ref_fs.data_matrix, target_fs.data_matrix)): retval = compare(ref_row, test_row) if retval == False: print "error in sample row", row_num print "FIT: ", ref_fs._contiguous_sample_names[ row_num], "FOF", target_fs._contiguous_sample_names[ row_num] self.assertTrue(retval) finally: rmtree(refdir) rmtree(targetdir)
def test_HeatMap_w_FeatureComputationPlan(self): """Classification results using SampleImageTiles method and FOF should be the same. """ # chris@NIA-LG-01778617 ~/src/wnd-charm/tests/pywndcharm_tests # $ tiffinfo lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif # TIFF Directory at offset 0x18ea9c (1632924) # Image Width: 1388 Image Length: 1040 # Bits/Sample: 8 # Compression Scheme: LZW # Photometric Interpretation: min-is-black # Samples/Pixel: 1 # Rows/Strip: 5 # Planar Configuration: single image plane # 5x6 tiling scheme => tile dims 208 x 231.33 each scan_x = 231 scan_y = 208 #num_features = 200 # Inflate the zipped test fit into a temp file tempdir = mkdtemp() try: import zipfile reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile(reference_sigs, mode='r') zf.extractall(tempdir) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename from shutil import copy # copy the tiff to the tempdir so the .sig files end up there too copy(orig_img_filepath, tempdir) input_image_path = tempdir + sep + img_filename # create the tile image iterator image_iter = SampleImageTiles(input_image_path, scan_x, scan_y, True) print "Number of samples = " + str(image_iter.samples) base, ext = splitext(input_image_path) # Just grab the first tile: import pdb pdb.set_trace() tile_cropped_px_plane = image_iter.sample() kwargs = {} kwargs['name'] = input_image_path kwargs['source_filepath'] = tile_cropped_px_plane #kwargs[ 'feature_names' ] = fw.feature_names #kwargs[ 'feature_computation_plan' ] = comp_plan kwargs['long'] = True kwargs['tile_num_cols'] = image_iter.tiles_x kwargs['tile_num_rows'] = image_iter.tiles_y kwargs['tiling_scheme'] = '{0}x{1}'.format(image_iter.tiles_x, image_iter.tiles_y) kwargs['tile_col_index'] = image_iter.current_col kwargs['tile_row_index'] = image_iter.current_row kwargs['sample_group_id'] = 0 top_left_tile_feats = FeatureVector(**kwargs).GenerateFeatures( quiet=False, write_to_disk=False) top_left_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'sj-05-3362-R2_001_E-t5x6_0_0-l.sig') # Remember we're reading these values in from strings. and the ranges are so wide # you only have 6 sig figs. Better apples to apples comparison is to # compare strings. self.assertTrue( compare(top_left_tile_feats.values, top_left_tile_reference_feats.values)) # Setting feature_names initiates the feature reduce from # the larger set of features that comes back from computation #kwargs[ 'feature_names' ] = fw.feature_names # if these are set, then the code will try to take a ROI of a ROI: #kwargs[ 'x' ] = image_iter.current_x #kwargs[ 'y' ] = image_iter.current_y #kwargs[ 'w' ] = image_iter.tile_width #kwargs[ 'h' ] = image_iter.tile_height finally: rmtree(tempdir)
def test_FeatureComputationFromROI(self): """Specify bounding box to FeatureVector, calc features, then compare with C++ implementation-calculated feats.""" # orig image lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif # has size=1388x1040 # WND-CHARM command line specifies via -tCxR param # where C is columns and R is rows, ergo 5 rows, 6 cols = -t6x5 # tile dims => w=1388/6 cols = 231.33px wide, h=1040/5 rows = 208 px tall ROI_width = 231 ROI_height = 208 # Inflate the zipped test fit into a temp file tempdir = mkdtemp() try: import zipfile reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile(reference_sigs, mode='r') zf.extractall(tempdir) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename from shutil import copy # copy the tiff to the tempdir so the .sig files end up there too copy(orig_img_filepath, tempdir) input_image_path = tempdir + sep + img_filename kwargs = {} kwargs['name'] = img_filename kwargs['source_filepath'] = input_image_path #kwargs[ 'feature_names' ] = fw.feature_names #kwargs[ 'feature_computation_plan' ] = comp_plan kwargs['long'] = True kwargs['x'] = 0 kwargs['y'] = 0 kwargs['w'] = ROI_width kwargs['h'] = ROI_height kwargs['sample_group_id'] = 0 top_left_tile_feats = FeatureVector(**kwargs).GenerateFeatures( quiet=False, write_to_disk=False) top_left_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_0_0-l.sig' ) # Remember we're reading these values in from strings. and the ranges are so wide # you only have 6 sig figs. Better apples to apples comparison is to # compare strings. self.assertEqual(top_left_tile_feats.feature_names, top_left_tile_reference_feats.feature_names) self.assertTrue( compare(top_left_tile_feats.values, top_left_tile_reference_feats.values)) kwargs['x'] = 1155 kwargs['y'] = 832 bot_right_tile_feats = FeatureVector(**kwargs).GenerateFeatures( quiet=False, write_to_disk=False) bot_right_tile_reference_feats = FeatureVector.NewFromSigFile( tempdir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_5_4-l.sig' ) self.assertEqual(bot_right_tile_feats.feature_names, bot_right_tile_reference_feats.feature_names) self.assertTrue( compare(bot_right_tile_feats.values, bot_right_tile_reference_feats.values)) finally: rmtree(tempdir)
def test_HeatMap_w_FeatureComputationPlan( self ): """Classification results using SampleImageTiles method and FOF should be the same.""" # chris@NIA-LG-01778617 ~/src/wnd-charm/tests/pywndcharm_tests # $ tiffinfo lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif # TIFF Directory at offset 0x18ea9c (1632924) # Image Width: 1388 Image Length: 1040 # Bits/Sample: 8 # Compression Scheme: LZW # Photometric Interpretation: min-is-black # Samples/Pixel: 1 # Rows/Strip: 5 # Planar Configuration: single image plane # WND-CHARM command line specifies via -tCxR param # where C is columns and R is rows, ergo 5 rows, 6 cols = -t6x5 # tile dims => w=1388/6 cols = 231.33px wide, h=1040/5 rows = 208 px tall #scan_x = 231 #scan_y = 208 #num_features = 200 # Inflate the zipped test fit into a temp file sourcedir = mkdtemp() targetdir = mkdtemp() try: import zipfile reference_sigs = pychrm_test_dir + sep + 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E_t6x5_REFERENCE_SIGFILES.zip' zf = zipfile.ZipFile( reference_sigs, mode='r' ) zf.extractall( targetdir ) img_filename = "lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E.tif" orig_img_filepath = pychrm_test_dir + sep + img_filename from shutil import copy # copy the tiff to the tempdir so the .sig files end up there too copy( orig_img_filepath, sourcedir ) input_image_path = sourcedir + sep + img_filename # Create sliding window that emulates 6x5 tiling: kwargs = {} kwargs[ 'source_filepath' ] = input_image_path kwargs[ 'tile_num_cols' ] = 6 kwargs[ 'tile_num_rows' ] = 5 kwargs[ 'long' ] = True window = SlidingWindow( **kwargs ) print "Number of samples = " + str( window.num_positions ) base, ext = splitext( input_image_path ) ref_file = 'lymphoma_eosin_channel_MCL_test_img_sj-05-3362-R2_001_E-t6x5_{}_{}-l.sig' # top left: for test_feats in window.sample(): test_feats.GenerateFeatures( quiet=False, write_to_disk=False, cache=True ) reference_feats = FeatureVector.NewFromSigFile( targetdir + sep + ref_file.format(0,0) ) self.assertTrue( compare( test_feats.values, reference_feats.values ) ) break # below top left: #window.GenerateFeatures( quiet=False, write_to_disk=False, cache=True ) #reference_feats = FeatureVector.NewFromSigFile( targetdir + sep + ref_file.format(0,1) ) #self.assertTrue( compare( window.values, reference_feats.values ) ) # Setting feature_names initiates the feature reduce from # the larger set of features that comes back from computation #kwargs[ 'feature_names' ] = fw.feature_names finally: rmtree( sourcedir ) rmtree( targetdir )