def spectraSelect(args): filename, numWorkers, mp_arr, xdim, ydim, zdim, id = args arr = np.frombuffer(mp_arr.get_obj(), dtype='uint16') # mp_arr and arr share the same memory b = arr.reshape((zdim)) # b and arr share the same memory xmin = random.randint(0, xdim - 6) xmax = xmin + 5 ymin = random.randint(0, ydim - 6) ymax = ymin + 5 #id = int( mp.current_process()._identity[0] ) omsiFile = omsi_file(filename, 'r') d = omsiFile.get_experiment(0).get_msidata(0) zstep = zdim / numWorkers zmin = zstep * (id) zmax = zstep * (id + 1) if id == (numWorkers - 1): zmax = zdim #print str(id)+": "+str(xmin) + " " + str(xmax)+" | "+str(ymin) + " " + str(ymax)+" | "+str(zmin)+" "+str(zmax) b[zmin:zmax] = np.mean(np.mean(d[xmin:xmax, ymin:ymax, zmin:zmax], axis=0), axis=0) omsiFile.close_file() sys.stdout.flush()
def generateBaseTestFile(omsiOutFile, xdim, ydim, zdim): #Create the output HDF5 file try: omsiFile = omsi_file(omsiOutFile) except: print "Unexpected error creating the output file:", sys.exc_info()[0] exit(0) exp = omsiFile.create_experiment(exp_identifier="test") #Create an empty method descrition sample = exp.create_method_info() #Create an empty instrument description mzdata = np.ones(zdim) instrument = exp.create_instrument_info(instrument_name="undefined", mzdata=mzdata) start = time.time() #Allocate space in the HDF5 file for the img data data = exp.create_msidata_full_cube(data_shape=(xdim, ydim, zdim), data_type='uint16', chunks=None) #Write data one spectrum at a time for xi in xrange(0, xdim): sys.stdout.write("[" + str(int(100. * float(xi) / float(xdim))) + "%]" + "\r") sys.stdout.flush() for yi in xrange(0, ydim): #Save the spectrum to the hdf5 file data[xi, yi, :] = (xi * ydim + yi) omsiFile.close_file() return (time.time() - start)
def generateBaseTestFile( omsiOutFile , xdim , ydim, zdim ) : #Create the output HDF5 file try: omsiFile = omsi_file( omsiOutFile ) except: print "Unexpected error creating the output file:", sys.exc_info()[0] exit(0) exp = omsiFile.create_experiment( exp_identifier = "test" ) #Create an empty method descrition sample = exp.create_method_info() #Create an empty instrument description mzdata = np.ones( zdim ) instrument = exp.create_instrument_info(instrumentname="undefined" , mzdata=mzdata ) start = time.time() #Allocate space in the HDF5 file for the img data data = exp.create_msidata(data_shape=( xdim , ydim , zdim ) , data_type = 'uint16' , chunks=None ) #Write data one spectrum at a time for xi in xrange( 0 , xdim ) : sys.stdout.write("[" +str( int( 100.* float(xi)/float(xdim) )) +"%]"+ "\r") sys.stdout.flush() for yi in xrange( 0 , ydim ) : #Save the spectrum to the hdf5 file data[xi,yi,:] = (xi*ydim + yi) omsiFile .close_file() return ( time.time() - start )
def sliceSelect(args): filename, numWorkers, mp_arr, xdim, ydim, zdim, id = args arr = np.frombuffer(mp_arr.get_obj(), dtype='uint16') # mp_arr and arr share the same memory b = arr.reshape((xdim, ydim)) # b and arr share the same memory zrange = 25 # 20000 zmin = random.randint(0, zdim - zrange - 1) zmax = zmin + zrange + 1 #id = int( mp.current_process()._identity[0] ) omsiFile = omsi_file(filename, 'r') d = omsiFile.get_experiment(0).get_msidata(0) xstep = xdim / numWorkers xstart = xstep * (id) xend = xstep * (id + 1) if id == (numWorkers - 1): xend = xdim #print str(xstart) + " " + str(xend)+" :"+str(zmin)+" "+str(zmax) b[xstart:xend, :] = np.var(d[xstart:xend, 0:ydim, zmin:zmax], axis=2) omsiFile.close_file() sys.stdout.flush()
def spectraSelect( args ): filename, numWorkers, mp_arr, xdim, ydim, zdim, id = args arr = np.frombuffer(mp_arr.get_obj() , dtype = 'uint16' ) # mp_arr and arr share the same memory b = arr.reshape( (zdim) ) # b and arr share the same memory xmin = random.randint(0, xdim-6 ) xmax = xmin+5 ymin = random.randint(0, ydim-6 ) ymax = ymin+5 #id = int( mp.current_process()._identity[0] ) omsiFile = omsi_file( filename , 'r' ) d = omsiFile.get_experiment(0).get_msidata(0) zstep = zdim / numWorkers zmin = zstep*(id) zmax = zstep*(id+1) if id == (numWorkers-1) : zmax = zdim #print str(id)+": "+str(xmin) + " " + str(xmax)+" | "+str(ymin) + " " + str(ymax)+" | "+str(zmin)+" "+str(zmax) b[zmin:zmax] = np.mean( np.mean( d[xmin:xmax , ymin:ymax , zmin:zmax ] , axis=0 ) , axis =0 ) omsiFile.close_file() sys.stdout.flush()
def generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk): #Create the output HDF5 file try: omsiFile = omsi_file(omsiOutFile) except: print "Unexpected error creating the output file:", sys.exc_info()[0] exit(0) exp = omsiFile.create_experiment(exp_identifier="test") #Create an empty method descrition sample = exp.create_method_info() #Create an empty instrument description mzdata = np.ones(zdim) instrument = exp.create_instrument_info(instrumentname="undefined", mzdata=mzdata) #Allocate space in the HDF5 file for the img data data = exp.create_msidata(data_shape=(xdim, ydim, zdim), data_type='uint16', chunks=(xchunk, ychunk, zchunk)) itertest = 0 numChunksX = int(math.ceil(float(xdim) / float(xchunk))) numChunksY = int(math.ceil(float(ydim) / float(ychunk))) numChunksZ = int(math.ceil(float(zdim) / float(zchunk))) print "NumChunks : " + str(numChunksX) + " " + str(numChunksY) + " " + str( numChunksZ) numChunks = numChunksX * numChunksY * numChunksZ #Write data one spectrum at a time for xi in xrange(0, numChunksX): sys.stdout.write("[" + str(int(100. * float(xi) / float(numChunksX))) + "%]" + "\r") sys.stdout.flush() xstart = xi * xchunk xend = min(xstart + xchunk, xdim) for yi in xrange(0, numChunksY): ystart = yi * ychunk yend = min(ystart + ychunk, ydim) #Save the spectrum to the hdf5 file data[xstart:xend, ystart:yend, :] = (xi * ydim + yi) #Write data into all the chunks """for xt in xrange(0, numChunksX ) : xstart = xt*xchunk xend = min( xstart+xchunk , xdim) for yt in xrange(0, numChunksY ) : ystart = yt*ychunk yend = min( ystart+ychunk , ydim ) for zt in xrange(0, numChunksZ ) : zstart = zt*zchunk zend = min( zstart+zchunk , zdim ) #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend) data[xstart:xend , ystart:yend, zstart:zend ] = itertest itertest+=1 sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r") sys.stdout.flush()""" omsiFile.close_file()
def generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk ) : #Create the output HDF5 file try: omsiFile = omsi_file( omsiOutFile ) except: print "Unexpected error creating the output file:", sys.exc_info()[0] exit(0) exp = omsiFile.create_experiment( exp_identifier = "test" ) #Create an empty method descrition sample = exp.create_method_info() #Create an empty instrument description mzdata = np.ones( zdim ) instrument = exp.create_instrument_info(instrumentname="undefined" , mzdata=mzdata ) #Allocate space in the HDF5 file for the img data data = exp.create_msidata(data_shape=( xdim , ydim , zdim ) , data_type = 'uint16' , chunks=(xchunk,ychunk,zchunk)) itertest=0 numChunksX = int( math.ceil( float(xdim)/float(xchunk) ) ) numChunksY = int( math.ceil( float(ydim)/float(ychunk) ) ) numChunksZ = int( math.ceil( float(zdim)/float(zchunk) ) ) print "NumChunks : "+str(numChunksX)+" "+str(numChunksY)+" "+str(numChunksZ) numChunks = numChunksX*numChunksY*numChunksZ #Write data one spectrum at a time for xi in xrange( 0 , numChunksX ) : sys.stdout.write("[" +str( int( 100.* float(xi)/float(numChunksX) )) +"%]"+ "\r") sys.stdout.flush() xstart = xi*xchunk xend = min( xstart+xchunk , xdim) for yi in xrange( 0 , numChunksY ) : ystart = yi*ychunk yend = min( ystart+ychunk , ydim ) #Save the spectrum to the hdf5 file data[xstart:xend , ystart:yend, : ] = (xi*ydim + yi) #Write data into all the chunks """for xt in xrange(0, numChunksX ) : xstart = xt*xchunk xend = min( xstart+xchunk , xdim) for yt in xrange(0, numChunksY ) : ystart = yt*ychunk yend = min( ystart+ychunk , ydim ) for zt in xrange(0, numChunksZ ) : zstart = zt*zchunk zend = min( zstart+zchunk , zdim ) #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend) data[xstart:xend , ystart:yend, zstart:zend ] = itertest itertest+=1 sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r") sys.stdout.flush()""" omsiFile .close_file()
def sliceSelect( args ): filename, numWorkers, mp_arr, xdim, ydim, zdim, id = args arr = np.frombuffer(mp_arr.get_obj() , dtype = 'uint16' ) # mp_arr and arr share the same memory b = arr.reshape((xdim,ydim)) # b and arr share the same memory zrange = 25 # 20000 zmin = random.randint(0, zdim-zrange-1 ) zmax = zmin+zrange+1 #id = int( mp.current_process()._identity[0] ) omsiFile = omsi_file( filename , 'r' ) d = omsiFile.get_experiment(0).get_msidata(0) xstep = xdim / numWorkers xstart = xstep*(id) xend = xstep*(id+1) if id == (numWorkers-1) : xend = xdim #print str(xstart) + " " + str(xend)+" :"+str(zmin)+" "+str(zmax) b[xstart:xend , : ] = np.var( d[xstart:xend , 0:ydim , zmin:zmax ] , axis=2 ) omsiFile.close_file() sys.stdout.flush()
def main(argv=None): """Then main function""" import sys from sys import argv,exit if argv is None: argv = sys.argv #Check for correct usage if len(argv) < 2 : printHelp() exit(0) if len(argv) == 8 : infile = argv[1] xmin = int(argv[2]) xmax = int(argv[3]) ymin = int(argv[4]) ymax = int(argv[5]) zmin = int(argv[6]) zmax = int(argv[7]) start = time.time() d = omsi_file(infile, 'r').get_experiment(0).get_msidata(0) loaddata = d[xmin:xmax , ymin:ymax, zmin:zmax] #content = json.dumps( loaddata.tolist() ) stop = (time.time() - start) print stop exit(0) import numpy as np import os import random import subprocess repeats = 50 outfolder = argv[1] if not outfolder.endswith("/") : outfolder = outfolder+"/" #Baseline filelist filelist = [ "/project/projectdirs/openmsi/manuscript_data/baseline/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Compressed 4x4x2048 filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Uncompressed 4x4x2048 filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Uncompressed autochunking filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Compressed autochunking filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #filelist = ["/work2/bowen/TEST_DEP3.h5"] data_shapes = {} results = {} for filename in filelist : #Initialze data shape f = omsi_file( filename , 'r' ) d = f.get_experiment(0).get_msidata(0) data_shapes[filename] = d.shape f.close_file() #Initialze output storage results[filename] = np.zeros( repeats , dtype=[ ('mz-slice','f') , ('spectrum','f') , ('xyz-cube','f'), ('mz-slice-all','f') , ('spectrum-all','f') , ('xyz-cube-all','f') , ('filesize' , 'f') ] ) results[filename]['filesize'] = os.stat( filename ).st_size #Note: We compute each test seperately so that we have touched enough data from #other files to avoid biases due to already cached data already at the beginning #of the tests. #Note: Depending on the file system, a significant amount of data (and in some #cases complete files) may be cached by the file system itself. This can results #in large variations in the times for data acceses. This is particularly the #case when two consecutive accesses happen to by chance access a similar portion #of the data. This behavior is expected and is what one expects to happen in real #life as well. It is, therefore, often informative to look at the general #variability of results. E.g.,if all data is stored in a single block, then we #may see very slow access times at the beginning and then, once, all data has #been cached access times drop significantly. For well-chunked data, this #variability between access should be much lower. #Compute the slice query test for filename in filelist : print filename+" 25 mz-slices" #mz-slice selection 250,000 elements sliceWidthZ = 25 #xdim=100 , ydim=100 for ri in xrange( 0 , repeats ) : xmin = 0 xmax = data_shapes[filename][0] ymin = 0 ymax = data_shapes[filename][1] zmin = random.randint(0, data_shapes[filename][2]-sliceWidthZ-1 ) zmax = zmin + sliceWidthZ callCommand = ["python", "testhdf5_file_read.py" , filename , str(xmin) , str(xmax), str(ymin), str(ymax), str(zmin) , str(zmax) ] start = time.time() p2 = subprocess.Popen(callCommand , stdout = subprocess.PIPE) readTime = float(p2.stdout.read()) stop = (time.time() - start) results[filename]['mz-slice'][ri] = readTime results[filename]['mz-slice-all'][ri] = stop print str(results[filename]['mz-slice'][ri]) + " " +str( results[filename]['mz-slice-all'][ri] )+ " " + str(xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(ymax) + " " + str(zmin) + " " + str(zmax) #Compute the spectra test for filename in filelist : print filename+" 3 x 3 spectra" #mz-slice selection 250,000 elements sliceWidthX = 3 sliceWidthY = 3 for ri in xrange( 0 , repeats ) : xmin = random.randint(0, data_shapes[filename][0]-sliceWidthX-1 ) xmax = xmin + sliceWidthX ymin = random.randint(0, data_shapes[filename][1]-sliceWidthY-1 ) ymax = ymin + sliceWidthY zmin = 0 zmax = data_shapes[filename][2] callCommand = ["python", "testhdf5_file_read.py" , filename , str(xmin) , str(xmax), str(ymin), str(ymax), str(zmin) , str(zmax) ] start = time.time() p2 = subprocess.Popen(callCommand , stdout = subprocess.PIPE) readTime = float(p2.stdout.read()) stop = (time.time() - start) results[filename]['spectrum'][ri] = readTime results[filename]['spectrum-all'][ri] = stop print str(results[filename]['spectrum'][ri]) + " " +str( results[filename]['spectrum-all'][ri] )+ " " + str(xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(ymax) + " " + str(zmin) + " " + str(zmax) #Compte the cube test for filename in filelist : print filename+" 20 x 20 x 1000 cube" #mz-slice selection 250,000 elements sliceWidthX = 20 sliceWidthY = 20 sliceWidthZ = 1000 for ri in xrange( 0 , repeats ) : xmin = random.randint(0, data_shapes[filename][0]-sliceWidthX-1 ) xmax = xmin + sliceWidthX ymin = random.randint(0, data_shapes[filename][1]-sliceWidthY-1 ) ymax = ymin + sliceWidthY zmin = random.randint(0, data_shapes[filename][2]-sliceWidthZ-1 ) zmax = zmin + sliceWidthZ callCommand = ["python", "testhdf5_file_read.py" , filename , str(xmin) , str(xmax), str(ymin), str(ymax), str(zmin) , str(zmax) ] start = time.time() p2 = subprocess.Popen(callCommand , stdout = subprocess.PIPE) readTime = float(p2.stdout.read()) stop = (time.time() - start) results[filename]['xyz-cube'][ri] = readTime results[filename]['xyz-cube-all'][ri] = stop print str(results[filename]['xyz-cube'][ri]) + " " +str( results[filename]['xyz-cube-all'][ri] )+ " " + str(xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(ymax) + " " + str(zmin) + " " + str(zmax) for filename in filelist : infilename = os.path.split( filename )[1] outfile = outfolder+infilename+"_timings.txt" f = open( outfile , 'w' ) for colName in results[filename].dtype.names : f.write( colName+" " ) f.write("\n") np.savetxt( f , results[filename] ) f.close() exit(0)
def main(argv=None): """Then main function""" import sys from sys import argv,exit if argv is None: argv = sys.argv #Check for correct usage if len(argv) !=3 : printHelp() exit(0) omsiOutFile = argv[1] resultsFile = argv[2] xdim = 100 ydim = 100 zdim = 100000 xtest = [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] #range(10,50,10) ytest = [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] #range(10,50,10) ztest = [2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048] #range(100, 10000, 100) allRes = np.zeros( len(xtest) , dtype=[ ('x','int32') , ('y','int32') , ('z','int32') , ('write','f') , ('z_min','f') , ('z_avg','f') , ('z_median','f') , ('z_max','f') , ('xy_min','f') , ('xy_avg','f') , ('xy_median','f') , ('xy_max','f') , ('xyz_min','f') , ('xyz_avg','f') , ('xyz_median','f') , ('xyz_max','f') , ('filesize' , 'f') ] ) numTests = 3 repeats = 50 for tr in xrange( 0 , len(xtest) ) : allRes[tr]['x'] = xtest[tr] allRes[tr]['y'] = ytest[tr] allRes[tr]['z'] = ztest[tr] #XYZ selection 25,000 elements sliceWidthX=5 sliceWidthY=5 sliceWidthZ = 1000 for tr in xrange( 0 , len(xtest) ) : allRes[tr]['x'] = xtest[tr] allRes[tr]['y'] = ytest[tr] allRes[tr]['z'] = ztest[tr] xchunk = xtest[tr] ychunk = ytest[tr] zchunk = ztest[tr] print "Chunking:"+str(xchunk)+":"+str(ychunk)+":"+str(zchunk) start = time.time() generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk ) allRes[tr]['write'] = allRes[tr]['write']+(time.time() - start) allRes[tr]['filesize'] = os.stat( omsiOutFile ).st_size omsiFile = omsi_file( omsiOutFile ) data = omsiFile.get_experiment(0).get_msidata(0) print "Time for data write:"+str(allRes[tr]['write'] ) #Select xyz slize resXYZ = np.zeros( repeats , dtype='f' ) for te in xrange( 0 , repeats ) : valX = random.randint(0, xdim-sliceWidthX-1 ) valY = random.randint(0, ydim-sliceWidthY-1 ) valZ = random.randint(0, zdim-sliceWidthZ-1 ) start = time.time() d=np.sum( data[valX:(valX+sliceWidthX),valY:(valY+sliceWidthY),valZ:(valZ+sliceWidthZ)] ) resXYZ[te] = (time.time() - start) np.savetxt( "resXYZ_aw_"+str(xchunk)+"_"+str(ychunk)+"_"+str(zchunk)+".txt" , resXYZ ) allRes[tr]['xyz_min'] = np.min( resXYZ ) allRes[tr]['xyz_avg'] = np.average( resXYZ ) allRes[tr]['xyz_median'] = np.median( resXYZ ) allRes[tr]['xyz_max'] = np.max( resXYZ ) print "XYZ-Slicing: "+str(np.max(resXYZ)) +":"+str(np.average( resXYZ ))+":"+str(np.median( resXYZ ))+":"+str(np.min(resXYZ)) omsiFile.close_file() os.remove( omsiOutFile ) #mz-slice selection 250,000 elements sliceWidthZ = 25 #xdim=100 , ydim=100 for tr in xrange( 0 , len(xtest) ) : allRes[tr]['x'] = xtest[tr] allRes[tr]['y'] = ytest[tr] allRes[tr]['z'] = ztest[tr] xchunk = xtest[tr] ychunk = ytest[tr] zchunk = ztest[tr] print "Chunking:"+str(xchunk)+":"+str(ychunk)+":"+str(zchunk) start = time.time() generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk ) allRes[tr]['write'] = allRes[tr]['write']+(time.time() - start) omsiFile = omsi_file( omsiOutFile ) data = omsiFile.get_experiment(0).get_msidata(0) print "Time for data write:"+str(allRes[tr]['write'] ) #Perform different types of slicing opterations and keep track of the times #Select 10 slizes in z resZ = np.zeros( repeats , dtype='f' ) for te in xrange( 0 , repeats ) : val = random.randint(0, zdim-sliceWidthZ-1 ) start = time.time() d=np.sum( data[:,:,val:(val+sliceWidthZ)] ) resZ[te] = (time.time() - start) np.savetxt( "resZ_aw_"+str(xchunk)+"_"+str(ychunk)+"_"+str(zchunk)+".txt" , resZ ) allRes[tr]['z_min'] = np.min( resZ ) allRes[tr]['z_avg'] = np.average( resZ ) allRes[tr]['z_median'] = np.median( resZ ) allRes[tr]['z_max'] = np.max( resZ ) print "Z-Slicing: "+str(np.max(resZ)) +":"+str(np.average( resZ ))+":"+str(np.median( resZ ))+":"+str(np.min(resZ)) omsiFile.close_file() os.remove( omsiOutFile ) #XY selection of multiple spectra. Selection size= 2,500,000 elements for tr in xrange( 0 , len(xtest) ) : xchunk = xtest[tr] ychunk = ytest[tr] zchunk = ztest[tr] print "Chunking:"+str(xchunk)+":"+str(ychunk)+":"+str(zchunk) start = time.time() generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk ) allRes[tr]['write'] = allRes[tr]['write']+(time.time() - start) omsiFile = omsi_file( omsiOutFile ) data = omsiFile.get_experiment(0).get_msidata(0) print "Time for data write:"+str(allRes[tr]['write'] ) #Select x/y resXY = np.zeros( repeats , dtype='f' ) for te in xrange( 0 , repeats ) : valX = random.randint(0, xdim-sliceWidthX-1 ) valY = random.randint(0, ydim-sliceWidthY-1 ) start = time.time() d=np.sum( data[valX:(valX+sliceWidthX),valY:(valY+sliceWidthY),:] ) resXY[te] = (time.time() - start) np.savetxt( "resXY_aw_"+str(xchunk)+"_"+str(ychunk)+"_"+str(zchunk)+".txt" , resXY ) allRes[tr]['xy_min'] = np.min( resXY ) allRes[tr]['xy_avg'] = np.average( resXY ) allRes[tr]['xy_median'] = np.median( resXY ) allRes[tr]['xy_max'] = np.max( resXY ) print "XY-Slicing: "+str(np.max(resXY)) +":"+str(np.average( resXY ))+":"+str(np.median( resXY ))+":"+str(np.min(resXY)) omsiFile.close_file() os.remove( omsiOutFile ) for tr in xrange( 0 , len(xtest) ) : allRes[tr]['write'] = allRes[tr]['write']/float(numTests) f = open( resultsFile , 'w' ) f.write( str(allRes.dtype.names) ) f.write("\n") np.savetxt( f , allRes ) f.close()
def generateChunkedTestFile( omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk, compress=False, donorFile="/project/projectdirs/openmsi/omsi_data/old/TEST.h5"): writeFullSpectra = False useChunking = (xchunk > 0 and ychunk > 0 and zchunk > 0) print useChunking useDonorFile = True if useDonorFile: inFile = omsi_file(donorFile) inData = inFile.get_experiment(0).get_msidata(0)[:] inFile.close_file() #Create the output HDF5 file try: omsiFile = omsi_file(omsiOutFile) except: print "Unexpected error creating the output file:", sys.exc_info()[0] exit(0) exp = omsiFile.create_experiment(exp_identifier="test") #Create an empty method descrition sample = exp.create_method_info() #Create an empty instrument description mzdata = np.ones(zdim) instrument = exp.create_instrument_info(instrument_name="undefined", mzdata=mzdata) #Allocate space in the HDF5 file for the img data start = time.time() if compress: #Use compresion data = exp.create_msidata_full_cube(data_shape=(xdim, ydim, zdim), data_type='uint16', chunks=(xchunk, ychunk, zchunk), compression='gzip', compression_opts=4) elif useChunking: #Use chunking data = exp.create_msidata_full_cube( data_shape=(xdim, ydim, zdim), data_type='uint16', chunks=(xchunk, ychunk, zchunk)) #, compression='gzip' , compression_opts=4 ) else: #Don't use chunking and compression data = exp.create_msidata_full_cube(data_shape=(xdim, ydim, zdim), data_type='uint16') itertest = 0 if useChunking: numChunksX = int(math.ceil(float(xdim) / float(xchunk))) numChunksY = int(math.ceil(float(ydim) / float(ychunk))) numChunksZ = int(math.ceil(float(zdim) / float(zchunk))) print "NumChunks : " + str(numChunksX) + " " + str( numChunksY) + " " + str(numChunksZ) numChunks = numChunksX * numChunksY * numChunksZ else: #Write on spectrum at a time if we want a contiguous data layout numChunksX = int(math.ceil(float(xdim) / 4.0)) numChunksY = int(math.ceil(float(ydim) / 4.0)) numChunksZ = 2 if not useDonorFile: if writeFullSpectra: print "Writing mxm spectra at a time (artifical data)" #Write data one x/y chunk at a time (i.e., multiple z-chunks are writtent at once) for xi in xrange(0, numChunksX): sys.stdout.write("[" + str(int(100. * float(xi) / float(numChunksX))) + "%]" + "\r") sys.stdout.flush() xstart = xi * xchunk xend = min(xstart + xchunk, xdim) for yi in xrange(0, numChunksY): ystart = yi * ychunk yend = min(ystart + ychunk, ydim) #Save the spectrum to the hdf5 file data[xstart:xend, ystart:yend, :] = (xi * ydim + yi) else: #Write data one x/y/z chunk at a time print "Writing one x/y/z chunk at a time (artifical data)" for xt in xrange(0, numChunksX): sys.stdout.write("[" + str(int(100. * float(xt) / float(numChunksX))) + "%]" + "\r") sys.stdout.flush() xstart = xt * xchunk xend = min(xstart + xchunk, xdim) for yt in xrange(0, numChunksY): ystart = yt * ychunk yend = min(ystart + ychunk, ydim) for zt in xrange(0, numChunksZ): zstart = zt * zchunk zend = min(zstart + zchunk, zdim) data[xstart:xend, ystart:yend, zstart:zend] = (xt * ydim * zdim + yt * zdim + zt) else: print "Writing one x/y/z chunk at a time (donor dataset)" #Write data into all the chunks one x,y,z chunk at a time using the donor file for xt in xrange(0, numChunksX): sys.stdout.write("[" + str(int(100. * float(xt) / float(numChunksX))) + "%]" + "\r") sys.stdout.flush() xstart = xt * xchunk xend = min(xstart + xchunk, xdim) for yt in xrange(0, numChunksY): ystart = yt * ychunk yend = min(ystart + ychunk, ydim) for zt in xrange(0, numChunksZ): zstart = zt * zchunk zend = min(zstart + zchunk, zdim) #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend) diff = inData.shape[2] - zend myend = zend mystart = zstart if inData.shape[2] < zend: myend = inData.shape[2] mystart = inData.shape[2] - (zend - zstart) a = inData[xstart:xend, ystart:yend, mystart:myend] #print str(zt)+" : "+str(a.shape)+" : "+str(mystart)+" "+str(myend) data[xstart:xend, ystart:yend, zstart:zend] = a itertest += 1 #sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r") #sys.stdout.flush() omsiFile.close_file() return (time.time() - start)
def main(argv=None): """Then main function""" import sys from sys import argv, exit if argv is None: argv = sys.argv #Check for correct usage if len(argv) < 2: printHelp() exit(0) if len(argv) == 8: infile = argv[1] xmin = int(argv[2]) xmax = int(argv[3]) ymin = int(argv[4]) ymax = int(argv[5]) zmin = int(argv[6]) zmax = int(argv[7]) start = time.time() d = omsi_file(infile, 'r').get_experiment(0).get_msidata(0) loaddata = d[xmin:xmax, ymin:ymax, zmin:zmax] #content = json.dumps( loaddata.tolist() ) stop = (time.time() - start) print stop exit(0) import numpy as np import os import random import subprocess repeats = 50 outfolder = argv[1] if not outfolder.endswith("/"): outfolder = outfolder + "/" #Baseline filelist filelist = [ "/project/projectdirs/openmsi/manuscript_data/baseline/11042008_NIMS.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20110929_Tumor624.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/Brain.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111012_Tumor458_50micronSS_D.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111208_KBL_Roots_SmallChip_BigRoot.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Compressed 4x4x2048 filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Uncompressed 4x4x2048 filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Uncompressed autochunking filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #Compressed autochunking filelist #filelist = [ "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20110929_Tumor624.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ] #filelist = ["/work2/bowen/TEST_DEP3.h5"] data_shapes = {} results = {} for filename in filelist: #Initialze data shape f = omsi_file(filename, 'r') d = f.get_experiment(0).get_msidata(0) data_shapes[filename] = d.shape f.close_file() #Initialze output storage results[filename] = np.zeros(repeats, dtype=[('mz-slice', 'f'), ('spectrum', 'f'), ('xyz-cube', 'f'), ('mz-slice-all', 'f'), ('spectrum-all', 'f'), ('xyz-cube-all', 'f'), ('filesize', 'f')]) results[filename]['filesize'] = os.stat(filename).st_size #Note: We compute each test seperately so that we have touched enough data from #other files to avoid biases due to already cached data already at the beginning #of the tests. #Note: Depending on the file system, a significant amount of data (and in some #cases complete files) may be cached by the file system itself. This can results #in large variations in the times for data acceses. This is particularly the #case when two consecutive accesses happen to by chance access a similar portion #of the data. This behavior is expected and is what one expects to happen in real #life as well. It is, therefore, often informative to look at the general #variability of results. E.g.,if all data is stored in a single block, then we #may see very slow access times at the beginning and then, once, all data has #been cached access times drop significantly. For well-chunked data, this #variability between access should be much lower. #Compute the slice query test for filename in filelist: print filename + " 25 mz-slices" #mz-slice selection 250,000 elements sliceWidthZ = 25 #xdim=100 , ydim=100 for ri in xrange(0, repeats): xmin = 0 xmax = data_shapes[filename][0] ymin = 0 ymax = data_shapes[filename][1] zmin = random.randint(0, data_shapes[filename][2] - sliceWidthZ - 1) zmax = zmin + sliceWidthZ callCommand = [ "python", "testhdf5_file_read.py", filename, str(xmin), str(xmax), str(ymin), str(ymax), str(zmin), str(zmax) ] start = time.time() p2 = subprocess.Popen(callCommand, stdout=subprocess.PIPE) readTime = float(p2.stdout.read()) stop = (time.time() - start) results[filename]['mz-slice'][ri] = readTime results[filename]['mz-slice-all'][ri] = stop print str(results[filename]['mz-slice'][ri]) + " " + str( results[filename]['mz-slice-all'][ri]) + " " + str( xmin) + " " + str(xmax) + " " + str(ymin) + " " + str( ymax) + " " + str(zmin) + " " + str(zmax) #Compute the spectra test for filename in filelist: print filename + " 3 x 3 spectra" #mz-slice selection 250,000 elements sliceWidthX = 3 sliceWidthY = 3 for ri in xrange(0, repeats): xmin = random.randint(0, data_shapes[filename][0] - sliceWidthX - 1) xmax = xmin + sliceWidthX ymin = random.randint(0, data_shapes[filename][1] - sliceWidthY - 1) ymax = ymin + sliceWidthY zmin = 0 zmax = data_shapes[filename][2] callCommand = [ "python", "testhdf5_file_read.py", filename, str(xmin), str(xmax), str(ymin), str(ymax), str(zmin), str(zmax) ] start = time.time() p2 = subprocess.Popen(callCommand, stdout=subprocess.PIPE) readTime = float(p2.stdout.read()) stop = (time.time() - start) results[filename]['spectrum'][ri] = readTime results[filename]['spectrum-all'][ri] = stop print str(results[filename]['spectrum'][ri]) + " " + str( results[filename]['spectrum-all'][ri]) + " " + str( xmin) + " " + str(xmax) + " " + str(ymin) + " " + str( ymax) + " " + str(zmin) + " " + str(zmax) #Compte the cube test for filename in filelist: print filename + " 20 x 20 x 1000 cube" #mz-slice selection 250,000 elements sliceWidthX = 20 sliceWidthY = 20 sliceWidthZ = 1000 for ri in xrange(0, repeats): xmin = random.randint(0, data_shapes[filename][0] - sliceWidthX - 1) xmax = xmin + sliceWidthX ymin = random.randint(0, data_shapes[filename][1] - sliceWidthY - 1) ymax = ymin + sliceWidthY zmin = random.randint(0, data_shapes[filename][2] - sliceWidthZ - 1) zmax = zmin + sliceWidthZ callCommand = [ "python", "testhdf5_file_read.py", filename, str(xmin), str(xmax), str(ymin), str(ymax), str(zmin), str(zmax) ] start = time.time() p2 = subprocess.Popen(callCommand, stdout=subprocess.PIPE) readTime = float(p2.stdout.read()) stop = (time.time() - start) results[filename]['xyz-cube'][ri] = readTime results[filename]['xyz-cube-all'][ri] = stop print str(results[filename]['xyz-cube'][ri]) + " " + str( results[filename]['xyz-cube-all'][ri]) + " " + str( xmin) + " " + str(xmax) + " " + str(ymin) + " " + str( ymax) + " " + str(zmin) + " " + str(zmax) for filename in filelist: infilename = os.path.split(filename)[1] outfile = outfolder + infilename + "_timings.txt" f = open(outfile, 'w') for colName in results[filename].dtype.names: f.write(colName + " ") f.write("\n") np.savetxt(f, results[filename]) f.close() exit(0)
def main(argv=None): """Then main function""" import sys from sys import argv, exit if argv is None: argv = sys.argv #Check for correct usage if len(argv) != 3: printHelp() exit(0) omsiOutFile = argv[1] resultsFile = argv[2] xdim = 100 ydim = 100 zdim = 100000 xtest = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] #range(10,50,10) ytest = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] #range(10,50,10) ztest = [2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048] #range(100, 10000, 100) allRes = np.zeros(len(xtest), dtype=[('x', 'int32'), ('y', 'int32'), ('z', 'int32'), ('write', 'f'), ('z_min', 'f'), ('z_avg', 'f'), ('z_median', 'f'), ('z_max', 'f'), ('xy_min', 'f'), ('xy_avg', 'f'), ('xy_median', 'f'), ('xy_max', 'f'), ('xyz_min', 'f'), ('xyz_avg', 'f'), ('xyz_median', 'f'), ('xyz_max', 'f'), ('filesize', 'f')]) numTests = 3 repeats = 50 for tr in xrange(0, len(xtest)): allRes[tr]['x'] = xtest[tr] allRes[tr]['y'] = ytest[tr] allRes[tr]['z'] = ztest[tr] #XYZ selection 25,000 elements sliceWidthX = 5 sliceWidthY = 5 sliceWidthZ = 1000 for tr in xrange(0, len(xtest)): allRes[tr]['x'] = xtest[tr] allRes[tr]['y'] = ytest[tr] allRes[tr]['z'] = ztest[tr] xchunk = xtest[tr] ychunk = ytest[tr] zchunk = ztest[tr] print "Chunking:" + str(xchunk) + ":" + str(ychunk) + ":" + str(zchunk) start = time.time() generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk) allRes[tr]['write'] = allRes[tr]['write'] + (time.time() - start) allRes[tr]['filesize'] = os.stat(omsiOutFile).st_size omsiFile = omsi_file(omsiOutFile) data = omsiFile.get_experiment(0).get_msidata(0) print "Time for data write:" + str(allRes[tr]['write']) #Select xyz slize resXYZ = np.zeros(repeats, dtype='f') for te in xrange(0, repeats): valX = random.randint(0, xdim - sliceWidthX - 1) valY = random.randint(0, ydim - sliceWidthY - 1) valZ = random.randint(0, zdim - sliceWidthZ - 1) start = time.time() d = np.sum(data[valX:(valX + sliceWidthX), valY:(valY + sliceWidthY), valZ:(valZ + sliceWidthZ)]) resXYZ[te] = (time.time() - start) np.savetxt( "resXYZ_aw_" + str(xchunk) + "_" + str(ychunk) + "_" + str(zchunk) + ".txt", resXYZ) allRes[tr]['xyz_min'] = np.min(resXYZ) allRes[tr]['xyz_avg'] = np.average(resXYZ) allRes[tr]['xyz_median'] = np.median(resXYZ) allRes[tr]['xyz_max'] = np.max(resXYZ) print "XYZ-Slicing: " + str(np.max(resXYZ)) + ":" + str( np.average(resXYZ)) + ":" + str(np.median(resXYZ)) + ":" + str( np.min(resXYZ)) omsiFile.close_file() os.remove(omsiOutFile) #mz-slice selection 250,000 elements sliceWidthZ = 25 #xdim=100 , ydim=100 for tr in xrange(0, len(xtest)): allRes[tr]['x'] = xtest[tr] allRes[tr]['y'] = ytest[tr] allRes[tr]['z'] = ztest[tr] xchunk = xtest[tr] ychunk = ytest[tr] zchunk = ztest[tr] print "Chunking:" + str(xchunk) + ":" + str(ychunk) + ":" + str(zchunk) start = time.time() generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk) allRes[tr]['write'] = allRes[tr]['write'] + (time.time() - start) omsiFile = omsi_file(omsiOutFile) data = omsiFile.get_experiment(0).get_msidata(0) print "Time for data write:" + str(allRes[tr]['write']) #Perform different types of slicing opterations and keep track of the times #Select 10 slizes in z resZ = np.zeros(repeats, dtype='f') for te in xrange(0, repeats): val = random.randint(0, zdim - sliceWidthZ - 1) start = time.time() d = np.sum(data[:, :, val:(val + sliceWidthZ)]) resZ[te] = (time.time() - start) np.savetxt( "resZ_aw_" + str(xchunk) + "_" + str(ychunk) + "_" + str(zchunk) + ".txt", resZ) allRes[tr]['z_min'] = np.min(resZ) allRes[tr]['z_avg'] = np.average(resZ) allRes[tr]['z_median'] = np.median(resZ) allRes[tr]['z_max'] = np.max(resZ) print "Z-Slicing: " + str(np.max(resZ)) + ":" + str( np.average(resZ)) + ":" + str(np.median(resZ)) + ":" + str( np.min(resZ)) omsiFile.close_file() os.remove(omsiOutFile) #XY selection of multiple spectra. Selection size= 2,500,000 elements for tr in xrange(0, len(xtest)): xchunk = xtest[tr] ychunk = ytest[tr] zchunk = ztest[tr] print "Chunking:" + str(xchunk) + ":" + str(ychunk) + ":" + str(zchunk) start = time.time() generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk) allRes[tr]['write'] = allRes[tr]['write'] + (time.time() - start) omsiFile = omsi_file(omsiOutFile) data = omsiFile.get_experiment(0).get_msidata(0) print "Time for data write:" + str(allRes[tr]['write']) #Select x/y resXY = np.zeros(repeats, dtype='f') for te in xrange(0, repeats): valX = random.randint(0, xdim - sliceWidthX - 1) valY = random.randint(0, ydim - sliceWidthY - 1) start = time.time() d = np.sum(data[valX:(valX + sliceWidthX), valY:(valY + sliceWidthY), :]) resXY[te] = (time.time() - start) np.savetxt( "resXY_aw_" + str(xchunk) + "_" + str(ychunk) + "_" + str(zchunk) + ".txt", resXY) allRes[tr]['xy_min'] = np.min(resXY) allRes[tr]['xy_avg'] = np.average(resXY) allRes[tr]['xy_median'] = np.median(resXY) allRes[tr]['xy_max'] = np.max(resXY) print "XY-Slicing: " + str(np.max(resXY)) + ":" + str( np.average(resXY)) + ":" + str(np.median(resXY)) + ":" + str( np.min(resXY)) omsiFile.close_file() os.remove(omsiOutFile) for tr in xrange(0, len(xtest)): allRes[tr]['write'] = allRes[tr]['write'] / float(numTests) f = open(resultsFile, 'w') f.write(str(allRes.dtype.names)) f.write("\n") np.savetxt(f, allRes) f.close()
from omsi.dataformat.omsi_file import * #Open the file #f is an object of the tyep omsi_file. For more information execute: #help( omsi_file ) f = omsi_file("/project/projectdirs/openmsi/omsi_data/20120711_Brain.h5" , 'r' ) #Get the number of experiments num_exp = f.get_num_experiments() #Get the first experiment. #exp0 is an object of the type omsi_file_experiment For more information execute: #help( omsi_file_experiment ) exp0 = f.get_experiment(0) #Get the number of available datasets and analyses num_dat = exp0.get_num_msidata() num_ana = exp0.get_num_analysis() #Get the raw MSI dataset, i.e., the API object that manages the dataset #We can slice into d using standard array syntax. The MSI dataset defines #a 3D cube in (x,y,m/z) and contains the corresponding intensity values. #The m/z values are the same for each spectrum and are stored in a #separate 1D array. In the call below we get the experiment with index 0 #and the msi dataset with index 0 associated with the experiment. An #OpenMSI file may contain multipe experiments, each of which may have #multiple MSI datasets associated with it. For now, all data you have. #d is an object of the type omsi_file_msidata. For more information execute: #help( omsi_file_msidata ) d = exp0.get_msidata(0) #Number of pixel in x, y , m/z numX = d.shape[0] numY = d.shape[1] numMZ = d.shape[2] #Get the m/z data. This is a 1D array of length d.shape[2] mz = d.mz
def generateChunkedTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk , compress=False , donorFile = "/project/projectdirs/openmsi/omsi_data/old/TEST.h5" ) : writeFullSpectra=False useChunking = (xchunk>0 and ychunk>0 and zchunk>0 ) print useChunking useDonorFile = True if useDonorFile : inFile = omsi_file( donorFile ) inData = inFile.get_experiment(0).get_msidata(0)[:] inFile.close_file() #Create the output HDF5 file try: omsiFile = omsi_file( omsiOutFile ) except: print "Unexpected error creating the output file:", sys.exc_info()[0] exit(0) exp = omsiFile.create_experiment( exp_identifier = "test" ) #Create an empty method descrition sample = exp.create_method_info() #Create an empty instrument description mzdata = np.ones( zdim ) instrument = exp.create_instrument_info(instrumentname="undefined" , mzdata=mzdata ) #Allocate space in the HDF5 file for the img data start = time.time() if compress : #Use compresion data = exp.create_msidata(data_shape=( xdim , ydim , zdim ) , data_type = 'uint16' , chunks=(xchunk,ychunk,zchunk) , compression='gzip' , compression_opts=4 ) elif useChunking : #Use chunking data = exp.create_msidata(data_shape=( xdim , ydim , zdim ) , data_type = 'uint16' , chunks=(xchunk,ychunk,zchunk) ) #, compression='gzip' , compression_opts=4 ) else : #Don't use chunking and compression data = exp.create_msidata(data_shape=( xdim , ydim , zdim ) , data_type = 'uint16' ) itertest=0 if useChunking : numChunksX = int( math.ceil( float(xdim)/float(xchunk) ) ) numChunksY = int( math.ceil( float(ydim)/float(ychunk) ) ) numChunksZ = int( math.ceil( float(zdim)/float(zchunk) ) ) print "NumChunks : "+str(numChunksX)+" "+str(numChunksY)+" "+str(numChunksZ) numChunks = numChunksX*numChunksY*numChunksZ else : #Write on spectrum at a time if we want a contiguous data layout numChunksX = int( math.ceil( float(xdim)/4.0 ) ) numChunksY = int( math.ceil( float(ydim)/4.0 ) ) numChunksZ = 2 if not useDonorFile : if writeFullSpectra : print "Writing mxm spectra at a time (artifical data)" #Write data one x/y chunk at a time (i.e., multiple z-chunks are writtent at once) for xi in xrange( 0 , numChunksX ) : sys.stdout.write("[" +str( int( 100.* float(xi)/float(numChunksX) )) +"%]"+ "\r") sys.stdout.flush() xstart = xi*xchunk xend = min( xstart+xchunk , xdim) for yi in xrange( 0 , numChunksY ) : ystart = yi*ychunk yend = min( ystart+ychunk , ydim ) #Save the spectrum to the hdf5 file data[xstart:xend , ystart:yend, : ] = (xi*ydim + yi) else : #Write data one x/y/z chunk at a time print "Writing one x/y/z chunk at a time (artifical data)" for xt in xrange(0 , numChunksX) : sys.stdout.write("[" +str( int( 100.* float(xt)/float(numChunksX) )) +"%]"+ "\r") sys.stdout.flush() xstart = xt*xchunk xend = min( xstart+xchunk , xdim) for yt in xrange(0, numChunksY ) : ystart = yt*ychunk yend = min( ystart+ychunk , ydim ) for zt in xrange(0, numChunksZ ) : zstart = zt*zchunk zend = min( zstart+zchunk , zdim ) data[xstart:xend , ystart:yend, zstart:zend ] = (xt*ydim*zdim + yt*zdim +zt) else : print "Writing one x/y/z chunk at a time (donor dataset)" #Write data into all the chunks one x,y,z chunk at a time using the donor file for xt in xrange(0, numChunksX ) : sys.stdout.write("[" +str( int( 100.* float(xt)/float(numChunksX) )) +"%]"+ "\r") sys.stdout.flush() xstart = xt*xchunk xend = min( xstart+xchunk , xdim) for yt in xrange(0, numChunksY ) : ystart = yt*ychunk yend = min( ystart+ychunk , ydim ) for zt in xrange(0, numChunksZ ) : zstart = zt*zchunk zend = min( zstart+zchunk , zdim ) #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend) diff = inData.shape[2] - zend myend = zend mystart = zstart if inData.shape[2] < zend : myend = inData.shape[2] mystart = inData.shape[2]-(zend-zstart) a = inData[xstart:xend , ystart:yend, mystart:myend ] #print str(zt)+" : "+str(a.shape)+" : "+str(mystart)+" "+str(myend) data[xstart:xend , ystart:yend, zstart:zend ] = a itertest+=1 #sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r") #sys.stdout.flush() omsiFile .close_file() return (time.time() - start)