Пример #1
0
def spectraSelect(args):

    filename, numWorkers, mp_arr, xdim, ydim, zdim, id = args

    arr = np.frombuffer(mp_arr.get_obj(),
                        dtype='uint16')  # mp_arr and arr share the same memory
    b = arr.reshape((zdim))  # b and arr share the same memory

    xmin = random.randint(0, xdim - 6)
    xmax = xmin + 5

    ymin = random.randint(0, ydim - 6)
    ymax = ymin + 5

    #id =   int( mp.current_process()._identity[0] )
    omsiFile = omsi_file(filename, 'r')
    d = omsiFile.get_experiment(0).get_msidata(0)
    zstep = zdim / numWorkers
    zmin = zstep * (id)
    zmax = zstep * (id + 1)
    if id == (numWorkers - 1):
        zmax = zdim
    #print str(id)+": "+str(xmin) + "  " + str(xmax)+" | "+str(ymin) + "  " + str(ymax)+" | "+str(zmin)+" "+str(zmax)
    b[zmin:zmax] = np.mean(np.mean(d[xmin:xmax, ymin:ymax, zmin:zmax], axis=0),
                           axis=0)

    omsiFile.close_file()
    sys.stdout.flush()
Пример #2
0
def generateBaseTestFile(omsiOutFile, xdim, ydim, zdim):

    #Create the output HDF5 file
    try:
        omsiFile = omsi_file(omsiOutFile)
    except:
        print "Unexpected error creating the output file:", sys.exc_info()[0]
        exit(0)

    exp = omsiFile.create_experiment(exp_identifier="test")
    #Create an empty method descrition
    sample = exp.create_method_info()
    #Create an empty instrument description
    mzdata = np.ones(zdim)
    instrument = exp.create_instrument_info(instrument_name="undefined",
                                            mzdata=mzdata)
    start = time.time()
    #Allocate space in the HDF5 file for the img data
    data = exp.create_msidata_full_cube(data_shape=(xdim, ydim, zdim),
                                        data_type='uint16',
                                        chunks=None)
    #Write data one spectrum at a time
    for xi in xrange(0, xdim):
        sys.stdout.write("[" + str(int(100. * float(xi) / float(xdim))) +
                         "%]" + "\r")
        sys.stdout.flush()
        for yi in xrange(0, ydim):
            #Save the spectrum to the hdf5 file
            data[xi, yi, :] = (xi * ydim + yi)

    omsiFile.close_file()
    return (time.time() - start)
Пример #3
0
def generateBaseTestFile( omsiOutFile , xdim , ydim, zdim ) :
    
    #Create the output HDF5 file
    try:
        omsiFile = omsi_file( omsiOutFile )
    except:
        print "Unexpected error creating the output file:", sys.exc_info()[0]
        exit(0)
        
    exp = omsiFile.create_experiment( exp_identifier = "test" )
    #Create an empty method descrition
    sample = exp.create_method_info()
    #Create an empty instrument description
    mzdata = np.ones( zdim )
    instrument = exp.create_instrument_info(instrumentname="undefined" , mzdata=mzdata )
    start = time.time()
    #Allocate space in the HDF5 file for the img data
    data = exp.create_msidata(data_shape=( xdim , ydim , zdim  ) , data_type = 'uint16' , chunks=None )
    #Write data one spectrum at a time
    for xi in xrange( 0 , xdim ) :
        sys.stdout.write("[" +str( int( 100.* float(xi)/float(xdim) )) +"%]"+ "\r")
        sys.stdout.flush()
        for yi in xrange( 0 , ydim ) :
            #Save the spectrum to the hdf5 file
            data[xi,yi,:] = (xi*ydim + yi)

    omsiFile .close_file()
    return ( time.time() - start )
Пример #4
0
def sliceSelect(args):

    filename, numWorkers, mp_arr, xdim, ydim, zdim, id = args

    arr = np.frombuffer(mp_arr.get_obj(),
                        dtype='uint16')  # mp_arr and arr share the same memory
    b = arr.reshape((xdim, ydim))  # b and arr share the same memory

    zrange = 25  # 20000
    zmin = random.randint(0, zdim - zrange - 1)
    zmax = zmin + zrange + 1

    #id =   int( mp.current_process()._identity[0] )
    omsiFile = omsi_file(filename, 'r')
    d = omsiFile.get_experiment(0).get_msidata(0)
    xstep = xdim / numWorkers
    xstart = xstep * (id)
    xend = xstep * (id + 1)
    if id == (numWorkers - 1):
        xend = xdim
    #print str(xstart) + "  " + str(xend)+"     :"+str(zmin)+" "+str(zmax)
    b[xstart:xend, :] = np.var(d[xstart:xend, 0:ydim, zmin:zmax], axis=2)

    omsiFile.close_file()
    sys.stdout.flush()
Пример #5
0
def spectraSelect( args ):

    filename, numWorkers, mp_arr, xdim, ydim, zdim,  id = args

    arr = np.frombuffer(mp_arr.get_obj() , dtype = 'uint16'  )  # mp_arr and arr share the same memory
    b = arr.reshape( (zdim) ) # b and arr share the same memory


    xmin = random.randint(0, xdim-6 )
    xmax = xmin+5

    ymin = random.randint(0, ydim-6 )
    ymax = ymin+5

    #id =   int( mp.current_process()._identity[0] )
    omsiFile = omsi_file( filename , 'r' )
    d = omsiFile.get_experiment(0).get_msidata(0)
    zstep = zdim / numWorkers
    zmin = zstep*(id)
    zmax  = zstep*(id+1)
    if id == (numWorkers-1) :
        zmax = zdim 
    #print str(id)+": "+str(xmin) + "  " + str(xmax)+" | "+str(ymin) + "  " + str(ymax)+" | "+str(zmin)+" "+str(zmax)
    b[zmin:zmax] = np.mean( np.mean( d[xmin:xmax , ymin:ymax , zmin:zmax ] , axis=0 ) , axis =0 )
    
    omsiFile.close_file()
    sys.stdout.flush()
Пример #6
0
def generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk):

    #Create the output HDF5 file
    try:
        omsiFile = omsi_file(omsiOutFile)
    except:
        print "Unexpected error creating the output file:", sys.exc_info()[0]
        exit(0)

    exp = omsiFile.create_experiment(exp_identifier="test")
    #Create an empty method descrition
    sample = exp.create_method_info()
    #Create an empty instrument description
    mzdata = np.ones(zdim)
    instrument = exp.create_instrument_info(instrumentname="undefined",
                                            mzdata=mzdata)
    #Allocate space in the HDF5 file for the img data
    data = exp.create_msidata(data_shape=(xdim, ydim, zdim),
                              data_type='uint16',
                              chunks=(xchunk, ychunk, zchunk))
    itertest = 0
    numChunksX = int(math.ceil(float(xdim) / float(xchunk)))
    numChunksY = int(math.ceil(float(ydim) / float(ychunk)))
    numChunksZ = int(math.ceil(float(zdim) / float(zchunk)))
    print "NumChunks : " + str(numChunksX) + " " + str(numChunksY) + " " + str(
        numChunksZ)
    numChunks = numChunksX * numChunksY * numChunksZ
    #Write data one spectrum at a time
    for xi in xrange(0, numChunksX):
        sys.stdout.write("[" + str(int(100. * float(xi) / float(numChunksX))) +
                         "%]" + "\r")
        sys.stdout.flush()
        xstart = xi * xchunk
        xend = min(xstart + xchunk, xdim)
        for yi in xrange(0, numChunksY):
            ystart = yi * ychunk
            yend = min(ystart + ychunk, ydim)
            #Save the spectrum to the hdf5 file
            data[xstart:xend, ystart:yend, :] = (xi * ydim + yi)

    #Write data into all the chunks
    """for xt in xrange(0, numChunksX ) :
        xstart = xt*xchunk
        xend = min(  xstart+xchunk , xdim)
        for yt in xrange(0, numChunksY ) :
            ystart = yt*ychunk
            yend = min( ystart+ychunk , ydim )
            for zt in xrange(0, numChunksZ ) :
                zstart = zt*zchunk
                zend = min( zstart+zchunk , zdim )
                #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend)
                data[xstart:xend , ystart:yend, zstart:zend ] = itertest
                itertest+=1
                sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r")
                sys.stdout.flush()"""
    omsiFile.close_file()
def generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk ) :
    
    #Create the output HDF5 file
    try:
        omsiFile = omsi_file( omsiOutFile )
    except:
        print "Unexpected error creating the output file:", sys.exc_info()[0]
        exit(0)
        
    exp = omsiFile.create_experiment( exp_identifier = "test" )
    #Create an empty method descrition
    sample = exp.create_method_info()
    #Create an empty instrument description
    mzdata = np.ones( zdim )
    instrument = exp.create_instrument_info(instrumentname="undefined" , mzdata=mzdata )
    #Allocate space in the HDF5 file for the img data
    data = exp.create_msidata(data_shape=( xdim , ydim , zdim  ) , data_type = 'uint16' , chunks=(xchunk,ychunk,zchunk))
    itertest=0 
    numChunksX = int( math.ceil( float(xdim)/float(xchunk) ) )
    numChunksY = int( math.ceil( float(ydim)/float(ychunk) ) )
    numChunksZ = int( math.ceil( float(zdim)/float(zchunk) ) )
    print "NumChunks : "+str(numChunksX)+" "+str(numChunksY)+" "+str(numChunksZ)
    numChunks =  numChunksX*numChunksY*numChunksZ
    #Write data one spectrum at a time
    for xi in xrange( 0 , numChunksX ) :
        sys.stdout.write("[" +str( int( 100.* float(xi)/float(numChunksX) )) +"%]"+ "\r")
        sys.stdout.flush()
        xstart = xi*xchunk
        xend = min(  xstart+xchunk , xdim)
        for yi in xrange( 0 , numChunksY ) :
            ystart = yi*ychunk
            yend = min( ystart+ychunk , ydim )
            #Save the spectrum to the hdf5 file
            data[xstart:xend , ystart:yend, : ] = (xi*ydim + yi)
    
    #Write data into all the chunks
    """for xt in xrange(0, numChunksX ) :
        xstart = xt*xchunk
        xend = min(  xstart+xchunk , xdim)
        for yt in xrange(0, numChunksY ) :
            ystart = yt*ychunk
            yend = min( ystart+ychunk , ydim )
            for zt in xrange(0, numChunksZ ) :
                zstart = zt*zchunk
                zend = min( zstart+zchunk , zdim )
                #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend)
                data[xstart:xend , ystart:yend, zstart:zend ] = itertest
                itertest+=1
                sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r")
                sys.stdout.flush()"""
    omsiFile .close_file()
Пример #8
0
def sliceSelect( args ):

    filename, numWorkers, mp_arr, xdim, ydim, zdim,  id = args

    arr = np.frombuffer(mp_arr.get_obj() , dtype = 'uint16'  )  # mp_arr and arr share the same memory
    b = arr.reshape((xdim,ydim)) # b and arr share the same memory

    zrange = 25 # 20000
    zmin = random.randint(0, zdim-zrange-1 )
    zmax = zmin+zrange+1

    #id =   int( mp.current_process()._identity[0] )
    omsiFile = omsi_file( filename , 'r' )
    d = omsiFile.get_experiment(0).get_msidata(0)
    xstep = xdim / numWorkers
    xstart = xstep*(id)
    xend  = xstep*(id+1)
    if id == (numWorkers-1) :
        xend = xdim 
    #print str(xstart) + "  " + str(xend)+"     :"+str(zmin)+" "+str(zmax)
    b[xstart:xend , :  ] = np.var( d[xstart:xend , 0:ydim , zmin:zmax ] , axis=2 )

    omsiFile.close_file()
    sys.stdout.flush()
Пример #9
0
def main(argv=None):
    """Then main function"""

    import sys
    from sys import argv,exit
    
    if argv is None:
        argv = sys.argv
        
    #Check for correct usage
    if len(argv) < 2 :
        printHelp()
        exit(0)
        
    if len(argv) == 8 :
        
        infile = argv[1]
        xmin = int(argv[2])
        xmax = int(argv[3])
        ymin = int(argv[4])
        ymax = int(argv[5])
        zmin = int(argv[6])
        zmax = int(argv[7])
        start = time.time()
        d = omsi_file(infile, 'r').get_experiment(0).get_msidata(0)
        loaddata = d[xmin:xmax , ymin:ymax, zmin:zmax]
        #content = json.dumps( loaddata.tolist() )
        stop = (time.time() - start) 
        print stop
        exit(0)
    
    import numpy as np
    import os
    import random
    import subprocess

    repeats = 50
    outfolder  = argv[1]
    if not outfolder.endswith("/") :
        outfolder = outfolder+"/"
    
    #Baseline filelist
    filelist =  [ "/project/projectdirs/openmsi/manuscript_data/baseline/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/baseline/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/baseline/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/baseline/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]
    
    #Compressed 4x4x2048 filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]
    
    #Uncompressed 4x4x2048 filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]
    
    #Uncompressed autochunking filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]
    
    #Compressed autochunking filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]
    
    
    #filelist = ["/work2/bowen/TEST_DEP3.h5"]  
    
    data_shapes = {}
    results = {}
    for filename in filelist :

        #Initialze data shape
        f = omsi_file( filename , 'r' )
        d = f.get_experiment(0).get_msidata(0)
        data_shapes[filename] = d.shape
        f.close_file()
        #Initialze output storage
        results[filename] = np.zeros( repeats ,  dtype=[ ('mz-slice','f') , ('spectrum','f') , ('xyz-cube','f'), ('mz-slice-all','f') , ('spectrum-all','f') , ('xyz-cube-all','f')  , ('filesize' , 'f') ] )
        results[filename]['filesize'] = os.stat( filename ).st_size

    #Note: We compute each test seperately so that we have touched enough data from 
    #other files to avoid biases due to already cached data already at the beginning
    #of the tests.
    #Note: Depending on the file system, a significant amount of data (and in some 
    #cases complete files) may be cached by the file system itself. This can results
    #in large variations in the times for data acceses. This is particularly the 
    #case when two consecutive accesses happen to by chance access a similar portion
    #of the data. This behavior is expected and is what one expects to happen in real
    #life as well. It is, therefore, often informative to look at the general 
    #variability of results. E.g.,if all data is stored in a single block, then we
    #may see very slow access times at the beginning and then, once, all data has
    #been cached access times drop significantly. For well-chunked data, this
    #variability between access should be much lower.

    #Compute the slice query test
    for filename in filelist :
        print filename+" 25 mz-slices"
        #mz-slice selection 250,000 elements
        sliceWidthZ = 25 #xdim=100 , ydim=100
        for ri in xrange( 0 , repeats ) :

            xmin = 0
            xmax = data_shapes[filename][0]
            ymin = 0
            ymax = data_shapes[filename][1]
            zmin = random.randint(0, data_shapes[filename][2]-sliceWidthZ-1 )
            zmax = zmin + sliceWidthZ
            callCommand = ["python", "testhdf5_file_read.py" , filename , str(xmin) , str(xmax), str(ymin), str(ymax), str(zmin) , str(zmax) ]
            start = time.time()
            p2 = subprocess.Popen(callCommand , stdout = subprocess.PIPE)
            readTime =  float(p2.stdout.read())
            stop = (time.time() - start)
            results[filename]['mz-slice'][ri] = readTime
            results[filename]['mz-slice-all'][ri] = stop
            print str(results[filename]['mz-slice'][ri]) + "   " +str( results[filename]['mz-slice-all'][ri] )+ " " + str(xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(ymax) + " " + str(zmin)  + " " + str(zmax)

    #Compute the spectra test
    for filename in filelist :
        print filename+" 3 x 3  spectra"
        #mz-slice selection 250,000 elements
        sliceWidthX = 3 
        sliceWidthY = 3
        for ri in xrange( 0 , repeats ) :

            xmin = random.randint(0, data_shapes[filename][0]-sliceWidthX-1 )
            xmax = xmin + sliceWidthX
            ymin = random.randint(0, data_shapes[filename][1]-sliceWidthY-1 )
            ymax = ymin + sliceWidthY
            zmin = 0
            zmax = data_shapes[filename][2]
            callCommand = ["python", "testhdf5_file_read.py" , filename , str(xmin) , str(xmax), str(ymin), str(ymax), str(zmin) , str(zmax) ]
            start = time.time()
            p2 = subprocess.Popen(callCommand , stdout = subprocess.PIPE)
            readTime =  float(p2.stdout.read())
            stop = (time.time() - start)
            results[filename]['spectrum'][ri] = readTime
            results[filename]['spectrum-all'][ri] = stop
            print str(results[filename]['spectrum'][ri]) + "   " +str( results[filename]['spectrum-all'][ri] )+ " " + str(xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(ymax) + " " + str(zmin)  + " " + str(zmax)

    #Compte the cube test 
    for filename in filelist :
        print filename+" 20 x 20 x 1000  cube"
        #mz-slice selection 250,000 elements
        sliceWidthX = 20
        sliceWidthY = 20
        sliceWidthZ = 1000
        for ri in xrange( 0 , repeats ) :

            xmin = random.randint(0, data_shapes[filename][0]-sliceWidthX-1 )
            xmax = xmin + sliceWidthX
            ymin = random.randint(0, data_shapes[filename][1]-sliceWidthY-1 )
            ymax = ymin + sliceWidthY
            zmin = random.randint(0, data_shapes[filename][2]-sliceWidthZ-1 )
            zmax = zmin + sliceWidthZ
            callCommand = ["python", "testhdf5_file_read.py" , filename , str(xmin) , str(xmax), str(ymin), str(ymax), str(zmin) , str(zmax) ]
            start = time.time()
            p2 = subprocess.Popen(callCommand , stdout = subprocess.PIPE)
            readTime =  float(p2.stdout.read())
            stop = (time.time() - start)
            results[filename]['xyz-cube'][ri] = readTime
            results[filename]['xyz-cube-all'][ri] = stop
            print str(results[filename]['xyz-cube'][ri]) + "   " +str( results[filename]['xyz-cube-all'][ri] )+ " " + str(xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(ymax) + " " + str(zmin)  + " " + str(zmax)
    
    for filename in filelist :
        
        infilename = os.path.split( filename )[1]
        outfile = outfolder+infilename+"_timings.txt"
        
        f = open( outfile , 'w' )
        for colName in results[filename].dtype.names :
            f.write( colName+" " )
        f.write("\n")
        np.savetxt( f , results[filename] )
        f.close()

    exit(0)
def main(argv=None):
    """Then main function"""

    import sys
    from sys import argv,exit
    
    if argv is None:
        argv = sys.argv
        
    #Check for correct usage
    if len(argv) !=3 :
        printHelp()
        exit(0)
        
    omsiOutFile  = argv[1]
    resultsFile  = argv[2]
    
    xdim = 100
    ydim = 100
    zdim = 100000
    
    xtest = [1   , 2   , 3   , 4   , 5   , 6   , 7   , 8   , 9   , 10  ]  #range(10,50,10)
    ytest = [1   , 2   , 3   , 4   , 5   , 6   , 7   , 8   , 9   , 10  ]  #range(10,50,10)
    ztest = [2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048]  #range(100, 10000, 100)
    
    allRes = np.zeros( len(xtest) , dtype=[ ('x','int32') , ('y','int32') , ('z','int32') , ('write','f') , ('z_min','f') , ('z_avg','f') , ('z_median','f') , ('z_max','f') , ('xy_min','f') , ('xy_avg','f') , ('xy_median','f') , ('xy_max','f') , ('xyz_min','f') , ('xyz_avg','f') , ('xyz_median','f') , ('xyz_max','f') , ('filesize' , 'f') ]  )
    
    numTests = 3
    repeats = 50

    
    for tr in xrange( 0 , len(xtest) ) :
        
        allRes[tr]['x'] = xtest[tr]
        allRes[tr]['y'] = ytest[tr]
        allRes[tr]['z'] = ztest[tr]

    #XYZ selection 25,000 elements
    sliceWidthX=5
    sliceWidthY=5
    sliceWidthZ = 1000

    for tr in xrange( 0 , len(xtest) ) :
        
        allRes[tr]['x'] = xtest[tr]
        allRes[tr]['y'] = ytest[tr]
        allRes[tr]['z'] = ztest[tr]
        
        xchunk = xtest[tr]
        ychunk = ytest[tr]
        zchunk = ztest[tr]
        print "Chunking:"+str(xchunk)+":"+str(ychunk)+":"+str(zchunk)
        start = time.time()
        generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk )
        allRes[tr]['write'] =  allRes[tr]['write']+(time.time() - start)
        allRes[tr]['filesize'] = os.stat( omsiOutFile ).st_size
        omsiFile = omsi_file( omsiOutFile )
        data = omsiFile.get_experiment(0).get_msidata(0)
        print "Time for data write:"+str(allRes[tr]['write'] )
        
        #Select xyz slize
        resXYZ = np.zeros( repeats , dtype='f' )
        for te in xrange( 0 , repeats ) :
            
            valX = random.randint(0, xdim-sliceWidthX-1 )
            valY = random.randint(0, ydim-sliceWidthY-1 )
            valZ = random.randint(0, zdim-sliceWidthZ-1 )
            start = time.time()
            d=np.sum( data[valX:(valX+sliceWidthX),valY:(valY+sliceWidthY),valZ:(valZ+sliceWidthZ)] )
            resXYZ[te] = (time.time() - start)
        
        np.savetxt( "resXYZ_aw_"+str(xchunk)+"_"+str(ychunk)+"_"+str(zchunk)+".txt" , resXYZ )
        allRes[tr]['xyz_min'] = np.min( resXYZ )
        allRes[tr]['xyz_avg'] = np.average( resXYZ )
        allRes[tr]['xyz_median'] = np.median( resXYZ )
        allRes[tr]['xyz_max'] = np.max( resXYZ )
        print "XYZ-Slicing: "+str(np.max(resXYZ)) +":"+str(np.average( resXYZ ))+":"+str(np.median( resXYZ ))+":"+str(np.min(resXYZ))
        
        omsiFile.close_file()
        os.remove( omsiOutFile  )
        
        
    #mz-slice selection 250,000 elements
    sliceWidthZ = 25 #xdim=100 , ydim=100
    for tr in xrange( 0 , len(xtest) ) :
        
        allRes[tr]['x'] = xtest[tr]
        allRes[tr]['y'] = ytest[tr]
        allRes[tr]['z'] = ztest[tr]
        
        xchunk = xtest[tr]
        ychunk = ytest[tr]
        zchunk = ztest[tr]
        print "Chunking:"+str(xchunk)+":"+str(ychunk)+":"+str(zchunk)
        start = time.time()
        generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk )
        allRes[tr]['write'] =  allRes[tr]['write']+(time.time() - start)
        omsiFile = omsi_file( omsiOutFile )
        data = omsiFile.get_experiment(0).get_msidata(0)
        print "Time for data write:"+str(allRes[tr]['write'] )

        #Perform different types of slicing opterations and keep track of the times
        #Select 10 slizes in z
        resZ = np.zeros( repeats , dtype='f' )
        for te in xrange( 0 , repeats ) :
            
            val = random.randint(0, zdim-sliceWidthZ-1 )
            start = time.time()
            d=np.sum( data[:,:,val:(val+sliceWidthZ)] )
            resZ[te] = (time.time() - start)
        
        np.savetxt( "resZ_aw_"+str(xchunk)+"_"+str(ychunk)+"_"+str(zchunk)+".txt" , resZ )
        allRes[tr]['z_min'] = np.min( resZ )
        allRes[tr]['z_avg'] = np.average( resZ )
        allRes[tr]['z_median'] = np.median( resZ )
        allRes[tr]['z_max'] = np.max( resZ )
        print "Z-Slicing: "+str(np.max(resZ)) +":"+str(np.average( resZ ))+":"+str(np.median( resZ ))+":"+str(np.min(resZ))
        
        omsiFile.close_file()
        os.remove( omsiOutFile  )
    
    #XY selection of multiple spectra. Selection size= 2,500,000 elements
    for tr in xrange( 0 , len(xtest) ) :
        
        xchunk = xtest[tr]
        ychunk = ytest[tr]
        zchunk = ztest[tr]
        print "Chunking:"+str(xchunk)+":"+str(ychunk)+":"+str(zchunk)
        start = time.time()
        generateTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk )
        allRes[tr]['write'] =  allRes[tr]['write']+(time.time() - start)
        omsiFile = omsi_file( omsiOutFile )
        data = omsiFile.get_experiment(0).get_msidata(0)
        print "Time for data write:"+str(allRes[tr]['write'] )
        
        #Select x/y
        resXY = np.zeros( repeats , dtype='f' )
        for te in xrange( 0 , repeats ) :
            
            valX = random.randint(0, xdim-sliceWidthX-1 )
            valY = random.randint(0, ydim-sliceWidthY-1 )
            start = time.time()
            d=np.sum( data[valX:(valX+sliceWidthX),valY:(valY+sliceWidthY),:] )
            resXY[te] = (time.time() - start)
        
        np.savetxt( "resXY_aw_"+str(xchunk)+"_"+str(ychunk)+"_"+str(zchunk)+".txt" , resXY )
        allRes[tr]['xy_min'] = np.min( resXY )
        allRes[tr]['xy_avg'] = np.average( resXY )
        allRes[tr]['xy_median'] = np.median( resXY )
        allRes[tr]['xy_max'] = np.max( resXY )
        print "XY-Slicing: "+str(np.max(resXY)) +":"+str(np.average( resXY ))+":"+str(np.median( resXY ))+":"+str(np.min(resXY))
        
        omsiFile.close_file()
        os.remove( omsiOutFile  )
    
    
        
    for tr in xrange( 0 , len(xtest) ) :
         allRes[tr]['write'] =  allRes[tr]['write']/float(numTests)
         
    f = open( resultsFile , 'w' )
    f.write( str(allRes.dtype.names) )
    f.write("\n")
    np.savetxt( f , allRes )
    f.close()
Пример #11
0
def generateChunkedTestFile(
        omsiOutFile,
        xdim,
        ydim,
        zdim,
        xchunk,
        ychunk,
        zchunk,
        compress=False,
        donorFile="/project/projectdirs/openmsi/omsi_data/old/TEST.h5"):

    writeFullSpectra = False
    useChunking = (xchunk > 0 and ychunk > 0 and zchunk > 0)
    print useChunking
    useDonorFile = True
    if useDonorFile:
        inFile = omsi_file(donorFile)
        inData = inFile.get_experiment(0).get_msidata(0)[:]
        inFile.close_file()

    #Create the output HDF5 file
    try:
        omsiFile = omsi_file(omsiOutFile)
    except:
        print "Unexpected error creating the output file:", sys.exc_info()[0]
        exit(0)

    exp = omsiFile.create_experiment(exp_identifier="test")
    #Create an empty method descrition
    sample = exp.create_method_info()
    #Create an empty instrument description
    mzdata = np.ones(zdim)
    instrument = exp.create_instrument_info(instrument_name="undefined",
                                            mzdata=mzdata)

    #Allocate space in the HDF5 file for the img data
    start = time.time()
    if compress:
        #Use compresion
        data = exp.create_msidata_full_cube(data_shape=(xdim, ydim, zdim),
                                            data_type='uint16',
                                            chunks=(xchunk, ychunk, zchunk),
                                            compression='gzip',
                                            compression_opts=4)
    elif useChunking:
        #Use chunking
        data = exp.create_msidata_full_cube(
            data_shape=(xdim, ydim, zdim),
            data_type='uint16',
            chunks=(xchunk, ychunk,
                    zchunk))  #,  compression='gzip' , compression_opts=4 )
    else:
        #Don't use chunking and compression
        data = exp.create_msidata_full_cube(data_shape=(xdim, ydim, zdim),
                                            data_type='uint16')

    itertest = 0
    if useChunking:

        numChunksX = int(math.ceil(float(xdim) / float(xchunk)))
        numChunksY = int(math.ceil(float(ydim) / float(ychunk)))
        numChunksZ = int(math.ceil(float(zdim) / float(zchunk)))
        print "NumChunks : " + str(numChunksX) + " " + str(
            numChunksY) + " " + str(numChunksZ)
        numChunks = numChunksX * numChunksY * numChunksZ

    else:
        #Write on spectrum at a time if we want a contiguous data layout
        numChunksX = int(math.ceil(float(xdim) / 4.0))
        numChunksY = int(math.ceil(float(ydim) / 4.0))
        numChunksZ = 2

    if not useDonorFile:
        if writeFullSpectra:
            print "Writing mxm spectra at a time (artifical data)"
            #Write data one x/y chunk at a time (i.e., multiple z-chunks are writtent at once)
            for xi in xrange(0, numChunksX):
                sys.stdout.write("[" +
                                 str(int(100. * float(xi) /
                                         float(numChunksX))) + "%]" + "\r")
                sys.stdout.flush()
                xstart = xi * xchunk
                xend = min(xstart + xchunk, xdim)
                for yi in xrange(0, numChunksY):
                    ystart = yi * ychunk
                    yend = min(ystart + ychunk, ydim)
                    #Save the spectrum to the hdf5 file
                    data[xstart:xend, ystart:yend, :] = (xi * ydim + yi)

        else:
            #Write data one x/y/z chunk at a time
            print "Writing one x/y/z chunk at a time (artifical data)"
            for xt in xrange(0, numChunksX):
                sys.stdout.write("[" +
                                 str(int(100. * float(xt) /
                                         float(numChunksX))) + "%]" + "\r")
                sys.stdout.flush()
                xstart = xt * xchunk
                xend = min(xstart + xchunk, xdim)
                for yt in xrange(0, numChunksY):
                    ystart = yt * ychunk
                    yend = min(ystart + ychunk, ydim)
                    for zt in xrange(0, numChunksZ):
                        zstart = zt * zchunk
                        zend = min(zstart + zchunk, zdim)
                        data[xstart:xend, ystart:yend,
                             zstart:zend] = (xt * ydim * zdim + yt * zdim + zt)

    else:
        print "Writing one x/y/z chunk at a time (donor dataset)"
        #Write data into all the chunks one x,y,z chunk at a time using the donor file
        for xt in xrange(0, numChunksX):
            sys.stdout.write("[" +
                             str(int(100. * float(xt) / float(numChunksX))) +
                             "%]" + "\r")
            sys.stdout.flush()
            xstart = xt * xchunk
            xend = min(xstart + xchunk, xdim)
            for yt in xrange(0, numChunksY):
                ystart = yt * ychunk
                yend = min(ystart + ychunk, ydim)
                for zt in xrange(0, numChunksZ):
                    zstart = zt * zchunk
                    zend = min(zstart + zchunk, zdim)
                    #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend)
                    diff = inData.shape[2] - zend
                    myend = zend
                    mystart = zstart
                    if inData.shape[2] < zend:
                        myend = inData.shape[2]
                        mystart = inData.shape[2] - (zend - zstart)
                    a = inData[xstart:xend, ystart:yend, mystart:myend]
                    #print str(zt)+" : "+str(a.shape)+" : "+str(mystart)+" "+str(myend)
                    data[xstart:xend, ystart:yend, zstart:zend] = a

                    itertest += 1
                    #sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r")
                    #sys.stdout.flush()

    omsiFile.close_file()

    return (time.time() - start)
Пример #12
0
def main(argv=None):
    """Then main function"""

    import sys
    from sys import argv, exit

    if argv is None:
        argv = sys.argv

    #Check for correct usage
    if len(argv) < 2:
        printHelp()
        exit(0)

    if len(argv) == 8:

        infile = argv[1]
        xmin = int(argv[2])
        xmax = int(argv[3])
        ymin = int(argv[4])
        ymax = int(argv[5])
        zmin = int(argv[6])
        zmax = int(argv[7])
        start = time.time()
        d = omsi_file(infile, 'r').get_experiment(0).get_msidata(0)
        loaddata = d[xmin:xmax, ymin:ymax, zmin:zmax]
        #content = json.dumps( loaddata.tolist() )
        stop = (time.time() - start)
        print stop
        exit(0)

    import numpy as np
    import os
    import random
    import subprocess

    repeats = 50
    outfolder = argv[1]
    if not outfolder.endswith("/"):
        outfolder = outfolder + "/"

    #Baseline filelist
    filelist = [
        "/project/projectdirs/openmsi/manuscript_data/baseline/11042008_NIMS.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/20121012_lipid_extracts.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/20110929_Tumor624.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/Brain.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/20111012_Tumor458_50micronSS_D.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/Microbial_Coculture.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/20111208_KBL_Roots_SmallChip_BigRoot.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/nimzyme.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/20120801_metabolite_standards.h5",
        "/project/projectdirs/openmsi/manuscript_data/baseline/20111207_KBL_Roots_BigChip_SmallRoots.h5"
    ]

    #Compressed 4x4x2048 filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]

    #Uncompressed 4x4x2048 filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_4_4_2048/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]

    #Uncompressed autochunking filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/uncompressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]

    #Compressed autochunking filelist
    #filelist =  [ "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/11042008_NIMS.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20121012_lipid_extracts.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20110929_Tumor624.h5" ,  "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Brain.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111012_Tumor458_50micronSS_D.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/Microbial_Coculture.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111208_KBL_Roots_SmallChip_BigRoot.h5" , "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/nimzyme.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20120801_metabolite_standards.h5", "/project/projectdirs/openmsi/manuscript_data/compressed_autochunking/20111207_KBL_Roots_BigChip_SmallRoots.h5" ]

    #filelist = ["/work2/bowen/TEST_DEP3.h5"]

    data_shapes = {}
    results = {}
    for filename in filelist:

        #Initialze data shape
        f = omsi_file(filename, 'r')
        d = f.get_experiment(0).get_msidata(0)
        data_shapes[filename] = d.shape
        f.close_file()
        #Initialze output storage
        results[filename] = np.zeros(repeats,
                                     dtype=[('mz-slice', 'f'),
                                            ('spectrum', 'f'),
                                            ('xyz-cube', 'f'),
                                            ('mz-slice-all', 'f'),
                                            ('spectrum-all', 'f'),
                                            ('xyz-cube-all', 'f'),
                                            ('filesize', 'f')])
        results[filename]['filesize'] = os.stat(filename).st_size

    #Note: We compute each test seperately so that we have touched enough data from
    #other files to avoid biases due to already cached data already at the beginning
    #of the tests.
    #Note: Depending on the file system, a significant amount of data (and in some
    #cases complete files) may be cached by the file system itself. This can results
    #in large variations in the times for data acceses. This is particularly the
    #case when two consecutive accesses happen to by chance access a similar portion
    #of the data. This behavior is expected and is what one expects to happen in real
    #life as well. It is, therefore, often informative to look at the general
    #variability of results. E.g.,if all data is stored in a single block, then we
    #may see very slow access times at the beginning and then, once, all data has
    #been cached access times drop significantly. For well-chunked data, this
    #variability between access should be much lower.

    #Compute the slice query test
    for filename in filelist:
        print filename + " 25 mz-slices"
        #mz-slice selection 250,000 elements
        sliceWidthZ = 25  #xdim=100 , ydim=100
        for ri in xrange(0, repeats):

            xmin = 0
            xmax = data_shapes[filename][0]
            ymin = 0
            ymax = data_shapes[filename][1]
            zmin = random.randint(0,
                                  data_shapes[filename][2] - sliceWidthZ - 1)
            zmax = zmin + sliceWidthZ
            callCommand = [
                "python", "testhdf5_file_read.py", filename,
                str(xmin),
                str(xmax),
                str(ymin),
                str(ymax),
                str(zmin),
                str(zmax)
            ]
            start = time.time()
            p2 = subprocess.Popen(callCommand, stdout=subprocess.PIPE)
            readTime = float(p2.stdout.read())
            stop = (time.time() - start)
            results[filename]['mz-slice'][ri] = readTime
            results[filename]['mz-slice-all'][ri] = stop
            print str(results[filename]['mz-slice'][ri]) + "   " + str(
                results[filename]['mz-slice-all'][ri]) + " " + str(
                    xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(
                        ymax) + " " + str(zmin) + " " + str(zmax)

    #Compute the spectra test
    for filename in filelist:
        print filename + " 3 x 3  spectra"
        #mz-slice selection 250,000 elements
        sliceWidthX = 3
        sliceWidthY = 3
        for ri in xrange(0, repeats):

            xmin = random.randint(0,
                                  data_shapes[filename][0] - sliceWidthX - 1)
            xmax = xmin + sliceWidthX
            ymin = random.randint(0,
                                  data_shapes[filename][1] - sliceWidthY - 1)
            ymax = ymin + sliceWidthY
            zmin = 0
            zmax = data_shapes[filename][2]
            callCommand = [
                "python", "testhdf5_file_read.py", filename,
                str(xmin),
                str(xmax),
                str(ymin),
                str(ymax),
                str(zmin),
                str(zmax)
            ]
            start = time.time()
            p2 = subprocess.Popen(callCommand, stdout=subprocess.PIPE)
            readTime = float(p2.stdout.read())
            stop = (time.time() - start)
            results[filename]['spectrum'][ri] = readTime
            results[filename]['spectrum-all'][ri] = stop
            print str(results[filename]['spectrum'][ri]) + "   " + str(
                results[filename]['spectrum-all'][ri]) + " " + str(
                    xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(
                        ymax) + " " + str(zmin) + " " + str(zmax)

    #Compte the cube test
    for filename in filelist:
        print filename + " 20 x 20 x 1000  cube"
        #mz-slice selection 250,000 elements
        sliceWidthX = 20
        sliceWidthY = 20
        sliceWidthZ = 1000
        for ri in xrange(0, repeats):

            xmin = random.randint(0,
                                  data_shapes[filename][0] - sliceWidthX - 1)
            xmax = xmin + sliceWidthX
            ymin = random.randint(0,
                                  data_shapes[filename][1] - sliceWidthY - 1)
            ymax = ymin + sliceWidthY
            zmin = random.randint(0,
                                  data_shapes[filename][2] - sliceWidthZ - 1)
            zmax = zmin + sliceWidthZ
            callCommand = [
                "python", "testhdf5_file_read.py", filename,
                str(xmin),
                str(xmax),
                str(ymin),
                str(ymax),
                str(zmin),
                str(zmax)
            ]
            start = time.time()
            p2 = subprocess.Popen(callCommand, stdout=subprocess.PIPE)
            readTime = float(p2.stdout.read())
            stop = (time.time() - start)
            results[filename]['xyz-cube'][ri] = readTime
            results[filename]['xyz-cube-all'][ri] = stop
            print str(results[filename]['xyz-cube'][ri]) + "   " + str(
                results[filename]['xyz-cube-all'][ri]) + " " + str(
                    xmin) + " " + str(xmax) + " " + str(ymin) + " " + str(
                        ymax) + " " + str(zmin) + " " + str(zmax)

    for filename in filelist:

        infilename = os.path.split(filename)[1]
        outfile = outfolder + infilename + "_timings.txt"

        f = open(outfile, 'w')
        for colName in results[filename].dtype.names:
            f.write(colName + " ")
        f.write("\n")
        np.savetxt(f, results[filename])
        f.close()

    exit(0)
Пример #13
0
def main(argv=None):
    """Then main function"""

    import sys
    from sys import argv, exit

    if argv is None:
        argv = sys.argv

    #Check for correct usage
    if len(argv) != 3:
        printHelp()
        exit(0)

    omsiOutFile = argv[1]
    resultsFile = argv[2]

    xdim = 100
    ydim = 100
    zdim = 100000

    xtest = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]  #range(10,50,10)
    ytest = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]  #range(10,50,10)
    ztest = [2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
             2048]  #range(100, 10000, 100)

    allRes = np.zeros(len(xtest),
                      dtype=[('x', 'int32'), ('y', 'int32'), ('z', 'int32'),
                             ('write', 'f'), ('z_min', 'f'), ('z_avg', 'f'),
                             ('z_median', 'f'), ('z_max', 'f'),
                             ('xy_min', 'f'), ('xy_avg', 'f'),
                             ('xy_median', 'f'), ('xy_max', 'f'),
                             ('xyz_min', 'f'), ('xyz_avg', 'f'),
                             ('xyz_median', 'f'), ('xyz_max', 'f'),
                             ('filesize', 'f')])

    numTests = 3
    repeats = 50

    for tr in xrange(0, len(xtest)):

        allRes[tr]['x'] = xtest[tr]
        allRes[tr]['y'] = ytest[tr]
        allRes[tr]['z'] = ztest[tr]

    #XYZ selection 25,000 elements
    sliceWidthX = 5
    sliceWidthY = 5
    sliceWidthZ = 1000

    for tr in xrange(0, len(xtest)):

        allRes[tr]['x'] = xtest[tr]
        allRes[tr]['y'] = ytest[tr]
        allRes[tr]['z'] = ztest[tr]

        xchunk = xtest[tr]
        ychunk = ytest[tr]
        zchunk = ztest[tr]
        print "Chunking:" + str(xchunk) + ":" + str(ychunk) + ":" + str(zchunk)
        start = time.time()
        generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk)
        allRes[tr]['write'] = allRes[tr]['write'] + (time.time() - start)
        allRes[tr]['filesize'] = os.stat(omsiOutFile).st_size
        omsiFile = omsi_file(omsiOutFile)
        data = omsiFile.get_experiment(0).get_msidata(0)
        print "Time for data write:" + str(allRes[tr]['write'])

        #Select xyz slize
        resXYZ = np.zeros(repeats, dtype='f')
        for te in xrange(0, repeats):

            valX = random.randint(0, xdim - sliceWidthX - 1)
            valY = random.randint(0, ydim - sliceWidthY - 1)
            valZ = random.randint(0, zdim - sliceWidthZ - 1)
            start = time.time()
            d = np.sum(data[valX:(valX + sliceWidthX),
                            valY:(valY + sliceWidthY),
                            valZ:(valZ + sliceWidthZ)])
            resXYZ[te] = (time.time() - start)

        np.savetxt(
            "resXYZ_aw_" + str(xchunk) + "_" + str(ychunk) + "_" +
            str(zchunk) + ".txt", resXYZ)
        allRes[tr]['xyz_min'] = np.min(resXYZ)
        allRes[tr]['xyz_avg'] = np.average(resXYZ)
        allRes[tr]['xyz_median'] = np.median(resXYZ)
        allRes[tr]['xyz_max'] = np.max(resXYZ)
        print "XYZ-Slicing: " + str(np.max(resXYZ)) + ":" + str(
            np.average(resXYZ)) + ":" + str(np.median(resXYZ)) + ":" + str(
                np.min(resXYZ))

        omsiFile.close_file()
        os.remove(omsiOutFile)

    #mz-slice selection 250,000 elements
    sliceWidthZ = 25  #xdim=100 , ydim=100
    for tr in xrange(0, len(xtest)):

        allRes[tr]['x'] = xtest[tr]
        allRes[tr]['y'] = ytest[tr]
        allRes[tr]['z'] = ztest[tr]

        xchunk = xtest[tr]
        ychunk = ytest[tr]
        zchunk = ztest[tr]
        print "Chunking:" + str(xchunk) + ":" + str(ychunk) + ":" + str(zchunk)
        start = time.time()
        generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk)
        allRes[tr]['write'] = allRes[tr]['write'] + (time.time() - start)
        omsiFile = omsi_file(omsiOutFile)
        data = omsiFile.get_experiment(0).get_msidata(0)
        print "Time for data write:" + str(allRes[tr]['write'])

        #Perform different types of slicing opterations and keep track of the times
        #Select 10 slizes in z
        resZ = np.zeros(repeats, dtype='f')
        for te in xrange(0, repeats):

            val = random.randint(0, zdim - sliceWidthZ - 1)
            start = time.time()
            d = np.sum(data[:, :, val:(val + sliceWidthZ)])
            resZ[te] = (time.time() - start)

        np.savetxt(
            "resZ_aw_" + str(xchunk) + "_" + str(ychunk) + "_" + str(zchunk) +
            ".txt", resZ)
        allRes[tr]['z_min'] = np.min(resZ)
        allRes[tr]['z_avg'] = np.average(resZ)
        allRes[tr]['z_median'] = np.median(resZ)
        allRes[tr]['z_max'] = np.max(resZ)
        print "Z-Slicing: " + str(np.max(resZ)) + ":" + str(
            np.average(resZ)) + ":" + str(np.median(resZ)) + ":" + str(
                np.min(resZ))

        omsiFile.close_file()
        os.remove(omsiOutFile)

    #XY selection of multiple spectra. Selection size= 2,500,000 elements
    for tr in xrange(0, len(xtest)):

        xchunk = xtest[tr]
        ychunk = ytest[tr]
        zchunk = ztest[tr]
        print "Chunking:" + str(xchunk) + ":" + str(ychunk) + ":" + str(zchunk)
        start = time.time()
        generateTestFile(omsiOutFile, xdim, ydim, zdim, xchunk, ychunk, zchunk)
        allRes[tr]['write'] = allRes[tr]['write'] + (time.time() - start)
        omsiFile = omsi_file(omsiOutFile)
        data = omsiFile.get_experiment(0).get_msidata(0)
        print "Time for data write:" + str(allRes[tr]['write'])

        #Select x/y
        resXY = np.zeros(repeats, dtype='f')
        for te in xrange(0, repeats):

            valX = random.randint(0, xdim - sliceWidthX - 1)
            valY = random.randint(0, ydim - sliceWidthY - 1)
            start = time.time()
            d = np.sum(data[valX:(valX + sliceWidthX),
                            valY:(valY + sliceWidthY), :])
            resXY[te] = (time.time() - start)

        np.savetxt(
            "resXY_aw_" + str(xchunk) + "_" + str(ychunk) + "_" + str(zchunk) +
            ".txt", resXY)
        allRes[tr]['xy_min'] = np.min(resXY)
        allRes[tr]['xy_avg'] = np.average(resXY)
        allRes[tr]['xy_median'] = np.median(resXY)
        allRes[tr]['xy_max'] = np.max(resXY)
        print "XY-Slicing: " + str(np.max(resXY)) + ":" + str(
            np.average(resXY)) + ":" + str(np.median(resXY)) + ":" + str(
                np.min(resXY))

        omsiFile.close_file()
        os.remove(omsiOutFile)

    for tr in xrange(0, len(xtest)):
        allRes[tr]['write'] = allRes[tr]['write'] / float(numTests)

    f = open(resultsFile, 'w')
    f.write(str(allRes.dtype.names))
    f.write("\n")
    np.savetxt(f, allRes)
    f.close()
Пример #14
0
from omsi.dataformat.omsi_file import *
#Open the file
#f is an object of the tyep omsi_file. For more information execute:
#help( omsi_file )
f = omsi_file("/project/projectdirs/openmsi/omsi_data/20120711_Brain.h5" , 'r' )
#Get the number of experiments
num_exp = f.get_num_experiments()
#Get the first experiment. 
#exp0  is an object of the type omsi_file_experiment For more information execute:
#help( omsi_file_experiment )
exp0 = f.get_experiment(0)
#Get the number of available datasets and analyses
num_dat = exp0.get_num_msidata()
num_ana = exp0.get_num_analysis()
#Get the raw MSI dataset, i.e., the API object that manages the dataset
#We can slice into d using standard array syntax. The MSI dataset defines
#a 3D cube in (x,y,m/z) and contains the corresponding intensity values.
#The m/z values are the same for each spectrum and are stored in a 
#separate 1D array. In the call below we get the experiment with index 0
#and the msi dataset with index 0 associated with the experiment. An
#OpenMSI file may contain multipe experiments, each of which may have
#multiple MSI datasets associated with it. For now, all data you have.
#d is an object of the type omsi_file_msidata. For more information execute:
#help( omsi_file_msidata )
d = exp0.get_msidata(0)
#Number of pixel in x, y , m/z
numX  = d.shape[0]
numY  = d.shape[1]
numMZ = d.shape[2]
#Get the m/z data. This is a 1D array of length d.shape[2]
mz = d.mz
Пример #15
0
def generateChunkedTestFile( omsiOutFile , xdim , ydim, zdim, xchunk, ychunk , zchunk  , compress=False , donorFile = "/project/projectdirs/openmsi/omsi_data/old/TEST.h5" ) :
    
    writeFullSpectra=False
    useChunking = (xchunk>0 and ychunk>0 and zchunk>0 ) 
    print useChunking
    useDonorFile = True
    if useDonorFile :
        inFile = omsi_file( donorFile )
        inData = inFile.get_experiment(0).get_msidata(0)[:]
        inFile.close_file()


    #Create the output HDF5 file
    try:
        omsiFile = omsi_file( omsiOutFile )
    except:
        print "Unexpected error creating the output file:", sys.exc_info()[0]
        exit(0)
        
    exp = omsiFile.create_experiment( exp_identifier = "test" )
    #Create an empty method descrition
    sample = exp.create_method_info()
    #Create an empty instrument description
    mzdata = np.ones( zdim )
    instrument = exp.create_instrument_info(instrumentname="undefined" , mzdata=mzdata )

    #Allocate space in the HDF5 file for the img data
    start = time.time()
    if compress : 
        #Use compresion
        data = exp.create_msidata(data_shape=( xdim , ydim , zdim  ) , data_type = 'uint16' ,  chunks=(xchunk,ychunk,zchunk) ,  compression='gzip' , compression_opts=4 )
    elif useChunking :
        #Use chunking
        data = exp.create_msidata(data_shape=( xdim , ydim , zdim  ) , data_type = 'uint16' ,  chunks=(xchunk,ychunk,zchunk) ) #,  compression='gzip' , compression_opts=4 )
    else :
        #Don't use chunking and compression
        data = exp.create_msidata(data_shape=( xdim , ydim , zdim  ) , data_type = 'uint16' )
 
    itertest=0 
    if useChunking :

        numChunksX = int( math.ceil( float(xdim)/float(xchunk) ) )
        numChunksY = int( math.ceil( float(ydim)/float(ychunk) ) )
        numChunksZ = int( math.ceil( float(zdim)/float(zchunk) ) )
        print "NumChunks : "+str(numChunksX)+" "+str(numChunksY)+" "+str(numChunksZ)
        numChunks =  numChunksX*numChunksY*numChunksZ

    else :
       #Write on spectrum at a time if we want a contiguous data layout 
       numChunksX = int( math.ceil( float(xdim)/4.0 ) )
       numChunksY = int( math.ceil( float(ydim)/4.0 ) )
       numChunksZ = 2

    if not useDonorFile :
        if writeFullSpectra :
            print "Writing mxm spectra at a time (artifical data)"
            #Write data one x/y chunk at a time (i.e., multiple z-chunks are writtent at once)
            for xi in xrange( 0 , numChunksX ) :
                sys.stdout.write("[" +str( int( 100.* float(xi)/float(numChunksX) )) +"%]"+ "\r")
                sys.stdout.flush()
                xstart = xi*xchunk
                xend = min(  xstart+xchunk , xdim)
                for yi in xrange( 0 , numChunksY ) :
                    ystart = yi*ychunk
                    yend = min( ystart+ychunk , ydim )
                    #Save the spectrum to the hdf5 file
                    data[xstart:xend , ystart:yend, : ] = (xi*ydim + yi)
    
        else :
            #Write data one x/y/z chunk at a time
            print "Writing one x/y/z chunk at a time (artifical data)"
            for xt in xrange(0 , numChunksX) :
                sys.stdout.write("[" +str( int( 100.* float(xt)/float(numChunksX) )) +"%]"+ "\r")
                sys.stdout.flush()
                xstart = xt*xchunk
                xend = min(  xstart+xchunk , xdim)
                for yt in xrange(0, numChunksY ) :
                    ystart = yt*ychunk
                    yend = min( ystart+ychunk , ydim )
                    for zt in xrange(0, numChunksZ ) :
                        zstart = zt*zchunk
                        zend = min( zstart+zchunk , zdim )
                        data[xstart:xend , ystart:yend, zstart:zend ] =  (xt*ydim*zdim + yt*zdim  +zt)

    else :
        print "Writing one x/y/z chunk at a time (donor dataset)"
        #Write data into all the chunks one x,y,z chunk at a time using the donor file
        for xt in xrange(0, numChunksX ) :
            sys.stdout.write("[" +str( int( 100.* float(xt)/float(numChunksX) )) +"%]"+ "\r")
            sys.stdout.flush()
            xstart = xt*xchunk
            xend = min(  xstart+xchunk , xdim)
            for yt in xrange(0, numChunksY ) :
                ystart = yt*ychunk
                yend = min( ystart+ychunk , ydim )
                for zt in xrange(0, numChunksZ ) :
                    zstart = zt*zchunk
                    zend = min( zstart+zchunk , zdim )
                    #print "Write : "+str(xstart)+" "+str(xend)+" "+str(ystart)+" "+str(yend)+" "+str(zstart)+" "+str(zend)
                    diff = inData.shape[2] - zend
                    myend = zend
                    mystart = zstart
                    if inData.shape[2] < zend :
                        myend = inData.shape[2]
                        mystart = inData.shape[2]-(zend-zstart)
                    a = inData[xstart:xend , ystart:yend, mystart:myend ]
                    #print str(zt)+" : "+str(a.shape)+" : "+str(mystart)+" "+str(myend)
                    data[xstart:xend , ystart:yend, zstart:zend ] = a
        
                    itertest+=1
                    #sys.stdout.write("Generating Data: [" +str( int( 100.* float(itertest)/float(numChunks) )) +"%]"+ "\r")
                    #sys.stdout.flush()

    omsiFile .close_file()
    
    return (time.time() - start)