def main(options, args): starttime = Timer.starttimer() #Cache thrashing is common when working with large files, we help alleviate misses by setting a larger than normal cache. 1GB gdal.SetCacheMax(1073741824) #Check for input if not args: print "\nERROR: You must supply an input data set.\n" sys.exit(0) #Get stretch type stretch = OptParse.get_stretch(options) #Get some info about the machine for multiprocessing cores = multiprocessing.cpu_count() cores *= 2 print "Processing on %i cores." %cores #Load the input dataset using the GdalIO class and get / set the output datatype. dataset = GdalIO.GdalIO(args[0]) raster = dataset.load() #Default is none, unless user specified if options['dtype'] == None: dtype = gdal.GetDataTypeName(raster.GetRasterBand(1).DataType) else: dtype=options['dtype'] #Create an output if the stretch is written to disk xsize, ysize, bands, projection, geotransform = dataset.info(raster) output = dataset.create_output("",options['output'],xsize,ysize,bands,projection, geotransform, gdal.GetDataTypeByName(dtype)) #Segment the image to handle either RAM constraints or selective processing segments = Segment.segment_image(xsize,ysize,options['vint'], options['hint']) for b in xrange(bands): band = raster.GetRasterBand(b+1) bandstats = Stats.get_band_stats(band) for key in bandstats.iterkeys(): options[key] = bandstats[key] #Get the size of the segments to be manipulated piecenumber = 1 for chunk in segments: print "Image segmented. Processing segment %i of %i" %(piecenumber, len(segments)) piecenumber += 1 (xstart, ystart, intervalx, intervaly) = chunk array = band.ReadAsArray(xstart, ystart, intervalx, intervaly).astype(numpy.float32) if options['ndv_band'] != None: array = numpy.ma.masked_values(array, options['ndv_band'], copy=False) elif options['ndv'] != None: array = numpy.ma.masked_values(array, options['ndv'], copy=False) if 'stretch' in stretch.__name__: array = Stats.normalize(array, options['bandmin'], options['bandmax'], dtype) #If the user wants to calc stats per segment: if options['segment'] == True: stats = Stats.get_array_stats(array, stretch) for key in stats.iterkeys(): options[key] = stats[key] #Otherwise use the stats per band for each segment else: options['mean'] = options['bandmean'] options['maximum'] = options['bandmax'] options['minimum'] = options['bandmin'] options['standard_deviation'] = options['bandstd'] y,x = array.shape #Calculate the hist and cdf if we need it. This way we do not calc it per core. if options['histequ_stretch'] == True: cdf, bins = Stats.gethist_cdf(array,options['num_bins']) options['cdf'] = cdf options['bins'] = bins #Fill the masked values with NaN to get to a shared array if options['ndv'] != None: array = array.filled(numpy.nan) #Create an ctypes array init(ArrayConvert.SharedMemArray(array)) step = y // cores jobs = [] if step != 0: for i in range(0,y,step): p = multiprocessing.Process(target=stretch,args= (shared_arr,slice(i, i+step)),kwargs=options) jobs.append(p) for job in jobs: job.start() del job for job in jobs: job.join() del job #Return the array to the proper data range and write it out. Scale if that is what the user wants if options['histequ_stretch'] or options['gamma_stretch']== True: pass elif 'filter' in stretch.__name__: pass else: Stats.denorm(shared_arr.asarray(), dtype, kwargs=options) if options['scale'] != None: Stats.scale(shared_arr.asarray(), kwargs=options) #If their are NaN in the array replace them with the dataset no data value Stats.setnodata(shared_arr, options['ndv']) #Write the output output.GetRasterBand(b+1).WriteArray(shared_arr.asarray(), xstart,ystart) #Manually cleanup to stop memory leaks. del array, jobs, shared_arr.data try: del stats except: pass del globals()['shared_arr'] gc.collect() if options['ndv'] != None: output.GetRasterBand(b+1).SetNoDataValue(float(options['ndv'])) elif options['ndv_band'] != None: output.GetRasterBand(b+1).SetNoDataValue(float(options['ndv_band'])) if options['visualize'] == True: Plot.show_hist(shared_arr.asarray()) Timer.totaltime(starttime) #Close up dataset = None output = None gc.collect()
def main(options, args): starttime = Timer.starttimer() #Cache thrashing is common when working with large files, we help alleviate misses by setting a larger than normal cache. 1GB gdal.SetCacheMax(1073741824) #Check for input if not args: print "\nERROR: You must supply an input data set.\n" sys.exit(0) #Get stretch type stretch = OptParse.get_stretch(options) #Get some info about the machine for multiprocessing cores = multiprocessing.cpu_count() cores *= 2 print "Processing on %i cores." % cores #Load the input dataset using the GdalIO class and get / set the output datatype. dataset = GdalIO.GdalIO(args[0]) raster = dataset.load() #Default is none, unless user specified if options['dtype'] == None: dtype = gdal.GetDataTypeName(raster.GetRasterBand(1).DataType) else: dtype = options['dtype'] #Create an output if the stretch is written to disk xsize, ysize, bands, projection, geotransform = dataset.info(raster) output = dataset.create_output("", options['output'], xsize, ysize, bands, projection, geotransform, gdal.GetDataTypeByName(dtype)) #Segment the image to handle either RAM constraints or selective processing segments = Segment.segment_image(xsize, ysize, options['vint'], options['hint']) for b in xrange(bands): band = raster.GetRasterBand(b + 1) bandstats = Stats.get_band_stats(band) for key in bandstats.iterkeys(): options[key] = bandstats[key] #Get the size of the segments to be manipulated piecenumber = 1 for chunk in segments: print "Image segmented. Processing segment %i of %i" % ( piecenumber, len(segments)) piecenumber += 1 (xstart, ystart, intervalx, intervaly) = chunk array = band.ReadAsArray(xstart, ystart, intervalx, intervaly).astype(numpy.float32) if options['ndv_band'] != None: array = numpy.ma.masked_values(array, options['ndv_band'], copy=False) elif options['ndv'] != None: array = numpy.ma.masked_values(array, options['ndv'], copy=False) if 'stretch' in stretch.__name__: array = Stats.normalize(array, options['bandmin'], options['bandmax'], dtype) #If the user wants to calc stats per segment: if options['segment'] == True: stats = Stats.get_array_stats(array, stretch) for key in stats.iterkeys(): options[key] = stats[key] #Otherwise use the stats per band for each segment else: options['mean'] = options['bandmean'] options['maximum'] = options['bandmax'] options['minimum'] = options['bandmin'] options['standard_deviation'] = options['bandstd'] y, x = array.shape #Calculate the hist and cdf if we need it. This way we do not calc it per core. if options['histequ_stretch'] == True: cdf, bins = Stats.gethist_cdf(array, options['num_bins']) options['cdf'] = cdf options['bins'] = bins #Fill the masked values with NaN to get to a shared array if options['ndv'] != None: array = array.filled(numpy.nan) #Create an ctypes array init(ArrayConvert.SharedMemArray(array)) step = y // cores jobs = [] if step != 0: for i in range(0, y, step): p = multiprocessing.Process(target=stretch, args=(shared_arr, slice(i, i + step)), kwargs=options) jobs.append(p) for job in jobs: job.start() del job for job in jobs: job.join() del job #Return the array to the proper data range and write it out. Scale if that is what the user wants if options['histequ_stretch'] or options['gamma_stretch'] == True: pass elif 'filter' in stretch.__name__: pass else: Stats.denorm(shared_arr.asarray(), dtype, kwargs=options) if options['scale'] != None: Stats.scale(shared_arr.asarray(), kwargs=options) #If their are NaN in the array replace them with the dataset no data value Stats.setnodata(shared_arr, options['ndv']) #Write the output output.GetRasterBand(b + 1).WriteArray(shared_arr.asarray(), xstart, ystart) #Manually cleanup to stop memory leaks. del array, jobs, shared_arr.data try: del stats except: pass del globals()['shared_arr'] gc.collect() if options['ndv'] != None: output.GetRasterBand(b + 1).SetNoDataValue( float(options['ndv'])) elif options['ndv_band'] != None: output.GetRasterBand(b + 1).SetNoDataValue( float(options['ndv_band'])) if options['visualize'] == True: Plot.show_hist(shared_arr.asarray()) Timer.totaltime(starttime) #Close up dataset = None output = None gc.collect()