def _cmp(cls, args): pth = str(args[0]) kwds = args[1] image = io.loadWithGDAL(pth) # load image Rikola.correct_image(image, False, **kwds) # correct image io.saveWithGDAL(os.path.splitext(pth)[0] + "_CORRECTED.hdr", image) # save corrected image return os.path.splitext(pth)[0] + "_CORRECTED.hdr" # return corrected path
def _call(func, path, arg, kwd, n): """ This function will be called by each thread. It loads each data chunk from disk, runs the operation, then saves the results. """ # print("Spawning thread %d." % n) # func, path, arg, kwd = args # load data chunk if '.ply' in path: data = io.loadCloudPLY(path) # load point cloud result = func(data, *arg, **kwd) # compute results assert isinstance( result, HyCloud), "Error - function %s does not return a HyCloud." % func io.saveCloudPLY(path, result) # save point cloud else: data = io.loadWithGDAL(path) # load image result = func(data, *arg, **kwd) # compute results assert isinstance( result, HyImage), "Error - function %s does not return a HyImage." % func io.saveWithGDAL(path, result) # save result return True # done
def correct_folder(cls, path, **kwds): """ Many sensors use simple/common data structures to store data/headers/dark reference etc. Hence it is often easiest to pass an output folder to the sensor for correction. *Arguments*: - path = a path to the folder containing the sensor specific data. *Keywords*: - verbose = True if print outputs should be made to update progress. Default is True. - other keywords are passed directly to correct_image. *Returns*: - a hyImage to which all sensor-specific corrections have been applied. Note that this will generally not include topographic or atmospheric corrections. """ verbose = kwds.get("verbose", True) kwds["verbose"] = verbose imgs = [str(p) for p in Path(path).rglob("capture/*.hdr") ] # all image files [including data] dark = [str(p) for p in Path(path).rglob("capture/DARKREF*.hdr") ] # dark reference file white = [str(p) for p in Path(path).rglob("capture/WHITEREF*.hdr") ] # an white reference data (core scanner) refl = [str(p) for p in Path(path).rglob("capture/REFL*.hdr") ] # any processed reflectance data (SiSu Rock) for d in dark: del imgs[imgs.index(d)] for w in white: del imgs[imgs.index(w)] for r in refl: del imgs[imgs.index(r)] if len(imgs) > 1 or len(dark) > 1: assert False, "Error - multiple scenes found in folder. Double check file path..." if len(imgs) == 0 or len(dark) == 0: assert False, "Error - no image or dark calibration found in folder. Double check file path... %s" % path if verbose: print('\nLoading image %s' % imgs[0]) # load image image = io.loadWithGDAL(imgs[0]) OWL.set_dark_ref(dark[0]) if len(white) > 0: # white reference exists OWL.set_white_ref(white[0]) # correct OWL.correct_image(image, **kwds) # return corrected image return image
def build_core_template(images, N=5, thresh=40, vb=True): """ Overlay images of core trays from e.g. a drillhole to calculate a template that is used for extracting individual core segments and is robust to data quirks (e.g. empty trays). All images must be identical dimensions and properly co-aligned. *Arguments*: - images = a list of co-aligned images of different core trays build template with. - N = the number of cores per tray. Default is 5. - thresh = percentile used to separate foreground from background. Default is 40. Higher values ensure proper separation of cores, but will crop data more closely. - vb = True if a tqdm progress bar should be printed. """ # sum valid pixels valid = None loop = images if vb: loop = tqdm(images, leave=False, desc="Building template") for i in loop: if isinstance(i, str): # load image if need be i = io.loadWithGDAL(i) if valid is None: # init valid if need be valid = np.zeros(i.data.shape[:-1]) if not i is None: valid += np.isfinite(i.data).all( axis=-1) # accumulate valid pixels # do threshold valid = valid > np.percentile(valid, thresh) if valid.shape[1] > valid.shape[0]: valid = valid.T # label components num_labels, labels_im = cv2.connectedComponents( (valid.T > np.percentile(valid, 40)).astype(np.uint8)) # take top N labels by area. area = [np.sum(labels_im == i) for i in range(num_labels)] thresh = np.sort(area)[::-1][N] l = 1 for i in range(1, num_labels): if area[i] >= thresh: labels_im[labels_im == i] = l l += 1 else: labels_im[labels_im == i] = 0 # background # return return hylite.HyImage(labels_im.T)
def set_dark_ref(cls, image): """ Sets the dark reference to be used for sensor corrections. """ if isinstance(image, str): assert os.path.exists( image ), "Error: %s is not a valid file path or hyperspectral image." image = io.loadWithGDAL(image) assert isinstance( image, io.HyImage ) or image is None, "Error: dark reference must be an image or None." Sensor.dark = image
def set_white_ref(cls, image): """ Sets the white reference to be used for sensor corrections. *Arguments*: - image = the white reference image. """ if isinstance(image, str): assert os.path.exists( image ), "Error: %s is not a valid file path or hyperspectral image." image = io.loadWithGDAL(image) assert isinstance( image, io.HyImage), "Error: white reference must be an image." Sensor.white = image # store white reference
def parallel_chunks(function, data, *args, **kwds): """ Run a function that operates per-point or per-pixel on smaller chunks of a point cloud or image dataset in parallel. Only use for expensive operations as otherwise overheads (writing files to cache, spawning threads, loading files from cache) are too costly. *Arguments*: - function = the function to run on each chunk of the dataset. Must take a HyCloud or HyImage dataset as it's first argument and also return a HyCloud or HyImage dataset (cf., mwl(...), get_hull_corrected(...)). - data = the HyCloud or HyImage instance to run the function on. - args = tuple of arguments to pass to the function. **Keywords**: - nthreads = the number of threads to spawn. Default is the number of cores - 2. Negative numbers will be subtracted from the number of cores. - any other keywords are passed to the function """ assert isinstance(data, HyCloud) or isinstance(data, HyImage) # get number of threads if 'nthreads' in kwds: nthreads = kwds['nthreads'] del kwds['nthreads'] else: nthreads = -2 if nthreads < 1: nthreads = os.cpu_count() - nthreads assert nthreads > 0, "Error - cannot spawn %d threads" % nthreads assert isinstance(nthreads, int), "Error - nthreads must be an integer." assert nthreads is not None, "Error - could not identify CPU count. Please specify nthreads keyword." # split data into chunks shape = data.data.shape[:-1] # store shape (important for images) chunks = _split(data, nthreads) # dump chunks into temp directory pth = mkdtemp() # make temp directory print("Writing thread cache to %s:" % pth) # dump clouds to directory paths = [] for i, c in enumerate(chunks): if isinstance(c, HyCloud): p = os.path.join(pth, '%d.ply' % i) io.saveCloudPLY(p, c) else: p = os.path.join(pth, '%d.hdr' % i) io.saveWithGDAL(p, c) paths.append(p) # make sure we don't multithread twice when using advanced scipy/numpy functions... os.environ['MKL_NUM_THREADS'] = '1' os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_DYNAMIC'] = 'FALSE' # spawn worker processes P = [ mp.Process(target=_call, args=(function, p, args, kwds, i)) for i, p in enumerate(paths) ] try: for p in P: p.start() for p in P: p.join() # successs! load data again... if isinstance(data, HyCloud): chunks = [io.loadCloudPLY(p) for p in paths] else: chunks = [io.loadWithGDAL(p) for p in paths] # remove temp directory shutil.rmtree(pth) # delete temp directory print("Process complete (thread cache cleaned successfully).") except (KeyboardInterrupt, SystemExit) as e: print("Job cancelled. Cleaning temp directory... ", end='') shutil.rmtree(pth) # delete temp directory print("Done.") assert False, "Multiprocessing job cancelled by KeyboardInterrupt or SystemExit." except Exception as e: print("Error thrown. Cleaning temp directory... ", end='') shutil.rmtree(pth) # delete temp directory print("Done.") raise e # re-enable scipy/numpy multithreading del os.environ['MKL_NUM_THREADS'] del os.environ['OMP_NUM_THREADS'] del os.environ['MKL_DYNAMIC'] # merge back into one dataset out = _merge(chunks, shape=shape) return out