def read_data_from_dir(dataDir, extension): """ Read a stack of images located in subdirectories into a dask array returning X (array of data) and y (array of labels) """ X = np.concatenate([ imread(dataDir + subdir + '/*.' + extension).compute() for subdir in os.listdir(dataDir) ]) filesdict = {} for subdir in sorted(os.listdir(dataDir)): files = next(os.walk(dataDir + subdir))[2] files = len([fi for fi in files if fi.endswith("." + extension)]) filesdict.update({subdir: files}) if sum(filesdict.values()) != X.shape[0]: raise ValueError('Images and Labels does not Match') else: y = np.zeros([X.shape[0], 1], dtype=np.uint8) i = 0 imagelist = [] for category in list(filesdict.keys()): z = filesdict[category] y[sum(imagelist):sum(imagelist) + z] = i imagelist.append(z) i += 1 return X, y
def pics_to_h5(source, target, name): "array of pictures to h5" arr = dai.imread(source + '/*.png', preprocess=np.transpose) if len(arr.shape) == 3: arr = arr.reshape(arr.shape + (1, )) arr.to_hdf5(target, name) return len(os.listdir(source))
def load_images(path, dtype=np.float64): # imread = fct.partial(ski.imread, as_gray=True) imread = fct.partial(imread_cv, dtype=dtype) varr = daim.imread(path, imread) varr = xr.DataArray(varr, dims=['frame', 'height', 'width']) for dim, length in varr.sizes.items(): varr = varr.assign_coords(**{dim: np.arange(length)}) return varr
def runfun(file_path, inc, m_size, min_size, thresh, outfolder, dress): global increment global max_size global min_sizes global thresholdfactor global out increment = int(inc) max_size = int(m_size) min_sizes = int(min_size) thresholdfactor = float(thresh) out = outfolder #connects to the sheduler for the use of dask client = Client(dress, processes = True) start = time.time() set_images = imread(file_path) #rechunk the image in a good set of dimensions that are large and fit in ram correctly image = set_images.rechunk((5,2048,2048)) print(image) print("computing") tup = image.shape #this function will loop thru the image set to cut down on the size of the entire volume #As dask is already going to cut down and not do the entire colume at once this will #cut down even more by splitting it up by a set of 75 #this can be modified to fit the shape of each volume it will be run with z = 0 for x in range(2,tup[0],75): savedFiles = [] temparray = image[x - 2:x + 75,0:tup[1],0:tup[2]] newshape = temparray.shape workingstep = temparray.map_overlap(godfun, depth = (2,30,30),trim = True, dtype = 'uint16') for y in range(0, newshape[0]): q = delayed(savefun)(workingstep[y],z) savedFiles.append(q) z += 1 total = sum(savedFiles) total = total.compute() end = time.time() print((end - start)/60) del workingstep client.close()
def jpgs_to_h5(source, target, name): """ Convierte un directorio de imágenes en un archivo HDF5 que almacena las imágenes en una matriz que tiene forma (img_num, height.width, [channels]) """ dai.imread(source + '*.jpg').to_hdf5(target, name)
import os import numpy as np from dask.array.image import imread from numba import njit data_dir = "data" worldpop_file = "ppp_2020_1km_Aggregated.tif" reduction_factor = 15 dask_arr = imread(os.path.join(data_dir, worldpop_file)) X = np.array(dask_arr[0]) X = np.clip(X, a_min=0, a_max=None) Xreduce = np.zeros([n // reduction_factor for n in X.shape]) @njit def reduce_resolution(X, Xreduce, reduction_factor): N = X.shape[0] M = X.shape[1] for i in range(N): for j in range(M): ii = i // reduction_factor jj = j // reduction_factor Xreduce[ii, jj] = Xreduce[ii, jj] + X[i, j] return Xreduce Xreduce = reduce_resolution(X, Xreduce, reduction_factor) out_file = f"popmap_{reduction_factor}.npy"