def getplan(self, size, dtype): """ Identify the plan for chunking along each dimension """ from numpy import dtype as gettype plan = ones(len(self.value.shape), dtype=int) if isinstance(size, tuple): plan[self.value.axes] = size else: # convert from megabytes size *= 1000.0 # calculate from dtype element_size = gettype(dtype).itemsize nelements = prod(self.value.shape) total_size = nelements * element_size moving_value_shapes = self.value.shape[self.value.mask] if size <= element_size: return moving_value_shapes remaining_size = 1.0 * total_size nchunks = ones(len(moving_value_shapes)) for (i, s) in enumerate(moving_value_shapes): min_chunk_size = remaining_size / s if min_chunk_size >= size: nchunks[i] = s remaining_size = min_chunk_size continue else: nchunks[i] = ceil(remaining_size / size) break plan[self.value.axes] = nchunks return plan
def getplan(self, size, dtype): """ Identify the plan for chunking along each dimension """ from numpy import dtype as gettype plan = ones(len(self.value.shape), dtype=int) if isinstance(size, tuple): plan[self.value.axes] = size else: # convert from megabytes size *= 1000.0 # calculate from dtype element_size = gettype(dtype).itemsize nelements = prod(self.value.shape) total_size = nelements * element_size moving_value_shapes = self.value.shape[self.value.mask] if size <= element_size: return moving_value_shapes remaining_size = 1.0*total_size nchunks = ones(len(moving_value_shapes)) for (i, s) in enumerate(moving_value_shapes): min_chunk_size = remaining_size/s if min_chunk_size >= size: nchunks[i] = s remaining_size = min_chunk_size continue else: nchunks[i] = ceil(remaining_size/size) break plan[self.value.axes] = nchunks return plan
def getplan(self, size="150", axes=None, padding=None): """ Identify a plan for chunking values along each dimension. Generates an ndarray with the size (in number of elements) of chunks in each dimension. If provided, will estimate chunks for only a subset of axes, leaving all others to the full size of the axis. Parameters ---------- size : string or tuple If str, the average size (in KB) of the chunks in all value dimensions. If int/tuple, an explicit specification of the number chunks in each moving value dimension. axes : tuple, optional, default=None One or more axes to estimate chunks for, if provided any other axes will use one chunk. padding : tuple or int, option, default=None Size over overlapping padding between chunks in each dimension. If tuple, specifies padding along each chunked dimension; if int, all dimensions use same padding; if None, no padding """ from numpy import dtype as gettype # initialize with all elements in one chunk plan = self.vshape # check for subset of axes if axes is None: if isinstance(size, str): axes = arange(len(self.vshape)) else: axes = arange(len(size)) else: axes = asarray(axes, 'int') # set padding pad = array(len(self.vshape)*[0, ]) if padding is not None: pad[axes] = padding # set the plan if isinstance(size, tuple): plan[axes] = size elif isinstance(size, str): # convert from kilobytes size = 1000.0 * float(size) # calculate from dtype elsize = gettype(self.dtype).itemsize nelements = prod(self.vshape) dims = self.vshape[self.vmask(axes)] if size <= elsize: s = ones(len(axes)) else: remsize = 1.0 * nelements * elsize s = [] for (i, d) in enumerate(dims): minsize = remsize/d if minsize >= size: s.append(1) remsize = minsize continue else: s.append(min(d, floor(size/minsize))) s[i+1:] = plan[i+1:] break plan[axes] = s else: raise ValueError("Chunk size not understood, must be tuple or int") return plan, pad
def getplan(self, size="150", axes=None, padding=None): """ Identify a plan for chunking values along each dimension. Generates an ndarray with the size (in number of elements) of chunks in each dimension. If provided, will estimate chunks for only a subset of axes, leaving all others to the full size of the axis. Parameters ---------- size : string or tuple If str, the average size (in MB) of the chunks in all value dimensions. If int/tuple, an explicit specification of the number chunks in each moving value dimension. axes : tuple, optional, default=None One or more axes to estimate chunks for, if provided any other axes will use one chunk. padding : tuple or int, option, default=None Size over overlapping padding between chunks in each dimension. If tuple, specifies padding along each chunked dimension; if int, all dimensions use same padding; if None, no padding """ from numpy import dtype as gettype # initialize with all elements in one chunk plan = self.vshape # check for subset of axes if axes is None: if isinstance(size, str): axes = arange(len(self.vshape)) else: axes = arange(len(size)) else: axes = asarray(axes, 'int') # set padding pad = array(len(self.vshape) * [ 0, ]) if padding is not None: pad[axes] = padding # set the plan if isinstance(size, tuple): plan[axes] = size elif isinstance(size, str): # convert from megabytes size = 1000.0 * float(size) # calculate from dtype elsize = gettype(self.dtype).itemsize nelements = prod(self.vshape) dims = self.vshape[self.vmask(axes)] if size <= elsize: s = ones(len(axes)) else: remsize = 1.0 * nelements * elsize s = [] for (i, d) in enumerate(dims): minsize = remsize / d if minsize >= size: s.append(1) remsize = minsize continue else: s.append(min(d, floor(size / minsize))) s[i + 1:] = plan[i + 1:] break plan[axes] = s else: raise ValueError("Chunk size not understood, must be tuple or int") return plan, pad