def _validate_arrays(self, arrays): """ Check that arrays are correctly configured """ src_vars = mbu.source_nr_vars() vis_vars = ['ntime', 'nbl', 'na', 'nchan'] for A in arrays: # Ensure they match ordering constraints order = [ ORDERING_CONSTRAINTS[var] for var in A['shape'] if var in ORDERING_CONSTRAINTS ] if not all([b >= a for a, b in zip(order, order[1:])]): raise ValueError( ('Array %s does not follow ' 'ordering constraints. Shape is %s, but ' 'this does breaks the expecting ordering of %s ') % (A['name'], A['shape'], ORDERING_RANK)) # Orthogonality of source variables and # time, baseline, antenna and channel nr_src_vars = [v for v in A['shape'] if v in src_vars] nr_vis_vars = [v for v in A['shape'] if v in vis_vars] if len(nr_src_vars) > 0 and len(nr_vis_vars) > 0: raise ValueError( ('Array %s of shape %s ' 'has source variables %s mixed with ' '%s. This solver does not currently ' 'support this mix') % (A['name'], A['shape'], nr_src_vars, nr_vis_vars))
def _validate_arrays(self, arrays): """ Check that arrays are correctly configured """ src_vars = mbu.source_nr_vars() vis_vars = ['ntime', 'nbl', 'na', 'nchan'] for A in arrays: # Ensure they match ordering constraints order = [ORDERING_CONSTRAINTS[var] for var in A['shape'] if var in ORDERING_CONSTRAINTS] if not all([b >= a for a, b in zip(order, order[1:])]): raise ValueError(('Array %s does not follow ' 'ordering constraints. Shape is %s, but ' 'this does breaks the expecting ordering of %s ') % ( A['name'], A['shape'], ORDERING_RANK)) # Orthogonality of source variables and # time, baseline, antenna and channel nr_src_vars = [v for v in A['shape'] if v in src_vars] nr_vis_vars = [v for v in A['shape'] if v in vis_vars] if len(nr_src_vars) > 0 and len(nr_vis_vars) > 0: raise ValueError(('Array %s of shape %s ' 'has source variables %s mixed with ' '%s. This solver does not currently ' 'support this mix') % ( A['name'], A['shape'], nr_src_vars, nr_vis_vars))
def _thread_create_solvers(self, subslvr_cfg, P, nsolvers): """ Create solvers on the thread local data """ montblanc.log.debug('Creating solvers in thread %s', threading.current_thread()) # GPU Device memory pool, used in cases where PyCUDA # needs GPU memory that we haven't been able to pre-allocate dev_mem_pool = pycuda.tools.DeviceMemoryPool() # CPU Pinned memory pool, used for array transfers pinned_mem_pool = pycuda.tools.PageLockedMemoryPool() # Mutex for guarding the memory pools pool_lock = threading.Lock() # Dirty index, indicating the CPU index of the # data currently on the GPU, used for avoiding # array transfer self.thread_local.dirty = [{} for n in range(nsolvers)] # Configure thread local storage # Number of solvers in this thread self.thread_local.nsolvers = nsolvers # List of solvers used by this thread, set below self.thread_local.solvers = [None for s in range(nsolvers)] # Initialise the subsolver generator self.thread_local.subslvr_gen = self._thread_gen_sub_solvers() # Set the CUDA context in the configuration to # the one associated with this thread subslvr_cfg[Options.CONTEXT] = self.thread_local.context # Create solvers for this context for i in range(nsolvers): subslvr = RimeSolver(subslvr_cfg) # Configure the source dimensions of each sub-solver. # Change the local size of each source dim so that there is # enough space in the associated arrays for NSRC sources. # Initially, configure the extents to be [0, NSRC], although # this will be setup properly in _thread_solve_sub nsrc = P[Options.NSRC] U = [{ 'name': nr_var, 'local_size': nsrc if nsrc < P[nr_var] else P[nr_var], 'lower_extent': 0, 'upper_extent': nsrc if nsrc < P[nr_var] else P[nr_var], } for nr_var in [Options.NSRC] + mbu.source_nr_vars()] subslvr.update_dimensions(U) # Give sub solvers access to device and pinned memory pools subslvr.dev_mem_pool = dev_mem_pool subslvr.pinned_mem_pool = pinned_mem_pool subslvr.pool_lock = pool_lock self.thread_local.solvers[i] = subslvr
def _gen_source_slices(self): """ Iterate over the visibility space in chunks, returning a dictionary of slices keyed on the following dimensions: nsrc, npsrc, ngsrc, nssrc, ... """ # Get a list of source number variables/dimensions src_nr_vars = mbu.source_nr_vars() lower_extents = self.dim_lower_extent(*src_nr_vars) upper_extents = self.dim_upper_extent(*src_nr_vars) non_zero = [(n, l) for n, l in zip(src_nr_vars, lower_extents) if l != 0] if len(non_zero) > 0: raise ValueError( "The following source dimensions " "have non-zero lower extents [{nzd}]".format(nzd=non_zero)) # Create source counts, or dimension extent sizes # for each source type/dimension src_nr_var_counts = { nr_var: u - l for nr_var, l, u in zip(src_nr_vars, lower_extents, upper_extents) } # Work out which range of sources in the total space # we are iterating over nsrc_lower, nsrc_upper = self.dim_extents(Options.NSRC) # Create the slice dictionaries, which we use to index # dimensions of the CPU and GPU array. cpu_slice, gpu_slice = {}, {} # Set up source slicing for src in xrange(nsrc_lower, nsrc_upper, self.src_diff): src_end = min(src + self.src_diff, nsrc_upper) src_diff = src_end - src cpu_slice[Options.NSRC] = slice(src, src_end, 1) gpu_slice[Options.NSRC] = slice(0, src_diff, 1) # Get the source slice ranges for each individual # source type, and update the CPU dictionary with them src_range_slices = mbu.source_range_slices(src, src_end, src_nr_var_counts) cpu_slice.update(src_range_slices) # and configure the same for GPU slices for s in src_nr_vars: cpu_var = cpu_slice[s] gpu_slice[s] = slice(0, cpu_var.stop - cpu_var.start, 1) yield (cpu_slice.copy(), gpu_slice.copy())
def _gen_source_slices(self): """ Iterate over the visibility space in chunks, returning a dictionary of slices keyed on the following dimensions: nsrc, npsrc, ngsrc, nssrc, ... """ # Get a list of source number variables/dimensions src_nr_vars = mbu.source_nr_vars() lower_extents = self.dim_lower_extent(*src_nr_vars) upper_extents = self.dim_upper_extent(*src_nr_vars) non_zero = [(n, l) for n, l in zip(src_nr_vars, lower_extents) if l != 0] if len(non_zero) > 0: raise ValueError("The following source dimensions " "have non-zero lower extents [{nzd}]".format( nzd=non_zero)) # Create source counts, or dimension extent sizes # for each source type/dimension src_nr_var_counts = { nr_var: u-l for nr_var, l, u in zip(src_nr_vars, lower_extents, upper_extents) } # Work out which range of sources in the total space # we are iterating over nsrc_lower, nsrc_upper = self.dim_extents(Options.NSRC) # Create the slice dictionaries, which we use to index # dimensions of the CPU and GPU array. cpu_slice, gpu_slice = {}, {} # Set up source slicing for src in xrange(nsrc_lower, nsrc_upper, self.src_diff): src_end = min(src + self.src_diff, nsrc_upper) src_diff = src_end - src cpu_slice[Options.NSRC] = slice(src, src_end, 1) gpu_slice[Options.NSRC] = slice(0, src_diff, 1) # Get the source slice ranges for each individual # source type, and update the CPU dictionary with them src_range_slices = mbu.source_range_slices( src, src_end, src_nr_var_counts) cpu_slice.update(src_range_slices) # and configure the same for GPU slices for s in src_nr_vars: cpu_var = cpu_slice[s] gpu_slice[s] = slice(0, cpu_var.stop - cpu_var.start, 1) yield (cpu_slice.copy(), gpu_slice.copy())
def _construct_tensorflow_feed_data(dfs, cube, iter_dims, nr_of_input_staging_areas): FD = AttrDict() # https://github.com/bcj/AttrDict/issues/34 FD._setattr('_sequence_type', list) # Reference local staging_areas FD.local = local = AttrDict() # https://github.com/bcj/AttrDict/issues/34 local._setattr('_sequence_type', list) # Create placholder variables for source counts FD.src_ph_vars = AttrDict({ n: tf.placeholder(dtype=tf.int32, shape=(), name=n) for n in ['nsrc'] + mbu.source_nr_vars()}) # Create placeholder variables for properties FD.property_ph_vars = AttrDict({ n: tf.placeholder(dtype=p.dtype, shape=(), name=n) for n, p in cube.properties().iteritems() }) #======================================================== # Determine which arrays need feeding once/multiple times #======================================================== # Take all arrays flagged as input input_arrays = [a for a in cube.arrays().itervalues() if 'input' in a.tags] src_data_sources, feed_many, feed_once = _partition(iter_dims, input_arrays) #===================================== # Descriptor staging area #===================================== local.descriptor = create_staging_area_wrapper('descriptors', ['descriptor'], dfs) #=========================================== # Staging area for multiply fed data sources #=========================================== # Create the staging_area for holding the feed many input local.feed_many = [create_staging_area_wrapper('feed_many_%d' % i, ['descriptor'] + [a.name for a in feed_many], dfs) for i in range(nr_of_input_staging_areas)] #================================================= # Staging areas for each radio source data sources #================================================= # Create the source array staging areas local.sources = { src_nr_var: [ create_staging_area_wrapper('%s_%d' % (src_type, i), [a.name for a in src_data_sources[src_nr_var]], dfs) for i in range(nr_of_input_staging_areas)] for src_type, src_nr_var in source_var_types().iteritems() } #====================================== # The single output staging_area #====================================== local.output = create_staging_area_wrapper('output', ['descriptor', 'model_vis', 'chi_squared'], dfs) #================================================= # Create tensorflow variables which are # fed only once via an assign operation #================================================= def _make_feed_once_tuple(array): dtype = dfs[array.name].dtype ph = tf.placeholder(dtype=dtype, name=a.name + "_placeholder") var = tf.Variable(tf.zeros(shape=(1,), dtype=dtype), validate_shape=False, name=array.name) op = tf.assign(var, ph, validate_shape=False) #op = tf.Print(op, [tf.shape(var), tf.shape(op)], # message="Assigning {}".format(array.name)) return FeedOnce(ph, var, op) # Create placeholders, variables and assign operators # for data sources that we will only feed once local.feed_once = { a.name : _make_feed_once_tuple(a) for a in feed_once } #======================================================= # Construct the list of data sources that need feeding #======================================================= # Data sources from input staging_areas src_sa = [q for sq in local.sources.values() for q in sq] all_staging_areas = local.feed_many + src_sa input_sources = { a for q in all_staging_areas for a in q.fed_arrays} # Data sources from feed once variables input_sources.update(local.feed_once.keys()) local.input_sources = input_sources return FD
def _apply_source_provider_dim_updates(cube, source_providers, budget_dims): """ Given a list of source_providers, apply the list of suggested dimension updates given in provider.updated_dimensions() to the supplied hypercube. Dimension global_sizes are always updated with the supplied sizes and lower_extent is always set to 0. upper_extent is set to any reductions (current upper_extents) existing in budget_dims, otherwise it is set to global_size. """ # Create a mapping between a dimension and a # list of (global_size, provider_name) tuples update_map = collections.defaultdict(list) for prov in source_providers: for dim_tuple in prov.updated_dimensions(): name, size = dim_tuple # Don't accept any updates on the nsrc dimension # This is managed internally if name == 'nsrc': continue dim_update = DimensionUpdate(size, prov.name()) update_map[name].append(dim_update) # No dimensions were updated, quit early if len(update_map) == 0: return cube.bytes_required() # Ensure that the global sizes we receive # for each dimension are unique. Tell the user # when conflicts occur update_list = [] for name, updates in update_map.iteritems(): if not all(updates[0].size == du.size for du in updates[1:]): raise ValueError("Received conflicting " "global size updates '{u}'" " for dimension '{n}'.".format(n=name, u=updates)) update_list.append((name, updates[0].size)) montblanc.log.info("Updating dimensions {} from " "source providers.".format(str(update_list))) # Now update our dimensions for name, global_size in update_list: # Defer to existing any existing budgeted extent sizes # Otherwise take the global_size extent_size = budget_dims.get(name, global_size) # Take the global_size if extent_size was previously zero! extent_size = global_size if extent_size == 0 else extent_size # Clamp extent size to global size if extent_size > global_size: extent_size = global_size # Update the dimension cube.update_dimension(name, global_size=global_size, lower_extent=0, upper_extent=extent_size) # Handle global number of sources differently # It's equal to the number of # point's, gaussian's, sersic's combined nsrc = sum(cube.dim_global_size(*mbu.source_nr_vars())) # Extent size will be equal to whatever source type # we're currently iterating over. So just take # the maximum extent size given the sources es = max(cube.dim_extent_size(*mbu.source_nr_vars())) cube.update_dimension('nsrc', global_size=nsrc, lower_extent=0, upper_extent=es) # Return our cube size return cube.bytes_required()
def _budget(cube, slvr_cfg): # Figure out a viable dimension configuration # given the total problem size mem_budget = slvr_cfg.get('mem_budget', 2*ONE_GB) bytes_required = cube.bytes_required() src_dims = mbu.source_nr_vars() + ['nsrc'] dim_names = ['na', 'nbl', 'ntime'] + src_dims global_sizes = cube.dim_global_size(*dim_names) na, nbl, ntime = global_sizes[:3] # Keep track of original dimension sizes and any reductions that are applied original_sizes = { r: s for r, s in zip(dim_names, global_sizes) } applied_reductions = {} def _reduction(): # Reduce over time first trange = _uniq_log2_range(1, ntime, 5) for t in trange[0:1]: yield [('ntime', t)] # Attempt reduction over source sbs = slvr_cfg['source_batch_size'] srange = _uniq_log2_range(10, sbs, 5) if sbs > 10 else 10 src_dim_gs = global_sizes[3:] for bs in srange: yield [(d, bs if bs < gs else gs) for d, gs in zip(src_dims, src_dim_gs)] # Try the rest of the timesteps for t in trange[1:]: yield [('ntime', t)] # Reduce by baseline for bl in _uniq_log2_range(na, nbl, 5): yield [('nbl', bl)] for reduction in _reduction(): if bytes_required > mem_budget: for dim, size in reduction: applied_reductions[dim] = size cube.update_dimension(dim, lower_extent=0, upper_extent=size) else: break bytes_required = cube.bytes_required() # Log some information about the memory_budget # and dimension reduction montblanc.log.info(("Selected a solver memory budget of {rb} " "given a hard limit of {mb}.").format( rb=mbu.fmt_bytes(bytes_required), mb=mbu.fmt_bytes(mem_budget))) if len(applied_reductions) > 0: montblanc.log.info("The following dimension reductions " "were applied:") for k, v in applied_reductions.iteritems(): montblanc.log.info('{p}{d}: {id} => {rd}'.format (p=' '*4, d=k, id=original_sizes[k], rd=v)) else: montblanc.log.info("No dimension reductions were applied.") return applied_reductions, bytes_required
def _thread_budget(self, slvr_cfg, A_sub, props): """ Get memory budget and dimension reduction information from the CUDA device associated with the current thread and context """ montblanc.log.debug('Budgeting in thread %s', threading.current_thread()) # Query free memory on this context (free_mem, total_mem) = cuda.mem_get_info() device = self.thread_local.context.get_device() montblanc.log.info('{d}: {t} total {f} free.'.format( d=device.name(), f=mbu.fmt_bytes(free_mem), t=mbu.fmt_bytes(total_mem))) # Work with a supplied memory budget, otherwise use # free memory less an amount equal to the upper size # of an NVIDIA context mem_budget = slvr_cfg.get('mem_budget', free_mem - 200 * ONE_MB) nsolvers = slvr_cfg.get(Options.NSOLVERS) na = slvr_cfg.get(Options.NA) nsrc = slvr_cfg.get(Options.SOURCE_BATCH_SIZE) src_str_list = [Options.NSRC] + mbu.source_nr_vars() src_reduction_str = '&'.join( ['%s=%s' % (nr_var, nsrc) for nr_var in src_str_list]) ntime_split = np.int32(np.ceil(100.0 / nsolvers)) ntime_split_str = 'ntime={n}'.format(n=ntime_split) # Figure out a viable dimension configuration # given the total problem size viable, modded_dims = mbu.viable_dim_config(mem_budget, A_sub, props, [ ntime_split_str, src_reduction_str, 'ntime', 'nbl={na}&na={na}'.format(na=na), 'nchan=50%' ], nsolvers) # Create property dictionary with updated # dimensions. P = props.copy() P.update(modded_dims) required_mem = mbu.dict_array_bytes_required(A_sub, P) if not viable: dim_set_str = ', '.join( ['%s=%s' % (k, v) for k, v in modded_dims.iteritems()]) ary_list_str = '\n'.join([ '%-*s %-*s %s' % (15, a['name'], 10, mbu.fmt_bytes(mbu.dict_array_bytes( a, P)), mbu.shape_from_str_tuple(a['shape'], P)) for a in sorted(A_sub, reverse=True, key=lambda a: mbu.dict_array_bytes(a, P)) ]) raise MemoryError( "Tried reducing the problem size " "by setting '%s' on all arrays, " "but the resultant required memory of %s " "for each of %d solvers is too big " "to fit within the memory budget of %s. " "List of biggests offenders:\n%s " "\nSplitting the problem along the " "channel dimension needs to be " "implemented." % (dim_set_str, mbu.fmt_bytes(required_mem), nsolvers, mbu.fmt_bytes(mem_budget), ary_list_str)) return P, modded_dims, required_mem
import montblanc.impl.rime.v4.RimeSolver as BSV4mod from montblanc.impl.rime.v5.RimeSolver import RimeSolver ONE_KB = 1024 ONE_MB = ONE_KB**2 ONE_GB = ONE_KB**3 ASYNC_HTOD = 'htod' ASYNC_DTOH = 'dtoh' ALL_SLICE = slice(None, None, 1) EMPTY_SLICE = slice(0, 0, 1) ORDERING_CONSTRAINTS = {nr_var: 1 for nr_var in mbu.source_nr_vars()} ORDERING_CONSTRAINTS.update({ 'nsrc': 1, 'ntime': 2, 'nbl': 3, 'na': 3, 'nchan': 4 }) ORDERING_RANK = [ ' or '.join(['nsrc'] + mbu.source_nr_vars()), 'ntime', ' or '.join(['nbl', 'na']), 'nchan' ] def _update_refs(pool, new_refs):
def _thread_budget(self, slvr_cfg, A_sub, props): """ Get memory budget and dimension reduction information from the CUDA device associated with the current thread and context """ montblanc.log.debug('Budgeting in thread %s', threading.current_thread()) # Query free memory on this context (free_mem,total_mem) = cuda.mem_get_info() device = self.thread_local.context.get_device() montblanc.log.info('{d}: {t} total {f} free.'.format( d=device.name(), f=mbu.fmt_bytes(free_mem), t=mbu.fmt_bytes(total_mem))) # Work with a supplied memory budget, otherwise use # free memory less an amount equal to the upper size # of an NVIDIA context mem_budget = slvr_cfg.get('mem_budget', free_mem - 200*ONE_MB) nsolvers = slvr_cfg.get(Options.NSOLVERS) na = slvr_cfg.get(Options.NA) nsrc = slvr_cfg.get(Options.SOURCE_BATCH_SIZE) src_str_list = [Options.NSRC] + mbu.source_nr_vars() src_reduction_str = '&'.join(['%s=%s' % (nr_var, nsrc) for nr_var in src_str_list]) ntime_split = np.int32(np.ceil(100.0 / nsolvers)) ntime_split_str = 'ntime={n}'.format(n=ntime_split) # Figure out a viable dimension configuration # given the total problem size viable, modded_dims = mbu.viable_dim_config( mem_budget, A_sub, props, [ntime_split_str, src_reduction_str, 'ntime', 'nbl={na}&na={na}'.format(na=na), 'nchan=50%'], nsolvers) # Create property dictionary with updated # dimensions. P = props.copy() P.update(modded_dims) required_mem = mbu.dict_array_bytes_required(A_sub, P) if not viable: dim_set_str = ', '.join(['%s=%s' % (k,v) for k,v in modded_dims.iteritems()]) ary_list_str = '\n'.join(['%-*s %-*s %s' % ( 15, a['name'], 10, mbu.fmt_bytes(mbu.dict_array_bytes(a, P)), mbu.shape_from_str_tuple(a['shape'],P)) for a in sorted(A_sub, reverse=True, key=lambda a: mbu.dict_array_bytes(a, P))]) raise MemoryError("Tried reducing the problem size " "by setting '%s' on all arrays, " "but the resultant required memory of %s " "for each of %d solvers is too big " "to fit within the memory budget of %s. " "List of biggests offenders:\n%s " "\nSplitting the problem along the " "channel dimension needs to be " "implemented." % (dim_set_str, mbu.fmt_bytes(required_mem), nsolvers, mbu.fmt_bytes(mem_budget), ary_list_str)) return P, modded_dims, required_mem
import montblanc.impl.rime.v4.RimeSolver as BSV4mod from montblanc.impl.rime.v5.RimeSolver import RimeSolver ONE_KB = 1024 ONE_MB = ONE_KB**2 ONE_GB = ONE_KB**3 ASYNC_HTOD = 'htod' ASYNC_DTOH = 'dtoh' ALL_SLICE = slice(None,None,1) EMPTY_SLICE = slice(0,0,1) ORDERING_CONSTRAINTS = { nr_var : 1 for nr_var in mbu.source_nr_vars() } ORDERING_CONSTRAINTS.update({ 'nsrc' : 1, 'ntime': 2, 'nbl': 3, 'na': 3, 'nchan': 4 }) ORDERING_RANK = [' or '.join(['nsrc'] + mbu.source_nr_vars()), 'ntime', ' or '.join(['nbl', 'na']), 'nchan'] def _update_refs(pool, new_refs): for key, value in new_refs.iteritems(): pool[key].extend(value) class CompositeRimeSolver(MontblancNumpySolver): """ Composite solver implementation for RIME. Implements a solver composed of multiple RimeSolvers. The sub-solver