def block_split(self, items, max_block_size=MAX_BLOCK_SIZE): """ Split the given items in blocks, depending on the parameter concurrent tasks. Notice that in order to save memory there is a maximum block size of %d items. :param list items: the items to split in blocks """ % MAX_BLOCK_SIZE assert len(items) > 0, 'No items in %s' % items num_rlzs = len(self._get_realizations()) bs = min(ceil(len(items), ceil(self.concurrent_tasks(), num_rlzs)), max_block_size) logs.LOG.warn('Using block size=%d', bs) return block_splitter(items, bs)
def initialize_sources(self): """ Parse source models and validate source logic trees. It also filters the sources far away and apply uncertainties to the relevant ones. As a side effect it populates the instance dictionary `.source_blocks_per_ltpath`. Notice that sources are automatically split. :returns: a list with the number of sources for each source model """ logs.LOG.progress("initializing sources") smlt_file = self.hc.inputs['source_model_logic_tree'] self.smlt = logictree.SourceModelLogicTree( file(smlt_file).read(), self.hc.base_path, smlt_file) sm_paths = list(self.smlt.get_sm_paths()) nblocks = ceil(config.get('hazard', 'concurrent_tasks'), len(sm_paths)) # here we are doing a full enumeration of the source model logic tree; # this is not bad since for very large source models there are # typically very few realizations; moreover, the filtering will remove # most of the sources, so the memory occupation is typically low lt_models = [] for i, (sm, path) in enumerate(sm_paths): smpath = tuple(path) fname = os.path.join(self.hc.base_path, sm) source_collector = source.parse_source_model_smart( fname, self.hc.sites_affected_by, self.smlt.make_apply_uncertainties(path), self.hc) if not source_collector.source_weights: raise RuntimeError( 'Could not find sources close to the sites in %s ' '(maximum_distance=%s km)' % (fname, self.hc.maximum_distance)) lt_model = models.LtSourceModel.objects.create( hazard_calculation=self.hc, ordinal=i, sm_lt_path=smpath) lt_models.append(lt_model) for trt, blocks in source_collector.split_blocks(nblocks): self.source_blocks_per_ltpath[smpath, trt] = blocks n = sum(len(block) for block in blocks) logs.LOG.info('Found %d relevant source(s) for %s %s, TRT=%s', n, sm, path, trt) logs.LOG.info('Splitting in %d blocks', len(blocks)) for i, block in enumerate(blocks, 1): logs.LOG.debug('%s, block %d: %d source(s), weight %s', trt, i, len(block), block.weight) # save LtModelInfo objects for each tectonic region type for trt in source_collector.sorted_trts(): models.LtModelInfo.objects.create( lt_model=lt_model, tectonic_region_type=trt, num_sources=len(source_collector.source_weights[trt]), num_ruptures=source_collector.num_ruptures[trt], min_mag=source_collector.min_mag[trt], max_mag=source_collector.max_mag[trt]) return lt_models
def split_blocks(self, nblocks): """ Split the sources in blocks of similar weight. Yield tectonic region type and blocks. :param int nblocks: the maximum number of blocks to generate """ # nblocks / number of tectonic region types ss = SequenceSplitter(ceil(nblocks, len(self.source_weights))) for trt, source_weights in self.source_weights.iteritems(): yield trt, ss.split_on_max_weight(source_weights)
def initialize_sources(self): """ Parse source models and validate source logic trees. It also filters the sources far away and apply uncertainties to the relevant ones. As a side effect it populates the instance dictionary `.source_blocks_per_ltpath`. Notice that sources are automatically split. :returns: a list with the number of sources for each source model """ logs.LOG.progress("initializing sources") smlt_file = self.hc.inputs['source_model_logic_tree'] self.smlt = logictree.SourceModelLogicTree( file(smlt_file).read(), self.hc.base_path, smlt_file) sm_paths = list(self.smlt.get_sm_paths()) nblocks = ceil(config.get('hazard', 'concurrent_tasks'), len(sm_paths)) bs = SequenceSplitter(nblocks) # here we are doing a full enumeration of the source model logic tree; # this is not bad because for very large source models there are # typically very few realizations; moreover, the filtering will remove # most of the sources, so the memory occupation is typically low num_sources = [] # the number of sources per sm_lt_path for sm, path in sm_paths: smpath = tuple(path) source_weight_pairs = source.parse_source_model_smart( os.path.join(self.hc.base_path, sm), self.hc.sites_affected_by, self.smlt.make_apply_uncertainties(path), self.hc) blocks = bs.split_on_max_weight(list(source_weight_pairs)) self.source_blocks_per_ltpath[smpath] = blocks n = sum(len(block) for block in blocks) logs.LOG.info('Found %d relevant source(s) for %s %s', n, sm, path) logs.LOG.info('Splitting in blocks with at maximum %d ruptures', bs.max_weight) for i, block in enumerate(blocks, 1): logs.LOG.info('Block %d: %d sources, %d ruptures', i, len(block), block.weight) num_sources.append(n) return num_sources