def test_split_in_blocks(self): weigths = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20), ('f', 5), ('g', 30), ('h', 17), ('i', 25)]) blocks = list(split_in_blocks('abcdefghi', 1, weigths.get)) self.assertEqual(len(blocks), 1) blocks = list(split_in_blocks('abcdefghi', 2, weigths.get)) self.assertEqual(len(blocks), 3) self.assertEqual(repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]")
def test_split_in_blocks(self): weigths = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20), ('f', 5), ('g', 30), ('h', 17), ('i', 25)]) blocks = list(split_in_blocks('abcdefghi', 1, weigths.get)) self.assertEqual(len(blocks), 1) blocks = list(split_in_blocks('abcdefghi', 2, weigths.get)) self.assertEqual(len(blocks), 3) self.assertEqual( repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]" )
def apply_reduce(task, task_args, agg=lambda a, x: x, acc=None, concurrent_tasks=CONCURRENT_TASKS, weight=lambda item: 1, key=lambda item: 'Unspecified'): """ Apply a task to a tuple of the form (job_id, data, *args) by splitting the data in chunks and reduce the results with an aggregation function. :param task: an oqtask :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in data :param key: function to extract the kind of an item in data """ job_id = task_args[0] data = task_args[1] args = task_args[2:] if not data: return acc elif len(data) == 1 or not concurrent_tasks: return agg(acc, task.task_func(job_id, data, *args)) blocks = split_in_blocks(data, concurrent_tasks, weight, key) alldata = [(job_id, block) + args for block in blocks] return map_reduce(task, alldata, agg, acc)
def split_site_collection(sitecol, num_chunks): """ Split the full site collection in several FilteredSiteCollections :param sitecol: full site collection :param num_chunks: hint for the number of blocks to generate """ for indices in split_in_blocks(sitecol.indices, num_chunks): yield FilteredSiteCollection(indices, sitecol)
def task_arg_gen(self): """ Yield a tuple of the form (job_id, sitecol, rupture_id, gmf_id, task_seed, num_realizations). `task_seed` will be used to seed numpy for temporal occurence sampling. Only a single task will be generated which is fine since the computation is fast anyway. """ ses_ruptures = models.SESRupture.objects.filter( rupture__ses_collection=self.ses_coll.id) for ruptures in split_in_blocks(ses_ruptures, self.concurrent_tasks): yield self.job.id, ruptures, self.sites, self.gmf.id
def split(self, hint): """ Split the sources in a number of blocks close to the given `hint`. :param int hint: hint for the number of blocks """ if self.sources: for block in split_in_blocks( self.sources, hint, self.weight.__getitem__, self.trt_model.__getitem__): trt_model = self.trt_model[block[0]] yield trt_model, block
def test_split_with_kind(self): Source = namedtuple('Source', 'typology, weight') s1 = Source('point', 1) s2 = Source('point', 1) s3 = Source('area', 2) s4 = Source('area', 4) s5 = Source('area', 4) blocks = list( block_splitter([s1, s2, s3, s4, s5], max_weight=6, weight=attrgetter('weight'), kind=attrgetter('typology'))) self.assertEqual(map(len, blocks), [2, 2, 1]) self.assertEqual([b.weight for b in blocks], [2, 6, 4]) blocks = list( split_in_blocks([s1, s2, s3, s4, s5], hint=6, weight=attrgetter('weight'), kind=attrgetter('typology'))) self.assertEqual(map(len, blocks), [2, 1, 1, 1]) self.assertEqual([b.weight for b in blocks], [2, 2, 4, 4])
def generate_gmfs(self): """ Generate the GMFs and optionally the hazard curves too """ sitecol = self.hc.site_collection otm = tasks.OqTaskManager(compute_and_save_gmfs, logs.LOG.progress) task_no = 0 rupture_data = [] for rupture in models.ProbabilisticRupture.objects.filter( trt_model__lt_model__hazard_calculation=self.hc ).order_by('trt_model'): rdata = RuptureData( self.hc.site_collection, rupture, [(r.id, r.seed) for r in rupture.sesrupture_set.all()]) rupture_data.append(rdata) for rblock in split_in_blocks( rupture_data, self.concurrent_tasks, RuptureData.get_weight, RuptureData.get_trt): otm.submit(self.job.id, sitecol.sids, rblock, task_no) task_no += 1 otm.aggregate_results(self.agg_curves, self.curves)
def parallel_apply(task, task_args, concurrent_tasks=multiprocessing.cpu_count(), weight=lambda item: 1, kind=lambda item: 'Unspecified'): """ Apply a list processing task to a tuple of task_args with the form (job_id, data, *args). Return the list of processed data. :param task: an oqtask :param task_args: the arguments to be passed to the task function :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in data :param kind: function to extract the kind of an item in data """ job_id = task_args[0] data = task_args[1] args = task_args[2:] if not data: return [] elif len(data) == 1: return task.task_func(job_id, data, *args) blocks = split_in_blocks(data, concurrent_tasks, weight, kind) alldata = [(job_id, block) + args for block in blocks] return map_reduce(task, alldata, list.__add__, [])