def test_transformer_add_replace(exprs, block2, block3): """Basic transformer test that adds one expression and replaces another""" line1 = '// Replaced expression' line2 = '// Adding a simple line' replacer = Block(c.Line(line1)) adder = lambda n: Block(c.Line(line2), n) transformer = Transformer({exprs[0]: replacer, exprs[1]: adder(exprs[1])}) for block in [block2, block3]: newblock = transformer.visit(block) newcode = str(newblock.ccode) oldnumlines = len(str(block.ccode).split('\n')) newnumlines = len(newcode.split('\n')) assert newnumlines >= oldnumlines + 1 assert line1 in newcode assert line2 in newcode assert "a[i0] = a[i0] + b[i0] + 5.0F;" not in newcode
def test_transformer_replace(exprs, block1, block2, block3): """Basic transformer test that replaces an expression""" line1 = '// Replaced expression' replacer = Block(c.Line(line1)) transformer = Transformer({exprs[0]: replacer}) for block in [block1, block2, block3]: newblock = transformer.visit(block) newcode = str(newblock.ccode) oldnumlines = len(str(block.ccode).split('\n')) newnumlines = len(newcode.split('\n')) assert newnumlines >= oldnumlines assert line1 in newcode assert "a[i0] = a[i0] + b[i0] + 5.0F;" not in newcode
def test_transformer_wrap(exprs, block1, block2, block3): """Basic transformer test that wraps an expression in comments""" line1 = '// This is the opening comment' line2 = '// This is the closing comment' wrapper = lambda n: Block(c.Line(line1), n, c.Line(line2)) transformer = Transformer({exprs[0]: wrapper(exprs[0])}) for block in [block1, block2, block3]: newblock = transformer.visit(block) newcode = str(newblock.ccode) oldnumlines = len(str(block.ccode).split('\n')) newnumlines = len(newcode.split('\n')) assert newnumlines >= oldnumlines + 2 assert line1 in newcode assert line2 in newcode assert "a[i] = a[i] + b[i] + 5.0F;" in newcode
def _ompize(self, state, **kwargs): """ Add OpenMP pragmas to the Iteration/Expression tree to emit parallel code """ processed = [] for node in state.nodes: # Reset denormals flag each time a parallel region is entered denormals = FindNodes(Denormals).visit(state.nodes) mapper = { i: List(c.Comment('DLE: moved denormals flag')) for i in denormals } # Handle parallelizable loops for tree in retrieve_iteration_tree(node): # Determine the number of consecutive parallelizable Iterations key = lambda i: i.is_Parallel and not i.is_Vectorizable candidates = filter_iterations(tree, key=key, stop='consecutive') if not candidates: continue # Heuristic: if at least two parallel loops are available and the # physical core count is greater than self.thresholds['collapse'], # then omp-collapse the loops nparallel = len(candidates) if psutil.cpu_count(logical=False) < self.thresholds['collapse'] or\ nparallel < 2: parallelism = omplang['for'] else: parallelism = omplang['collapse'](nparallel) root = candidates[0] mapper[root] = Block(header=omplang['par-region'], body=denormals + [Element(parallelism), root]) processed.append(Transformer(mapper).visit(node)) return {'nodes': processed}
def _ompize(self, state, **kwargs): """ Add OpenMP pragmas to the Iteration/Expression tree to emit parallel code """ processed = [] for node in state.nodes: # Reset denormals flag each time a parallel region is entered denormals = FindNodes(Denormals).visit(state.nodes) mapper = OrderedDict([(i, None) for i in denormals]) # Group by outer loop so that we can embed within the same parallel region was_tagged = False groups = OrderedDict() for tree in retrieve_iteration_tree(node): # Determine the number of consecutive parallelizable Iterations key = lambda i: i.is_Parallel and\ not (i.is_Elementizable or i.is_Vectorizable) candidates = filter_iterations(tree, key=key, stop='asap') if not candidates: was_tagged = False continue # Consecutive tagged Iteration go in the same group is_tagged = any(i.tag is not None for i in tree) key = len(groups) - (is_tagged & was_tagged) handle = groups.setdefault(key, OrderedDict()) handle[candidates[0]] = candidates was_tagged = is_tagged # Handle parallelizable loops for group in groups.values(): private = [] for root, tree in group.items(): # Heuristic: if at least two parallel loops are available and the # physical core count is greater than self.thresholds['collapse'], # then omp-collapse the loops nparallel = len(tree) if psutil.cpu_count(logical=False) < self.thresholds['collapse'] or\ nparallel < 2: parallel = omplang['for'] else: parallel = omplang['collapse'](nparallel) mapper[root] = root._rebuild(pragmas=root.pragmas + (parallel, )) # Track the thread-private and thread-shared variables private.extend([ i for i in FindSymbols('symbolics').visit(root) if i.is_TensorFunction and i._mem_stack ]) # Build the parallel region private = sorted(set([i.name for i in private])) private = ('private(%s)' % ','.join(private)) if private else '' rebuilt = [v for k, v in mapper.items() if k in group] par_region = Block(header=omplang['par-region'](private), body=denormals + rebuilt) for k, v in list(mapper.items()): if isinstance(v, Iteration): mapper[k] = None if v.is_Remainder else par_region handle = Transformer(mapper).visit(node) if handle is not None: processed.append(handle) return {'nodes': processed}