def test_k_cache_detection_basic(): testee = VerticalLoopFactory( loop_order=LoopOrder.FORWARD, sections__0__horizontal_executions__0__body=[ AssignStmtFactory( left__name="foo", right__name="foo", right__offset__k=1, ), AssignStmtFactory( left__name="bar", right__name="foo", right__offset__k=-1, ), AssignStmtFactory( left__name="baz", right__name="baz", right__offset__i=1, right__offset__k=1, ), AssignStmtFactory( left__name="foo", right__name="baz", right__offset__j=1, right__offset__k=-1, ), ], ) transformed = KCacheDetection().visit(testee) assert {c.name for c in transformed.caches} == {"foo"} assert all(isinstance(cache, KCache) for cache in transformed.caches)
def _optimize_oir(self, oir): oir = GreedyMerging().visit(oir) oir = AdjacentLoopMerging().visit(oir) oir = LocalTemporariesToScalars().visit(oir) oir = WriteBeforeReadTemporariesToScalars().visit(oir) oir = OnTheFlyMerging().visit(oir) oir = NoFieldAccessPruning().visit(oir) oir = IJCacheDetection().visit(oir) oir = KCacheDetection().visit(oir) oir = PruneKCacheFills().visit(oir) oir = PruneKCacheFlushes().visit(oir) return oir
def _optimize_oir(self, oir): oir = optimize_horizontal_executions(oir, GraphMerging) oir = AdjacentLoopMerging().visit(oir) oir = LocalTemporariesToScalars().visit(oir) oir = WriteBeforeReadTemporariesToScalars().visit(oir) oir = OnTheFlyMerging().visit(oir) oir = MaskStmtMerging().visit(oir) oir = NoFieldAccessPruning().visit(oir) oir = IJCacheDetection().visit(oir) oir = KCacheDetection().visit(oir) oir = PruneKCacheFills().visit(oir) oir = PruneKCacheFlushes().visit(oir) return oir
def test_k_cache_detection_single_access_point(): testee = VerticalLoopFactory( loop_order=LoopOrder.FORWARD, sections__0__horizontal_executions=[ HorizontalExecutionFactory( body=[AssignStmtFactory(left__name="foo", right__name="bar")]), HorizontalExecutionFactory(body=[ AssignStmtFactory( left__name="bar", right__name="baz", right__offset__k=1) ]), ], ) transformed = KCacheDetection().visit(testee) assert not transformed.caches