def reference(self): return scipy.special.softmax(self.numpy(self.data), axis=1) def config(self): return [self.M, self.N] @staticmethod def module(): return "softmax" def memory_workload(self): if self.mode == "fwd": sol_count = 1 + 1 algorithmic_count = 3 + 1 else: sol_count = (1 + 1) + (1 + 1) algorithmic_count = (3 + 1) + (3 + 1) buffer_size = self.M * self.N * 4 return { "sol": buffer_size * sol_count, "algorithmic": buffer_size * algorithmic_count, } @staticmethod def default_configs(): return [[128, 1 << 16]] benchmark.register_benchmark_class(SoftmaxBench)
return { "sol": buffer_size * sol_count, "algorithmic": buffer_size * algorithmic_count, } @staticmethod def default_configs(): return [[3, 16, 32, 256, 256]] class MaxPoolBench(PoolingBench): def __init__(self, *args): super().__init__("maxpool", *args) @staticmethod def module(): return "maxpool" class AvgPoolBench(PoolingBench): def __init__(self, *args): super().__init__("avgpool", *args) @staticmethod def module(): return "avgpool" benchmark.register_benchmark_class(MaxPoolBench) benchmark.register_benchmark_class(AvgPoolBench)
def module(): return "batchnorm" class InstanceNormBench(NormalizationBench): def forward(self): y = self.instance_norm(self.data) return y @staticmethod def module(): return "instance_norm" def is_supported(self): return tensor_engine.is_supported(self.instance_norm) class LayerNormBench(NormalizationBench): def forward(self): y = self.layer_norm(self.data, [self.H, self.W]) return y @staticmethod def module(): return "layernorm" benchmark.register_benchmark_class(BatchNormBench) benchmark.register_benchmark_class(InstanceNormBench) benchmark.register_benchmark_class(LayerNormBench)
class ReduceRowBench(ReduceBench): def __init__(self, mode, device, M, N, K): super(ReduceRowBench, self).__init__(mode, device, "row", M, N, K) @staticmethod def module(): return "reduce_row" class ReduceMidBench(ReduceBench): def __init__(self, mode, device, M, N, K): super(ReduceMidBench, self).__init__(mode, device, "mid", M, N, K) @staticmethod def module(): return "reduce_mid" class ReduceColBench(ReduceBench): def __init__(self, mode, device, M, N, K): super(ReduceColBench, self).__init__(mode, device, "col", M, N, K) @staticmethod def module(): return "reduce_col" benchmark.register_benchmark_class(ReduceRowBench) benchmark.register_benchmark_class(ReduceMidBench) benchmark.register_benchmark_class(ReduceColBench)
sol_count = 1 algorithmic_count = 1 else: sol_count = 1 + 1 algorithmic_count = 1 + (1 + 1) buffer_size = (self.B * self.M * self.N + self.B * self.M * self.N + self.B * self.N * self.K) buffer_size *= 4 return { "sol": buffer_size * sol_count, "algorithmic": buffer_size * algorithmic_count, } def compute_workload(self): if self.mode == "fwd": count = 1 else: count = 1 + (1 + 1) op_count = 2 * self.B * self.M * self.N * self.K return op_count * count @staticmethod def default_configs(): return [[128, 64, 128, 256]] benchmark.register_benchmark_class(MatMulBench)
return op_count * count @staticmethod def default_configs(): return [ [3, 64, 32, 128, 128, 64], ] class ConvBench(ConvImplBench): def __init__(self, *args): super().__init__("conv", *args) @staticmethod def module(): return "conv" class DepthwiseConvBench(ConvImplBench): def __init__(self, *args): super().__init__("depthwise_conv", *args) @staticmethod def module(): return "depthwise_conv" benchmark.register_benchmark_class(ConvBench) benchmark.register_benchmark_class(DepthwiseConvBench)