def verify_mean(data_shape, axis, exclude, keepdims): dtype = "float32" x = relay.var("x", shape=data_shape, dtype=dtype) y = relay.mean(x, axis, keepdims, exclude) func = relay.Function([x], y) x_data = np.random.uniform(size=data_shape).astype(dtype) verify_results(func, [x_data], "test_mean", rtol=1e-5, atol=1e-5)
def test_mean(self): data = relay.var("data", relay.TensorType((-1, 4, 1, 1), "float32")) m = relay.mean(data, axis=1) net = relay.Function([data], m) mod = tvm.IRModule.from_expr(net) mod = relay.transform.InferType()(mod) xgraph = xf_relay.from_relay(mod, {}) layers = xgraph.get_layers() assert layers[0].type[0] == "Input" assert layers[1].type[0] == "Mean" assert layers[1].shapes == [-1, 1, 1]
def _get_model(shape, axis, keepdims, input_zp, input_sc, output_zp, output_sc, dtype): a = relay.var("a", shape=shape, dtype=dtype) casted = relay.op.cast(a, "int32") mean = relay.mean(casted, axis, keepdims) model = relay.qnn.op.requantize( mean, input_scale=relay.const(input_sc, "float32"), input_zero_point=relay.const(input_zp, "int32"), output_scale=relay.const(output_sc, "float32"), output_zero_point=relay.const(output_zp, "int32"), out_dtype=dtype, ) return model
def create_relay_graph_with_same_quantization(): ifm = relay.var("input", shape=ifm_shape, dtype=dtype) cast = relay.cast(ifm, dtype="int32") mean = relay.mean(cast, axis=axis, keepdims=keep_dims) requantize = relay.qnn.op.requantize( mean, input_scale=relay.const(1.0, dtype="float32"), input_zero_point=relay.const(0, dtype="int32"), output_scale=relay.const(1.0, dtype="float32"), output_zero_point=relay.const(0, dtype="int32"), ) func = relay.Function(relay.analysis.free_vars(requantize), requantize) mod = tvm.IRModule.from_expr(func) return mod
def create_mod_from_relay(): ifm = relay.var("input", shape=ifm_shape, dtype=dtype) cast = relay.cast(ifm, dtype="int32") mean = relay.mean(cast, axis=axis, keepdims=keep_dims) requantize = relay.qnn.op.requantize( mean, input_scale=relay.const(1.0, dtype="float32"), input_zero_point=relay.const(0, dtype="int32"), output_scale=relay.const(1.0, dtype="float32"), output_zero_point=relay.const(0, dtype="int32"), ) func = relay.Function(relay.analysis.free_vars(requantize), requantize) mod = tvm.IRModule.from_expr(func) input_data = {"input": np.random.randint(low=-127, high=128, size=ifm_shape, dtype=dtype)} output_data = generate_ref_data(mod, input_data) return mod, input_data, output_data
def get_model(): var = relay.var(input_name, shape=input_shape, dtype=dtype) clip = relay.op.clip(var, dtype_min, dtype_max) max_pool = relay.nn.max_pool2d(clip, (2, 2), (2, 2), ceil_mode=True, layout="NHWC") mean = relay.op.cast(clip, "int32") mean = relay.mean(mean, axis=[1, 2], keepdims=True) mean = relay.qnn.op.requantize( mean, input_scale=relay.const(0.0784314, "float32"), input_zero_point=relay.const(dtype_min + 128, "int32"), output_scale=relay.const(0.0784314, "float32"), output_zero_point=relay.const(dtype_min + 128, "int32"), out_dtype=dtype, ) return relay.Tuple((mean, max_pool, clip))
def before(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 16)) bias = relay.var("bias", shape=(1, 1, 1, 16)) y = relay.nn.conv2d( x, weight, channels=16, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.add(y, bias) mean = relay.mean(y, axis=3, exclude=True) var = relay.variance(y, axis=3, exclude=True) gamma = relay.var("gamma") beta = relay.var("beta") y = relay.nn.batch_norm(y, gamma, beta, mean, var, axis=3) y = y[0] y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 56, 56, 64)) weight = relay.var("weight", shape=(3, 3, 64, 16)) bias = relay.var("bias", shape=(1, 1, 1, 16)) x = relay.layout_transform(x, src_layout="NHWC", dst_layout="NCHW") x = relay.layout_transform(x, src_layout="NCHW", dst_layout="NCHW16c") weight = relay.layout_transform(weight, src_layout="HWIO", dst_layout="OIHW") y = relay.nn.conv2d( x, weight, channels=16, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c" ) bias = relay.layout_transform(bias, src_layout="NHWC", dst_layout="NCHW") bias = relay.layout_transform(bias, src_layout="NCHW", dst_layout="NCHW16c") add = relay.add(y, bias) y = relay.layout_transform(add, src_layout="NCHW16c", dst_layout="NCHW") y = relay.layout_transform(y, src_layout="NCHW", dst_layout="NHWC") mean = relay.mean(y, axis=3, exclude=True) var = relay.variance(y, axis=3, exclude=True) denom = relay.const(1.0) / relay.sqrt(var + relay.const(1e-05)) gamma = relay.var("gamma", shape=(16,)) denom = denom * gamma denom_expand1 = relay.expand_dims(denom, axis=1, num_newaxis=2) denom_expand2 = relay.expand_dims(denom_expand1, axis=0) denom_nchwc16 = relay.layout_transform( denom_expand2, src_layout="NCHW", dst_layout="NCHW16c" ) out = add * denom_nchwc16 beta = relay.var("beta", shape=(16,)) numerator = (-mean) * denom + beta numerator_expand1 = relay.expand_dims(numerator, axis=1, num_newaxis=2) numerator_expand2 = relay.expand_dims(numerator_expand1, axis=0) numerator_nchwc16 = relay.layout_transform( numerator_expand2, src_layout="NCHW", dst_layout="NCHW16c" ) out = out + numerator_nchwc16 out = relay.layout_transform(out, src_layout="NCHW16c", dst_layout="NCHW") y = relay.layout_transform(out, src_layout="NCHW", dst_layout="NHWC") y = relay.Function(analysis.free_vars(y), y) return y
def _execute(self): self.node_dict = {} # self.node_dict['1'] = relay.const(np.zeros((1, 128)), dtype='int32') gelu_a = relay.var('gelu_a', shape=()) gelu_b = relay.var('gelu_b', shape=()) gelu_c = relay.var('gelu_c', shape=()) gelu_d = relay.var('gelu_d', shape=()) gelu_e = relay.var('gelu_e', shape=()) self.node_dict['1'] = relay.var('input.1', shape=(1,128), dtype='int32') self.node_dict['2'] = relay.var('input.2', shape=(1,128), dtype='int32') for gnode in self.graph: name = gnode['name'] op_type = gnode['op_type'] attrs = gnode['attrs'] del attrs['A_shape'] del attrs['O_shape'] inputs = gnode['inputs'] if op_type == 'Const': arr = np.zeros(attrs['shape'], dtype=np.int32) y = relay.const(arr, dtype='int32') elif op_type == 'expand_dims': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.expand_dims(x, attrs['axis'], attrs['num_newaxis']) elif op_type == 'reshape': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.reshape(x, attrs['newshape']) elif op_type == 'take': data = get_input(self.node_dict, self.params, inputs[0]) indices = get_input(self.node_dict, self.params, inputs[1]) y = relay.take(data, indices, axis=attrs['axis'][0], mode=attrs['mode']) elif op_type == 'one_hot': x = get_input(self.node_dict, self.params, inputs[0]) cc1 = get_input(self.node_dict, self.params, inputs[1]) cc2 = get_input(self.node_dict, self.params, inputs[2]) y = relay.one_hot(x, cc1, cc2, **attrs) elif op_type == 'strided_slice': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.strided_slice(x, **attrs) elif op_type == 'mean': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.mean(x, axis=attrs['axis'], exclude=attrs['exclude'], keepdims=attrs['keepdims']) elif op_type == 'nn.dense': x = get_input(self.node_dict, self.params, inputs[0]) weight = get_input(self.node_dict, self.params, inputs[1]) y = relay.nn.dense(x, weight, units=attrs['units'][0]) elif op_type == 'add': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.add(x1, x2) elif op_type == 'subtract': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.subtract(x1, x2) elif op_type == 'multiply': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.multiply(x1, x2) elif op_type == 'power': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.power(x1, x2) elif op_type == 'transpose': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.transpose(x, **attrs) elif op_type == 'tanh': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.tanh(x) elif op_type == 'squeeze': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.squeeze(x, **attrs) elif op_type == 'nn.batch_matmul': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.nn.batch_matmul(x1, x2) elif op_type == 'nn.softmax': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.nn.softmax(x, **attrs) elif op_type == 'gelu': x = get_input(self.node_dict, self.params, inputs[0]) y = x * gelu_a * (gelu_b + relay.tanh( ( gelu_c * (x + gelu_d * relay.power(x, gelu_e))))) else: import pdb; pdb.set_trace() print( 'not supported op %s ' % op_type) self.node_dict[name] = y output_name = self.output_node_ids[0] output = self.node_dict[output_name] inputs = relay.analysis.free_vars(output) # inputs = [self.node_dict['1'], self.node_dict['2']] func = relay.Function(inputs, output) mod = tvm.IRModule() mod['main'] = func with relay.build_config(opt_level=0): graph, lib, params = relay.build(mod, 'llvm', params={}) self.m = graph_runtime.create(graph, lib, tvm.cpu())