def internel_lltm(input, weight_for_gate, bias_for_gate, old_h, old_c): ''' input: [batch_size, 28*28] old_h & old_c: [batch_size, state_size] >>>>> cat -> X: [batch_size, state_size+28*28] weight_for_gate: [3*state_size, state_size+28*28] bias_for_gate:[3*state_size] ''' X = topi.concatenate([old_h, input], axis=1) gate_weights = topi.nn.dense(X, weight_for_gate, bias_for_gate) gates = topi.split(gate_weights, 3, axis=1) input_gate = topi.sigmoid(gates[0]) output_gate = topi.sigmoid(gates[1]) candidate_cell = elu(gates[2]) new_c = topi.add(old_c, topi.multiply(candidate_cell, input_gate)) new_h = topi.multiply(topi.tanh(new_c), output_gate) return [new_h, new_c]
def simulated_quantize_compute(attrs, inputs, out_type, target): """Compiler for simulated_quantize.""" assert len(inputs) == 4 assert attrs.sign assert attrs.rounding == "round" data, scale, clip_min, clip_max = inputs # simulate rounding error scaled_data = topi.divide(data, scale) clipped_data = topi.maximum(topi.minimum(scaled_data, clip_max), clip_min) round_data = topi.round(clipped_data) # recover data rdata = topi.multiply(round_data, scale) return [rdata]
def simulated_quantize_compute(attrs, inputs, out_type, target): """Compiler for simulated_quantize.""" assert len(inputs) == 4 assert attrs.sign assert attrs.rounding == "round" data, scale, clip_min, clip_max = inputs # simulate rounding error scaled_data = topi.divide(data, scale) clipped_data = topi.maximum(topi.minimum(scaled_data, clip_max), clip_min) round_data = topi.round(clipped_data) # recover data rdata = topi.multiply(round_data, scale) return [rdata]
def simulated_quantize_compute(attrs, inputs, out_type): """Compiler for simulated_quantize.""" assert len(inputs) == 4 assert attrs.sign assert attrs.rounding == "round" data, scale, clip_min, clip_max = inputs if attrs.kind == QAnnotateKind.IDENTITY: return [topi.identity(data)] # simulate rounding error scaled_data = topi.divide(data, scale) clipped_data = topi.maximum(topi.minimum(scaled_data, clip_max), clip_min) round_data = topi.round(clipped_data) # recover data rdata = topi.multiply(round_data, scale) return [rdata]
def multiply_compute(attrs, inputs, output_type, target): assert len(inputs) == 2 return [topi.multiply(inputs[0], inputs[1])]
def _interpolate(im, im_shape, x, y, out_size, dtype): num_batch = im_shape[0] height = im_shape[1] width = im_shape[2] channels = im_shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(im_shape[1] - 1) max_x = int(im_shape[2] - 1) #[-1,1] -> [0, width-1] x = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype)) y = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype)) # do sampling dim3 = out_height * out_width * num_batch x0 = topi.cast(topi.floor(x), 'int32') y0 = topi.cast(topi.floor(y), 'int32') x1 = topi.add(x0,tvm.const(1, dtype="int32")) y1 = topi.add(y0,tvm.const(1, dtype="int32")) x0 = topi.clip(x0, 0, max_x) x1 = topi.clip(x1, 0, max_x) y0 = topi.clip(y0, 0, max_y) y1 = topi.clip(y1, 0, max_y) dim2 = width dim1 = width * height base = tvm.compute((dim3,),lambda i:(i // (out_height * out_width)) * width * height, name = 'base') base_y0 = topi.add(base, topi.multiply(y0, dim2)) base_y1 = topi.add(base, topi.multiply(y1, dim2)) idx_a = topi.add(base_y0, x0) idx_b = topi.add(base_y1, x0) idx_c = topi.add(base_y0, x1) idx_d = topi.add(base_y1, x1) im_flat = topi.reshape(im, (num_batch * height * width, channels)) im_flat = topi.cast(im_flat, dtype) Ia = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_a[i], j], name = 'Ia') Ib = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_b[i], j], name = 'Ib') Ic = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_c[i], j], name = 'Ic') Id = tvm.compute((dim3, channels),lambda i,j: im_flat[idx_d[i], j], name = 'Id') x0_f = topi.cast(x0, dtype) x1_f = topi.cast(x1, dtype) y0_f = topi.cast(y0, dtype) y1_f = topi.cast(y1, dtype) wa = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y1_f, y)), 1) wb = topi.expand_dims(topi.multiply(topi.subtract(x1_f, x), topi.subtract(y, y0_f)), 1) wc = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y1_f, y)), 1) wd = topi.expand_dims(topi.multiply(topi.subtract(x, x0_f), topi.subtract(y, y0_f)), 1) output = topi.add(topi.add(topi.add(topi.multiply(wa, Ia), topi.multiply(wb, Ib)),topi.multiply(wc, Ic)), topi.multiply(wd, Id)) return output
def _interpolate(im, im_shape, x, y, out_size, dtype): num_batch = im_shape[0] height = im_shape[1] width = im_shape[2] channels = im_shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(im_shape[1] - 1) max_x = int(im_shape[2] - 1) # [-1,1] -> [0, width-1] x_temp = topi.multiply(topi.add(x, tvm.const(1, dtype=dtype)), width / tvm.const(2, dtype=dtype)) y_temp = topi.multiply(topi.add(y, tvm.const(1, dtype=dtype)), height / tvm.const(2, dtype=dtype)) # do sampling dim3 = out_height * out_width * num_batch x_zero = topi.cast(topi.floor(x_temp), 'int32') y_zero = topi.cast(topi.floor(y_temp), 'int32') x_one = topi.add(x_zero, tvm.const(1, dtype="int32")) y_one = topi.add(y_zero, tvm.const(1, dtype="int32")) x_zero = topi.clip(x_zero, 0, max_x) x_one = topi.clip(x_one, 0, max_x) y_zero = topi.clip(y_zero, 0, max_y) y_one = topi.clip(y_one, 0, max_y) dim2 = width base = tvm.compute((dim3, ), lambda i: (i // (out_height * out_width)) * width * height, name='base') base_y0 = topi.add(base, topi.multiply(y_zero, dim2)) base_y1 = topi.add(base, topi.multiply(y_one, dim2)) idx_a = topi.add(base_y0, x_zero) idx_b = topi.add(base_y1, x_zero) idx_c = topi.add(base_y0, x_one) idx_d = topi.add(base_y1, x_one) im_flat = topi.reshape(im, (num_batch * height * width, channels)) im_flat = topi.cast(im_flat, dtype) i_a = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_a[i], j], name='Ia') i_b = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_b[i], j], name='Ib') i_c = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_c[i], j], name='Ic') i_d = tvm.compute((dim3, channels), lambda i, j: im_flat[idx_d[i], j], name='Id') x0_f = topi.cast(x_zero, dtype) x1_f = topi.cast(x_one, dtype) y0_f = topi.cast(y_zero, dtype) y1_f = topi.cast(y_zero, dtype) w_a = topi.expand_dims( topi.multiply(topi.subtract(x1_f, x_temp), topi.subtract(y1_f, y_temp)), 1) w_b = topi.expand_dims( topi.multiply(topi.subtract(x1_f, x_temp), topi.subtract(y_temp, y0_f)), 1) w_c = topi.expand_dims( topi.multiply(topi.subtract(x_temp, x0_f), topi.subtract(y1_f, y_temp)), 1) w_d = topi.expand_dims( topi.multiply(topi.subtract(x_temp, x0_f), topi.subtract(y_temp, y0_f)), 1) output = topi.add( topi.add( topi.add(topi.multiply(w_a, i_a), topi.multiply(w_b, i_b)), topi.multiply(w_c, i_c)), topi.multiply(w_d, i_d)) return output
# https://rufflewind.com/2016-12-30/reverse-mode-automatic-differentiation import tvm import topi import numpy x = tvm.te.placeholder((3, ), name='x') w = tvm.te.placeholder((3, ), name='w') z1 = topi.multiply(x, w) z2 = topi.sum(z1) z3 = topi.multiply(z2, -1) z4 = topi.exp(z3) z5 = topi.add(z4, 1) z6 = topi.divide(1, z5) [dw] = tvm.te.gradient(z6, w) s = tvm.te.create_schedule(dw.op) g = tvm.build(s, [x, w, dw]) # The default tensor type in tvm dtype = "float32" target = 'llvm' ctx = tvm.context(target, 0) # # Random generated tensor for testing x1 = tvm.nd.array(numpy.array([1, 3, 2]).astype(dtype), ctx) w1 = tvm.nd.array(numpy.array([2, 1, -2]).astype(dtype), ctx) dw1 = tvm.nd.empty(shape=(3, ), dtype='float32', ctx=ctx) g(x1, w1, dw1) print("ret=", dw1)