def quantize(self, op, **kwargs): precs, buffers = kwargs['precs'], kwargs['buffers'] features = kwargs['features'] X = op.get_children()[0] name, X_name = op.attr('name'), X.attr('name') attrs = op.list_attr() # `a_max`, `a_min` and precision should be align with CVM-Runtime iscale = buffers[X.attr('name')].get() buffers[name] = SBuffer(iscale) a_min = int(get_attr(attrs, "a_min") * iscale) a_max = int(get_attr(attrs, "a_max") * iscale) precs[name][OUT_KEY] = get_bit_exp(max(abs(a_min), a_max)) return mx.sym.clip(X, a_min=a_min, a_max=a_max, name=name)
def mergefunc(node, params, graph): name, op_name = node.attr('name'), node.attr('op_name') childs, attr = sutils.sym_iter( node.get_children()), node.list_attr() if op_name in attribute_deps: attr_deps = attribute_deps[op_name] for attr_name, v in attr_deps.items(): val = sutils.get_attr(attr, attr_name, 0) attr[attr_name] = int(val * mrt_oscales[name_idx[v]]) node = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) return node
def _separate_pad(op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sutils.sym_iter(op.get_children()) if op_name not in [Convolution.op_name]: return op if 'layout' in attr: assert attr['layout'] == 'NCHW' PH, PW = sutils.get_attr(attr, 'pad', (0, 0)) if 'pad' in attr: del attr['pad'] if PH == 0 and PW == 0: return sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) childs[0] = mx.sym.pad(childs[0], pad_width=(0, 0, 0, 0, PH, PH, PW, PW), mode='constant', constant_value=0, name=N.n('pad')) op = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) return op
def quantize(self, op, **kwargs): params, graph = kwargs['params'], kwargs['graph'] buffers, precs = kwargs['buffers'], kwargs['precs'] features, cfg_dict = kwargs['features'], kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] oprec = kwargs['op_input_precs'][op_name] th = features[cns[0]].get() xs = scale_exp(th, oprec) quant_type = cfg_dict[cns[0]]['quant_type'] assert quant_type == USQuantizer.name quant = get_quantizer(quant_type) X, xprec, xs = quant.quantize(childs[0], oprec, oscale=xs, oname=name, **kwargs) axis = get_attr(attr, 'axis', -1) lambd = kwargs['softmax_lambd'] alpha = int(lambd * xs) var = nd_const(alpha, graph, params) max_axis = mx.sym.max(X, axis=axis, keepdims=True) offset = mx.sym.broadcast_sub(max_axis, var, name=N.n('softmax_offset')) offset = realize(offset, 0, xprec) norm = mx.sym.broadcast_sub(X, offset, name=N.n('softmax_normalize')) norm = mx.sym.relu(norm, name=N.n('Softmax_filter')) norm = realize(norm, 0, xprec) data = sutils.nd_arange(0, alpha + 1) table = nd.exp(data / xs) tprec = get_bit_exp(math.exp(lambd)) table = nd.clip(table, a_min=0, a_max=get_range_exp(tprec)) W_name = N.n('cvm_lut_weight') params[W_name] = weight = table.round().reshape(alpha + 1, 1) wattr = {'precision': str(tprec)} W = graph[W_name] = mx.sym.var(W_name, shape=weight.shape, attr=wattr) # lut = mx.sym.Custom(norm, W, in_dim=alpha+1, # name=name, op_type='cvm_lut') lut = mx.sym.Custom(norm, W, in_dim=alpha + 1, name=N.n('softmax_lut'), op_type='cvm_lut') sum_lut = mx.sym.sum(lut, axis=axis, keepdims=True, name=N.n("softmax_sum")) oprec = min(15, 31 - tprec) assert oprec > 8, "operator softmax(%s) lambda(%d) is too large" \ % (name, lambd) oscale = get_range_exp(oprec) var_scale = nd_const(oscale, graph, params) prob = mx.sym.broadcast_mul(lut, var_scale, name=N.n("softmax_output_scale")) half_lut = realize(sum_lut, 1, 31) prob = mx.sym.broadcast_add(prob, half_lut, name=N.n("softmax_round")) op = mx.sym.broadcast_div(prob, sum_lut, name=N.n("softmax_prob")) op = op.astype('int32').astype('float32') # op = mx.sym.floor(op) # simulate integer division # op = realize(op, 0, oprec) op = realize(op, 0, oprec, name=name) # oname = op.attr('name') precs[name][OUT_KEY] = oprec # precs[oname] = {OUT_KEY: oprec} # scales[oname] = scales[name] = oscale buffers[name] = SBuffer(oscale) logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op