class SSDPostProcessKernel(AutotilerKernel): def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): del qrec if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.num_anchors = params.in_dims[0].shape[0] # num_boxes x 4 self.num_classes = params.in_dims[1].shape[ 1] # num_boxes x num_classes self.out_boxes = params.out_dims[0].shape[0] # out_boxes x 4 self.max_bb_before_nms = params.max_bb_before_nms self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_ssd_parameter(code_block, self.cname, self.num_anchors, self.num_classes, self.out_boxes, self.max_bb_before_nms, at_ver=self.at_ver) return code_block
def __init__(self, node_name, cname, matadd_params, act_params=None, force_relu=True, gen_ctrl=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname if act_params is not None: at_act_params = gen_activation_op(act_params.activation, force_relu=force_relu) else: at_act_params = "KOP_NONE" dimensions = make_three_dims(matadd_params.in_dims[0]) attrs = { 'feat': dimensions[0], 'width': dimensions[1], 'height': dimensions[2], 'act_oper': at_act_params } extra_attrs = {'cname': cname, 'node_name': node_name} super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, node_name, cname, pool_params, act_params, gen_ctrl=None, at_ver=3, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if act_params is not None: self.at_act_params = gen_active_at_params(act_params, force_relu=force_relu) else: self.at_act_params = NO_ACTIVATION self.at_globalpool_params = gen_globalpool_at_params(pool_params) in_dim = pool_params.in_dims[0] reduce_sz = reduce(lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape) if idx not in pool_params.axis), 1) #self.c = in_dim.size()/reduce_sz self.c = reduce_sz (self.h, self.w) = balanced_divisors(in_dim.size() / reduce_sz) self.cname = cname self.node_name = node_name self.at_ver = at_ver
def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if isinstance(rnn_params, RNNParameters): self.kname = "RNN_Stack_fp16" elif isinstance(rnn_params, LSTMParameters): self.kname = "LSTM_Stack_fp16" elif isinstance(rnn_params, GRUParameters): self.kname = "GRU_Stack_fp16" if not rnn_params.linear_before_reset: # gen_ctrl.linear_before_reset = 0 raise ValueError( "In {} linear_before_reset == 0 not supported by the Autotiler kernels" ) else: raise ValueError("unknown RNN parameter type") self.n_cells = rnn_params.n_cells self.n_states = rnn_params.n_states self.n_inputs = rnn_params.n_inputs self.n_input_cells = rnn_params.n_input_cells self.n_output_cells = rnn_params.n_output_cells self.revert = rnn_params.revert if not rnn_params.hard_act: gen_ctrl.rnn_use_hardact = 0 if not rnn_params.rnn_same_inout_scale: gen_ctrl.rnn_same_inout_scale = 0 self.cname = cname self.node_name = node_name self.at_ver = at_ver
class SoftmaxKernel(AutotilerKernel): def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): del qrec if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_softmax_params = gen_softmax_at_params(params) self.in_dim = params.in_dims[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_softmax(code_block, self.cname, self.in_dim, self.at_softmax_params.SoftMaxOper, at_ver=self.at_ver) return code_block
def __init__(self, node_name, cname, pool_params, act_params, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if act_params is not None: self.at_act_params = gen_active_at_params(act_params, force_relu=True) else: self.at_act_params = NO_ACTIVATION self.at_globalpool_params = gen_globalpool_at_params(pool_params) self.in_dim = pool_params.in_dims[0] self.out_dim = pool_params.out_dims[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver
def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if isinstance(rnn_params, RNNParameters): self.kname = "RNN_Stack_SQ8" elif isinstance(rnn_params, LSTMParameters): self.kname = "LSTM_Stack_SQ8" else: raise ValueError("unknown RNN parameter type") self.n_cells = rnn_params.n_cells self.n_states = rnn_params.n_states self.n_inputs = rnn_params.n_inputs self.n_input_cells = rnn_params.n_input_cells self.n_output_cells = rnn_params.n_output_cells self.revert = rnn_params.revert if not rnn_params.hard_act and gen_ctrl.rnn_use_hardact is None: gen_ctrl.rnn_use_hardact = 0 if not rnn_params.rnn_same_inout_scale and gen_ctrl.rnn_same_inout_scale is None: gen_ctrl.rnn_same_inout_scale = 0 self.cname = cname self.node_name = node_name self.at_ver = at_ver
class TwoDTransposeKernelSq8(AutotilerKernel): def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.in_shape = real_in_shape self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.real_transpose = real_transpose self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.comment("transpose from {} to {} ({})", self.in_dim, self.out_dim, self.real_transpose) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_2d_transpose(code_block, self.cname, self.in_shape) return code_block
def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_globalpool_params = gen_globalpool_at_params(params) in_dim = params.in_dims[0] reduce_sz = reduce( lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape) if idx not in params.axis)) #self.c = in_dim.size()/reduce_sz self.c = reduce_sz (self.h, self.w) = balanced_divisors(in_dim.size() / reduce_sz) self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver
def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if qrec.out_qs[0].is_floating: self.gen_ctrl.float_dump = 1 self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.in_shape = real_in_shape self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.real_transpose = real_transpose self.cname = cname self.node_name = params.name self.at_ver = at_ver
class GlobalPoolKernel(AutotilerKernel): def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_globalpool_params = gen_globalpool_at_params(params) self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_globalpool(code_block, self.cname, self.in_q, self.out_q, self.in_dim, self.out_dim, self.at_globalpool_params, at_ver=self.at_ver) return code_block
def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.in_shape = real_in_shape dim_names = ['C', 'H', 'W'] perm = [dim_names[i] for i in real_transpose] self.permop = "KOP_MATPERM_CHW2{}".format("".join(perm)) self.real_transpose = real_transpose self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver
class GlobalPoolKernel(AutotilerKernel): def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_globalpool_params = gen_globalpool_at_params(params) in_dim = params.in_dims[0] reduce_sz = reduce(lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape) if idx not in params.axis)) #self.c = in_dim.size()/reduce_sz self.c = reduce_sz (self.h, self.w) = balanced_divisors(in_dim.size()/reduce_sz) self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_globalpool(code_block, self.cname, self.in_q, self.out_q, self.c, self.h, self.w, self.at_globalpool_params, at_ver=self.at_ver) return code_block
def __init__(self, cname, params, qrec, gen_ctrl=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname if qrec.out_qs[0].is_floating: gen_ctrl.float_dump = 1 attrs = { 'n_frames': params.n_frames, 'frame_size': params.frame_size, 'frame_stride': params.frame_step, 'n_fft': params.n_fft, 'preemp_factor': params.preemp_factor, 'skip_preemp': 0, 'no_window': int(params.win_fn is None), 'out_fft': 0, 'magsquared': int(params.magsquared), 'data_type': DSP_DTYPE[qrec.out_qs[0].dtype], } # other attributes extra_attrs = {'cname': cname, 'node_name': params.name} super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, node_name, cname, params, qrec, gen_ctrl=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname if params.hard_act: gen_ctrl.rnn_use_hardact = 1 names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)} in_qs = qrec.in_qs if in_qs[names['i_2_i_w']].bits != in_qs[names['r_2_i_w']].bits: ValueError(f'bit width of gates differs in {params.name}') attrs = { 'bias_size': in_qs[names['i_b']].dtype_bits // 8, 'feat_size': -in_qs[0].dtype_bits // 8, 'filter_bits': in_qs[names['i_2_i_w']].bits, 'n_cells': params.n_cells, 'k0': params.n_input_cells, 'k1': params.n_output_cells, 'dim_state': params.n_states, 'dim_in': params.n_inputs, 'always_reset': 0, 'revert': 1 if params.revert else 0, } extra_attrs = {'cname': cname, 'node_name': node_name} super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, node_name, cname, matrixadd_params, pad_params, act_params, at_ver=3, gen_ctrl=None, force_relu=True): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname if act_params is not None: at_act_params = gen_activation_op( act_params.activation, force_relu=force_relu) else: at_act_params = NO_ACTIVATION padtop = pad_params.padding[0][0] padbot = pad_params.padding[0][1] padded_idx = 0 if matrixadd_params.in_dims[0].size( ) > matrixadd_params.in_dims[1].size() else 1 dimensions0 = make_three_dims(matrixadd_params.in_dims[0]) dimensions1 = make_three_dims(matrixadd_params.in_dims[1]) attrs = { 'feat': max(dimensions0[0], dimensions1[0]), 'width': dimensions0[1], 'height': dimensions0[2], 'padded_idx': padded_idx, 'padtop': padtop, 'padbot': padbot, 'act_oper': at_act_params } extra_attrs = { 'cname': cname, 'node_name': node_name } super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
class PoolKernel(AutotilerKernel): def __init__(self, node_name, cname, pool_params, act_params, qrec, act_q=None, gen_ctrl=None, at_ver=3, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if pool_params.ker_in_order and pool_params.ker_in_order[0] == [ "h", "w", "c" ]: self.gen_ctrl.hwc = 1 if not qrec.out_qs[0].signed: self.gen_ctrl.output_datasize = -qrec.out_qs[0].dtype_bits // 8 if not qrec.in_qs[0].signed: self.gen_ctrl.input_datasize = -qrec.in_qs[0].dtype_bits // 8 if act_params is not None: self.at_act_params = gen_active_at_params( act_params, force_relu=force_relu, asymmetric=act_q.in_qs[0].zero_point != 0) else: self.at_act_params = NO_ACTIVATION pad_compatibilities = [] self.at_pool_params = gen_pool_at_params(pool_params, pad_compatibilities) self.in_dim = pool_params.in_dims[0] self.out_dim = pool_params.out_dims[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_cnn_pool_act_sq8(code_block, self.cname, gen_ctrl, self.in_dim.c, self.in_dim.w, self.in_dim.h, self.at_pool_params, self.at_act_params.ReLUOper) return code_block
class LinearReluKernel(AutotilerKernel): def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl assert linear_params is not None, "linear should always be included" at_linear_params = gen_linear_at_params(linear_params) in_dim = linear_params.in_dims[0] out_dim = linear_params.out_dims[0] filter_q = linear_q.in_qs[1] in_q = linear_q.in_qs[0] out_q = linear_q.out_qs[0] bias_q = linear_q.in_qs[2] if act_params is not None: at_act_params = gen_active_at_params(act_params) out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION self.at_linear_params = at_linear_params self.in_dim = in_dim self.out_dim = out_dim self.in_q = in_q self.bias_q = bias_q self.out_q = out_q self.filter_q = filter_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_linear_relu(code_block, self.cname, self.in_q, self.out_q, self.filter_q, self.bias_q, self.in_dim, self.out_dim, self.at_linear_params, self.at_act_params, at_ver=self.at_ver, gen_ctrl=self.gen_ctrl) return code_block
class GlobalPoolKernel(AutotilerKernel): def __init__(self, node_name, cname, pool_params, act_params, act_q=None, gen_ctrl=None, at_ver=3, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if pool_params.ker_in_order and pool_params.ker_in_order[0] == [ "h", "w", "c" ]: self.gen_ctrl.hwc = 1 if act_params is not None: self.at_act_params = gen_active_at_params( act_params, force_relu=force_relu, asymmetric=act_q.in_qs[0].zero_point != 0) else: self.at_act_params = NO_ACTIVATION self.at_globalpool_params = gen_globalpool_at_params(pool_params) in_dim = pool_params.in_dims[0] reduce_sz = reduce(lambda x, y: x * y, (sz for idx, sz in enumerate(in_dim.shape) if idx not in pool_params.axis), 1) #self.c = in_dim.size()/reduce_sz self.c = reduce_sz (self.h, self.w) = balanced_divisors(in_dim.size() / reduce_sz) self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_cnn_globalpool_sq8(code_block, self.cname, gen_ctrl, self.c, self.h, self.w, self.at_globalpool_params.GlobalPoolOper, self.at_act_params.ReLUOper) return code_block
def __init__(self, node_name, cname, linear_params, act_params, linear_q, act_q, force_relu, gen_ctrl=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname in_dim = linear_params.in_dims[0] out_dim = linear_params.out_dims[0] in_qs = linear_q.in_qs mulbiases_q = linear_q.cache['mul_biases_q'] if act_params is not None: at_act_params = gen_activation_op( act_params.activation, force_relu=force_relu, asymmetric=act_q.in_qs[0].zero_point != 0) if in_dim is None: in_dim = act_params.in_dims[0] if out_dim is None: out_dim = act_params.out_dims[0] out_qs = act_q.out_qs else: at_act_params = "KOP_NONE" out_qs = linear_q.out_qs attrs = { 'in_size': in_qs[0].dtype_bits // 8 if in_qs[0].signed else -in_qs[0].dtype_bits // 8, 'out_size': out_qs[0].dtype_bits // 8 if out_qs[0].signed else -out_qs[0].dtype_bits // 8, 'bias_size': in_qs[2].dtype_bits // 8, 'scale_size': mulbiases_q.dtype_bits // 8, 'filter_bits': in_qs[1].bits, 'in_feat': in_dim.size(), 'out_feat': out_dim.size(), 'act_op': at_act_params } extra_attrs = {'cname': cname, 'node_name': node_name} super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
class ThreeDTransposeKernel(AutotilerKernel): def __init__(self, cname, params, real_in_shape, real_transpose, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if qrec.out_qs[0].is_floating: self.gen_ctrl.float_dump = 1 self.in_shape = real_in_shape dim_names = ['C', 'H', 'W'] perm = [dim_names[i] for i in real_transpose] self.permop = "KOP_MATPERM_CHW2{}".format("".join(perm)) self.real_transpose = real_transpose self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) code_block.comment("transpose from {} to {} ({})", self.in_dim, self.out_dim, self.real_transpose) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_at_3d_transpose(code_block, self.cname, abs(at_bits(self.in_q)), self.in_shape, self.permop, gen_ctrl=gen_ctrl) return code_block
class MatrixScaleKernel(AutotilerKernel): def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl at_matscale_params = gen_matscale_at_params(params) in_dim = params.in_dims[0] out_dim = params.out_dims[0] assert in_dim.shape[0] == out_dim.shape[0] if params.fusion_type == "vec_scalar": otherq = qrec.in_qs[0] vectorq = qrec.in_qs[1] scalarq = qrec.in_qs[2] elif params.fusion_type == "vector": otherq = qrec.in_qs[1] vectorq = qrec.in_qs[2] scalarq = None elif params.fusion_type == "scalar": otherq = qrec.in_qs[0] vectorq = None scalarq = qrec.in_qs[1] else: raise NotImplementedError("unknown fusion type %s" % params.fusion_type) self.at_matscale_params = at_matscale_params self.in_dim = in_dim self.out_dim = out_dim self.otherq = otherq self.vectorq = vectorq self.scalarq = scalarq self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_matscale(code_block, self.cname, self.otherq, self.vectorq, self.scalarq, self.out_q, self.in_dim, self.out_dim, self.at_matscale_params) return code_block
class MatrixAddKernel(AutotilerKernel): def __init__(self, cname, matrixadd_params, matrixadd_q, act_params, act_q, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl at_matrixadd_params = gen_matrixadd_at_params(matrixadd_params) in_dim = matrixadd_params.in_dims[0] out_dim = matrixadd_params.out_dims[0] in_q1 = matrixadd_q.in_qs[0] in_q2 = matrixadd_q.in_qs[1] out_q = matrixadd_q.out_qs[0] if act_params is not None: at_act_params = gen_active_at_params(act_params) out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION self.at_matrixadd_params = at_matrixadd_params self.in_dim = in_dim self.out_dim = out_dim self.in_q1 = in_q1 self.in_q2 = in_q2 self.out_q = out_q self.at_act_params = at_act_params self.cname = cname self.node_name = matrixadd_params.name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_at_matrixadd(code_block, self.cname, self.in_q1, self.in_q2, self.out_q, self.in_dim, self.out_dim, self.at_matrixadd_params, at_ver=self.at_ver, gen_ctrl=self.gen_ctrl) return code_block
def __init__(self, cname, params, qrec, gen_ctrl=None, at_ver=3): del qrec if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_softmax_params = gen_softmax_at_params(params) self.in_dim = params.in_dims[0] self.cname = cname self.node_name = params.name self.at_ver = at_ver
class RNNKernel(AutotilerKernel): def __init__(self, node_name, cname, rnn_params, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl if isinstance(rnn_params, RNNParameters): self.kname = "RNN_Stack_fp16" elif isinstance(rnn_params, LSTMParameters): self.kname = "LSTM_Stack_fp16" elif isinstance(rnn_params, GRUParameters): self.kname = "GRU_Stack_fp16" if not rnn_params.linear_before_reset: # gen_ctrl.linear_before_reset = 0 raise ValueError( "In {} linear_before_reset == 0 not supported by the Autotiler kernels" ) else: raise ValueError("unknown RNN parameter type") self.n_cells = rnn_params.n_cells self.n_states = rnn_params.n_states self.n_inputs = rnn_params.n_inputs self.n_input_cells = rnn_params.n_input_cells self.n_output_cells = rnn_params.n_output_cells self.revert = rnn_params.revert if not rnn_params.hard_act: gen_ctrl.rnn_use_hardact = 0 if not rnn_params.rnn_same_inout_scale: gen_ctrl.rnn_same_inout_scale = 0 self.cname = cname self.node_name = node_name self.at_ver = at_ver def code(self, code_block=None): if code_block is None: code_block = CodeBlock() code_block.comment("generator for {}", self.node_name) if not self.gen_ctrl.is_unmodified: self.gen_ctrl.gen_ctrl_decl(code_block) gen_ctrl = self.gen_ctrl.ctrl_name else: gen_ctrl = "0" gen_rnn_fp16(code_block, self.kname, self.cname, gen_ctrl, self.n_cells, self.n_input_cells, self.n_output_cells, self.n_states, self.n_inputs, self.revert and "1" or "0") return code_block
def __init__(self, cname, params, qrec, gen_ctrl=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname attrs = { 'size': params.out_dims[0].size() * (qrec.out_qs[0].bits // 8) } # other attributes extra_attrs = {'cname': cname, 'node_name': params.name} super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None, force_relu=True): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl assert linear_params is not None, "linear should always be included" at_linear_params = gen_linear_at_params(linear_params) in_dim = linear_params.in_dims[0] out_dim = linear_params.out_dims[0] filter_q = linear_q.in_qs[1] in_q = linear_q.in_qs[0] out_q = linear_q.out_qs[0] bias_q = linear_q.in_qs[2] if act_params is not None: at_act_params = gen_active_at_params(act_params, force_relu=force_relu) if in_dim is None: in_dim = act_params.in_dims[0] if out_dim is None: out_dim = act_params.out_dims[0] if in_q is None: in_q = act_q.in_qs[0] out_q = act_q.out_qs[0] else: at_act_params = NO_ACTIVATION self.at_linear_params = at_linear_params self.in_dim = in_dim.size() self.out_dim = out_dim.size() self.in_q = in_q self.bias_q = bias_q self.out_q = out_q self.filter_q = filter_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver
def __init__(self, cname, params, qrec, gen_ctrl=None): if gen_ctrl is None: gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname if qrec.out_qs[0].is_floating: gen_ctrl.float_dump = 1 if params.log_type is not None and params.log_offset: gen_ctrl.mfcc_log_offset = int( np.round(params.log_offset * 2**(30))) attrs = { 'n_frames': params.n_frames, 'frame_size': params.frame_size, 'frame_stride': params.frame_step, 'n_fft': params.n_fft, 'n_melbanks': params.n_fbanks, 'size_mel_coeff': params.get_melfilter_size()[0], 'n_dct': params.n_dct, 'preemp_factor': params.preemp_factor, 'no_window': int(params.win_fn is None), 'lift_coeff': 0, 'magsquared': int(params.magsquared), 'data_type': DSP_DTYPE[qrec.out_qs[0].dtype], 'log_type': 0 if not params.log_type else (2 if params.log_type == "db" else 1), 'out_fft': 0, } # other attributes extra_attrs = {'cname': cname, 'node_name': params.name} super().__init__(attrs, extra_attrs, gen_ctrl=gen_ctrl)
def __init__(self, node_name, cname, params, qrec, gen_ctrl=None, at_ver=3): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl self.at_globalpool_params = gen_globalpool_at_params(params) self.in_dim = params.in_dims[0] self.out_dim = params.out_dims[0] self.in_q = qrec.in_qs[0] self.out_q = qrec.out_qs[0] self.cname = cname self.node_name = node_name self.at_ver = at_ver
def __init__(self, node_name, cname, linear_params, linear_q, act_params, act_q, at_ver=3, gen_ctrl=None): if gen_ctrl is None: self.gen_ctrl = GenCtrl(None, cname=cname) else: gen_ctrl.cname = cname self.gen_ctrl = gen_ctrl assert linear_params is not None, "linear should always be included" at_linear_params = gen_linear_at_params(linear_params) in_dim = linear_params.in_dims[0] out_dim = linear_params.out_dims[0] filter_q = linear_q.weights_q in_q = linear_q.in_qs[0] out_q = linear_q.out_qs[0] bias_q = linear_q.biases_q if act_params is not None: at_act_params = gen_active_at_params(act_params) out_q = act_q.out_qs[0] if at_ver < 3: if act_params.activation == "relu6" and out_q.q != 0: self.gen_ctrl.ReluN = 6 << out_q.q self.gen_ctrl.ReluNNoNorm = 1 else: if act_params.activation == "relun": self.gen_ctrl.ReluN = act_params.activation_params else: at_act_params = NO_ACTIVATION self.at_linear_params = at_linear_params self.in_dim = in_dim self.out_dim = out_dim self.in_q = in_q self.bias_q = bias_q self.out_q = out_q self.filter_q = filter_q self.at_act_params = at_act_params self.cname = cname self.node_name = node_name self.at_ver = at_ver