def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) # 2 lcoal_var_name = '_output_val' indent = ' ' * 4 c_array = ', '.join([str(v) for v in self.output_tensor_values[0]]) TemplateConstValue = indent + 'int ' + lcoal_var_name + str( self.input_tensor_ndims) + '={' + c_array + '};' mapping = {} TemplateStatements = 'output[i] =' + lcoal_var_name + '[i];' mapping.update({'X': lcoal_var_name}) mapping.update({'Y': self.output_tensor_names[0]}) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{tx} input{XDims} , {ty} output{YDims}, void *inputs_params, void* outputs_params) {{ {statements1} {statements2} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'YDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'tx': data_type.np2c(self.input_tensor_dtypes[0])}) mappingf.update({'ty': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements1': TemplateConstValue}) mappingf.update({ 'statements2': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res ='' # include header res += '\n'.join([c_helper.generate_local_include(h) for h in self.get_c_op_include_header()]) res +='\n\n' # param type res += self.get_c_param_type() res +='\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.ones(self.output_tensor_shapes[0])) # 2 TemplateStatements = '{Y}{StatementDims} = {c_abs}({X}{StatementDims});' mapping = {} mapping.update({'X': self.input_tensor_names[0]}) mapping.update({'Y': self.output_tensor_names[0]}) StatementDims = '' for _, step in zip_longest(self.input_tensor[0].shape[::-1], reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])): StatementDims = '[{0}]'.format(step) + StatementDims mapping.update({'StatementDims': StatementDims}) out_c_type = data_type.np2c(self.output_tensor_dtypes[0]) if out_c_type.startswith('double'): mapping.update({'c_abs': 'fabs'}) elif out_c_type.startswith('float'): mapping.update({'c_abs': 'fabsf'}) elif out_c_type.startswith('int'): mapping.update({'c_abs': 'fabsf'}) else: raise ValueError('{0} is not supported'.format(out_c_type)) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} {X}{Dims} , {t} {Y}{Dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({'Dims':c_helper.generate_dim_bracket(self.input_tensor_shapes[0])}) mappingf.update({'Dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0])}) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping))}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayDropoutLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) TemplateStatements = ''' output{dims} = ((high - low) * rand()/RAND_MAX ) - low; ''' mapping = {} mapping.update({ 'dims': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] ]) }) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t_in} data{dims}, {t_out} output{dims}, void *inputs_params, void* outputs_params) {{ const float high = {high}; const float low = {low}; {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t_in': data_type.np2c(self.input_tensor_dtypes[0])}) mappingf.update( {'t_out': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'high': self.attrs.get('high', 1.0)}) mappingf.update({'low': self.attrs.get('low', 0.0)}) mappingf.update({ 'statements': TemplateArrayDropoutLoop.replace( '[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def gen_param_signature(self, name, value): # mapping ={} mapping.update({'type': data_type.np2c(value.dtype)}) mapping.update({'name': name}) mapping.update({'dim_bracket': c_helper.generate_dim_bracket(value.shape)}) return '{type} {name}{dim_bracket}'.format(**mapping)
def generate_c_code(self, **kwargs): TEMPALTE_IDENTITY_FUNC = cleandoc(''' void {op_func_name}(void *op_param, {t} Input{InputDims}, {t} Output{OutputDims}, void *inputs_params, void* outputs_params){{ memcpy(Output, Input, sizeof({t}) * {cumdim}); }} ''') res = '' res += self.get_c_param_type() # call only once res += '\n\n\n' # constant function mapping = {} mapping.update({'op_func_name': self.get_func_name()}) mapping.update({'t': data_type.np2c(self.input_tensor_dtypes[0])}) mapping.update( {'cumdim': np.cumproduct(self.input_tensor_shapes[0])[-1]}) mapping.update({'Input': self.input_tensor_names[0]}) mapping.update({'Output': self.output_tensor_names[0]}) mapping.update({ 'InputDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mapping.update({ 'OutputDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) res += TEMPALTE_IDENTITY_FUNC.format(**mapping) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayExpLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayExpLoop = TemplateArrayExpLoop.replace( '[pragma]', self.PRAGMA_OMP) # 2 TemplateStatements = ''' Y{dims} = exp(X{dims}); ''' mapping = {} mapping.update({ 'dims': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] ]) }) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayExpLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayTransposeLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayTransposeLoop = TemplateArrayTransposeLoop.replace( '[pragma]', self.PRAGMA_OMP) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} {X}{XDims}, {t} {C}{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'C': self.output_tensor_names[0]}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayTransposeLoop.replace('[statements]', self.generate_kernel_code()) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def _generate_initializers_c(self): TEMPALTE_INITIALIZER = cleandoc(""" {t} {name}{dim_bracket} = {values}; """) res = [] for name in self.initializers: i = self.tensor_dict[name] mapping = { "name": name, "t": data_type.np2c(i.dtype), "dim_bracket": c_helper.generate_dim_bracket(i.shape), "values": c_helper.generate_c_array(i) } res.append(TEMPALTE_INITIALIZER.format(**mapping)) return "\n\n".join(res)
def generate_param_signature(param_dict, override_param_name=None): param_signatures = [] param_signature = " {type} {name}{dim}" for param_name, param_val in param_dict.items(): mapping = { "type": data_type.np2c(param_val.dtype), "dim": generate_dim_bracket(param_val.shape), "name": param_name if override_param_name == None else override_param_name } param_signatures.append(param_signature.format(**mapping)) return param_signatures
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayLeakyReluLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayLeakyReluLoop = TemplateArrayLeakyReluLoop.replace( '[pragma]', self.PRAGMA_OMP) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} {X}{dims}, {t} {Y}{dims}, void *inputs_params, void* outputs_params) {{ LeakyReluOpParam *param_ptr = (LeakyReluOpParam *)op_param; const {t} alpha = {alpha}; {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'alpha': self.attrs['alpha']}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'statements': TemplateArrayLeakyReluLoop.replace( '[statements]', self.generate_kernel_code(alpha_variable_value=False)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def _generate_inititializers_def(self): TEMPALTE_INITIALIZER = cleandoc(""" {t} {name}{dim_bracket}; NUMPY_HEADER {nph_name}; """) res = ["// define initializers"] for name in self.initializers: i = self.tensor_dict[name] mapping = { "name": name, "nph_name": name.replace("vi_", "nph_"), "t": data_type.np2c(i.dtype), "dim_bracket": c_helper.generate_dim_bracket(i.shape) } res.append(TEMPALTE_INITIALIZER.format(**mapping)) return "\n".join(res)
def _generate_node_outputs_def(self): res = [] res.append("// Define Outptus") TemplateOutputs = "{t} {name}{shape};" for i, n in enumerate(self.graph): for j in range(len(n.op.output_tensor)): res.append( TemplateOutputs.format( **{ "t": data_type.np2c(n.op.output_tensor_dtypes[j]), "name": n.output_tensor_names[j], # output_tensor_names "shape": c_helper.generate_dim_bracket( n.op.output_tensor_shapes[j]) })) return "\n".join(res)
def _gen_c_code_value(self): TEMPALTE_CONSTANCT_VALUE = cleandoc(''' {t} {name}{dim_bracket} = {values}; ''') attr_value = self.attrs['value'] value = numpy_helper.to_array(attr_value) mapping = { 'name': self.get_name(), 't': data_type.np2c(self.output_tensor_dtypes[0]), 'dim_bracket': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]), 'values': c_helper.generate_c_array(value) } return TEMPALTE_CONSTANCT_VALUE.format(**mapping)
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} input{XDims}, {t} output{CDims}, void *inputs_params, void* outputs_params) {{ {init_statements}\n {main_statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'main_statements': self.generate_c_code_reduce()}) mappingf.update( {'init_statements': self.generate_c_code_init_output()}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): TEMPALTE_CONSTANCT_FUNC = cleandoc(''' void {op_func_name}(void *op_param, void *outputs, void* outputs_params){{ ConstantOpParam *p = (ConstantOpParam *)op_param; int ndim; int* shape; void * value; ndim = p->ndim; shape = p->shape; value =({type} *) p->value; int len = 1; for(int i=0;i< ndim;i++){{ len *=shape[i]; }} memcpy(outputs, value, sizeof({type}) * len); }} ''') res = '' res += self.get_c_param_type() # call only once res += '\n\n\n' # constant value res += self._gen_c_code_value() res += '\n\n\n' # constant function mapping = {} mapping.update({'op_name': self.get_name()}) mapping.update({'op_func_name': self.get_func_name()}) mapping.update({'type': data_type.np2c(self.output_tensor_dtypes[0])}) res += TEMPALTE_CONSTANCT_FUNC.format(**mapping) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' res += self.get_op_variale_def() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop( np.empty(self.output_tensor_shapes[0]), pragma=self.OpenMP) if self.OpenMP: TemplateArrayAddLoop = TemplateArrayAddLoop.replace( '[pragma]', self.PRAGMA_OMP) # 2 mapping = {} # TemplateStatements = 'temp_arr{CStatementDims} = ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims};\n' # # if data_type.np2c(self.output_tensor_dtypes[0]) == 'uint8_t': # TemplateStatements += ' {C}{CStatementDims} = CLAMP(temp_arr{CStatementDims}, 0, 255);\n' # else: # TemplateStatements += ' {C}{CStatementDims} = CLAMP(temp_arr{CStatementDims}, -127, 128);\n' if data_type.np2c(self.output_tensor_dtypes[0]) == 'uint8_t': TemplateStatements = '{C}{CStatementDims} = CLAMP(ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims}, 0, 255);\n' else: TemplateStatements = '{C}{CStatementDims} = CLAMP(ROUND({X}{XStatementDims} / {Y}{YStatementDims}) + {Z}{ZStatementDims}, -127, 128);\n' mapping.update({'X': self.input_tensor_names[0]}) mapping.update({'Y': self.input_tensor_names[1]}) mapping.update({'Z': self.input_tensor_names[2]}) mapping.update({'C': self.output_tensor_names[0]}) XStatementDims = '' YStatementDims = '' ZStatementDims = '' CStatementDims = '' X, Y, Z = self.input_tensor_values for element_num_x, element_num_y, element_num_z, step in zip_longest( X.shape[::-1], Y.shape[::-1], Z.shape[::-1], reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])): if element_num_x is not None: if element_num_x == 1: XStatementDims = '[0]' + XStatementDims else: XStatementDims = '[{0}]'.format(step) + XStatementDims if element_num_y is not None: if element_num_y == 1: YStatementDims = '[0]' + YStatementDims else: YStatementDims = '[{0}]'.format(step) + YStatementDims if element_num_z is not None: if element_num_z == 1: ZStatementDims = '[0]' + ZStatementDims else: ZStatementDims = '[{0}]'.format(step) + ZStatementDims CStatementDims = '[{0}]'.format(step) + CStatementDims mapping.update({'XStatementDims': XStatementDims}) mapping.update({'YStatementDims': YStatementDims}) mapping.update({'ZStatementDims': ZStatementDims}) mapping.update({'CStatementDims': CStatementDims}) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{x_type} {X}{XDims} , {y_type} {Y}{YDims}, {z_type} {Z}{ZDims}, {c_type} {C}{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.input_tensor_names[1]}) mappingf.update({'Z': self.input_tensor_names[2]}) mappingf.update({'C': self.output_tensor_names[0]}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'YDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'ZDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[2]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update( {'x_type': data_type.np2c(self.node.input_tensor_values[0].dtype)}) mappingf.update( {'y_type': data_type.np2c(self.node.input_tensor_values[1].dtype)}) mappingf.update( {'z_type': data_type.np2c(self.node.input_tensor_values[2].dtype)}) mappingf.update( {'c_type': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): hidden_size = self.attrs['hidden_size'] batch_size = self.input_tensor_shapes[0][1] input_size = self.input_tensor_shapes[0][-1] sequence_lens = self.input_tensor_shapes[0][0] num_directions = self.input_tensor_shapes[1][0] b_half_index = self.input_tensor_shapes[3][-1] // 2 if 3 < len(self.input_tensor) else 0 HDim = [sequence_lens,num_directions, batch_size, hidden_size] res ='' # include header res += '\n'.join([c_helper.generate_std_include(h) for h in self.get_c_op_include_header()]) res +='\n\n' # param type res += self.get_c_param_type() res +='\n\n\n' res += "" # activate func activations = ['Sigmoid','Tanh', 'Tanh'] # default activation_alpha = ["0", "0", "0"] # default activation_beta = ["0", "0", "0"] # default func = list(self.attrs.get('activations',[])) alpha = list(self.attrs.get('activation_alpha',[])) beta = list(self.attrs.get('activation_beta',[])) activations[:len(func)] = func activation_alpha[:len(alpha)] = alpha activation_beta[:len(beta)] = beta for act in set(activations): res += self.get_activate_func(act) + '\n\n' res +='\n\n' mappingf = {} mappingf.update({"signature":self.get_signature()}) mappingf.update({'input_size': input_size}) mappingf.update({'batch_size': batch_size}) mappingf.update({'hidden_size': hidden_size}) mappingf.update({'sequence_lens': sequence_lens}) mappingf.update({"PLen": str(3 * hidden_size)}) mappingf.update({"P": self.input_tensor_names[7] + "[0]" if len(self.input_tensor)==8 else "P"}) mappingf.update({"HDim_last2": c_helper.generate_dim_bracket(HDim[-2:])}) mappingf.update({"bias_code": self.gen_c_bias(b_half_index)}) # activate mappingf.update({"act_f": LSTMActivate[activations[0]].value}) mappingf.update({"act_g": LSTMActivate[activations[1]].value}) mappingf.update({"act_h": LSTMActivate[activations[2]].value}) mappingf.update({"act_alpha_f": activation_alpha[0]}) mappingf.update({"act_alpha_g": activation_alpha[1]}) mappingf.update({"act_alpha_h": activation_alpha[2]}) mappingf.update({"act_beta_f": activation_beta[0]}) mappingf.update({"act_beta_g": activation_beta[1]}) mappingf.update({"act_beta_h": activation_beta[2]}) mappingf.update({"Y_set_code": ""}) mappingf.update({"Y_h_set_code": ""}) mappingf.update({"Y_c_set_code": ""}) mappingf_index=0 for index, _ in enumerate(range(len(self.node.outputs))):# onnx definitive order if index==0 and self.node.outputs[index] != "":# Y exist mappingf.update({'yt': data_type.np2c(self.output_tensor_dtypes[mappingf_index])}) mappingf.update({'Y': "vi_Y"}) mappingf.update({'YDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mappingf_index])}) mappingf.update({"Y_set_code": "memcpy(&vi_Y[i], &(y), sizeof(y));"}) mappingf_index +=1 elif index==1 and self.node.outputs[index] != "":# Y_h exist mappingf.update({'yt': data_type.np2c(self.output_tensor_dtypes[mappingf_index])}) mappingf.update({'Y_h': "vi_Y_h"}) mappingf.update({'Y_hDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mappingf_index])}) mappingf.update({"Y_h_set_code": "memcpy(&vi_Y_h[0], &(y), sizeof(y));"})# only support OneDirectino mappingf_index +=1 elif index==2 and self.node.outputs[index] != "":# Y_c exist: mappingf.update({'yt': data_type.np2c(self.output_tensor_dtypes[mappingf_index])}) mappingf.update({'Y_c': "vi_Y_c"}) mappingf.update({'Y_cDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mappingf_index])}) mappingf.update({"Y_c_set_code": "memcpy(&vi_Y_c[0], &(tc), sizeof(tc));"})# only support OneDirectino TemplateFunction = cleandoc(''' {signature} {{ const int hidden_size={hidden_size}; const {yt} P[{PLen}] ={{0.0}}; {yt} prevH{HDim_last2} = {{0.0}}; {yt} prevC{HDim_last2} = {{0.0}}; for(int i =0;i<{sequence_lens};i++) {{ float y[{batch_size}][{hidden_size}] = {{0.0}}; float ti[{batch_size}][{hidden_size}] = {{0.0}}; float to[{batch_size}][{hidden_size}] = {{0.0}}; float tf[{batch_size}][{hidden_size}] = {{0.0}}; float tc[{batch_size}][{hidden_size}] = {{0.0}}; for(int j=0;j<{batch_size};j++){{ for(int k=0;k<{hidden_size};k++){{ for(int l=0;l<{input_size};l++){{ ti[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 0][l]); to[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 1][l]); tf[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 2][l]); tc[j][k] +=(vi_X[i][j][l] * vi_W[0][k + hidden_size * 3][l]); }} for(int l=0;l<{hidden_size}; l++){{ ti[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 0][l]); to[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 1][l]); tf[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 2][l]); tc[j][k] +=(prevH[j][l] * vi_R[0][k + hidden_size * 3][l]); }} {bias_code} }} }} for(int j=0;j<{batch_size};j++){{ for(int k=0;k<{hidden_size};k++){{ ti[j][k] = {act_f}(ti[j][k] + {P}[k + 3 * 0] * prevC[j][k], {act_alpha_f}, {act_beta_f}); tf[j][k] = {act_f}(tf[j][k] + {P}[k + 3 * 1] * prevC[j][k], {act_alpha_f}, {act_beta_f}); tc[j][k] = {act_g}(tc[j][k], {act_alpha_g}, {act_beta_g}); tc[j][k] = tf[j][k] * prevC[j][k] + ti[j][k] * tc[j][k]; to[j][k] = {act_f}(to[j][k] + {P}[k + 3 * 2]* tc[j][k], {act_alpha_f}, {act_beta_f}); y[j][k] = to[j][k] * {act_h}(tc[j][k], {act_alpha_h}, {act_beta_h}); }} }} {Y_set_code} {Y_h_set_code} {Y_c_set_code} memcpy(&prevH, &(y), sizeof(y)); memcpy(&prevC, &(tc), sizeof(tc)); }} }} ''') res += TemplateFunction.format(**mappingf) return res
def get_signature(self): res = "void {op_func_name}(void *op_param,{xt} {X}{XDims},{wt} {W}{WDims},{rt} {R}{RDims}," mapping = {} mapping.update({'op_func_name': self.get_func_name()}) mapping.update({'xt': data_type.np2c(self.input_tensor_dtypes[0])}) mapping.update({'X': "vi_X"}) mapping.update({'XDims':c_helper.generate_dim_bracket(self.input_tensor_shapes[0])}) mapping.update({'wt': data_type.np2c(self.input_tensor_dtypes[1])}) mapping.update({'W': self.input_tensor_names[1].replace('_lstm_', '_')}) # vi_lstm_W -> vi_W mapping.update({'WDims':c_helper.generate_dim_bracket(self.input_tensor_shapes[1])}) mapping.update({'rt': data_type.np2c(self.input_tensor_dtypes[2])}) mapping.update({'R': self.input_tensor_names[2].replace('_lstm_', '_')}) mapping.update({'RDims':c_helper.generate_dim_bracket(self.input_tensor_shapes[2])}) if 3 < len(self.input_tensor): mapping.update({'bt': data_type.np2c(self.input_tensor_dtypes[3])}) mapping.update({'B': self.input_tensor_names[3].replace('_lstm_', '_')}) mapping.update({'BDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[3])}) res += "{bt} {B}{BDims}," if 4 < len(self.input_tensor): mapping.update({'slt': data_type.np2c(self.input_tensor_dtypes[4])}) mapping.update({'sl': self.input_tensor_names[4]}) mapping.update({'slDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[4])}) res += "{slt} {sl}{slDims}," if 5 < len(self.input_tensor): mapping.update({'initial_ht': data_type.np2c(self.input_tensor_dtypes[5])}) mapping.update({'initial_h': self.input_tensor_names[5]}) mapping.update({'initial_hDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[5])}) res += "{initial_ht} {initial_h}{initial_hDims}," if 6 < len(self.input_tensor): mapping.update({'initial_ct': data_type.np2c(self.input_tensor_dtypes[6])}) mapping.update({'initial_c': self.input_tensor_names[6]}) mapping.update({'initial_cDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[6])}) res += "{initial_ct} {initial_c}{initial_cDims}," if 7 < len(self.input_tensor): mapping.update({'pt': data_type.np2c(self.input_tensor_dtypes[7])}) mapping.update({'P': self.input_tensor_names[7]}) mapping.update({'PDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[7])}) res += "{pt} {P}{PDims}," # output mapping_index =0 for index, o in enumerate(range(len(self.node.outputs))):# onnx definitive order if index==0 and self.node.outputs[index] != "":# Y exist mapping.update({'yt': data_type.np2c(self.output_tensor_dtypes[mapping_index])}) mapping.update({'Y': "vi_Y"}) mapping.update({'YDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mapping_index])}) mapping_index +=1 res += "{yt} {Y}{YDims}," elif index==1 and self.node.outputs[index] != "":# Y_h exist mapping.update({'y_ht': data_type.np2c(self.output_tensor_dtypes[mapping_index])}) mapping.update({'Y_h': "vi_Y_h"}) mapping.update({'Y_hDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mapping_index])}) mapping_index +=1 res += "{y_ht} {Y_h}{Y_hDims}," elif index==2 and self.node.outputs[index] != "":# Y_c exist: mapping.update({'y_ct': data_type.np2c(self.output_tensor_dtypes[mapping_index])}) mapping.update({'Y_c': "vi_Y_c"}) mapping.update({'Y_cDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[mapping_index])}) res += "{y_ct} {Y_c}{Y_cDims}," res += " void *inputs_params, void* outputs_params)" return res.format(**mapping)
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) # 2 mapping = {} TemplateStatements = '{C}{CStatementDims} = {X}{XStatementDims} ^ {Y}{YStatementDims};' mapping.update({'X': self.input_tensor_names[0]}) mapping.update({'Y': self.input_tensor_names[1]}) mapping.update({'C': self.output_tensor_names[0]}) XStatementDims = '' YStatementDims = '' CStatementDims = '' X, Y = self.input_tensor_values for element_num_x, element_num_y, step in zip_longest( X.shape[::-1], Y.shape[::-1], reversed(string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]])): if element_num_x is not None: if element_num_x == 1: XStatementDims = '[0]' + XStatementDims else: XStatementDims = '[{0}]'.format(step) + XStatementDims if element_num_y is not None: if element_num_y == 1: YStatementDims = '[0]' + YStatementDims else: YStatementDims = '[{0}]'.format(step) + YStatementDims CStatementDims = '[{0}]'.format(step) + CStatementDims mapping.update({'XStatementDims': XStatementDims}) mapping.update({'YStatementDims': YStatementDims}) mapping.update({'CStatementDims': CStatementDims}) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} {X}{XDims} , {t} {Y}{YDims}, {t} {C}{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'X': self.input_tensor_names[0]}) mappingf.update({'Y': self.input_tensor_names[1]}) mappingf.update({'C': self.output_tensor_names[0]}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'YDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({ 'statements': TemplateArrayAddLoop.replace('[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' input_shapes = [] input_shapes.append(self.input_tensor_shapes[0]) input_shapes.append(self.input_tensor_shapes[1]) if (len(self.input_tensor) == 4): input_shapes.append(self.input_tensor_shapes[2]) input_shapes.append(self.input_tensor_shapes[3]) input_mod_shapes = [] input_org_shapes = [] max_dim = 5 if (len(input_shapes[0]) == 1): input_mod_shapes.append(((1, ) * (max_dim - 1) + input_shapes[0])) input_org_shapes.append(((0, ) * (max_dim - 1) + input_shapes[0])) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[0])) + input_shapes[0]) input_org_shapes.append((0, ) * (max_dim - len(input_shapes[0])) + input_shapes[0]) if (len(input_shapes[1]) == 1): input_mod_shapes.append( ((1, ) * (max_dim - 2) + input_shapes[1] + (1, ))) input_org_shapes.append( ((0, ) * (max_dim - 2) + input_shapes[1] + (1, ))) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[1])) + input_shapes[1]) input_org_shapes.append((0, ) * (max_dim - len(input_shapes[1])) + input_shapes[1]) outputs_shape = ((1, ) * (max_dim - len(self.output_tensor_shapes[0])) ) + self.output_tensor_shapes[0] output_names = self.output_tensor_names[0] ndim = self.output_tensor_ndims[0] if (len(self.input_tensor) == 2): zero_point_shapes = [0, 0] else: zero_point_shapes = [ self.input_tensor_shapes[2][0], self.input_tensor_shapes[3][0] ] TemplateStatements = ''' const int A_h = {A_d0}; const int A_i = {A_d1}; const int A_j = {A_d2}; const int A_m = {A_d3}; const int A_k = {A_d4}; const int B_h = {B_d0}; const int B_i = {B_d1}; const int B_j = {B_d2}; const int B_k = {B_d3}; const int B_n = {B_d4}; const int Y_h = {Y_d0}; const int Y_i = {Y_d1}; const int Y_j = {Y_d2}; const int Y_m = {Y_d3}; const int Y_n = {Y_d4}; const int A_h_o = {A_d0_o}; const int A_i_o = {A_d1_o}; const int A_j_o = {A_d2_o}; const int B_h_o = {B_d0_o}; const int B_i_o = {B_d1_o}; const int B_j_o = {B_d2_o}; {ta} *_A = ({ta} *)A; {tb} *_B = ({tb} *)B; int *_Y = (int *)Y; int tmpA, tmpB, tmpY; {tb} BT [{B_d0}][{B_d1}][{B_d2}][{B_d3}][{B_d4}]; {tb} *_BT = ({tb} *)BT; ''' if (len(self.input_tensor) == 4): TemplateStatements += ''' {taz} a_zero_point_mod[{A_d3}]; {tbz} b_zero_point_mod[{B_d4}]; ''' TemplateStatements += ''' int h, i, j; int ah, ai, aj; int bh, bi, bj; int k; int m; int n; int tmpA_pos_h, tmpA_pos_i, tmpA_pos; int tmpB_pos_h, tmpB_pos_i, tmpB_pos; int tmpY_pos_h, tmpY_pos_i, tmpY_pos; memset( Y, (int)0, sizeof(*_Y)*Y_h*Y_i*Y_j*Y_m*Y_n ); ''' if (len(self.input_tensor) == 4): if (zero_point_shapes[0] == 1): TemplateStatements += ''' #pragma omp parallel for for (m=0; m < A_m; m++) {{ a_zero_point_mod[m] = a_zero_point[0]; }} ''' else: TemplateStatements += ''' #pragma omp parallel for for (m=0; m < A_m; m++) {{ a_zero_point_mod[m] = a_zero_point[m]; }} ''' if (zero_point_shapes[1] == 1): TemplateStatements += ''' #pragma omp parallel for for (n=0; n < B_n; n++) {{ b_zero_point_mod[n] = b_zero_point[0]; }} ''' else: TemplateStatements += ''' #pragma omp parallel for for (n=0; n < B_n; n++) {{ b_zero_point_mod[n] = b_zero_point[n]; }} ''' TemplateStatements += ''' for (h=0; h < B_h; h++) {{ bh = (B_h_o > 1) ? h : 0; tmpB_pos_h = bh*(B_i*B_j*B_k*B_n); for (i=0; i < B_i; i++) {{ bi = (B_i_o > 1) ? i : 0; tmpB_pos_i = tmpB_pos_h + bi*(B_j*B_k*B_n); for (j=0; j < B_j; j++) {{ bj = (B_j_o > 1) ? j : 0; tmpB_pos = tmpB_pos_i + bj*(B_k*B_n); #pragma omp parallel for private(n,k) for (n=0; n < B_n; n++) {{ for (k=0; k < B_k; k++) {{ *(_BT + tmpB_pos + n*(B_k) + k) = *(_B + tmpB_pos + k*(B_n) + n); }} }} }} }} }} for (h=0; h < Y_h; h++) {{ ah = (A_h_o > 1) ? h : 0; bh = (B_h_o > 1) ? h : 0; tmpA_pos_h = ah*(A_i*A_j*A_m*A_k); tmpB_pos_h = bh*(B_i*B_j*B_k*B_n); tmpY_pos_h = h*(Y_i*Y_j*Y_m*Y_n); for (i=0; i < Y_i; i++) {{ ai = (A_i_o > 1) ? i : 0; bi = (B_i_o > 1) ? i : 0; tmpA_pos_i = tmpA_pos_h + ai*(A_j*A_m*A_k); tmpB_pos_i = tmpB_pos_h + bi*(B_j*B_k*B_n); tmpY_pos_i = tmpY_pos_h + i*(Y_j*Y_m*Y_n); for (j=0; j < Y_j; j++) {{ aj = (A_j_o > 1) ? j : 0; bj = (B_j_o > 1) ? j : 0; tmpA_pos = tmpA_pos_i + aj*(A_m*A_k); tmpB_pos = tmpB_pos_i + bj*(B_k*B_n); tmpY_pos = tmpY_pos_i + j*(Y_m*Y_n); #pragma omp parallel for private(m,n,k,tmpA,tmpB) reduction(+:tmpY) for (m=0; m < Y_m; m++) {{ for (n=0; n < Y_n; n++) {{ tmpY = 0; for (k=0; k < B_k; k++) {{ ''' if (len(self.input_tensor) == 2): TemplateStatements += ''' tmpA = *(_A + tmpA_pos + m*(A_k) + k); tmpB = *(_BT + tmpB_pos + n*(B_k) + k); ''' else: TemplateStatements += ''' tmpA = *(_A + tmpA_pos + m*(A_k) + k) - a_zero_point_mod[m]; tmpB = *(_BT + tmpB_pos + n*(B_k) + k) - b_zero_point_mod[n]; ''' TemplateStatements += ''' tmpY += tmpA * tmpB; }} *(_Y + tmpY_pos + m*(Y_n) + n) = tmpY; }} }} }} }} }} ''' mapping = {} mapping.update({'A_d0': input_mod_shapes[0][0]}) mapping.update({'A_d1': input_mod_shapes[0][1]}) mapping.update({'A_d2': input_mod_shapes[0][2]}) mapping.update({'A_d3': input_mod_shapes[0][3]}) mapping.update({'A_d4': input_mod_shapes[0][4]}) mapping.update({'B_d0': input_mod_shapes[1][0]}) mapping.update({'B_d1': input_mod_shapes[1][1]}) mapping.update({'B_d2': input_mod_shapes[1][2]}) mapping.update({'B_d3': input_mod_shapes[1][3]}) mapping.update({'B_d4': input_mod_shapes[1][4]}) mapping.update({'Y_d0': outputs_shape[0]}) mapping.update({'Y_d1': outputs_shape[1]}) mapping.update({'Y_d2': outputs_shape[2]}) mapping.update({'Y_d3': outputs_shape[3]}) mapping.update({'Y_d4': outputs_shape[4]}) mapping.update({'ta': data_type.np2c(self.input_tensor_dtypes[0])}) mapping.update({'tb': data_type.np2c(self.input_tensor_dtypes[1])}) if (len(self.input_tensor) == 4): mapping.update( {'taz': data_type.np2c(self.input_tensor_dtypes[2])}) mapping.update( {'tbz': data_type.np2c(self.input_tensor_dtypes[3])}) mapping.update({'A_d0_o': input_org_shapes[0][0]}) mapping.update({'A_d1_o': input_org_shapes[0][1]}) mapping.update({'A_d2_o': input_org_shapes[0][2]}) mapping.update({'B_d0_o': input_org_shapes[1][0]}) mapping.update({'B_d1_o': input_org_shapes[1][1]}) mapping.update({'B_d2_o': input_org_shapes[1][2]}) # 3 if (len(self.input_tensor) == 4): TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {ta} A{dims_A}, {tb} B{dims_B}, {taz} a_zero_point{dims_az}, {tbz} b_zero_point{dims_bz}, int Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') else: TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {ta} A{dims_A}, {tb} B{dims_B}, int Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'A': self.input_tensor_names[0]}) mappingf.update( {'dims_A': c_helper.generate_dim_bracket(input_shapes[0])}) mappingf.update({'B': self.input_tensor_names[1]}) mappingf.update( {'dims_B': c_helper.generate_dim_bracket(input_shapes[1])}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'ta': data_type.np2c(self.input_tensor_dtypes[0])}) mappingf.update({'tb': data_type.np2c(self.input_tensor_dtypes[1])}) if (len(self.input_tensor) == 4): mappingf.update( {'dims_az': c_helper.generate_dim_bracket(input_shapes[2])}) mappingf.update( {'dims_bz': c_helper.generate_dim_bracket(input_shapes[3])}) mappingf.update( {'taz': data_type.np2c(self.input_tensor_dtypes[2])}) mappingf.update( {'tbz': data_type.np2c(self.input_tensor_dtypes[3])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' ndim = len(self.c_starts) indent = [' ' * 4] * ndim alpha_iter = reversed(string.ascii_lowercase[8:8 + ndim]) TemplateArrayLoop = ''.join(indent) + '[statements]' input_dim = '' output_dim = '' for s, e in zip(self.c_starts[::-1], self.c_ends[::-1]): var = next(alpha_iter) params = {} params.update({'var': var}) params.update({'start': str(0)}) params.update({'end': str(e)}) loop_start = ''.join( indent ) + 'for(int {var}={start};{var}<{end};{var}++ ){{'.format( **params) loop_end = ''.join(indent) + '}' TemplateArrayLoop = loop_start + '\n' + TemplateArrayLoop + '\n' + loop_end input_dim = '[' + str(var) + ('' if s == 0 else '+' + str(s)) + ']' + input_dim output_dim = '[' + str(var) + ']' + output_dim indent.pop() statements = TemplateArrayLoop.replace( '[statements]', ''.join(indent) + 'output' + output_dim + '=' + 'input' + input_dim + ';') TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{t} input{XDims}, {t} output{CDims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'XDims': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'CDims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': statements}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateStatements = ''' const int center_point_box = {center_point_box}; const int boxes_batch = {boxes_batch}; const int boxes_spatial = {boxes_spatial}; const int boxes_params = {boxes_params}; const int scores_batch = {scores_batch}; const int scores_class = {scores_class}; const int scores_spatial = {scores_spatial}; const long long int max_output_boxes_p_class = max_output_boxes_per_class[0]; const float iou_th = iou_threshold[0]; const float score_th = score_threshold[0]; const int selected_indices_num = {selected_indices_num}; const int selected_indices_index = {selected_indices_index}; // printf("max_output: %ld\\n", max_output_boxes_per_class[0]); // printf("iou_th: %f\\n", iou_threshold[0]); // printf("score_th: %f\\n", score_threshold[0]); // printf("max_output: %ld\\n", max_output_boxes_p_class); // printf("iou_th: %f\\n", iou_th); // printf("score_th: %f\\n", score_th); memset( (void *)selected_indices, 0, sizeof({t}) * selected_indices_num * selected_indices_index ); if (max_output_boxes_p_class == 0) {{ return; }} for (int n=0; n<scores_batch; n++) {{ for (int c=0; c<scores_class; c++) {{ float decoded_boxes[boxes_spatial][4]; // decode boxes if (center_point_box == 0) {{ for (int i=0; i<boxes_spatial; i++) {{ if (boxes[n][i][0] < boxes[n][i][2]) {{ decoded_boxes[i][0] = boxes[n][i][0]; // ymin decoded_boxes[i][2] = boxes[n][i][2]; // ymax }} else {{ decoded_boxes[i][0] = boxes[n][i][2]; // ymin decoded_boxes[i][2] = boxes[n][i][0]; // ymax }} if (boxes[n][i][1] < boxes[n][i][3]) {{ decoded_boxes[i][1] = boxes[n][i][1]; // xmin decoded_boxes[i][3] = boxes[n][i][3]; // xmax }} else {{ decoded_boxes[i][1] = boxes[n][i][3]; // xmin decoded_boxes[i][3] = boxes[n][i][1]; // xmax }} }} }} else {{ for (int i=0; i<boxes_spatial; i++) {{ decoded_boxes[i][0] = boxes[n][i][1] - boxes[n][i][3]/2; // ymin decoded_boxes[i][1] = boxes[n][i][0] - boxes[n][i][2]/2; // xmin decoded_boxes[i][2] = boxes[n][i][1] + boxes[n][i][3]/2; // ymax decoded_boxes[i][3] = boxes[n][i][0] + boxes[n][i][2]/2; // xmax }} }} // // SelectDetectionsAboveScoreThreshold(scores, non_max_suppression_score_threshold, &keep_scores, &keep_indices); // float keep_scores[scores_spatial]; int keep_indices[scores_spatial]; int num_scores_kept = 0; for (int i=0; i<scores_spatial; i++) {{ keep_scores[i] = 0.0; keep_indices[i] = 0; }} for (int i=0; i<scores_spatial; i++) {{ // printf("scores(%f)[%d:%d:%d]:th(%f) -> %d\\n", scores[n][c][i], n, c, i, score_th, num_scores_kept); if (scores[n][c][i] >= score_th) {{ keep_scores[num_scores_kept] = scores[n][c][i]; keep_indices[num_scores_kept] = i; num_scores_kept++; }} }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("keep_indices[%d] = %d\\n", i, keep_indices[i]); // }} // // DecreasingPartialArgSort(keep_scores.data(), num_scores_kept, num_scores_kept, sorted_indices.data()); // int sorted_indices[num_scores_kept]; for (int i=0; i<num_scores_kept; i++) {{ sorted_indices[i] = keep_indices[i]; // printf("keep_score[%d] = %f\\n", sorted_indices[i], keep_scores[i]); }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("pre_keep_score[%d] = %f\\n", i, keep_scores[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("pre_keep_indices[%d] = %d\\n", i, keep_indices[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("pre_sorted_indices[%d] = %d\\n", i, sorted_indices[i]); // }} // qsort( keep_scores, num_scores_kept, sizeof(float), nonmaxsuppression_num_cmp ); nonmaxsuppression_idx_sort( keep_scores, sorted_indices, num_scores_kept); // for (int i=0; i<num_scores_kept; i++) {{ // printf("keep_score[%d] = %f\\n", i, keep_scores[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("keep_indices[%d] = %d\\n", i, keep_indices[i]); // }} // for (int i=0; i<num_scores_kept; i++) {{ // printf("sorted_indices[%d] = %d\\n", i, sorted_indices[i]); // }} const int num_boxes_kept = num_scores_kept; const int output_size = (num_scores_kept < max_output_boxes_p_class) ? num_scores_kept: max_output_boxes_p_class; int num_active_candidate = num_boxes_kept; // printf("output_size: %d\\n", output_size); // printf("selected_indices_num: %d\\n", selected_indices_num); // printf("num_active_candidate: %d\\n", num_active_candidate); int active_box_candidate[num_scores_kept]; for (int row=0; row<num_boxes_kept; row++) {{ active_box_candidate[row] = 1; }} int selected[scores_spatial]; int selected_box_cnt = 0; for (int i=0; i<num_boxes_kept; i++) {{ selected[i] = -1; }} for (int i=0; i<num_boxes_kept; i++) {{ if (num_active_candidate == 0 || selected_box_cnt >= output_size) break; if (active_box_candidate[i] == 1) {{ // selected[selected_box_cnt] = keep_indices[sorted_indices[i]]; selected[selected_box_cnt] = sorted_indices[i]; // printf("%d: selected[%d] = %d\\n", i, selected_box_cnt, selected[selected_box_cnt]); selected_box_cnt++; active_box_candidate[i] = 0; num_active_candidate--; }} else {{ continue; }} for (int j = i + 1; j < num_boxes_kept; ++j) {{ if (active_box_candidate[j] == 1) {{ // float iou = nonmaxsuppression_compute_iou((float *)decoded_boxes, keep_indices[sorted_indices[i]], keep_indices[sorted_indices[j]]); float iou = nonmaxsuppression_compute_iou((float *)decoded_boxes, sorted_indices[i], sorted_indices[j]); // printf("%d:%d: iou = [%f:%f:%f:%f] [%f:%f:%f:%f] %f(th:%f)\\n", i, j, // decoded_boxes[i][0], decoded_boxes[i][1], decoded_boxes[i][2], decoded_boxes[i][3], // decoded_boxes[j][0], decoded_boxes[j][1], decoded_boxes[j][2], decoded_boxes[j][3], // iou, iou_th); if (iou > iou_th) {{ active_box_candidate[j] = 0; num_active_candidate--; }} }} }} }} // for (int i=0; i<num_boxes_kept; i++) {{ // printf("sorted[%d] = %d : keep_indices = %d\\n", i, sorted_indices[i], keep_indices[sorted_indices[i]]); // }} // for (int i=0; i<num_boxes_kept; i++) {{ // printf("selected[%d] = %d\\n", i, selected[i]); // }} int num_batch_elements = selected_indices_num/scores_batch; for (int i=0; i<num_batch_elements/scores_class; i++) {{ if (selected[i] == -1) {{ selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][0] = -1; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][1] = -1; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][2] = -1; }} else {{ selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][0] = n; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][1] = c; selected_indices[n*num_batch_elements+c*num_batch_elements/scores_class+i][2] = selected[i]; }} // printf("NMS_result: %d:%d:%d\\n", n, c, selected[i]); }} }} }} ''' mapping = {} mapping.update({'op_func_name': self.get_func_name()}) mapping.update({'center_point_box': self.attrs['center_point_box']}) mapping.update({'boxes_batch': self.input_tensor_shapes[0][0]}) mapping.update({'boxes_spatial': self.input_tensor_shapes[0][1]}) mapping.update({'boxes_params': self.input_tensor_shapes[0][2]}) mapping.update({'scores_batch': self.input_tensor_shapes[1][0]}) mapping.update({'scores_class': self.input_tensor_shapes[1][1]}) mapping.update({'scores_spatial': self.input_tensor_shapes[1][2]}) mapping.update( {'max_output_boxes_per_class': self.input_tensor_shapes[2][0]}) mapping.update({'iou_threshold': self.input_tensor_shapes[3][0]}) mapping.update({'score_threshold': self.input_tensor_shapes[4][0]}) mapping.update( {'selected_indices_num': self.output_tensor_shapes[0][0]}) mapping.update( {'selected_indices_index': self.output_tensor_shapes[0][1]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, float boxes{dims_boxes}, float scores{dims_scores}, long long int max_output_boxes_per_class[], float iou_threshold[], float score_threshold[], {t} selected_indices{dims_selected_indices}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'boxes': self.input_tensor_names[0]}) mappingf.update({ 'dims_boxes': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({'scores': self.input_tensor_names[1]}) mappingf.update({ 'dims_scores': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update( {'max_output_boxes_per_class': self.input_tensor_names[2]}) mappingf.update({'iou_threshold': self.input_tensor_names[3]}) mappingf.update({'score_threshold': self.input_tensor_names[4]}) mappingf.update({'selected_indices': self.output_tensor_names[0]}) mappingf.update({ 'dims_selected_indices': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 # 2 TemplateStatements = ''' {t} *_data = ({t} *)data; {t} *_squeezed = ({t} *)squeezed; int data_elements = {data_elements}; int shape_elements = {shape_elements}; int i; if (data_elements >= shape_elements) {{ for (i=0; i<shape_elements; i++) {{ *(_squeezed +i) = *(_data +i); }} }} else {{ for (i=0; i<data_elements; i++) {{ *(_squeezed +i) = *(_data +i); }} for (; i<shape_elements; i++) {{ *(_squeezed +i) = ({t})0.0; }} }} ''' mapping = {} mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mapping.update({'data_elements': self.input_tensor[0].size}) mapping.update({'shape_elements': self.output_tensor[0].size}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} data{dims_data}, {t} squeezed{dims_squeezed}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_data': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_squeezed': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' ratio = self.attrs.get('ratio', 0.5) # 1 TemplateArrayDropoutLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) TemplateStatements = ''' if (random() > RAND_MAX * ratio) {{ output{dims} = data{dims}; }} else {{ output{dims} = 0.0; }} ''' mapping = {} mapping.update({ 'dims': ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] ]) }) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} data{dims}, {t} output{dims}, void *inputs_params, void* outputs_params) {{ const float ratio = {ratio}; {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'ratio': ratio}) mappingf.update({ 'statements': TemplateArrayDropoutLoop.replace( '[statements]', TemplateStatements.format(**mapping)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' TemplateStatements = ''' const int Y_n = {d1}; const int Y_c = {d2}; const int Y_h = {d3}; const int Y_w = {d4}; const double epsilon = {epsilon}; const double momentum = {momentum}; const int spatial = {spatial}; int n; int c, h, w; double sum; double ave; double sigma2; double norm; #if {spatial} // spatial is true for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ sum = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sum += X[n][c][h][w]; }} }} ave /= (h * w); ave = momentum * ave + (1-momentum) * mean[c]; sigma2 = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sigma2 += pow((X[n][c][h][w] - ave), 2); }} }} sigma2 /= (h * w); for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ norm = (X[n][c][h][w] - mean[c]) / sqrt(var[c]+epsilon); Y[n][c][h][w] = scale[c] * norm + B[c]; }} }} }} }} #else // spatial is false for (n=0; n<Y_n; n++) {{ for (c=0; c<Y_c; c++) {{ sum = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sum += X[n][c][h][w]; }} }} ave /= (h * w); ave = momentum * ave + (1-momentum) * mean[c][h][w]; sigma2 = 0.0; for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ sigma2 += pow((X[n][c][h][w] - ave), 2); }} }} sigma2 /= (h * w); for (h=0; h<Y_h; h++) {{ for (w=0; w<Y_w; w++) {{ norm = (X[n][c][h][w] - mean[c][h][w]) / sqrt(var[c][h][w]+epsilon); Y[n][c][h][w] = scale[c][h][w] * norm + B[c][h][w]; }} }} }} }} #endif // spatial ''' mapping = {} mapping.update({'d1': self.input_tensor_shapes[0][0]}) mapping.update({'d2': self.input_tensor_shapes[0][1]}) mapping.update({'d3': self.input_tensor_shapes[0][2]}) mapping.update({'d4': self.input_tensor_shapes[0][3]}) mapping.update({'epsilon': self.attrs['epsilon']}) mapping.update({'momentum': self.attrs['momentum']}) mapping.update({'spatial': self.attrs['spatial']}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} X{dims_X}, {t} scale{dims_scale}, {t} B{dims_B}, {t} mean{dims_mean}, {t} var{dims_var}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_X': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_scale': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'dims_B': c_helper.generate_dim_bracket(self.input_tensor_shapes[2]) }) mappingf.update({ 'dims_mean': c_helper.generate_dim_bracket(self.input_tensor_shapes[3]) }) mappingf.update({ 'dims_var': c_helper.generate_dim_bracket(self.input_tensor_shapes[4]) }) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' input_shapes = [] input_shapes.append(self.input_tensor_shapes[0]) input_shapes.append(self.input_tensor_shapes[1]) input_mod_shapes = [] max_dim = 5 if (len(input_shapes[0]) == 1): input_mod_shapes.append(((1, ) * (max_dim - 1) + input_shapes[0])) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[0])) + input_shapes[0]) if (len(input_shapes[1]) == 1): input_mod_shapes.append( ((1, ) * (max_dim - 2) + input_shapes[1] + (1, ))) else: input_mod_shapes.append((1, ) * (max_dim - len(input_shapes[1])) + input_shapes[1]) outputs_shape = (max(input_mod_shapes[0][0], input_mod_shapes[1][0]), max(input_mod_shapes[0][1], input_mod_shapes[1][1]), max(input_mod_shapes[0][2], input_mod_shapes[1][2]), input_mod_shapes[0][3], input_mod_shapes[1][4]) output_names = self.output_tensor_names[0] ndim = self.output_tensor_ndims[0] TemplateStatements = ''' int A_h = {A_d0}; int A_i = {A_d1}; int A_j = {A_d2}; int A_m = {A_d3}; int A_k = {A_d4}; int B_h = {B_d0}; int B_i = {B_d1}; int B_j = {B_d2}; int B_k = {B_d3}; int B_n = {B_d4}; int Y_h = {Y_d0}; int Y_i = {Y_d1}; int Y_j = {Y_d2}; int Y_m = {Y_d3}; int Y_n = {Y_d4}; {t} *_A = ({t} *)A; {t} *_B = ({t} *)B; {t} *_Y = ({t} *)Y; {t} tmpA, tmpB; int h, i, j; int k; int m; int n; memset( Y, ({t})0.0, sizeof(*_Y)*Y_h*Y_i*Y_j*Y_m*Y_n ); for (h=0; h < Y_h; h++) {{ for (i=0; i < Y_i; i++) {{ for (j=0; j < Y_j; j++) {{ for (m=0; m < Y_m; m++) {{ for (n=0; n < Y_n; n++) {{ for (k=0; k < B_k; k++) {{ tmpA = *(_A + h*(Y_i*Y_j*Y_m*B_k) + i*(Y_j*Y_m*B_k) + j*(Y_m*B_k) + m*(B_k) + k); tmpB = *(_B + h*(Y_i*Y_j*B_k*Y_n) + i*(Y_j*B_k*Y_n) + j*(B_k*Y_n) + k*(Y_n) + n); *(_Y + h*(Y_i*Y_j*Y_m*Y_n) + i*(Y_j*Y_m*Y_n) + j*(Y_m*Y_n) + m*(Y_n) + n) += tmpA * tmpB; // Y[h][i][j][m][n] += A[h][i][j][m][k] * B[h][i][j][k][n]; }} }} }} }} }} }} ''' mapping = {} mapping.update({'A_d0': input_mod_shapes[0][0]}) mapping.update({'A_d1': input_mod_shapes[0][1]}) mapping.update({'A_d2': input_mod_shapes[0][2]}) mapping.update({'A_d3': input_mod_shapes[0][3]}) mapping.update({'A_d4': input_mod_shapes[0][4]}) mapping.update({'B_d0': input_mod_shapes[1][0]}) mapping.update({'B_d1': input_mod_shapes[1][1]}) mapping.update({'B_d2': input_mod_shapes[1][2]}) mapping.update({'B_d3': input_mod_shapes[1][3]}) mapping.update({'B_d4': input_mod_shapes[1][4]}) mapping.update({'Y_d0': outputs_shape[0]}) mapping.update({'Y_d1': outputs_shape[1]}) mapping.update({'Y_d2': outputs_shape[2]}) mapping.update({'Y_d3': outputs_shape[3]}) mapping.update({'Y_d4': outputs_shape[4]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} A{dims_A}, {t} B{dims_B}, {t} Y{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'A': self.input_tensor_names[0]}) mappingf.update( {'dims_A': c_helper.generate_dim_bracket(input_shapes[0])}) mappingf.update({'B': self.input_tensor_names[1]}) mappingf.update( {'dims_B': c_helper.generate_dim_bracket(input_shapes[1])}) mappingf.update({'Y': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): b_min = 0 b_max = 0 input_array_boundary = [] for (_, d) in enumerate(self.input_tensor_shapes): b_max += d[self.attrs['axis']] input_array_boundary.append( (self.input_tensor_names[_], b_min, b_max)) b_min = b_max loop_idx = string.ascii_lowercase[8:8 + self.output_tensor_ndims[0]] concat_idx = loop_idx[self.attrs['axis']] res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' # 1 TemplateArrayConcatLoop = c_helper.generate_ndim_for_loop(np.ones( self.output_tensor_shapes[0]), indent=0) # 2 input_vals = OrderedDict({ k: self._gen_array_element_val(self.output_tensor_ndims[0], v) for k, v in self.input_tensor_dict.items() }) output_vals = { self.output_tensor_names[0]: self._gen_array_element_val(self.output_tensor_ndims[0], self.output_tensor_values[0]) } Conditions = '' ConcatStatement = '' TemplateConditionElement = cleandoc(''' {if} ({concat_idx} < {concat_boundary}) {{ {indent} {outputVal} = {inputVal}; {indent}}} ''') for (i, boundary) in enumerate(input_array_boundary): mapping_cond_element = { 't': data_type.np2c(self.input_tensor_dtypes[0]) } mapping_cond_element.update({'concat_idx': concat_idx}) mapping_cond_element.update( {'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)}) # mapping_cond_element.update({'inputVal': list(input_vals.keys())[i] + list(input_vals.values())[i]}) # list(val.keys()) の順番がordered_dictに従わず、安定しないため、別に持っている変数boundary順に変更 input_axis_idx = input_vals[boundary[0]] input_axis_idx = input_axis_idx.replace( concat_idx, concat_idx + '-' + str(boundary[1])) mapping_cond_element.update( {'inputVal': boundary[0] + input_axis_idx}) mapping_cond_element.update({ 'outputVal': list(output_vals.keys())[0] + list(output_vals.values())[0] }) mapping_cond_element.update({'concat_boundary': boundary[2]}) if (i == 0): mapping_cond_element.update( {'if': ' ' * 4 * (self.output_tensor_ndims[0] + 1) + 'if'}) else: mapping_cond_element.update({'if': 'else if'}) ConcatStatement += TemplateConditionElement.format( **mapping_cond_element) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) input_sigs = [] for name, value in self.input_tensor_dict.items(): input_sigs.append(self.gen_param_signature(name, value)) mappingf.update({'InputsParamSignature': ','.join(input_sigs)}) mappingf.update({ 'OutputsParamSignature': self.gen_param_signature(self.output_tensor_names[0], self.output_tensor_values[0]) }) mappingf.update({ 'statements': TemplateArrayConcatLoop.replace('[statements]', ConcatStatement) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): axis = self.attrs['axis'] data_ndims = self.input_tensor_ndims[0] output_ndims = self.output_tensor_ndims[0] indices_ndims = self.input_tensor_ndims[1] res = '' # include header res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' textwrap.TextWrapper() TemplateKernel = textwrap.indent(""" if ({array_indices}>= 0) {{ {array_output}= {array_data}; }} else {{ {array_output} = 0.0; }} """, prefix=" " * 4 * output_ndims) TemplateLoop = c_helper.generate_ndim_for_loop( np.ones(self.output_tensor_shapes[0])) array_right_data = ''.join( ['[' + v + ']' for v in string.ascii_lowercase[8:8 + axis]]) array_left_data = ''.join([ '[' + v + ']' for v in string.ascii_lowercase[9 + axis + indices_ndims - 1:8 + data_ndims + indices_ndims - 1] ]) array_indices = "indices" + "".join([ '[' + v + ']' for v in string.ascii_lowercase[8 + axis:8 + axis + indices_ndims] ]) array_data = "data" + array_right_data + '[' + array_indices + ']' + array_left_data array_output = "output" + ''.join([ '[' + v + ']' for v in string.ascii_lowercase[8:8 + output_ndims] ]) mapping_kernel = {} mapping_kernel.update({"array_data": array_data}) mapping_kernel.update({"array_indices": array_indices}) mapping_kernel.update({"array_output": array_output}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t1} data{dims_data}, {t2} indices{dims_indices}, {t1} output{dims_output}, void *inputs_params, void* outputs_params) {{ {loop_statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({ 'dims_data': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({ 'dims_indices': c_helper.generate_dim_bracket(self.input_tensor_shapes[1]) }) mappingf.update({ 'dims_output': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t1': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'t2': data_type.np2c(self.input_tensor_dtypes[1])}) mappingf.update({ 'loop_statements': TemplateLoop.replace('[statements]', TemplateKernel.format(**mapping_kernel)) }) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res ='' # include header res += '\n'.join([c_helper.generate_local_include(h) for h in self.get_c_op_include_header()]) res +='\n\n' # param type res += self.get_c_param_type() res +='\n\n' # 1 TemplateArrayAddLoop = c_helper.generate_ndim_for_loop(np.empty(self.output_tensor_shapes[0]),indent=0) # 2 input_vals = OrderedDict({k: self._gen_array_element_val(self.output_tensor_ndims[0], v) for k, v in self.input_tensor_dict.items()}) output_vals = {self.output_tensor_names[0]: self._gen_array_element_val(self.output_tensor_ndims[0], self.output_tensor_values[0])} Conditions = '' MeanStatement = '' TemplateCondition = cleandoc(''' {t} sum = 0.0f; int num = 0; {conditions} {indent}{outputVal} = sum/num; ''') TemplateCompare = cleandoc(''' {indent}num++; {indent}sum += {input}; ''') for k, v in input_vals.items(): Conditions += TemplateCompare.format(**{'input': k + v, 'indent': ' ' * 4 * (self.input_tensor_ndims[0] + 1)}) Conditions += '\n' else: mapping_cond ={'t': data_type.np2c(self.input_tensor_dtypes[0])} mapping_cond.update({'conditions': Conditions}) mapping_cond.update({'outputVal': list(output_vals.keys())[0] + list(output_vals.values())[0]}) mapping_cond.update({'indent': ' ' * 4 * (self.output_tensor_ndims[0] + 1)}) MeanStatement += TemplateCondition.format(**mapping_cond) TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param,{InputsParamSignature}, {OutputsParamSignature}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) input_sigs = [] for name, value in self.input_tensor_dict.items(): input_sigs.append(self.gen_param_signature(name, value)) mappingf.update({'InputsParamSignature':','.join(input_sigs)}) mappingf.update({'OutputsParamSignature': self.gen_param_signature(self.output_tensor_names[0], self.output_tensor_values[0])}) mappingf.update({'statements': TemplateArrayAddLoop.replace('[statements]', MeanStatement)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res
def generate_c_code(self, **kwargs): res = '' res += '\n'.join([ c_helper.generate_local_include(h) for h in self.get_c_op_include_header() ]) res += '\n\n' # param type res += self.get_c_param_type() res += '\n\n' axis = self.attrs['axis'] if axis == -1: axis = self.input_tensor_ndims[0] - 1 batch_size = 1 for d in range(0, axis): batch_size *= self.input_tensor_shapes[0][d] num = 1 for d in range(axis, self.input_tensor_ndims[0]): num *= self.input_tensor_shapes[0][d] TemplateStatements = ''' {t} *_input = ({t} *)input; {t} *_output = ({t} *)output; int batch_size = {batch_size}; int num = {num}; int i; int batch; {t} max, sum; for (batch=0; batch<batch_size; batch++) {{ sum = 0.0; max = -HUGE_VAL; for (i=0; i<num; i++) {{ if (*(_input + batch*num +i) > max) {{ max = *(_input + batch*num +i); }} }} for (i=0; i<num; i++) {{ *(_output + batch*num +i) = {exp}(*(_input + batch*num +i) - max); sum += *(_output + batch*num +i); }} for (i=0; i<num; i++) {{ *(_output + batch*num +i) /= sum; }} }} ''' mapping = {} mapping.update({'batch_size': batch_size}) mapping.update({'num': num}) mapping.update({'d1': self.output_tensor_shapes[0][0]}) mapping.update({'d2': self.output_tensor_shapes[0][1]}) mapping.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) if (self.output_tensor_dtypes[0] == 'float64'): mapping.update({'exp': 'exp'}) elif (self.output_tensor_dtypes[0] == 'float32'): mapping.update({'exp': 'expf'}) else: mapping.update({'exp': 'expf'}) # 3 TemplateFunction = cleandoc(''' void {op_func_name}(void *op_param, {t} input{dims_input}, {t} output{dims}, void *inputs_params, void* outputs_params) {{ {statements} }} ''') mappingf = {} mappingf.update({'op_func_name': self.get_func_name()}) mappingf.update({'input': self.input_tensor_names[0]}) mappingf.update({ 'dims_input': c_helper.generate_dim_bracket(self.input_tensor_shapes[0]) }) mappingf.update({'output': self.output_tensor_names[0]}) mappingf.update({ 'dims': c_helper.generate_dim_bracket(self.output_tensor_shapes[0]) }) mappingf.update({'t': data_type.np2c(self.output_tensor_dtypes[0])}) mappingf.update({'statements': TemplateStatements.format(**mapping)}) res += '\n\n' res += TemplateFunction.format(**mappingf) return res