def c_code(self, node, name, inputs, outputs, sub): images, = inputs targets, denoms = outputs fail = sub['fail'] num_braces = 0 size_f = self._size_f add_scale = self._add_scale pow_scale = self._pow_scale blocked = "true" if self._blocked else "false" class_name = self.__class__.__name__ class_name_upper = class_name.upper() basic_setup = self._basic_setup setup_nv_images = ( contiguity_check("images") + dimension_check("images", 4) + self._images_setup ) num_braces += 2 setup_nv_targets = output_same_shape('targets', 'images') num_braces += 1 setup_nv_denoms = output_same_shape('denoms', 'images') num_braces += 1 do_normalize = """ convResponseNormCrossMap(nv_images, nv_denoms, nv_targets, numFilters, sizeF, addScale, powScale, blocked); """ braces = '}' * num_braces + "\n" rval = (basic_setup + setup_nv_images + setup_nv_targets + setup_nv_denoms + do_normalize + braces) rval = rval % locals() return rval
def c_code(self, node, name, inputs, outputs, sub): images, acts, denoms, dout = inputs targets, out_acts = outputs fail = sub['fail'] num_braces = 0 size_f = self._size_f add_scale = self._add_scale pow_scale = self._pow_scale blocked = "true" if self._blocked else "false" inplace = "true" if self._inplace else "false" scale_targets = int(self._scale_targets) scale_outputs = int(self._scale_outputs) class_name = self.__class__.__name__ class_name_upper = class_name.upper() basic_setup = self._basic_setup scaling_setup = """ float scaleTargets = %(scale_targets)s; float scaleOutput = %(scale_outputs)s; """ setup_nv_images = ( contiguity_check("images") + dimension_check("images", 4) + self._images_setup ) num_braces += 2 setup_acts = (contiguity_check("acts") + dimension_check("acts", 4) + """ { //setup_nv_images brace 1 const int * acts_dims = CudaNdarray_HOST_DIMS(%(acts)s); """ + ensure_same_shape('acts', 'images') + """ { // setup_nv_acts brace 2 """) num_braces += 2 setup_nv_denoms = (contiguity_check("denoms") + dimension_check("denoms", 4) + """ { const int *denoms_dims = images_dims; """ + ensure_same_shape("denoms", "images") + nv_matrix_create("denoms")) num_braces += 2 setup_nv_dout = (contiguity_check("dout") + dimension_check("dout", 4) + """ { // setup_nv_dout brace const int *dout_dims = CudaNdarray_HOST_DIMS(%(dout)s); """ + ensure_same_shape("dout", "images") + nv_matrix_create("dout")) num_braces += 2 setup_nv_targets = output_same_shape('targets', 'images') num_braces += 1 setup_nv_out_acts = (""" const int *out_acts_dims = images_dims; #if %(inplace)s // XXX: is this right? Py_XDECREF(%(out_acts)s); %(out_acts)s = %(acts)s; Py_INCREF(%(out_acts)s); #else if (CudaNdarray_prep_output(& %(out_acts)s, 4, out_acts_dims)) { Py_DECREF(%(targets)s); %(fail)s; } if (CudaNdarray_CopyFromCudaNdarray(%(out_acts)s, %(acts)s)) { Py_DECREF(%(targets)s); Py_DECREF(%(out_acts)s); %(fail)s; } #endif """ + nv_matrix_create("out_acts")) num_braces += 1 undo_normalize = """ convResponseNormCrossMapUndo(nv_dout, nv_denoms, nv_images, nv_out_acts, nv_targets, numFilters, sizeF, addScale, powScale, blocked, scaleTargets, scaleOutput); """ rval = "\n".join((basic_setup, scaling_setup, setup_nv_images, setup_acts, setup_nv_denoms, setup_nv_dout, setup_nv_targets, setup_nv_out_acts, undo_normalize, "}" * num_braces)) return rval % locals()