def rgb565_rgb888(input_tensor: np.ndarray, in_dim: Dim, out_dim: Dim): assert in_dim.is_named and in_dim.c == 1 and out_dim.is_named and out_dim.c == 3 input_tensor = np.repeat(input_tensor.transpose( in_dim.transpose_to_order(("h", "w", "c"))), 3, axis=2) input_tensor[:, :, 1] = (input_tensor[:, :, 0] & (63 << 5)) >> 3 input_tensor[:, :, 2] = (input_tensor[:, :, 0] & 31) << 3 input_tensor[:, :, 0] = (input_tensor[:, :, 0] & (31 << 11)) >> 8 return input_tensor.astype(np.uint8).transpose( out_dim.transpose_from_order(("h", "w", "c")))
def prepare_acc(biases: np.array, out_dims: Dim, qrec: FilterQuantizationRecord): if biases is None: acc_tensor = zeros(out_dims.shape, qrec, 'acc_q') else: acc_tensor = zeros((out_dims.c, out_dims.h, out_dims.w), qrec, 'acc_q') if qrec and qrec.acc_q != qrec.biases_q: biases = qrec.acc_q.expand_from(biases, qrec.biases_q) for i in range(out_dims.c): acc_tensor[i, :] = biases[i] acc_tensor = acc_tensor.transpose(out_dims.transpose_from_order(('c', 'h', 'w'))) return acc_tensor
def from_hwc(input_tensor: np.ndarray, in_dim: Dim, out_dim: Dim): del in_dim return input_tensor.astype(np.uint8).transpose( out_dim.transpose_from_order(("h", "w", "c")))
def faster_conv_quantized(params, qrec: FilterQuantizationRecord, in_dims: Dim, out_dims: Dim, in_tensor: np.ndarray, weights: np.ndarray, biases: np.ndarray, details, detect_overflow=True): '''3D convolution by sub-matrix summing. ''' if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") details['overflow_dot'] = 0 details['overflow_acc'] = 0 in_tensor = in_tensor.transpose(in_dims.transpose_to_order(['h', 'w', 'c'])) if params.padding.h + params.padding.w > 0: in_tensor = np.pad(in_tensor, ([params.padding.t, params.padding.b], [params.padding.l, params.padding.r])\ + ([0, 0], ) * (np.ndim(in_tensor)-2), mode='constant', constant_values=0) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 in_tensor = in_tensor.astype(qrec.calc_q.dtype) weights = weights.transpose( params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c'])) filt_w = params.filter.w filt_h = params.filter.h in_w = in_dims.w in_h = in_dims.h out_c = params.filter.out_c out_w = ((in_w - filt_w + pad_w)) + 1 out_h = ((in_h - filt_h + pad_h)) + 1 if biases is None: result = np.zeros((out_c, out_h, out_w), dtype=qrec.acc_q.dtype) else: if qrec.acc_q != qrec.biases_q: biases = qrec.acc_q.expand_from(biases, qrec.biases_q) result = np.ones( (out_c, out_h, out_w), dtype=qrec.acc_q.dtype) * biases.reshape( out_c, 1, 1) if detect_overflow: result64 = result.astype(np.int64) const_h = pad_h + in_h - filt_h + 1 const_w = pad_w + in_w - filt_w + 1 for out_c_i in range(out_dims.c): for cur_h in range(filt_h): for cur_w in range(filt_w): if detect_overflow: # selects all elements that the filter element needs to multiply slabhw64 = in_tensor[cur_h:const_h + cur_h:1, cur_w:const_w + cur_w:1, ...].astype( np.int64) * weights[out_c_i, cur_h, cur_w] if qrec.calc_q != qrec.acc_q: # reduce the accumulator slabhwpost = qrec.acc_q.round_normalize_clip( slabhw64, qrec.calc_q, change_type=False) else: slabhwpost = slabhw64 # add depthwise slabhw64sum = slabhwpost.sum(axis=-1) # add to the previous filter elements result64[out_c_i] += slabhw64sum # selects all elements that the filter element needs to multiply slabhw = in_tensor[cur_h:const_h + cur_h:1, cur_w:const_w + cur_w:1, ...] * weights[out_c_i, cur_h, cur_w] if detect_overflow: if np.any(slabhw < slabhw64): details['overflow_dot'] += 1 if qrec.calc_q != qrec.acc_q: # reduce the accumulator slabhw = qrec.acc_q.reduce_from(slabhw, qrec.calc_q) # add depthwise slabhw = slabhw.sum(axis=-1) # add to the previous filter elements if detect_overflow: acc_overflow_detected = False if np.any(slabhw != slabhw64sum): details['overflow_acc'] += 1 acc_overflow_detected = True result[out_c_i] += slabhw if detect_overflow and not acc_overflow_detected: if np.any(result[out_c_i] != result64[out_c_i]): details['overflow_acc'] += 1 if details is not None: details['min_acc'] = min(np.min(result[out_c_i]), details['min_acc']) details['max_acc'] = max(np.max(result[out_c_i]), details['max_acc']) if params.stride.size() > 1: result = result[:, ::params.stride.h, ::params.stride.w, ...] if qrec.out_qs[0] != qrec.acc_q: result = qrec.out_qs[0].reduce_from(result, qrec.acc_q) return result.transpose(out_dims.transpose_from_order(['c', 'h', 'w']))
def faster_conv(params, in_dims: Dim, out_dims: Dim, in_tensor: np.ndarray, weights: np.ndarray, biases: np.ndarray, details): '''3D convolution by sub-matrix summing. ''' if details is not None: details['min_acc'] = float("Infinity") details['max_acc'] = float("-Infinity") in_tensor = in_tensor.transpose(in_dims.transpose_to_order(['h', 'w', 'c'])) if params.padding.h + params.padding.w > 0: in_tensor = np.pad(in_tensor, ([params.padding.t, params.padding.b], [params.padding.l, params.padding.r])\ + ([0, 0], ) * (np.ndim(in_tensor)-2), mode='constant', constant_values=0) pad_w = params.padding.w pad_h = params.padding.h else: pad_w = pad_h = 0 weights = weights.transpose( params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c'])) filt_w = params.filter.w filt_h = params.filter.h in_w = in_dims.w in_h = in_dims.h out_c = params.filter.out_c out_w = ((in_w - filt_w + pad_w)) + 1 out_h = ((in_h - filt_h + pad_h)) + 1 if biases is None: result = np.zeros((out_c, out_h, out_w)) else: result = np.ones((out_c, out_h, out_w)) * biases.reshape(out_c, 1, 1) const_h = pad_h + in_h - filt_h + 1 const_w = pad_w + in_w - filt_w + 1 for out_c_i in range(out_dims.c): for cur_h in range(filt_h): for cur_w in range(filt_w): # selects all elements that the filter element needs to multiply slabhw = in_tensor[cur_h:const_h + cur_h:1, cur_w:const_w + cur_w:1, ...] * weights[out_c_i, cur_h, cur_w] # add depthwise slabhw = slabhw.sum(axis=-1) # add to the previous filter elements result[out_c_i] += slabhw if details is not None: details['min_acc'] = min(np.min(result[out_c_i]), details['min_acc']) details['max_acc'] = max(np.max(result[out_c_i]), details['max_acc']) if params.stride.size() > 1: result = result[:, ::params.stride.h, ::params.stride.w, ...] return result.transpose(out_dims.transpose_from_order(['c', 'h', 'w']))