def forward(ctx, input, weight, bias=None, conc_weight=None, use_kernel=False, use_cuda=True, rounding='deterministic', shift_range=(0,-15)): fraction_bits = 16 integer_bit = 16 shift, sign = utils.get_shift_and_sign(weight, rounding) if use_kernel: # 似乎kernel只支持推理 input_fixed_point = (input * (2 ** fraction_bits)).int() if bias is not None: bias_fixed_point = (bias * (2 ** fraction_bits)).int() out = fcos_core.deepshift.kernels.linear(input_fixed_point, shift, sign, bias_fixed_point, conc_weight, use_cuda) out = out.float() out = out / (2**fraction_bits) else: input.data = utils.round_to_fixed(input.data, fraction_bits, integer_bit) if bias is not None: bias.data = utils.round_to_fixed(bias.data, fraction_bits, integer_bit) weight_s = (2.0 ** shift) * sign # input * weight_s + bias, 训练时还是根据原始的weights来反传梯度,既然反正要计算,那也就这样这样训练吧 # 可以先正常模型预训练,然后用deepshift模型训练 out = input.mm(weight_s.t()) if bias is not None: out += bias.unsqueeze(0).expand_as(out) ctx.save_for_backward(input, weight_s, bias) return out
def round_shift_weights(model, clone=False): if (clone): model = copy.deepcopy(model) for name, module in reversed(model._modules.items()): if len(list(module.children())) > 0: # recurse model._modules[name] = round_shift_weights(model=module) if type(module) == fcos_core.deepshift.modules.LinearShift or type( module) == fcos_core.deepshift.modules.Conv2dShift: module.shift.data = module.shift.round() module.sign.data = module.sign.round().sign() if (module.bias is not None): module.bias.data = utils.round_to_fixed(module.bias, fraction=16, integer=16) elif type( module) == fcos_core.deepshift.modules_q.LinearShiftQ or type( module) == fcos_core.deepshift.modules_q.Conv2dShiftQ: module.weight.data = utils.round_power_of_2(module.weight) if (module.bias is not None): module.bias.data = utils.round_to_fixed(module.bias, fraction=16, integer=16) return model
def forward(ctx, input, shift, sign, bias=None, conc_weight=None, stride=1, padding=0, dilation=1, groups=1, use_kernel=False, use_cuda=False, rounding='deterministic', shift_range=(-15, 0)): fraction_bits = 16 integer_bits = 16 # start_time = time.time() if use_kernel: input_fixed_point = (input * (2**fraction_bits)).int() if bias is not None: bias_fixed_point = (bias * (2**fraction_bits)).int() else: bias_fixed_point = None out = fcos_core.deepshift.kernels.conv2d(input_fixed_point, shift, sign, bias_fixed_point, conc_weight, stride, padding, dilation, groups, use_cuda) out = out.float() out = out / (2**fraction_bits) else: shift = shift.clamp(*shift_range) sign = sign.clamp(-1, 1) input.data = utils.round_to_fixed(input.data, fraction_bits, integer_bits) if bias is not None: bias.data = utils.round_to_fixed(bias.data, fraction_bits, integer_bits) shift_rounded = utils.round(shift, stochastic=False) sign_rounded_signed = torch.sign( utils.round(sign, stochastic=False)) v = 2**shift_rounded * sign_rounded_signed out = F.conv2d(input, v, bias, stride, padding, dilation, groups) ctx.save_for_backward(input, shift, sign, bias, v) ctx.stride = stride ctx.padding = padding ctx.dilation = dilation ctx.groups = groups return out
def forward(ctx, input, shift, sign, bias=None, conc_weight=None, use_kernel=False, use_cuda=True, rounding='deterministic', shift_range=(-15, 0)): fraction_bits = 16 integer_bit = 16 if use_kernel: input_fixed_point = (input * (2**fraction_bits)).int() if bias is not None: bias_fixed_point = (bias * (2**fraction_bits)).int() out = fcos_core.deepshift.kernels.linear(input_fixed_point, shift, sign, bias_fixed_point, conc_weight, use_cuda) out = out.float() out = out / (2**fraction_bits) else: sign = sign.clamp(-1, 1) shift = shift.clamp(*shift_range) input.data = utils.round_to_fixed(input.data, fraction_bits, integer_bit) if bias is not None: bias.data = utils.round_to_fixed(bias.data, fraction_bits, integer_bit) v = 2**shift.round() * sign.round().sign() out = input.mm(v.t()) if bias is not None: out += bias.unsqueeze(0).expand_as(out) ctx.save_for_backward(input, shift, sign, bias, v) return out
def forward(ctx, input): return utils.round_to_fixed(input)
def convert_to_shift(model, shift_depth, shift_type, convert_all_linear=True, convert_weights=False, freeze_sign=False, use_kernel=False, use_cuda=True, rounding='deterministic', shift_range=(-14, 0)): conversion_count = 0 for name, module in reversed(model._modules.items()): l = len(list(module.children())) t = type(module) if len(list(module.children())) > 0: # recurse model._modules[name], num_converted = convert_to_shift( model=module, shift_depth=shift_depth - conversion_count, shift_type=shift_type, convert_all_linear=convert_all_linear, convert_weights=convert_weights, freeze_sign=freeze_sign, use_kernel=use_kernel, use_cuda=use_cuda, rounding=rounding, shift_range=shift_range) conversion_count += num_converted if type(module) == nn.Linear and (convert_all_linear == True or conversion_count < shift_depth): linear = module if shift_type == 'Q': shift_linear = fcos_core.deepshift.modules_q.LinearShiftQ( module.in_features, module.out_features, module.bias is not None, use_kernel=use_kernel, use_cuda=use_cuda, rounding=rounding, shift_range=shift_range) shift_linear.weight = linear.weight if linear.bias is not None: shift_linear.bias.data = utils.round_to_fixed(linear.bias, fraction=16, integer=16) if use_cuda == True and use_kernel == True: shift_linear.conc_weight = utils.compress_bits( *utils.get_shift_and_sign(linear.weight)) elif shift_type == 'PS': shift_linear = fcos_core.deepshift.modules.LinearShift( module.in_features, module.out_features, module.bias is not None, freeze_sign=freeze_sign, use_kernel=use_kernel, use_cuda=use_cuda, rounding=rounding, shift_range=shift_range) if convert_weights == True: shift_linear.shift.data, shift_linear.sign.data = utils.get_shift_and_sign( linear.weight) shift_linear.bias = linear.bias if use_cuda == True and use_kernel == True: shift_linear.conc_weight = utils.compress_bits( shift_linear.shift.data, shift_linear.sign.data) else: raise ValueError('Unsupported shift_type argument: ', shift_type) model._modules[name] = shift_linear if convert_all_linear == False: conversion_count += 1 if (type(module) == nn.Conv2d or type(module) == fConv2d) and conversion_count < shift_depth: # if type(module) == nn.Conv2d and conversion_count < shift_depth: conv2d = module if shift_type == 'Q': shift_conv2d = fcos_core.deepshift.modules_q.Conv2dShiftQ( module.in_channels, module.out_channels, module.kernel_size, module.stride, module.padding, module.dilation, module.groups, module.bias is not None, module.padding_mode, use_kernel=use_kernel, use_cuda=use_cuda, rounding=rounding, shift_range=shift_range) shift_conv2d.weight = conv2d.weight if conv2d.bias is not None: shift_conv2d.bias.data = utils.round_to_fixed(conv2d.bias, fraction=16, integer=16) if use_cuda == True and use_kernel == True: shift_conv2d.conc_weight = utils.compress_bits( *utils.get_shift_and_sign(conv2d.weight)) elif shift_type == 'PS': shift_conv2d = fcos_core.deepshift.modules.Conv2dShift( module.in_channels, module.out_channels, module.kernel_size, module.stride, module.padding, module.dilation, module.groups, module.bias is not None, module.padding_mode, freeze_sign=freeze_sign, use_kernel=use_kernel, use_cuda=use_cuda, rounding=rounding, shift_range=shift_range) if convert_weights == True: shift_conv2d.shift.data, shift_conv2d.sign.data = utils.get_shift_and_sign( conv2d.weight) shift_conv2d.bias = conv2d.bias if use_cuda == True and use_kernel == True: shift_conv2d.conc_weight = utils.compress_bits( shift_conv2d.shift.data, shift_conv2d.sign.data) model._modules[name] = shift_conv2d conversion_count += 1 return model, conversion_count