return self.quantize(input_, self.scale, self.num_bits) if __name__ == '__main__': per_tensor_scale_shape = (1, ) for input_name, input_size, gpu_runs in TEST_PARAMS_STRUCT: weight_per_channel_scale_shape = get_per_channel_scale_shape( input_size, is_weights=True) act_per_channel_scale_shape = get_per_channel_scale_shape( input_size, is_weights=False) print("CUDA " + input_name) print("------------------------------------------------") print("Pytorch Symmetric (cuda 0) impl:") print("input size: {0}".format(input_size)) run_profile( ReferenceQuantize(NBITS).cuda(), input_size, 'cuda', gpu_runs) print() print("Custom Symmetric (cuda 0 ) impl:") print("input size: {0}".format(input_size)) run_profile( SymmetricQuantizer( DefaultedPTQuantizerSpec(scale_shape=per_tensor_scale_shape, num_bits=NBITS)).cuda(), input_size, 'cuda', gpu_runs) print() print("Pytorch Symmetric Per Weight Channel (cuda 0) impl:") print("input size: {0}".format(input_size)) run_profile( ReferenceQuantize(NBITS,
self.quantize = ReferenceQuantizeSymmetric.apply def get_scale(self): return self.scale def forward(self, input_): return self.quantize(input_, self.scale, self.num_bits) if __name__ == '__main__': for input_name, input_size, gpu_runs in TEST_PARAMS_STRUCT: print("CUDA " + input_name) print("------------------------------------------------") print("Pytorch Symmetric (cuda 0) impl:") print("input size: {0}".format(input_size)) run_profile( ReferenceQuantize(NBITS).cuda(), input_size, 'cuda', gpu_runs) print() print("Custom Symmetric (cuda 0 ) impl:") print("input size: {0}".format(input_size)) run_profile( SymmetricQuantizer(QuantizerConfig( QuantizationParams(bits=NBITS))).cuda(), input_size, 'cuda', gpu_runs) print() print("Pytorch Symmetric Per Weight Channel (cuda 0) impl:") print("input size: {0}".format(input_size)) run_profile( ReferenceQuantize(NBITS, input_shape=input_size,
def forward(self, input_): return ReferenceActivationBinarize.apply(input_, self.scale, self.threshold) if __name__ == '__main__': for input_name, input_size, gpu_runs in TEST_PARAMS_STRUCT: print() print("CUDA " + input_name) print("------------------------------------------------") print("Pytorch XNOR weight binarization (cuda 0) impl:") print("input size: {0}".format(input_size)) run_profile(ReferenceWeightBinarizationModule('xnor').cuda(), input_size, 'cuda', gpu_runs, forward_only=True) print() print("Custom XNOR weight binarization (cuda 0) impl:") print("input size: {0}".format(input_size)) run_profile(XNORBinarize(enabled=True).cuda(), input_size, 'cuda', gpu_runs, forward_only=True) print() print("Pytorch DoReFa weight binarization (cuda 0) impl:") print("input size: {0}".format(input_size))