Python quantize_tensor示例，myutils.pytorch.tensor_util.quantize_tensor Python示例

示例#1

0

显示文件

文件： base.py 项目： yoshitomo-matsubara/head-network-distillation

    def compute_ae_bottleneck_size(self, x, print_info=False):
        z = self.head_model(x)
        modules = list()
        module_util.extract_decomposable_modules(self.autoencoder, z, modules)
        modules = [module.to(x.device) for module in modules]
        org_size = np.prod(x.size())
        min_rate = None
        bo = None
        bqo = None
        for i in range(len(modules)):
            if isinstance(modules[i], nn.Linear):
                z = z.view(z.size(0), -1)

            z = modules[i](z)
            rate = np.prod(z.size()) / org_size
            if min_rate is None or rate < min_rate:
                min_rate = rate
                bo = pickle.dumps(z)
                bqo = pickle.dumps(tensor_util.quantize_tensor(z))

        output_data_size = sys.getsizeof(bo) / 1024
        quantized_output_data_size = sys.getsizeof(bqo) / 1024
        if print_info:
            print(
                '[Autoencoder bottleneck]\tScaled output size: {} [%]\tOutput data size: {} [KB]'
                '\tQuantized output data size: {} [KB]'.format(
                    min_rate * 100.0, output_data_size,
                    quantized_output_data_size))
        # Scaled bottleneck size, bottleneck data size [KB], Quantized bottleneck data size [KB]
        return min_rate, output_data_size, quantized_output_data_size

示例#2

0

显示文件

    def __call__(self, z, target):
        if z is None:
            data_size = 0.0
            fp16_data_size = 0.0
            quantized_data_size = 0.0
        else:
            data_size = file_util.get_binary_object_size(z)
            fp16_data_size = None if not isinstance(z, torch.Tensor) else file_util.get_binary_object_size(z.short())
            quantized_data_size = None if not isinstance(z, torch.Tensor)\
                else file_util.get_binary_object_size(tensor_util.quantize_tensor(z, num_bits=self.num_bits4quant))

        self.data_size_list.append(data_size)
        self.fp16_data_size_list.append(fp16_data_size)
        self.quantized_data_size_list.append(quantized_data_size)
        self.tensor_shape_list.append([0, 0, 0] if z is None else [z.shape[1], z.shape[2], z.shape[3]])
        return z, target

示例#3

0

显示文件

文件： deployment_helper.py 项目： yoshitomo-matsubara/head-network-distillation

def test_split_model(model, head_network, tail_network, sensor_device,
                     edge_device, spbit, config):
    dataset_config = config['dataset']
    _, _, test_loader =\
        dataset_util.get_data_loaders(dataset_config, batch_size=config['train']['batch_size'],
                                      rough_size=config['train']['rough_size'],
                                      reshape_size=tuple(config['input_shape'][1:3]),
                                      test_batch_size=config['test']['batch_size'], jpeg_quality=-1)
    print('Testing..')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if device.type == 'cuda':
        cudnn.benchmark = True

    head_network = module_util.use_multiple_gpus_if_available(
        head_network, sensor_device)
    tail_network = module_util.use_multiple_gpus_if_available(
        tail_network, edge_device)
    model.to(device)
    head_network.to(sensor_device)
    tail_network.to(edge_device)
    head_network.eval()
    tail_network.eval()
    model.eval()
    split_correct_count = 0
    split_test_loss = 0
    org_correct_count = 0
    org_test_loss = 0
    total = 0
    file_size_list = list()
    head_proc_time_list = list()
    tail_proc_time_list = list()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            total += targets.size(0)
            inputs, targets = inputs.to(sensor_device), targets.to(edge_device)
            head_start_time = time.time()
            zs = head_network(inputs)
            if spbit in ['8bits', '16bits']:
                if spbit == '8bits':
                    # Quantization and dequantization
                    qzs = tensor_util.quantize_tensor(zs)
                    head_end_time = time.time()
                    file_size_list.append(
                        file_util.get_binary_object_size(qzs))
                    zs = tensor_util.dequantize_tensor(qzs)
                else:
                    # Casting and recasting
                    zs = zs.half()
                    head_end_time = time.time()
                    file_size_list.append(file_util.get_binary_object_size(zs))
                    zs = zs.float()
            else:
                head_end_time = time.time()
                file_size_list.append(file_util.get_binary_object_size(zs))

            preds = tail_network(zs.to(edge_device))
            tail_end_time = time.time()
            sub_correct_count, sub_test_loss = predict(preds, targets)
            split_correct_count += sub_correct_count
            split_test_loss += sub_test_loss
            inputs, targets = inputs.to(device), targets.to(device)
            preds = model(inputs)
            sub_correct_count, sub_test_loss = predict(preds, targets)
            org_correct_count += sub_correct_count
            org_test_loss += sub_test_loss
            head_proc_time_list.append(head_end_time - head_start_time)
            tail_proc_time_list.append(tail_end_time - head_end_time)

    org_acc = 100.0 * org_correct_count / total
    print(
        '[Before splitting]\tAverage Loss: {:.4f}, Accuracy: {}/{} [{:.4f}%]\n'
        .format(org_test_loss / total, org_correct_count, total, org_acc))
    split_acc = 100.0 * split_correct_count / total
    print(
        '[After splitting]\tAverage Loss: {:.4f}, Accuracy: {}/{} [{:.4f}%]\n'.
        format(split_test_loss / total, split_correct_count, total, split_acc))
    print('Output file size at splitting point [KB]: {} +- {}'.format(
        np.average(file_size_list), np.std(file_size_list)))
    print('Local processing time [sec]: {} +- {}'.format(
        np.average(head_proc_time_list), np.std(head_proc_time_list)))
    print('Edge processing time [sec]: {} +- {}'.format(
        np.average(tail_proc_time_list), np.std(tail_proc_time_list)))

示例#4

0

显示文件

 def save_image(self, z, output_file_path):
     qz = tensor_util.quantize_tensor(z)
     img = Image.fromarray(qz.tensor.permute(1, 2, 0).cpu().numpy())
     img.save(output_file_path, format='jpeg', quality=self.jpeg_quality)
     return qz

示例#5

0

显示文件

    def __call__(self, z, target):
        if self.num_bits == 16:
            return z.half(), target

        qz = tensor_util.quantize_tensor(z, num_bits=self.num_bits)
        return qz, target