def _add_constant_tensor(self, tensor: onnx.TensorProto): if not tensor.HasField("name"): raise ValueError("Got tensor without name") if not tensor.HasField("data_type"): raise ValueError("Initializer tensor '{}' has no type".format( tensor.name)) name = clean_onnx_name(tensor.name) dtype = onnx_tensor_type_to_typeclass(tensor.data_type) if len(tensor.dims) == 0: # this is a scalar self.sdfg.add_scalar(name, dtype) else: dims = [d for d in tensor.dims] if name not in self.sdfg.arrays: self.sdfg.add_array(name, dims, dtype) else: existing_arr = self.sdfg.arrays[name] if existing_arr.dtype != dtype: raise ValueError( "Invalid ONNX model; found two values with name '{}', but different dtypes ({} and {})" .format(name, existing_arr.dtype, dtype)) if tuple(existing_arr.shape) != tuple(dims): raise ValueError( "Invalid ONNX model; found two values with name '{}', but different dimensions ({} and {})" .format(name, existing_arr.shape, dims)) self.weights[tensor.name] = numpy_helper.to_array(tensor)
def to_array(tensor: TensorProto, base_dir: str = "") -> np.ndarray: """Converts a tensor def object to a numpy array. Inputs: tensor: a TensorProto object. base_dir: if external tensor exists, base_dir can help to find the path to it Returns: arr: the converted array. """ if tensor.HasField("segment"): raise ValueError("Currently not supporting loading segments.") if tensor.data_type == TensorProto.UNDEFINED: raise TypeError("The element type in the input tensor is not defined.") tensor_dtype = tensor.data_type np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[tensor_dtype] storage_type = mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[tensor_dtype] storage_np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[storage_type] storage_field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[storage_type] dims = tensor.dims if tensor.data_type == TensorProto.STRING: utf8_strings = getattr(tensor, storage_field) ss = list(s.decode('utf-8') for s in utf8_strings) return np.asarray(ss).astype(np_dtype).reshape(dims) # Load raw data from external tensor if it exists if uses_external_data(tensor): load_external_data_for_tensor(tensor, base_dir) if tensor.HasField("raw_data"): # Raw_bytes support: using frombuffer. if sys.byteorder == 'big': # Convert endian from little to big convert_endian(tensor) # manually convert bf16 since there's no numpy support if tensor_dtype == TensorProto.BFLOAT16: data = np.frombuffer(tensor.raw_data, dtype=np.int16) return bfloat16_to_float32(data, dims) return np.frombuffer(tensor.raw_data, dtype=np_dtype).reshape(dims) else: # float16 is stored as int32 (uint16 type); Need view to get the original value if tensor_dtype == TensorProto.FLOAT16: return (np.asarray(tensor.int32_data, dtype=np.uint16).reshape(dims).view(np.float16)) # bfloat16 is stored as int32 (uint16 type); no numpy support for bf16 if tensor_dtype == TensorProto.BFLOAT16: data = np.asarray(tensor.int32_data, dtype=np.int32) return bfloat16_to_float32(data, dims) data = getattr(tensor, storage_field) if (tensor_dtype == TensorProto.COMPLEX64 or tensor_dtype == TensorProto.COMPLEX128): data = combine_pairs_to_complex(data) return (np.asarray( data, dtype=storage_np_dtype).astype(np_dtype).reshape(dims))
def fill_zeros_for_external_data(tensor: TensorProto): if tensor.HasField("raw_data"): # already loaded return value = NumpyHelper.to_array(tensor, fill_zeros=True) zero_tensor = numpy_helper.from_array(value, name=tensor.name) tensor.raw_data = zero_tensor.raw_data
def has_same_value(tensor1: TensorProto, tensor2: TensorProto) -> bool: """Returns True when two tensors have same value. Note that name can be different. Args: tensor1 (TensorProto): initializer 1 tensor2 (TensorProto): initializer 2 Returns: bool: True when two intializers has same value. """ if tensor1.data_type != tensor2.data_type or tensor1.dims != tensor2.dims: return False if tensor1.HasField("raw_data") and tensor2.HasField("raw_data"): return tensor1.raw_data == tensor2.raw_data return numpy_helper.to_array(tensor1) == numpy_helper.to_array(tensor2)
def _add_constant_tensor(self, tensor: onnx.TensorProto, parent_pt_model): if not tensor.HasField("name"): raise ValueError("Got tensor without name") if not tensor.HasField("data_type"): raise ValueError("Initializer tensor '{}' has no type".format( tensor.name)) name = clean_onnx_name(tensor.name) dtype = onnx_tensor_type_to_typeclass(tensor.data_type) if len(tensor.dims) == 0: # this is a scalar self.sdfg.add_scalar(name, dtype) else: dims = [d for d in tensor.dims] if name not in self.sdfg.arrays: self.sdfg.add_array(name, dims, dtype) else: existing_arr = self.sdfg.arrays[name] if existing_arr.dtype != dtype: raise ValueError( "Invalid ONNX model; found two values with name '{}', but different dtypes ({} and {})" .format(name, existing_arr.dtype, dtype)) if tuple(existing_arr.shape) != tuple(dims): raise ValueError( "Invalid ONNX model; found two values with name '{}', but different dimensions ({} and {})" .format(name, existing_arr.shape, dims)) weight_arr = numpy_helper.to_array(tensor) if parent_pt_model is not None: parent_parameters = dict(parent_pt_model.named_parameters()) if parent_pt_model is not None and tensor.name in parent_parameters: self.weights[tensor.name] = parent_parameters[tensor.name].data else: # we need to copy here because the weight_arr tensor is not writable self.weights[tensor.name] = torch.from_numpy(weight_arr.copy())
def _strip_raw_data(tensor: onnx.TensorProto) -> onnx.TensorProto: arr = onnx.numpy_helper.to_array(tensor) meta_dict = {} meta_dict['type'] = "stripped" meta_dict['average'] = float(arr.mean()) # type: ignore[assignment] meta_dict['variance'] = float(arr.var()) # type: ignore[assignment] if not tensor.HasField("raw_data"): tensor.raw_data = onnx.numpy_helper.from_array(arr, tensor.name).raw_data onnx.external_data_helper.set_external_data(tensor, location=json.dumps(meta_dict), length=arr.nbytes) tensor.data_location = onnx.TensorProto.EXTERNAL tensor.ClearField('raw_data') tensor.ClearField('float_data') return tensor