def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, stop_when=None, defer_size=None, parent_encoding=default_encoding, specific_tags=None): """Return a Dataset instance containing the next dataset in the file. Parameters ---------- fp : an opened file object is_implicit_VR : boolean True if file transfer syntax is implicit VR. is_little_endian : boolean True if file has little endian transfer syntax. bytelength : int, None, optional None to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional optional call_back function which can terminate reading. See help for data_element_generator for details defer_size : int, None, optional Size to avoid loading large elements in memory. See ``dcmread`` for more parameter info. parent_encoding : optional encoding to use as a default in case a Specific Character Set (0008,0005) isn't specified specific_tags : list or None See ``dcmread`` for parameter info. Returns ------- a Dataset instance See Also -------- pydicom.dataset.Dataset A collection (dictionary) of Dicom `DataElement` instances. """ raw_data_elements = dict() fpStart = fp.tell() de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags) try: while (bytelength is None) or (fp.tell() - fpStart < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == (0xFFFE, 0xE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: # XXX is this error visible enough to user code with just logging? logger.error(str(details) + " in file " + getattr(fp, "name", "<no filename>")) except NotImplementedError as details: logger.error(details) return Dataset(raw_data_elements)
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, stop_when=None, defer_size=None, parent_encoding=default_encoding, specific_tags=None): """Return a Dataset instance containing the next dataset in the file. Parameters ---------- fp : an opened file object is_implicit_VR : boolean True if file transfer syntax is implicit VR. is_little_endian : boolean True if file has little endian transfer syntax. bytelength : int, None, optional None to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional optional call_back function which can terminate reading. See help for data_element_generator for details defer_size : int, None, optional Size to avoid loading large elements in memory. See ``read_file`` for more parameter info. parent_encoding : optional encoding to use as a default in case a Specific Character Set (0008,0005) isn't specified specific_tags : list or None See ``read_file`` for parameter info. Returns ------- a Dataset instance See Also -------- pydicom.dataset.Dataset A collection (dictionary) of Dicom `DataElement` instances. """ raw_data_elements = dict() fpStart = fp.tell() de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags) try: while (bytelength is None) or (fp.tell() - fpStart < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == (0xFFFE, 0xE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: # XXX is this error visible enough to user code with just logging? logger.error(str(details) + " in file " + getattr(fp, "name", "<no filename>")) except NotImplementedError as details: logger.error(details) return Dataset(raw_data_elements)
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, stop_when=None, defer_size=None, parent_encoding=default_encoding, specific_tags=None, at_top_level=True): """Return a :class:`~pydicom.dataset.Dataset` instance containing the next dataset in the file. Parameters ---------- fp : file-like An opened file-like object. is_implicit_VR : bool ``True`` if file transfer syntax is implicit VR. is_little_endian : bool ``True`` if file has little endian transfer syntax. bytelength : int, None, optional ``None`` to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional Optional call_back function which can terminate reading. See help for :func:`data_element_generator` for details defer_size : int, None, optional Size to avoid loading large elements in memory. See :func:`dcmread` for more parameter info. parent_encoding : Optional encoding to use as a default in case (0008,0005) *Specific Character Set* isn't specified. specific_tags : list or None See :func:`dcmread` for parameter info. at_top_level: bool If dataset is top level (not within a sequence). Used to turn off explicit VR heuristic within sequences Returns ------- dataset.Dataset A Dataset instance. See Also -------- :class:`~pydicom.dataset.Dataset` A collection (dictionary) of DICOM :class:`~pydicom.dataelem.DataElement` instances. """ raw_data_elements = dict() fp_start = fp.tell() if at_top_level: is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian, stop_when) fp.seek(fp_start) de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags) try: while (bytelength is None) or (fp.tell() - fp_start < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == BaseTag(0xFFFEE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: if config.enforce_valid_values: raise msg = str(details) + " in file " + getattr(fp, "name", "<no filename>") warnings.warn(msg, UserWarning) except NotImplementedError as details: logger.error(details) ds = Dataset(raw_data_elements) if 0x00080005 in raw_data_elements: char_set = DataElement_from_raw(raw_data_elements[0x00080005]) encoding = convert_encodings(char_set) else: encoding = parent_encoding ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding) return ds
def read_undefined_length_value(fp, is_little_endian, delimiter_tag, defer_size=None, read_size=1024 * 8): """Read until the delimiter tag found and return the value; ignore the delimiter. On completion, the file will be set to the first byte after the delimiter and its following four zero bytes. Parameters ---------- fp : a file-like object is_little_endian : boolean True if file transfer syntax is little endian, else False. delimiter_tag : BaseTag tag used as and marker for reading defer_size : int, None, optional Size to avoid loading large elements in memory. See ``filereader.dcmread`` for more parameter info. read_size : int Number of bytes to read at one time. Returns ------- delimiter : str, None The file delimiter Raises ------ EOFError If EOF is reached before delimiter found. """ data_start = fp.tell() search_rewind = 3 if is_little_endian: bytes_format = b"<HH" else: bytes_format = b">HH" bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem) found = False eof = False value_chunks = [] defer_size = size_in_bytes(defer_size) byte_count = 0 # for defer_size checks while not found: chunk_start = fp.tell() bytes_read = fp.read(read_size) if len(bytes_read) < read_size: # try again - if still don't get required amount, # this is the last block new_bytes = fp.read(read_size - len(bytes_read)) bytes_read += new_bytes if len(bytes_read) < read_size: eof = True # but will still check whatever we did get index = bytes_read.find(bytes_to_find) if index != -1: found = True new_bytes = bytes_read[:index] byte_count += len(new_bytes) if defer_size is None or byte_count < defer_size: value_chunks.append(bytes_read[:index]) fp.seek(chunk_start + index + 4) # rewind to end of delimiter length = fp.read(4) if length != b"\0\0\0\0": msg = ("Expected 4 zero bytes after undefined length delimiter" " at pos {0:04x}") logger.error(msg.format(fp.tell() - 4)) elif eof: fp.seek(data_start) raise EOFError( "End of file reached before delimiter {0!r} found".format( delimiter_tag)) else: # rewind a bit in case delimiter crossed read_size boundary fp.seek(fp.tell() - search_rewind) # accumulate the bytes read (not including the rewind) new_bytes = bytes_read[:-search_rewind] byte_count += len(new_bytes) if defer_size is None or byte_count < defer_size: value_chunks.append(new_bytes) # if get here then have found the byte string if defer_size is not None and byte_count >= defer_size: return None else: return b"".join(value_chunks)
def from_json(cls, dataset_class, tag, vr, value, value_key, bulk_data_uri_handler=None, encodings=None): """Creates a DataElement from JSON. Parameters ---------- dataset_class: Dataset derived class class used to create sequence items tag: pydicom.tag.Tag data element tag vr: str data element value representation value: list data element value(s) value_key: Union[str, None] key of the data element that contains the value (options: ``{"Value", "InlineBinary", "BulkDataURI"}``) bulk_data_uri_handler: Union[Callable, None] callable that accepts the "BulkDataURI" of the JSON representation of a data element and returns the actual value of that data element (retrieved via DICOMweb WADO-RS) Returns ------- pydicom.dataelem.DataElement """ # TODO: test wado-rs retrieve wrapper try: vm = dictionary_VM(tag) except KeyError: # Private tag vm = str(len(value)) if value_key == 'Value': if not (isinstance(value, list)): fmt = '"{}" of data element "{}" must be a list.' raise TypeError(fmt.format(value_key, tag)) elif value_key in {'InlineBinary', 'BulkDataURI'}: if isinstance(value, list): fmt = '"{}" of data element "{}" must be a {}.' expected_type = ('string' if value_key == 'BulkDataURI' else 'bytes-like object') raise TypeError(fmt.format(value_key, tag, expected_type)) if vr == 'SQ': elem_value = [] for value_item in value: ds = dataset_class() if value_item: for key, val in value_item.items(): if 'vr' not in val: fmt = 'Data element "{}" must have key "vr".' raise KeyError(fmt.format(tag)) unique_value_keys = tuple( set(val.keys()) & set(jsonrep.JSON_VALUE_KEYS)) if len(unique_value_keys) == 0: logger.debug( 'data element has neither key "{}".'.format( '" nor "'.join(jsonrep.JSON_VALUE_KEYS))) elem = DataElement(tag=tag, value='', VR=vr) else: value_key = unique_value_keys[0] elem = cls.from_json(dataset_class, key, val['vr'], val[value_key], value_key) ds.add(elem) elem_value.append(ds) elif vr == 'PN': # Special case, see DICOM Part 18 Annex F2.2 elem_value = [] for v in value: if not isinstance(v, dict): # Some DICOMweb services get this wrong, so we # workaround the issue and warn the user # rather than raising an error. logger.error( 'value of data element "{}" with VR Person Name (PN) ' 'is not formatted correctly'.format(tag)) elem_value.append(v) else: elem_value.extend(list(v.values())) if vm == '1': try: elem_value = elem_value[0] except IndexError: elem_value = '' else: if vm == '1': if value_key == 'InlineBinary': elem_value = base64.b64decode(value) elif value_key == 'BulkDataURI': if bulk_data_uri_handler is None: logger.warning( 'no bulk data URI handler provided for retrieval ' 'of value of data element "{}"'.format(tag)) elem_value = b'' else: elem_value = bulk_data_uri_handler(value) else: if value: elem_value = value[0] else: elem_value = value else: elem_value = value if elem_value is None: logger.warning('missing value for data element "{}"'.format(tag)) elem_value = '' elem_value = jsonrep.convert_to_python_number(elem_value, vr) try: if compat.in_py2 and vr == "PN": elem_value = PersonNameUnicode(elem_value, 'UTF8') return DataElement(tag=tag, value=elem_value, VR=vr) except Exception: raise ValueError( 'Data element "{}" could not be loaded from JSON: {}'.format( tag, elem_value))
def read_undefined_length_value(fp, is_little_endian, delimiter_tag, defer_size=None, read_size=1024*8): """Read until the delimiter tag found and return the value; ignore the delimiter. On completion, the file will be set to the first byte after the delimiter and its following four zero bytes. Parameters ---------- fp : a file-like object is_little_endian : boolean True if file transfer syntax is little endian, else False. delimiter_tag : BaseTag tag used as and marker for reading defer_size : int, None, optional Size to avoid loading large elements in memory. See ``filereader.read_file`` for more parameter info. read_size : int Number of bytes to read at one time. Returns ------- delimiter : str, None The file delimiter Raises ------ EOFError If EOF is reached before delimiter found. """ data_start = fp.tell() search_rewind = 3 if is_little_endian: bytes_format = b"<HH" else: bytes_format = b">HH" bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem) found = False eof = False value_chunks = [] defer_size = size_in_bytes(defer_size) byte_count = 0 # for defer_size checks while not found: chunk_start = fp.tell() bytes_read = fp.read(read_size) if len(bytes_read) < read_size: # try again - if still don't get required amount, # this is the last block new_bytes = fp.read(read_size - len(bytes_read)) bytes_read += new_bytes if len(bytes_read) < read_size: eof = True # but will still check whatever we did get index = bytes_read.find(bytes_to_find) if index != -1: found = True new_bytes = bytes_read[:index] byte_count += len(new_bytes) if defer_size is None or byte_count < defer_size: value_chunks.append(bytes_read[:index]) fp.seek(chunk_start + index + 4) # rewind to end of delimiter length = fp.read(4) if length != b"\0\0\0\0": msg = ("Expected 4 zero bytes after undefined length delimiter" " at pos {0:04x}") logger.error(msg.format(fp.tell() - 4)) elif eof: fp.seek(data_start) raise EOFError("End of file reached before delimiter {0!r} found". format(delimiter_tag)) else: # rewind a bit in case delimiter crossed read_size boundary fp.seek(fp.tell() - search_rewind) # accumulate the bytes read (not including the rewind) new_bytes = bytes_read[:-search_rewind] byte_count += len(new_bytes) if defer_size is None or byte_count < defer_size: value_chunks.append(new_bytes) # if get here then have found the byte string if defer_size is not None and byte_count >= defer_size: return None else: return b"".join(value_chunks)
def read_dataset(fp: BinaryIO, is_implicit_VR: bool, is_little_endian: bool, bytelength: Optional[int] = None, stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None, defer_size: Optional[Union[str, int, float]] = None, parent_encoding: Union[ str, MutableSequence[str]] = default_encoding, specific_tags: Optional[List[BaseTag]] = None, at_top_level: bool = True) -> Dataset: """Return a :class:`~pydicom.dataset.Dataset` instance containing the next dataset in the file. Parameters ---------- fp : file-like An opened file-like object. is_implicit_VR : bool ``True`` if file transfer syntax is implicit VR. is_little_endian : bool ``True`` if file has little endian transfer syntax. bytelength : int, None, optional ``None`` to read until end of file or ItemDeliterTag, else a fixed number of bytes to read stop_when : None, optional Optional call_back function which can terminate reading. See help for :func:`data_element_generator` for details defer_size : int, str or float, optional Size to avoid loading large elements in memory. See :func:`dcmread` for more parameter info. parent_encoding : str or List[str] Optional encoding to use as a default in case (0008,0005) *Specific Character Set* isn't specified. specific_tags : list of BaseTag, optional See :func:`dcmread` for parameter info. at_top_level: bool If dataset is top level (not within a sequence). Used to turn off explicit VR heuristic within sequences Returns ------- dataset.Dataset A Dataset instance. See Also -------- :class:`~pydicom.dataset.Dataset` A collection (dictionary) of DICOM :class:`~pydicom.dataelem.DataElement` instances. """ raw_data_elements: Dict[BaseTag, Union[RawDataElement, DataElement]] = {} fp_start = fp.tell() is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian, stop_when, is_sequence=not at_top_level) fp.seek(fp_start) de_gen = data_element_generator( fp, is_implicit_VR, is_little_endian, stop_when, defer_size, parent_encoding, specific_tags, ) try: while (bytelength is None) or (fp.tell() - fp_start < bytelength): raw_data_element = next(de_gen) # Read data elements. Stop on some errors, but return what was read tag = raw_data_element.tag # Check for ItemDelimiterTag --dataset is an item in a sequence if tag == BaseTag(0xFFFEE00D): break raw_data_elements[tag] = raw_data_element except StopIteration: pass except EOFError as details: if config.settings.reading_validation_mode == config.RAISE: raise msg = str(details) + " in file " + getattr(fp, "name", "<no filename>") warnings.warn(msg, UserWarning) except NotImplementedError as details: logger.error(details) ds = Dataset(raw_data_elements) encoding: Union[str, MutableSequence[str]] if 0x00080005 in raw_data_elements: elem = cast(RawDataElement, raw_data_elements[BaseTag(0x00080005)]) char_set = cast(Optional[Union[str, MutableSequence[str]]], DataElement_from_raw(elem).value) encoding = convert_encodings(char_set) # -> List[str] else: encoding = parent_encoding # -> Union[str, MutableSequence[str]] ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding) return ds
def read_undefined_length_value(fp: BinaryIO, is_little_endian: bool, delimiter_tag: BaseTag, defer_size: Optional[Union[int, float]] = None, read_size: int = 1024 * 8) -> Optional[bytes]: """Read until `delimiter_tag` and return the value up to that point. On completion, the file will be set to the first byte after the delimiter and its following four zero bytes. Parameters ---------- fp : file-like The file-like to read. is_little_endian : bool ``True`` if file transfer syntax is little endian, else ``False``. delimiter_tag : BaseTag Tag used as end marker for reading defer_size : int or None, optional Size to avoid loading large elements in memory. See :func:`~pydicom.filereader.dcmread` for more parameter info. read_size : int, optional Number of bytes to read at one time. Returns ------- delimiter : bytes or None The file delimiter. Raises ------ EOFError If EOF is reached before delimiter found. """ data_start = fp.tell() defer_size = size_in_bytes(defer_size) # It's common for an undefined length value item to be an # encapsulated pixel data as defined in PS3.5 section A.4. # Attempt to parse the data under that assumption, since the method # 1. is proof against coincidental embedded sequence delimiter tags # 2. avoids accumulating any data in memory if the element is large # enough to be deferred # 3. does not double-accumulate data (in chunks and then joined) # # Unfortunately, some implementations deviate from the standard and the # encapsulated pixel data-parsing algorithm fails. In that case, we fall # back to a method of scanning the entire element value for the # sequence delimiter, as was done historically. if delimiter_tag == SequenceDelimiterTag: was_value_found, value = _try_read_encapsulated_pixel_data( fp, is_little_endian, defer_size) if was_value_found: return value search_rewind = 3 if is_little_endian: bytes_format = b"<HH" else: bytes_format = b">HH" bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem) found = False eof = False value_chunks = [] byte_count = 0 # for defer_size checks while not found: chunk_start = fp.tell() bytes_read = fp.read(read_size) if len(bytes_read) < read_size: # try again - if still don't get required amount, # this is the last block new_bytes = fp.read(read_size - len(bytes_read)) bytes_read += new_bytes if len(bytes_read) < read_size: eof = True # but will still check whatever we did get index = bytes_read.find(bytes_to_find) if index != -1: found = True new_bytes = bytes_read[:index] byte_count += len(new_bytes) if defer_size is None or byte_count < defer_size: value_chunks.append(new_bytes) fp.seek(chunk_start + index + 4) # rewind to end of delimiter length = fp.read(4) if length != b"\0\0\0\0": msg = ("Expected 4 zero bytes after undefined length delimiter" " at pos {0:04x}") logger.error(msg.format(fp.tell() - 4)) elif eof: fp.seek(data_start) raise EOFError( "End of file reached before delimiter {0!r} found".format( delimiter_tag)) else: # rewind a bit in case delimiter crossed read_size boundary fp.seek(fp.tell() - search_rewind) # accumulate the bytes read (not including the rewind) new_bytes = bytes_read[:-search_rewind] byte_count += len(new_bytes) if defer_size is None or byte_count < defer_size: value_chunks.append(new_bytes) # if get here then have found the byte string if defer_size is not None and byte_count >= defer_size: return None else: return b"".join(value_chunks)