def convert_value(VR, raw_data_element, encoding=default_encoding): """Return the converted value (from raw bytes) for the given VR""" if VR not in converters: raise NotImplementedError( "Unknown Value Representation '{0}'".format(VR)) # Look up the function to convert that VR # Dispatch two cases: a plain converter, or a number one which needs a format string if isinstance(converters[VR], tuple): converter, num_format = converters[VR] else: converter = converters[VR] num_format = None # Ensure that encoding is in the proper 3-element format if isinstance(encoding, compat.string_types): encoding = [ encoding, ] * 3 byte_string = raw_data_element.value is_little_endian = raw_data_element.is_little_endian is_implicit_VR = raw_data_element.is_implicit_VR # Not only two cases. Also need extra info if is a raw sequence # Pass the encoding to the converter if it is a specific VR try: if VR == 'PN': value = converter(byte_string, is_little_endian, encoding=encoding) elif VR in text_VRs: # Text VRs use the 2nd specified encoding value = converter(byte_string, is_little_endian, encoding=encoding[1]) elif VR != "SQ": value = converter(byte_string, is_little_endian, num_format) else: value = convert_SQ(byte_string, is_implicit_VR, is_little_endian, encoding, raw_data_element.value_tell) except ValueError: if config.enforce_valid_values: # The user really wants an exception here raise logger.debug('unable to translate tag %s with VR %s' % (raw_data_element.tag, VR)) for convert_vr in convert_retry_VR_order: vr = convert_vr converter = converters[vr] if vr == VR: continue try: value = convert_value(vr, raw_data_element, encoding) break except Exception: pass else: logger.debug('converted tag %s with VR %s' % (raw_data_element.tag, vr)) value = raw_data_element.value return value
def read_item(fp): """Read and return a single Item in the fragmented data stream""" try: tag = fp.read_tag() except EOFError: # already read delimiter before passing data here, so should just run out return None if tag == SequenceDelimiterTag: # No more items, time for sequence to stop reading length = fp.read_UL() logger.debug("%04x: Sequence Delimiter, length 0x%x", fp.tell() - 8, length) if length != 0: logger.warning( "Expected 0x00000000 after delimiter, found 0x%x, at data position 0x%x", length, fp.tell() - 4) return None if tag != ItemTag: logger.warning("Expected Item with tag %s at data position 0x%x", ItemTag, fp.tell() - 4) length = fp.read_UL() else: length = fp.read_UL() logger.debug("%04x: Item, length 0x%x", fp.tell() - 8, length) if length == 0xFFFFFFFF: raise ValueError( "Encapsulated data fragment had Undefined Length at data position 0x%x" % fp.tell() - 4) item_data = fp.read(length) return item_data
def __setitem__(self, key, value): """Operator for dataset[key]=value. Check consistency, and deal with private tags""" if not isinstance( value, (DataElement, RawDataElement)): # ok if is subclass, e.g. DeferredDataElement raise TypeError( "Dataset contents must be DataElement instances.\n" "To set a data_element value use data_element.value=val") tag = Tag(value.tag) if key != tag: raise ValueError("data_element.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if isinstance(data_element, RawDataElement): data_element = DataElement_from_raw( data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element)
def __ne__(self, other): try: return self.dicom_dataset.SeriesInstanceUID != \ other.dicom_dataset.SeriesInstanceUID except Exception as e: logger.debug("trouble comparing two Series", exc_info=e) return True
def __repr__(self): try: output = "\t\t\tSOPInstanceUID = %s:\n" % (self.dicom_dataset.SOPInstanceUID, ) return output except Exception as e: logger.debug("trouble getting Series data", exc_info=e) return "\t\t\tSOPInstanceUID = None\n"
def __eq__(self, other): try: selfuid = self.dicom_dataset.SeriesInstanceUID() otheruid = other.dicom_dataset.SeriesInstanceUID() return selfuid == otheruid except Exception as e: logger.debug("trouble comparing two Series", exc_info=e) return False
def __ne__(self, other): try: selfuid = self.dicom_dataset.StudyInstanceUID() otheruid = other.dicom_dataset.StudyInstanceUID() return selfuid != otheruid except Exception as e: logger.debug("trouble comparing two Studies", exc_info=e) return True
def __ne__(self, other): try: selfid = self.dicom_dataset.PatientID() otherid = other.dicom_dataset.PatientID() return selfid != otherid except Exception as e: logger.debug("trouble comparing two patients", exc_info=e) return True
def __str__(self): try: return "Patient: [{0}] {1} ({2})\n".format( self.dicom_dataset.PatientSex(), self.dicom_dataset.PatientName(), self.dicom_dataset.PatientBirthDate()) except Exception as e: logger.debug("trouble getting image PatientID", exc_info=e) return "None"
def __repr__(self): try: output = f"\tStudyIUID = {self.dicom_dataset.StudyInstanceUID}:\n" for x in self.series: output += repr(x) return output except Exception as e: logger.debug("trouble getting Study data", exc_info=e) return "\tStudyIUID = None\n"
def __repr__(self): try: output = "PatientID = %s:\n" % (self.dicom_dataset.PatientID, ) for x in self.studies: output += repr(x) return output except Exception as e: logger.debug("trouble getting Patient data", exc_info=e) return "PatientID = None\n"
def __init__(self, fp, stop_when=None, force=False): """Read the preamble and meta info and prepare iterator for remainder of file. Parameters ---------- fp : an open DicomFileLike object, at start of file force : boolean Force reading of data. See ``read_file`` for more parameter info. Adds flags to fp: Big/Little-endian and Implicit/Explicit VR """ self.fp = fp self.stop_when = stop_when self.preamble = preamble = read_preamble(fp, force) self.has_header = has_header = (preamble is not None) self.file_meta_info = Dataset() if has_header: self.file_meta_info = file_meta_info = _read_file_meta_info(fp) transfer_syntax = file_meta_info.TransferSyntaxUID if transfer_syntax == pydicom.uid.ExplicitVRLittleEndian: self._is_implicit_VR = False self._is_little_endian = True elif transfer_syntax == pydicom.uid.ImplicitVRLittleEndian: self._is_implicit_VR = True self._is_little_endian = True elif transfer_syntax == pydicom.uid.ExplicitVRBigEndian: self._is_implicit_VR = False self._is_little_endian = False elif transfer_syntax == pydicom.uid.DeflatedExplicitVRLittleEndian: # See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset # following the file metadata was prepared the normal way, # then "deflate" compression applied. # All that is needed here is to decompress and then # use as normal in a file-like object zipped = fp.read() # -MAX_WBITS part is from comp.lang.python answer: # groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799 unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS) fp = BytesIO(unzipped) # a file-like object self.fp = fp # point to new object self._is_implicit_VR = False self._is_little_endian = True else: # Any other syntax should be Explicit VR Little Endian, # e.g. all Encapsulated (JPEG etc) are ExplVR-LE # by Standard PS 3.5-2008 A.4 (p63) self._is_implicit_VR = False self._is_little_endian = True else: # no header -- make assumptions fp.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian self._is_little_endian = True self._is_implicit_VR = True impl_expl = ("Explicit", "Implicit")[self._is_implicit_VR] big_little = ("Big", "Little")[self._is_little_endian] logger.debug("Using {0:s} VR, {1:s} Endian transfer syntax".format( impl_expl, big_little))
def __str__(self): try: return "\tStudy: [{0}] {1} ({2})\n".format( self.dicom_dataset.StudyID(), self.dicom_dataset.StudyDescription(), self.dicom_dataset.StudyDate(), ) except Exception as e: logger.debug("trouble getting image StudyInstanceUID", exc_info=e) return "None"
def convert_pixel_data(self): """Convert the Pixel Data to a numpy array internally. Returns ------- None Converted pixel data is stored internally in the dataset. If a compressed image format, the image is decompressed, and any related data elements are changed accordingly. """ # Check if already have converted to a NumPy array # Also check if self.PixelData has changed. If so, get new NumPy array already_have = True if not hasattr(self, "_pixel_array"): already_have = False elif self._pixel_id != id(self.PixelData): already_have = False if not already_have: last_exception = None successfully_read_pixel_data = False for x in [ h for h in pydicom.config.image_handlers if h and h.supports_transfer_syntax(self) ]: try: pixel_array = x.get_pixeldata(self) self._pixel_array = self._reshape_pixel_array(pixel_array) if x.needs_to_convert_to_RGB(self): self._pixel_array = self._convert_YBR_to_RGB( self._pixel_array) successfully_read_pixel_data = True break except Exception as e: logger.debug("Trouble with", exc_info=e) last_exception = e continue if not successfully_read_pixel_data: handlers_tried = " ".join( [str(x) for x in pydicom.config.image_handlers]) logger.info("%s did not support this transfer syntax", handlers_tried) self._pixel_array = None self._pixel_id = None if last_exception: raise last_exception else: msg = ("No available image handler could " "decode this transfer syntax {}".format( self.file_meta.TransferSyntaxUID.name)) raise NotImplementedError(msg) # is this guaranteed to work if memory is re-used?? self._pixel_id = id(self.PixelData)
def __str__(self): try: return "\t\tSeries: [{0} {1}] {2} ({3} {4})\n".format( self.dicom_dataset.Modality(), self.dicom_dataset.ProtoclName(), self.dicom_dataset.SeriesDescription(), self.dicom_dataset.BodyPasrtExamined(), self.dicom_dataset.PatientPosition()) except Exception as e: logger.debug("trouble getting image SeriesInstanceUID", exc_info=e) return "None"
def __str__(self): try: return "\t\t\tImage:[{0} {1} {2}] {3} ({4} {5})\n".format( self.dicom_dataset.SliceLocation(), self.dicom_dataset.ImagePositionPatient(), self.dicom_dataset.ImageOrientationPatient(), self.dicom_dataset.Shape(), self.dicom_dataset.ImageSpacing(), ) except Exception as e: logger.debug("trouble getting image SOPInstanceUID", exc_info=e) return "None"
def convert_value(VR, raw_data_element, encoding=default_encoding): """Return the converted value (from raw bytes) for the given VR""" if VR not in converters: raise NotImplementedError("Unknown Value Representation '{0}'".format(VR)) # Look up the function to convert that VR # Dispatch two cases: a plain converter, or a number one which needs a format string if isinstance(converters[VR], tuple): converter, num_format = converters[VR] else: converter = converters[VR] num_format = None # Ensure that encoding is in the proper 3-element format if isinstance(encoding, compat.string_types): encoding = [encoding, ] * 3 byte_string = raw_data_element.value is_little_endian = raw_data_element.is_little_endian is_implicit_VR = raw_data_element.is_implicit_VR # Not only two cases. Also need extra info if is a raw sequence # Pass the encoding to the converter if it is a specific VR try: if VR == 'PN': value = converter(byte_string, is_little_endian, encoding=encoding) elif VR in text_VRs: # Text VRs use the 2nd specified encoding value = converter(byte_string, is_little_endian, encoding=encoding[1]) elif VR != "SQ": value = converter(byte_string, is_little_endian, num_format) else: value = convert_SQ(byte_string, is_implicit_VR, is_little_endian, encoding, raw_data_element.value_tell) except ValueError: if config.enforce_valid_values: # The user really wants an exception here raise logger.debug('unable to translate tag %s with VR %s' % (raw_data_element.tag, VR)) for convert_vr in convert_retry_VR_order: vr = convert_vr converter = converters[vr] if vr == VR: continue try: value = convert_value(vr, raw_data_element, encoding) break except Exception: pass else: logger.debug('converted tag %s with VR %s' % (raw_data_element.tag, vr)) value = raw_data_element.value return value
def __repr__(self): try: output = "Patient: [{0}] {1} ({2})\n".format( self.dicom_dataset.PatientSex(), self.dicom_dataset.PatientName(), self.dicom_dataset.PatientBirthDate()) for x in self.studies: output += repr(x) return output except Exception as e: logger.debug("trouble getting Patient data", exc_info=e) return "Patient: N/A\n"
def convert_pixel_data(self): """Convert the Pixel Data to a numpy array internally. Returns ------- None Converted pixel data is stored internally in the dataset. If a compressed image format, the image is decompressed, and any related data elements are changed accordingly. """ # Check if already have converted to a NumPy array # Also check if self.PixelData has changed. If so, get new NumPy array already_have = True if not hasattr(self, "_pixel_array"): already_have = False elif self._pixel_id != id(self.PixelData): already_have = False if not already_have: last_exception = None successfully_read_pixel_data = False for x in [h for h in pydicom.config.image_handlers if h and h.supports_transfer_syntax(self)]: try: pixel_array = x.get_pixeldata(self) self._pixel_array = self._reshape_pixel_array(pixel_array) if x.needs_to_convert_to_RGB(self): self._pixel_array = self._convert_YBR_to_RGB( self._pixel_array ) successfully_read_pixel_data = True break except Exception as e: logger.debug("Trouble with", exc_info=e) last_exception = e continue if not successfully_read_pixel_data: handlers_tried = " ".join( [str(x) for x in pydicom.config.image_handlers]) logger.info("%s did not support this transfer syntax", handlers_tried) self._pixel_array = None self._pixel_id = None if last_exception: raise last_exception else: msg = ("No available image handler could " "decode this transfer syntax {}".format( self.file_meta.TransferSyntaxUID)) raise NotImplementedError(msg) # is this guaranteed to work if memory is re-used?? self._pixel_id = id(self.PixelData)
def __repr__(self): try: output = "\tStudy: [{0}] {1} ({2})\n".format( self.dicom_dataset.StudyID(), self.dicom_dataset.StudyDescription(), self.dicom_dataset.StudyDate(), ) for x in self.series: output += repr(x) return output except Exception as e: logger.debug("trouble getting Study data", exc_info=e) return "\tStudy: N/A\n"
def __repr__(self): try: output = "\t\t\tImage:[{0}] {1} {2} {3} ({4} {5})\n".format( self.dicom_dataset.SliceLocation(), self.dicom_dataset.ImagePositionPatient(), self.dicom_dataset.ImageOrientationPatient(), self.dicom_dataset.Shape(), self.dicom_dataset.ImageSpacing(), self.dicom_dataset.VoxelSize(), ) return output except Exception as e: logger.debug("trouble getting Series data", exc_info=e) return "\t\t\tImage: N/A\n"
def __repr__(self): try: output = "\t\tSeries: [{0} {1}] {2} ({3} {4})\n".format( self.dicom_dataset.Modality(), self.dicom_dataset.ProtocolName(), self.dicom_dataset.SeriesDescription(), self.dicom_dataset.BodyPartExamined(), self.dicom_dataset.PatientPosition()) for x in self.images: output += repr(x) return output except Exception as e: logger.debug("trouble getting Series data", exc_info=e) return "\t\tSeries: N/A\n"
def read_preamble(fp: BinaryIO, force: bool) -> Optional[bytes]: """Return the 128-byte DICOM preamble in `fp` if present. `fp` should be positioned at the start of the file-like. If the preamble and prefix are found then after reading `fp` will be positioned at the first byte after the prefix (byte offset 133). If either the preamble or prefix are missing and `force` is ``True`` then after reading `fp` will be positioned at the start of the file-like. Parameters ---------- fp : file-like object The file-like to read the preamble from. force : bool Flag to force reading of a file even if no header is found. Returns ------- preamble : bytes or None The 128-byte DICOM preamble will be returned if the appropriate prefix ('DICM') is found at byte offset 128. Returns ``None`` if the 'DICM' prefix is not found and `force` is ``True``. Raises ------ InvalidDicomError If `force` is ``False`` and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading File Meta Information preamble...") preamble = fp.read(128) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug(f"{fp.tell() - 128:08x}: {sample}") logger.debug("Reading File Meta Information prefix...") magic = fp.read(4) if magic != b"DICM" and force: logger.info( "File is not conformant with the DICOM File Format: 'DICM' " "prefix is missing from the File Meta Information header " "or the header itself is missing. Assuming no header and " "continuing.") fp.seek(0) return None if magic != b"DICM" and not force: raise InvalidDicomError( "File is missing DICOM File Meta Information header or the 'DICM' " "prefix is missing from the header. Use force=True to force " "reading.") else: logger.debug(f"{fp.tell() - 4:08x}: 'DICM' prefix found") return preamble
def add_dataset(self, dataset): try: if self.dicom_dataset.SeriesInstanceUID == dataset.SeriesInstanceUID: for x in self.images: if x.SOPInstanceUID == dataset.SOPInstanceUID: logger.debug("Image is already part of this series") break else: self.images.append(Image(dicom_dataset=dataset)) else: raise KeyError("Not the same SeriesInstanceUIDs") except Exception as e: logger.debug("trouble adding image to series", exc_info=e) raise KeyError("Not the same SeriesInstanceUIDs")
def read_preamble(fp, force): """Return the 128-byte DICOM preamble in `fp` if present. `fp` should be positioned at the start of the file-like. If the preamble and prefix are found then after reading `fp` will be positioned at the first byte after the prefix (byte offset 133). If either the preamble or prefix are missing and `force` is True then after reading `fp` will be positioned at the start of the file-like. Parameters ---------- fp : file-like object The file-like to read the preamble from. force : bool Flag to force reading of a file even if no header is found. Returns ------- preamble : str/bytes or None The 128-byte DICOM preamble will be returned if the appropriate prefix ('DICM') is found at byte offset 128. Returns None if the 'DICM' prefix is not found and `force` is True. Raises ------ InvalidDicomError If `force` is False and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading File Meta Information preamble...") preamble = fp.read(128) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug("{0:08x}: {1}".format(fp.tell() - 128, sample)) logger.debug("Reading File Meta Information prefix...") magic = fp.read(4) if magic != b"DICM" and force: logger.info( "File is not conformant with the DICOM File Format: 'DICM' " "prefix is missing from the File Meta Information header " "or the header itself is missing. Assuming no header and " "continuing.") preamble = None fp.seek(0) elif magic != b"DICM" and not force: raise InvalidDicomError("File is missing DICOM File Meta Information " "header or the 'DICM' prefix is missing from " "the header. Use force=True to force reading.") else: logger.debug("{0:08x}: 'DICM' prefix found".format(fp.tell() - 4)) return preamble
def __setitem__(self, key, value): """Operator for Dataset[key] = value. Check consistency, and deal with private tags. Parameters ---------- key : int The tag for the element to be added to the Dataset. value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement The element to add to the Dataset. Raises ------ NotImplementedError If `key` is a slice. ValueError If the `key` value doesn't match DataElement.tag. """ if isinstance(key, slice): raise NotImplementedError('Slicing is not supported for setting ' 'Dataset elements.') # OK if is subclass, e.g. DeferredDataElement if not isinstance(value, (DataElement, RawDataElement)): raise TypeError("Dataset contents must be DataElement instances.") if isinstance(value.tag, BaseTag): tag = value.tag else: tag = Tag(value.tag) if key != tag: raise ValueError("DataElement.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if isinstance(data_element, RawDataElement): data_element = DataElement_from_raw( data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element)
def read_deferred_data_element(fileobj_type, filename, timestamp, raw_data_elem): """Read the previously deferred value from the file into memory and return a raw data element""" logger.debug("Reading deferred element %r" % str(raw_data_elem.tag)) # If it wasn't read from a file, then return an error if filename is None: raise IOError("Deferred read -- original filename not stored. " "Cannot re-open") # Check that the file is the same as when originally read if not os.path.exists(filename): raise IOError(u"Deferred read -- original file " "{0:s} is missing".format(filename)) if timestamp is not None: statinfo = os.stat(filename) if statinfo.st_mtime != timestamp: warnings.warn("Deferred read warning -- file modification time " "has changed.") # Open the file, position to the right place # fp = self.typefileobj(self.filename, "rb") fp = fileobj_type(filename, 'rb') is_implicit_VR = raw_data_elem.is_implicit_VR is_little_endian = raw_data_elem.is_little_endian offset = data_element_offset_to_value(is_implicit_VR, raw_data_elem.VR) fp.seek(raw_data_elem.value_tell - offset) elem_gen = data_element_generator(fp, is_implicit_VR, is_little_endian, defer_size=None) # Read the data element and check matches what was stored before data_elem = next(elem_gen) fp.close() if data_elem.VR != raw_data_elem.VR: raise ValueError("Deferred read VR {0:s} does not match " "original {1:s}".format(data_elem.VR, raw_data_elem.VR)) if data_elem.tag != raw_data_elem.tag: raise ValueError("Deferred read tag {0!r} does not match " "original {1!r}".format(data_elem.tag, raw_data_elem.tag)) # Everything is ok, now this object should act like usual DataElement return data_elem
def __setitem__(self, key, value): """Operator for Dataset[key] = value. Check consistency, and deal with private tags. Parameters ---------- key : int The tag for the element to be added to the Dataset. value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement The element to add to the Dataset. Raises ------ NotImplementedError If `key` is a slice. ValueError If the `key` value doesn't match DataElement.tag. """ if isinstance(key, slice): raise NotImplementedError('Slicing is not supported for setting ' 'Dataset elements.') # OK if is subclass, e.g. DeferredDataElement if not isinstance(value, (DataElement, RawDataElement)): raise TypeError("Dataset contents must be DataElement instances.") tag = Tag(value.tag) if key != tag: raise ValueError("DataElement.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if isinstance(data_element, RawDataElement): data_element = DataElement_from_raw( data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element)
def __setitem__(self, key, value): """Operator for dataset[key]=value. Check consistency, and deal with private tags""" if not isinstance(value, (DataElement, RawDataElement)): # ok if is subclass, e.g. DeferredDataElement raise TypeError("Dataset contents must be DataElement instances.\n" "To set a data_element value use data_element.value=val") tag = Tag(value.tag) if key != tag: raise ValueError("data_element.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if isinstance(data_element, RawDataElement): data_element = DataElement_from_raw(data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element)
def absorb_delimiter_item( fp: BinaryIO, is_little_endian: bool, delimiter: BaseTag ) -> None: """Read (and ignore) undefined length sequence or item terminators.""" if is_little_endian: struct_format = "<HHL" else: struct_format = ">HHL" group, elem, length = unpack(struct_format, fp.read(8)) tag = TupleTag((group, elem)) if tag != delimiter: logger.warn( "Did not find expected delimiter " f"'{dictionary_description(delimiter)}', instead found " f"{tag} at file position 0x{fp.tell() - 8:X}" ) fp.seek(fp.tell() - 8) return logger.debug("%04x: Found Delimiter '%s'", fp.tell() - 8, dictionary_description(delimiter)) if length == 0: logger.debug("%04x: Read 0 bytes after delimiter", fp.tell() - 4) else: logger.debug("%04x: Expected 0x00000000 after delimiter, found 0x%x", fp.tell() - 4, length)
def read_sequence_item(fp: BinaryIO, is_implicit_VR: bool, is_little_endian: bool, encoding: Union[str, MutableSequence[str]], offset: int = 0) -> Optional[Dataset]: """Read and return a single :class:`~pydicom.sequence.Sequence` item, i.e. a :class:`~pydicom.dataset.Dataset`. """ seq_item_tell = fp.tell() + offset if is_little_endian: tag_length_format = "<HHL" else: tag_length_format = ">HHL" try: bytes_read = fp.read(8) group, element, length = unpack(tag_length_format, bytes_read) except BaseException: raise IOError( f"No tag to read at file position {fp.tell() + offset:X}") tag = (group, element) if tag == SequenceDelimiterTag: # No more items, time to stop reading logger.debug(f"{fp.tell() - 8 + offset:08x}: End of Sequence") if length != 0: logger.warning( f"Expected 0x00000000 after delimiter, found 0x{length:X}, " f"at position 0x{fp.tell() - 4 + offset:X}") return None if tag != ItemTag: logger.warning( f"Expected sequence item with tag {ItemTag} at file position " f"0x{fp.tell() - 4 + offset:X}") else: logger.debug(f"{fp.tell() - 4 + offset:08x}: {bytes2hex(bytes_read)} " "Found Item tag (start of item)") if length == 0xFFFFFFFF: ds = read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, parent_encoding=encoding, at_top_level=False) ds.is_undefined_length_sequence_item = True else: ds = read_dataset(fp, is_implicit_VR, is_little_endian, length, parent_encoding=encoding, at_top_level=False) ds.is_undefined_length_sequence_item = False logger.debug(f"{fp.tell() + offset:08X}: Finished sequence item") ds.seq_item_tell = seq_item_tell return ds
def read_item(fp): """Read and return a single Item in the fragmented data stream""" try: tag = fp.read_tag() except EOFError: # already read delimiter before passing data here, so should just run out return None if tag == SequenceDelimiterTag: # No more items, time for sequence to stop reading length = fp.read_UL() logger.debug("%04x: Sequence Delimiter, length 0x%x", fp.tell() - 8, length) if length != 0: logger.warning("Expected 0x00000000 after delimiter, found 0x%x, at data position 0x%x", length, fp.tell() - 4) return None if tag != ItemTag: logger.warning("Expected Item with tag %s at data position 0x%x", ItemTag, fp.tell() - 4) length = fp.read_UL() else: length = fp.read_UL() logger.debug("%04x: Item, length 0x%x", fp.tell() - 8, length) if length == 0xFFFFFFFF: raise ValueError("Encapsulated data fragment had Undefined Length at data position 0x%x" % fp.tell() - 4) item_data = fp.read(length) return item_data
def convert_value(VR, raw_data_element, encodings=None): """Return the converted value (from raw bytes) for the given VR""" if VR not in converters: message = "Unknown Value Representation '{0}'".format(VR) raise NotImplementedError(message) # Look up the function to convert that VR # Dispatch two cases: a plain converter, # or a number one which needs a format string if isinstance(converters[VR], tuple): converter, num_format = converters[VR] else: converter = converters[VR] num_format = None # Ensure that encodings is a list encodings = encodings or [default_encoding] if isinstance(encodings, compat.string_types): encodings = [encodings] byte_string = raw_data_element.value is_little_endian = raw_data_element.is_little_endian is_implicit_VR = raw_data_element.is_implicit_VR # Not only two cases. Also need extra info if is a raw sequence # Pass all encodings to the converter if needed try: if VR in text_VRs or VR == 'PN': value = converter(byte_string, encodings=encodings) elif VR != "SQ": value = converter(byte_string, is_little_endian, num_format) else: value = convert_SQ(byte_string, is_implicit_VR, is_little_endian, encodings, raw_data_element.value_tell) except ValueError: if config.enforce_valid_values: # The user really wants an exception here raise logger.debug('unable to translate tag %s with VR %s' % (raw_data_element.tag, VR)) for vr in convert_retry_VR_order: if vr == VR: continue try: value = convert_value(vr, raw_data_element, encodings) logger.debug('converted value for tag %s with VR %s' % (raw_data_element.tag, vr)) break except Exception: pass else: logger.debug('Could not convert value for tag %s with any VR ' 'in the convert_retry_VR_order list' % raw_data_element.tag) value = raw_data_element.value return value
def read_sequence_item(fp, is_implicit_VR, is_little_endian, encoding, offset=0): """Read and return a single :class:`~pydicom.sequence.Sequence` item, i.e. a :class:`~pydicom.dataset.Dataset`. """ seq_item_tell = fp.tell() + offset if is_little_endian: tag_length_format = "<HHL" else: tag_length_format = ">HHL" try: bytes_read = fp.read(8) group, element, length = unpack(tag_length_format, bytes_read) except BaseException: raise IOError("No tag to read at file position " "{0:05x}".format(fp.tell() + offset)) tag = (group, element) if tag == SequenceDelimiterTag: # No more items, time to stop reading logger.debug("{0:08x}: {1}".format(fp.tell() - 8 + offset, "End of Sequence")) if length != 0: logger.warning("Expected 0x00000000 after delimiter, found 0x%x, " "at position 0x%x" % (length, fp.tell() - 4 + offset)) return None if tag != ItemTag: logger.warning("Expected sequence item with tag %s at file position " "0x%x" % (ItemTag, fp.tell() - 4 + offset)) else: logger.debug("{0:08x}: {1} Found Item tag (start of item)".format( fp.tell() - 4 + offset, bytes2hex(bytes_read))) if length == 0xFFFFFFFF: ds = read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None, parent_encoding=encoding, at_top_level=False) ds.is_undefined_length_sequence_item = True else: ds = read_dataset(fp, is_implicit_VR, is_little_endian, length, parent_encoding=encoding, at_top_level=False) ds.is_undefined_length_sequence_item = False logger.debug("%08x: Finished sequence item" % (fp.tell() + offset, )) ds.seq_item_tell = seq_item_tell return ds
def read_preamble(fp, force): """Return the 128-byte DICOM preamble in `fp` if present. Parameters ---------- fp : file-like object The file-like to read the preamble from. force : bool Flag to force reading of a file even if no header is found. Returns ------- preamble : str/bytes or None The 128-byte DICOM preamble will be returned if the appropriate prefix ('DICM') is found at byte offset 128. Returns None if the 'DICM' prefix is not found and `force` is True. Raises ------ InvalidDicomError If `force` is False and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading preamble...") preamble = fp.read(0x80) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample)) magic = fp.read(4) if magic != b"DICM": if force: logger.info( "File is not a conformant DICOM file; 'DICM' prefix is " "missing from the file header or the header is " "missing. Assuming no header and continuing.") preamble = None fp.seek(0) else: raise InvalidDicomError("File is missing DICOM header or 'DICM' " "prefix is missing from the header. Use " "force=True to force reading.") else: logger.debug("{0:08x}: 'DICM' prefix found".format(fp.tell() - 4)) return preamble
def add_dataset(self, dataset): try: if self.dicom_dataset.StudyInstanceUID == dataset.StudyInstanceUID: for x in self.series: try: x.add_dataset(dataset) logger.debug("Part of this series") break except Exception as e: logger.debug("Not part of this series") else: self.series.append(Series(dicom_dataset=dataset)) else: raise KeyError("Not the same StudyInstanceUIDs") except Exception as e: logger.debug("trouble adding series to study", exc_info=e) raise KeyError("Not the same StudyInstanceUIDs")
def read_preamble(fp, force): """Read and return the DICOM preamble. Parameters ---------- fp : file-like object force : boolean Flag to force reading of a file even if no header is found. Returns ------- preamble : DICOM preamble, None The DICOM preamble will be returned if appropriate header ('DICM') is found. Returns None if no header is found. Raises ------ InvalidDicomError If force flag is false and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading preamble...") preamble = fp.read(0x80) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample)) magic = fp.read(4) if magic != b"DICM": if force: logger.info("File is not a standard DICOM file; 'DICM' header is " "missing. Assuming no header and continuing") preamble = None fp.seek(0) else: raise InvalidDicomError("File is missing 'DICM' marker. " "Use force=True to force reading") else: logger.debug("{0:08x}: 'DICM' marker found".format(fp.tell() - 4)) return preamble
def absorb_delimiter_item(fp, is_little_endian, delimiter): """Read (and ignore) undefined length sequence or item terminators.""" if is_little_endian: struct_format = "<HHL" else: struct_format = ">HHL" group, elem, length = unpack(struct_format, fp.read(8)) tag = TupleTag((group, elem)) if tag != delimiter: msg = "Did not find expected delimiter '%s'" % dictionary_description(delimiter) msg += ", instead found %s at file position 0x%x" % (str(tag), fp.tell() - 8) logger.warn(msg) fp.seek(fp.tell() - 8) return logger.debug("%04x: Found Delimiter '%s'", fp.tell() - 8, dictionary_description(delimiter)) if length == 0: logger.debug("%04x: Read 0 bytes after delimiter", fp.tell() - 4) else: logger.debug("%04x: Expected 0x00000000 after delimiter, found 0x%x", fp.tell() - 4, length)
def read_sequence_item(fp, is_implicit_VR, is_little_endian, encoding, offset=0): """Read and return a single sequence item, i.e. a Dataset""" seq_item_tell = fp.tell() + offset if is_little_endian: tag_length_format = "<HHL" else: tag_length_format = ">HHL" try: bytes_read = fp.read(8) group, element, length = unpack(tag_length_format, bytes_read) except: raise IOError("No tag to read at file position " "{0:05x}".format(fp.tell() + offset)) tag = (group, element) if tag == SequenceDelimiterTag: # No more items, time to stop reading logger.debug("{0:08x}: {1}".format(fp.tell() - 8 + offset, "End of Sequence")) if length != 0: logger.warning( "Expected 0x00000000 after delimiter, found 0x%x, " "at position 0x%x" % (length, fp.tell() - 4 + offset) ) return None if tag != ItemTag: logger.warning( "Expected sequence item with tag %s at file position " "0x%x" % (ItemTag, fp.tell() - 4 + offset) ) else: logger.debug( "{0:08x}: {1} Found Item tag (start of item)".format(fp.tell() - 4 + offset, bytes2hex(bytes_read)) ) if length == 0xFFFFFFFF: ds = read_dataset( fp, is_implicit_VR, is_little_endian, bytelength=None, parent_encoding=encoding, value_tell_offset=offset ) ds.is_undefined_length_sequence_item = True else: ds = read_dataset( fp, is_implicit_VR, is_little_endian, length, parent_encoding=encoding, value_tell_offset=offset ) ds.is_undefined_length_sequence_item = False logger.debug("%08x: Finished sequence item" % (fp.tell() + offset,)) ds.seq_item_tell = seq_item_tell return ds
def keyword_for_tag(tag): """Return the DICOM keyword for the given tag. Will return GroupLength for group length tags, and returns empty string ("") if the tag doesn't exist in the dictionary. """ try: return dictionary_keyword(tag) except KeyError: return "" # Provide for the 'reverse' lookup. Given the keyword, what is the tag? logger.debug("Reversing DICOM dictionary so can look up tag from a keyword...") keyword_dict = dict([(dictionary_keyword(tag), tag) for tag in DicomDictionary]) def tag_for_keyword(keyword): """Return the dicom tag corresponding to keyword, or None if none exist.""" return keyword_dict.get(keyword) def repeater_has_tag(tag): """Return True if the DICOM repeaters dictionary has an entry for `tag`.""" return (mask_match(tag) in RepeatersDictionary)
def dcmread(fp, defer_size=None, stop_before_pixels=False, force=False, specific_tags=None): """Read and parse a DICOM dataset stored in the DICOM File Format. Read a DICOM dataset stored in accordance with the DICOM File Format (DICOM Standard Part 10 Section 7). If the dataset is not stored in accordance with the File Format (i.e. the preamble and prefix are missing, there are missing required Type 1 File Meta Information Group elements or the entire File Meta Information is missing) then you will have to set `force` to True. Parameters ---------- fp : str or file-like Either a file-like object, or a string containing the file name. If a file-like object, the caller is responsible for closing it. defer_size : int or str or None If None (default), all elements read into memory. If specified, then if a data element's stored value is larger than `defer_size`, the value is not read into memory until it is accessed in code. Specify an integer (bytes), or a string value with units, e.g. "512 KB", "2 MB". stop_before_pixels : bool If False (default), the full file will be read and parsed. Set True to stop before reading (7FE0,0010) 'Pixel Data' (and all subsequent elements). force : bool If False (default), raises an InvalidDicomError if the file is missing the File Meta Information header. Set to True to force reading even if no File Meta Information header is found. specific_tags : list or None If not None, only the tags in the list are returned. The list elements can be tags or tag names. Note that the tag Specific Character Set is always returned if present - this ensures correct decoding of returned text values. Returns ------- FileDataset An instance of FileDataset that represents a parsed DICOM file. Raises ------ InvalidDicomError If `force` is True and the file is not a valid DICOM file. See Also -------- pydicom.dataset.FileDataset Data class that is returned. pydicom.filereader.read_partial Only read part of a DICOM file, stopping on given conditions. Examples -------- Read and return a dataset stored in accordance with the DICOM File Format: >>> ds = pydicom.dcmread("rtplan.dcm") >>> ds.PatientName Read and return a dataset not in accordance with the DICOM File Format: >>> ds = pydicom.dcmread("rtplan.dcm", force=True) >>> ds.PatientName Use within a context manager: >>> with pydicom.dcmread("rtplan.dcm") as ds: >>> ds.PatientName """ # Open file if not already a file object caller_owns_file = True if isinstance(fp, compat.string_types): # caller provided a file name; we own the file handle caller_owns_file = False try: logger.debug(u"Reading file '{0}'".format(fp)) except Exception: logger.debug("Reading file '{0}'".format(fp)) fp = open(fp, 'rb') if config.debugging: logger.debug("\n" + "-" * 80) logger.debug("Call to dcmread()") msg = ("filename:'%s', defer_size='%s', " "stop_before_pixels=%s, force=%s, specific_tags=%s") logger.debug(msg % (fp.name, defer_size, stop_before_pixels, force, specific_tags)) if caller_owns_file: logger.debug("Caller passed file object") else: logger.debug("Caller passed file name") logger.debug("-" * 80) # Convert size to defer reading into bytes defer_size = size_in_bytes(defer_size) # Iterate through all items and store them --include file meta if present stop_when = None if stop_before_pixels: stop_when = _at_pixel_data try: dataset = read_partial(fp, stop_when, defer_size=defer_size, force=force, specific_tags=specific_tags) finally: if not caller_owns_file: fp.close() # XXX need to store transfer syntax etc. return dataset
def _read_file_meta_info(fp): """Return the file meta information. fp must be set after the 128 byte preamble and 'DICM' marker """ # File meta info always LittleEndian, Explicit VR. After will change these # to the transfer syntax values set in the meta info # Get group length data element, whose value is the length of the meta_info fp_save = fp.tell() # in case need to rewind debugging = config.debugging if debugging: logger.debug("Try to read group length info...") bytes_read = fp.read(8) group, elem, VR, length = unpack("<HH2sH", bytes_read) if debugging: debug_msg = "{0:08x}: {1}".format(fp.tell() - 8, bytes2hex(bytes_read)) if not in_py2: VR = VR.decode(default_encoding) if VR in extra_length_VRs: bytes_read = fp.read(4) length = unpack("<L", bytes_read)[0] if debugging: debug_msg += " " + bytes2hex(bytes_read) if debugging: debug_msg = "{0:<47s} ({1:04x}, {2:04x}) {3:2s} Length: {4:d}".format( debug_msg, group, elem, VR, length) logger.debug(debug_msg) # Store meta group length if it exists, then read until not group 2 if group == 2 and elem == 0: bytes_read = fp.read(length) if debugging: logger.debug("{0:08x}: {1}".format(fp.tell() - length, bytes2hex(bytes_read))) group_length = unpack("<L", bytes_read)[0] expected_ds_start = fp.tell() + group_length if debugging: msg = "value (group length) = {0:d}".format(group_length) msg += " regular dataset should start at {0:08x}".format( expected_ds_start) logger.debug(" " * 10 + msg) else: expected_ds_start = None if debugging: logger.debug(" " * 10 + "(0002,0000) Group length not found.") # Changed in pydicom 0.9.7 -- don't trust the group length, just read # until no longer group 2 data elements. But check the length and # give a warning if group 2 ends at different location. # Rewind to read the first data element as part of the file_meta dataset if debugging: logger.debug("Rewinding and reading whole dataset " "including this first data element") fp.seek(fp_save) file_meta = read_dataset(fp, is_implicit_VR=False, is_little_endian=True, stop_when=not_group2) fp_now = fp.tell() if expected_ds_start and fp_now != expected_ds_start: logger.info("*** Group length for file meta dataset " "did not match end of group 2 data ***") else: if debugging: logger.debug("--- End of file meta data found " "as expected ---------") return file_meta
def read_file(fp, defer_size=None, stop_before_pixels=False, force=False, toad=True): """Read and parse a DICOM file. Parameters ---------- fp : file-like object, str Either a file-like object, or a string containing the file name. If a file-like object, the caller is responsible for closing it. defer_size : int, str, None, optional If None (default), all elements read into memory. If specified, if a data element value is larger than defer_size, then the value is not read into memory until it is accessed in code. Specify an integer (bytes), or a string value with units, e.g. "512 KB", "2 MB". stop_before_pixels : boolean, optional If False (default), the full file will be read and parsed. Set True to stop before reading pixels (and anything after them). force : boolean, optional If False (default), raises an InvalidDicomError if the file is not valid DICOM. Set to True to force reading even if no header is found. Returns ------- FileDataset An instance of FileDataset that represents a parsed DICOM file. Raises ------ InvalidDicomError If the force flag is True and the file is not a valid DICOM file. See Also -------- pydicom.dataset.FileDataset Data class that is returned. pydicom.filereader.read_partial Only read part of a DICOM file, stopping on given conditions. Examples -------- Read file and return file dataset: >>> rtplan = pydicom.read_file("rtplan.dcm") >>> rtplan.PatientName Use within a context manager: >>> with pydicom.read_file("rtplan.dcm") as rtplan: >>> rtplan.PatientName """ # Open file if not already a file object caller_owns_file = True if isinstance(fp, compat.string_types): # caller provided a file name; we own the file handle caller_owns_file = False logger.debug(u"Reading file '{0}'".format(fp)) fp = open(fp, 'rb') if config.debugging: logger.debug("\n" + "-" * 80) logger.debug("Call to read_file()") msg = ("filename:'%s', defer_size='%s', " "stop_before_pixels=%s, force=%s") logger.debug(msg % (fp.name, defer_size, stop_before_pixels, force)) if caller_owns_file: logger.debug("Caller passed file object") else: logger.debug("Caller passed file name") logger.debug("-" * 80) # Convert size to defer reading into bytes, and store in file object # if defer_size is not None: # defer_size = size_in_bytes(defer_size) # fp.defer_size = defer_size # Iterate through all items and store them --include file meta if present stop_when = None if stop_before_pixels: def _at_TOAD_convenience(tag, VR, length): return tag == (0x0020, 0x0018) if toad: stop_when = _at_TOAD_convenience else: stop_when = _at_pixel_data try: dataset = read_partial(fp, stop_when, defer_size=defer_size, force=force) finally: if not caller_owns_file: fp.close() # XXX need to store transfer syntax etc. return dataset