def read_dicomdir(filename="DICOMDIR"): """Read a DICOMDIR file and return a DicomDir instance. This is a wrapper around dcmread, which gives a default file name. Parameters ---------- filename : str, optional Full path and name to DICOMDIR file to open Returns ------- DicomDir Raises ------ InvalidDicomError Raised if filename is not a DICOMDIR file. """ # dcmread will return a DicomDir instance if file is one. # Read the file as usual. ds = dcmread(filename) # Here, check that it is in fact DicomDir if not isinstance(ds, DicomDir): msg = u"File '{0}' is not a Media Storage Directory file".format( filename) raise InvalidDicomError(msg) return ds
def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None, is_implicit_VR=True, is_little_endian=True): """Initialize a DICOMDIR dataset read from a DICOM file Carries forward all the initialization from FileDataset class :param filename: full path and filename to the file. Use None if is a BytesIO. :param dataset: some form of dictionary, usually a Dataset from read_dataset() :param preamble: the 128-byte DICOM preamble :param file_meta: the file meta info dataset, as returned by _read_file_meta, or an empty dataset if no file meta information is in the file :param is_implicit_VR: True if implicit VR transfer syntax used; False if explicit VR. Default is True. :param is_little_endian: True if little-endian transfer syntax used; False if big-endian. Default is True. """ # Usually this class is created through filereader.read_partial, # and it checks class SOP, but in case of direct creation, # check here also if file_meta: class_uid = file_meta.MediaStorageSOPClassUID if not class_uid == "Media Storage Directory Storage": msg = "SOP Class is not Media Storage Directory (DICOMDIR)" raise InvalidDicomError(msg) FileDataset.__init__(self, filename_or_obj, dataset, preamble, file_meta, is_implicit_VR=True, is_little_endian=True) self.parse_records()
def read_dicomdir(filename: PathType = "DICOMDIR") -> DicomDir: """Read a DICOMDIR file and return a :class:`~pydicom.dicomdir.DicomDir`. This is a wrapper around :func:`dcmread` which gives a default file name. .. deprecated:: 2.1 ``read_dicomdir()`` is deprecated and will be removed in v3.0. Use :func:`~pydicom.filereader.dcmread` instead. Parameters ---------- filename : str, optional Full path and name to DICOMDIR file to open Returns ------- DicomDir Raises ------ InvalidDicomError Raised if filename is not a DICOMDIR file. """ warnings.warn( "'read_dicomdir()' is deprecated and will be removed in v3.0, use " "'dcmread()' instead", DeprecationWarning) str_or_obj = path_from_pathlike(filename) ds = dcmread(str_or_obj) if not isinstance(ds, DicomDir): raise InvalidDicomError( f"File '{filename!r}' is not a Media Storage Directory file") return ds
def read_preamble(fp: BinaryIO, force: bool) -> Optional[bytes]: """Return the 128-byte DICOM preamble in `fp` if present. `fp` should be positioned at the start of the file-like. If the preamble and prefix are found then after reading `fp` will be positioned at the first byte after the prefix (byte offset 133). If either the preamble or prefix are missing and `force` is ``True`` then after reading `fp` will be positioned at the start of the file-like. Parameters ---------- fp : file-like object The file-like to read the preamble from. force : bool Flag to force reading of a file even if no header is found. Returns ------- preamble : bytes or None The 128-byte DICOM preamble will be returned if the appropriate prefix ('DICM') is found at byte offset 128. Returns ``None`` if the 'DICM' prefix is not found and `force` is ``True``. Raises ------ InvalidDicomError If `force` is ``False`` and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading File Meta Information preamble...") preamble = fp.read(128) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug(f"{fp.tell() - 128:08x}: {sample}") logger.debug("Reading File Meta Information prefix...") magic = fp.read(4) if magic != b"DICM" and force: logger.info( "File is not conformant with the DICOM File Format: 'DICM' " "prefix is missing from the File Meta Information header " "or the header itself is missing. Assuming no header and " "continuing.") fp.seek(0) return None if magic != b"DICM" and not force: raise InvalidDicomError( "File is missing DICOM File Meta Information header or the 'DICM' " "prefix is missing from the header. Use force=True to force " "reading.") else: logger.debug(f"{fp.tell() - 4:08x}: 'DICM' prefix found") return preamble
def getattr_required(dataset, name): ''' Helper function that should be used when accessing a required DICOM attribute, which should raise our standard exception upon a failure. ''' try: return getattr(dataset, name) except AttributeError: raise InvalidDicomError(f"Missing required DICOM attribute {name}")
def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None, is_implicit_VR=True, is_little_endian=True): """Initialize a DICOMDIR dataset read from a DICOM file. Carries forward all the initialization from :class:`~pydicom.dataset.FileDataset` Parameters ---------- filename_or_obj : str or None Full path and filename to the file of ``None`` if :class:`io.BytesIO`. dataset : dataset.Dataset Some form of dictionary, usually a :class:`~pydicom.dataset.FileDataset` from :func:`~pydicom.filereader.dcmread`. preamble : bytes The 128-byte DICOM preamble. file_meta : dataset.Dataset The file meta :class:`~pydicom.dataset.Dataset`, such as the one returned by :func:`~pydicom.filereader.read_file_meta_info`, or an empty :class:`~pydicom.dataset.Dataset` if no file meta information is in the file. is_implicit_VR : bool ``True`` if implicit VR transfer syntax used (default); ``False`` if explicit VR. is_little_endian : bool ``True`` if little endian transfer syntax used (default); ``False`` if big endian. """ # Usually this class is created through filereader.read_partial, # and it checks class SOP, but in case of direct creation, # check here also if file_meta: class_uid = file_meta.MediaStorageSOPClassUID if not class_uid.name == "Media Storage Directory Storage": msg = "SOP Class is not Media Storage Directory (DICOMDIR)" raise InvalidDicomError(msg) FileDataset.__init__(self, filename_or_obj, dataset, preamble, file_meta, is_implicit_VR=is_implicit_VR, is_little_endian=is_little_endian) self.parse_records()
def _is_implicit_vr(fp, implicit_vr_is_assumed, is_little_endian, stop_when): """Check if the real VR is explicit or implicit. Parameters ---------- fp : an opened file object implicit_vr_is_assumed : bool True if implicit VR is assumed. If this does not match with the real transfer syntax, a user warning will be issued. is_little_endian : bool True if file has little endian transfer syntax. Needed to interpret the first tag. stop_when : None, optional Optional call_back function which can terminate reading. Needed to check if the next tag still belongs to the read dataset. Returns ------- True if implicit VR is used, False otherwise. """ tag_bytes = fp.read(4) vr = fp.read(2) if len(vr) < 2: return implicit_vr_is_assumed # it is sufficient to check if the VR is in valid ASCII range, as it is # extremely unlikely that the tag length accidentally has such a # representation - this would need the first tag to be longer than 16kB # (e.g. it should be > 0x4141 = 16705 bytes) vr1 = ord(vr[0]) if in_py2 else vr[0] vr2 = ord(vr[1]) if in_py2 else vr[1] found_implicit = not (0x40 < vr1 < 0x5B and 0x40 < vr2 < 0x5B) if found_implicit != implicit_vr_is_assumed: # first check if the tag still belongs to the dataset if stop_when # is given - if not, the dataset is empty and we just return endian_chr = "<" if is_little_endian else ">" tag = TupleTag(unpack(endian_chr + "HH", tag_bytes)) if stop_when is not None and stop_when(tag, vr, 0): return found_implicit # got to the real problem - warn or raise depending on config found_vr = 'implicit' if found_implicit else 'explicit' expected_vr = 'implicit' if not found_implicit else 'explicit' message = ('Expected {0} VR, but found {1} VR - using {1} VR for ' 'reading'.format(expected_vr, found_vr)) if config.enforce_valid_values: raise InvalidDicomError(message) warnings.warn(message, UserWarning) return found_implicit
def read_preamble(fp, force): """Return the 128-byte DICOM preamble in `fp` if present. Parameters ---------- fp : file-like object The file-like to read the preamble from. force : bool Flag to force reading of a file even if no header is found. Returns ------- preamble : str/bytes or None The 128-byte DICOM preamble will be returned if the appropriate prefix ('DICM') is found at byte offset 128. Returns None if the 'DICM' prefix is not found and `force` is True. Raises ------ InvalidDicomError If `force` is False and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading preamble...") preamble = fp.read(0x80) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample)) magic = fp.read(4) if magic != b"DICM": if force: logger.info( "File is not a conformant DICOM file; 'DICM' prefix is " "missing from the file header or the header is " "missing. Assuming no header and continuing.") preamble = None fp.seek(0) else: raise InvalidDicomError("File is missing DICOM header or 'DICM' " "prefix is missing from the header. Use " "force=True to force reading.") else: logger.debug("{0:08x}: 'DICM' prefix found".format(fp.tell() - 4)) return preamble
def parse_records(self): """Build the hierarchy of given directory records, and structure into Patient, Studies, Series, Images hierarchy. This is intended for initial read of file only, it will not reorganize correctly if records are changed. """ # Define a helper function for organizing the records def get_siblings(record, map_offset_to_record): """Return a list of all siblings of the given directory record, including itself. """ sibling_list = [record] current_record = record while ('OffsetOfTheNextDirectoryRecord' in current_record and current_record.OffsetOfTheNextDirectoryRecord): offset_of_next = current_record.OffsetOfTheNextDirectoryRecord sibling = map_offset_to_record[offset_of_next] sibling_list.append(sibling) current_record = sibling return sibling_list # Build the mapping from file offsets to records records = self.DirectoryRecordSequence map_offset_to_record = {} for record in records: offset = record.seq_item_tell map_offset_to_record[offset] = record # logging.debug("Record offsets: " + map_offset_to_record.keys()) # Find the children of each record for record in records: record.children = [] if 'OffsetOfReferencedLowerLevelDirectoryEntity' in record: child_offset = ( record.OffsetOfReferencedLowerLevelDirectoryEntity) if child_offset: child = map_offset_to_record[child_offset] record.children = get_siblings(child, map_offset_to_record) self.patient_records = [ record for record in records if getattr(record, 'DirectoryRecordType') == 'PATIENT' ] if not self.patient_records: raise InvalidDicomError('Missing PATIENT record(s) in DICOMDIR')
def read_preamble(fp, force): """Read and return the DICOM preamble. Parameters ---------- fp : file-like object force : boolean Flag to force reading of a file even if no header is found. Returns ------- preamble : DICOM preamble, None The DICOM preamble will be returned if appropriate header ('DICM') is found. Returns None if no header is found. Raises ------ InvalidDicomError If force flag is false and no appropriate header information found. Notes ----- Also reads past the 'DICM' marker. Rewinds file to the beginning if no header found. """ logger.debug("Reading preamble...") preamble = fp.read(0x80) if config.debugging: sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:]) logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample)) magic = fp.read(4) if magic != b"DICM": if force: logger.info("File is not a standard DICOM file; 'DICM' header is " "missing. Assuming no header and continuing") preamble = None fp.seek(0) else: raise InvalidDicomError("File is missing 'DICM' marker. " "Use force=True to force reading") else: logger.debug("{0:08x}: 'DICM' marker found".format(fp.tell() - 4)) return preamble
def read_dicomdir(filename="DICOMDIR"): """Read a DICOMDIR file and return a :class:`~pydicom.dicomdir.DicomDir`. This is a wrapper around :func:`dcmread` which gives a default file name. .. deprecated:: 2.1 ``read_dicomdir()`` is deprecated and will be removed in v3.0. Use :func:`~pydicom.filereader.dcmread` instead. Parameters ---------- filename : str, optional Full path and name to DICOMDIR file to open Returns ------- DicomDir Raises ------ InvalidDicomError Raised if filename is not a DICOMDIR file. """ # dcmread will return a DicomDir instance if file is one. warnings.warn( "'read_dicomdir()' is deprecated and will be removed in v3.0, use " "'dcmread()' instead", DeprecationWarning ) # Read the file as usual. ds = dcmread(filename) # Here, check that it is in fact DicomDir if not isinstance(ds, DicomDir): msg = "File '{0}' is not a Media Storage Directory file".format( filename) raise InvalidDicomError(msg) return ds
def _test(): raise InvalidDicomError('test msg')
def __init__( self, filename_or_obj: Union[str, os.PathLike, BinaryIO], dataset: Dataset, preamble: Optional[bytes] = None, file_meta: Optional[FileMetaDataset] = None, is_implicit_VR: bool = True, is_little_endian: bool = True, ) -> None: """Initialize a DICOMDIR dataset read from a DICOM file. Carries forward all the initialization from :class:`~pydicom.dataset.FileDataset` Parameters ---------- filename_or_obj : str or PathLike or file-like or None Full path and filename to the file of ``None`` if :class:`io.BytesIO`. dataset : dataset.Dataset Some form of dictionary, usually a :class:`~pydicom.dataset.FileDataset` from :func:`~pydicom.filereader.dcmread`. preamble : bytes The 128-byte DICOM preamble. file_meta : dataset.Dataset The file meta :class:`~pydicom.dataset.Dataset`, such as the one returned by :func:`~pydicom.filereader.read_file_meta_info`, or an empty :class:`~pydicom.dataset.Dataset` if no file meta information is in the file. is_implicit_VR : bool ``True`` if implicit VR transfer syntax used (default); ``False`` if explicit VR. is_little_endian : bool ``True`` if little endian transfer syntax used (default); ``False`` if big endian. Raises ------ InvalidDicomError If the file transfer syntax is not Little Endian Explicit and :func:`enforce_valid_values<pydicom.config.enforce_valid_values>` is ``True``. """ # Usually this class is created through filereader.read_partial, # and it checks class SOP, but in case of direct creation, # check here also if file_meta: class_uid = file_meta.MediaStorageSOPClassUID if not class_uid.name == "Media Storage Directory Storage": msg = "SOP Class is not Media Storage Directory (DICOMDIR)" raise InvalidDicomError(msg) if is_implicit_VR or not is_little_endian: msg = ('Invalid transfer syntax for DICOMDIR - ' 'Explicit Little Endian expected.') if config.enforce_valid_values: raise InvalidDicomError(msg) warnings.warn(msg, UserWarning) FileDataset.__init__(self, filename_or_obj, dataset, preamble, file_meta, is_implicit_VR=is_implicit_VR, is_little_endian=is_little_endian) self.patient_records: List[Dataset] = [] self.parse_records()
def _is_implicit_vr( fp: BinaryIO, implicit_vr_is_assumed: bool, is_little_endian: bool, stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]], is_sequence: bool ) -> bool: """Check if the real VR is explicit or implicit. Parameters ---------- fp : an opened file object implicit_vr_is_assumed : bool True if implicit VR is assumed. If this does not match with the real transfer syntax, a user warning will be issued. is_little_endian : bool True if file has little endian transfer syntax. Needed to interpret the first tag. stop_when : None, optional Optional call_back function which can terminate reading. Needed to check if the next tag still belongs to the read dataset. is_sequence : bool True if called for a sequence, False for a top-level dataset. Returns ------- True if implicit VR is used, False otherwise. """ # sequences do not switch from implicit to explicit encoding, # but they are allowed to use implicit encoding if the dataset # is encoded as explicit VR if is_sequence and implicit_vr_is_assumed: return True tag_bytes = fp.read(4) raw_vr = fp.read(2) if len(raw_vr) < 2: return implicit_vr_is_assumed # it is sufficient to check if the VR is in valid ASCII range, as it is # extremely unlikely that the tag length accidentally has such a # representation - this would need the first tag to be longer than 16kB # (e.g. it should be > 0x4141 = 16705 bytes) found_implicit = not (0x40 < raw_vr[0] < 0x5B and 0x40 < raw_vr[1] < 0x5B) if found_implicit != implicit_vr_is_assumed: # first check if the tag still belongs to the dataset if stop_when # is given - if not, the dataset is empty and we just return endian_chr = "<" if is_little_endian else ">" tag = _unpack_tag(tag_bytes, endian_chr) vr = raw_vr.decode(default_encoding) if stop_when is not None and stop_when(tag, vr, 0): return found_implicit # sequences with undefined length can be encoded in implicit VR, # see PS 3.5, section 6.2.2 if found_implicit and is_sequence: return True # got to the real problem - warn or raise depending on config found_vr = 'implicit' if found_implicit else 'explicit' expected_vr = 'implicit' if not found_implicit else 'explicit' msg = f"Expected {expected_vr} VR, but found {found_vr} VR" if config.enforce_valid_values: raise InvalidDicomError(msg) warnings.warn(msg + f" - using {found_vr} VR for reading", UserWarning) return found_implicit
def ingest(self, input_file, skip_contours=False): ''' Load RT Struct DICOM from input_file and output intermediate format :param input_file: Path to the dicom rt-struct file :return: multidimensional array with ROI(s) ''' try: rt_struct_image = pydicom.read_file(input_file) if not hasattr(rt_struct_image, 'StructureSetROISequence'): raise InvalidDicomError() except (IsADirectoryError, InvalidDicomError): raise InvalidFileFormatException( 'File {} is not an rt-struct dicom'.format(input_file)) # lets extract the ROI(s) and dcmrtstruct2nii it to an intermediate format contours = [] # this var will hold the contours # first create a map so that we can easily trace referenced_roi_number back to its metadata metadata_mappings = {} for contour_metadata in rt_struct_image.StructureSetROISequence: metadata_mappings[contour_metadata.ROINumber] = contour_metadata for contour_sequence in rt_struct_image.ROIContourSequence: contour_data = {} metadata = metadata_mappings[ contour_sequence.ReferencedROINumber] # retrieve metadata # I'm not sure if these attributes are always present in the metadata and contour_sequence # so I decided to write this in a defensive way. if hasattr(metadata, 'ROIName'): contour_data['name'] = metadata.ROIName if hasattr(metadata, 'ROINumber'): contour_data['roi_number'] = metadata.ROINumber if hasattr(metadata, 'ReferencedFrameOfReferenceUID'): contour_data[ 'referenced_frame'] = metadata.ReferencedFrameOfReferenceUID if hasattr(contour_sequence, 'ROIDisplayColor') and len( contour_sequence.ROIDisplayColor) > 0: contour_data[ 'display_color'] = contour_sequence.ROIDisplayColor if not skip_contours and hasattr( contour_sequence, 'ContourSequence') and len( contour_sequence.ContourSequence) > 0: contour_data['sequence'] = [] for contour in contour_sequence.ContourSequence: contour_data['sequence'].append({ 'type': (contour.ContourGeometricType if hasattr( contour, 'ContourGeometricType') else 'unknown'), 'points': { 'x': ( [ contour.ContourData[index] for index in range( 0, len(contour.ContourData), 3) ] if hasattr(contour, 'ContourData') else None ), # this is just a fancy way to separate x, y, z from the rtstruct point array 'y': ( [ contour.ContourData[index + 1] for index in range( 0, len(contour.ContourData), 3) ] if hasattr(contour, 'ContourData') else None ), # this is just a fancy way to separate x, y, z from the rtstruct point array 'z': ( [ contour.ContourData[index + 2] for index in range( 0, len(contour.ContourData), 3) ] if hasattr(contour, 'ContourData') else None ) # this is just a fancy way to separate x, y, z from the rtstruct point array } }) if contour_data: # only add contour if we successfully extracted (some) data contours.append(contour_data) return contours