示例#1
0
def _write_file_meta_info(fp, meta_dataset):
    """Write the dicom group 2 dicom storage File Meta Information to the file.

    The file should already be positioned past the 128 byte preamble.
    Raises ValueError if the required data_elements (elements 2,3,0x10,0x12)
    are not in the dataset. If the dataset came from a file read with
    read_file(), then the required data_elements should already be there.
    """
    fp.write(b'DICM')

    # File meta info is always LittleEndian, Explicit VR. After will change these
    #    to the transfer syntax values set in the meta info
    fp.is_little_endian = True
    fp.is_implicit_VR = False

    if Tag((2, 1)) not in meta_dataset:
        meta_dataset.add_new((2, 1), 'OB',
                             b"\0\1")  # file meta information version

    # Now check that required meta info tags are present:
    missing = []
    for element in [2, 3, 0x10, 0x12]:
        if Tag((2, element)) not in meta_dataset:
            missing.append(Tag((2, element)))
    if missing:
        raise ValueError(
            "Missing required tags {0} for file meta information".format(
                str(missing)))

    # Put in temp number for required group length, save current location to come back
    meta_dataset[(2, 0)] = DataElement((2, 0), 'UL', 0)  # put 0 to start
    group_length_data_element_size = 12  # !based on DICOM std ExplVR
    group_length_tell = fp.tell()

    # Write the file meta datset, including temp group length
    length = write_dataset(fp, meta_dataset)
    group_length = length - group_length_data_element_size  # counts from end of that

    # Save end of file meta to go back to
    end_of_file_meta = fp.tell()

    # Go back and write the actual group length
    fp.seek(group_length_tell)
    group_length_data_element = DataElement((2, 0), 'UL', group_length)
    write_data_element(fp, group_length_data_element)

    # Return to end of file meta, ready to write remainder of the file
    fp.seek(end_of_file_meta)
 def test_matching_tags(self):
     """Dataset: key and data_element.tag mismatch raises ValueError....."""
     def set_wrong_tag():
         ds[0x10, 0x10] = data_element
     ds = Dataset()
     data_element = DataElement((0x300a, 0x00b2), "SH", "unit001")
     self.assertRaises(ValueError, set_wrong_tag)
示例#3
0
def read_sequence_item(fp, is_implicit_VR, is_little_endian):
    """Read and return a single sequence item, i.e. a Dataset"""
    if is_little_endian:
        tag_length_format = "<HHL"
    else:
        tag_length_format = ">HHL"
    try:
        bytes_read = fp.read(8)
        group, element, length = unpack(tag_length_format, bytes_read)
    except:
        raise IOError, "No tag to read at file position %05x" % fp.tell()

    tag = (group, element)
    if tag == SequenceDelimiterTag: # No more items, time to stop reading
        data_element = DataElement(tag, None, None, fp.tell()-4)
        logger.debug("%08x: %s" % (fp.tell()-8, "End of Sequence"))
        if length != 0:
            logger.warning("Expected 0x00000000 after delimiter, found 0x%x, at position 0x%x" % (length, fp.tell()-4))
        return None
    if tag != ItemTag:
        logger.warning("Expected sequence item with tag %s at file position 0x%x" % (ItemTag, fp.tell()-4))
    else:
        logger.debug("%08x: %s  Found Item tag (start of item)" % (fp.tell()-4,
                          bytes2hex(bytes_read)))
    is_undefined_length = False
    if length == 0xFFFFFFFFL:
        ds = read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None)
        ds.is_undefined_length_sequence_item = True
    else:
        ds = read_dataset(fp, is_implicit_VR, is_little_endian, length)
    logger.debug("%08x: Finished sequence item" % fp.tell())
    return ds
示例#4
0
 def testUpdate(self):
     """Dataset: update() method works with tag or name..................."""
     ds = self.dummy_dataset()
     pat_data_element = DataElement((0x10, 0x12), 'PN', 'Johnny')
     ds.update({'PatientName': 'John', (0x10, 0x12): pat_data_element})
     self.assertEqual(ds[0x10, 0x10].value, 'John',
                      "named data_element not set")
     self.assertEqual(ds[0x10, 0x12].value, 'Johnny', "set by tag failed")
示例#5
0
 def test_empty_AT(self):
     """Write empty AT correctly.........."""
     # Was issue 74
     data_elem = DataElement(0x00280009, "AT", [])
     expected = hex2bytes((
         " 28 00 09 00"  # (0028,0009) Frame Increment Pointer
         " 00 00 00 00"  # length 0
     ))
     write_data_element(self.f1, data_elem)
     got = self.f1.parent.getvalue()
     msg = ("Did not write zero-length AT value correctly. "
            "Expected %r, got %r") % (bytes2hex(expected), bytes2hex(got))
     msg = "%r %r" % (type(expected), type(got))
     msg = "'%r' '%r'" % (expected, got)
     self.assertEqual(expected, got, msg)
示例#6
0
    def __setattr__(self, name, value):
        """Intercept any attempts to set a value for an instance attribute.

        If name is a dicom descriptive string (cleaned with CleanName),
        then set the corresponding tag and data_element.
        Else, set an instance (python) attribute as any other class would do.

        """
        tag = tag_for_name(name)
        if tag is not None:  # successfully mapped name to a tag
            if tag not in self:  # don't have this tag yet->create the data_element instance
                VR = dictionaryVR(tag)
                data_element = DataElement(tag, VR, value)
            else:  # already have this data_element, just changing its value
                data_element = self[tag]
                data_element.value = value
            # Now have data_element - store it in this dict
            self[tag] = data_element
        else:  # name not in dicom dictionary - setting a non-dicom instance attribute
            # XXX note if user mis-spells a dicom data_element - no error!!!
            self.__dict__[name] = value
示例#7
0
    def run(self, ident_dir, clean_dir):
        # Get first date for tags set in relative_dates
        date_adjust = None
        audit_date_correct = None
        if self.relative_dates is not None:
            date_adjust = {
                tag: first_date - datetime(1970, 1, 1)
                for tag, first_date in self.get_first_date(
                    ident_dir, self.relative_dates).items()
            }
        for root, _, files in os.walk(ident_dir):
            for filename in files:
                if filename.startswith('.'):
                    continue
                source_path = os.path.join(root, filename)
                try:
                    ds = dicom.read_file(source_path)
                except IOError:
                    logger.error('Error reading file %s' % source_path)
                    self.close_all()
                    return False
                except InvalidDicomError:  # DICOM formatting error
                    self.quarantine_file(source_path, ident_dir,
                                         'Could not read DICOM file.')
                    continue

                move, reason = self.check_quarantine(ds)

                if move:
                    self.quarantine_file(source_path, ident_dir, reason)
                    continue

                # Store adjusted dates for recovery
                obfusc_dates = None
                if self.relative_dates is not None:
                    obfusc_dates = {
                        tag: datetime.strptime(ds[tag].value, '%Y%m%d') -
                        date_adjust[tag]
                        for tag in self.relative_dates
                    }

                # Keep CSA Headers
                csa_headers = dict()
                if self.keep_csa_headers and (0x29, 0x10) in ds:
                    csa_headers[(0x29, 0x10)] = ds[(0x29, 0x10)]
                    for offset in [0x10, 0x20]:
                        elno = (0x10 * 0x0100) + offset
                        csa_headers[(0x29, elno)] = ds[(0x29, elno)]

                destination_dir = self.destination(source_path, clean_dir,
                                                   ident_dir)
                if not os.path.exists(destination_dir):
                    os.makedirs(destination_dir)
                try:
                    ds, study_pk = self.anonymize(ds)
                except ValueError, e:
                    self.quarantine_file(
                        source_path, ident_dir,
                        'Error running anonymize function. There may be a '
                        'DICOM element value that does not match the specified'
                        ' Value Representation (VR). Error was: %s' % e)
                    continue

                # Recover relative dates
                if self.relative_dates is not None:
                    for tag in self.relative_dates:
                        if audit_date_correct != study_pk and tag in AUDIT.keys(
                        ):
                            self.audit.update(
                                ds[tag], obfusc_dates[tag].strftime('%Y%m%d'),
                                study_pk)
                        ds[tag].value = obfusc_dates[tag].strftime('%Y%m%d')
                    audit_date_correct = study_pk

                # Restore CSA Header
                if len(csa_headers) > 0:
                    for tag in csa_headers:
                        ds[tag] = csa_headers[tag]

                # Set Patient Identity Removed to YES
                t = Tag((0x12, 0x62))
                ds[t] = DataElement(t, 'CS', 'YES')

                # Set the De-identification method code sequence
                method_ds = Dataset()
                t = dicom.tag.Tag((0x8, 0x102))
                if self.profile == 'clean':
                    method_ds[t] = DataElement(
                        t, 'DS', MultiValue(DS, ['113100', '113105']))
                else:
                    method_ds[t] = DataElement(t, 'DS',
                                               MultiValue(DS, ['113100']))
                t = dicom.tag.Tag((0x12, 0x64))
                ds[t] = DataElement(t, 'SQ', Sequence([method_ds]))

                out_filename = ds[
                    SOP_INSTANCE_UID].value if self.rename else filename
                clean_name = os.path.join(destination_dir, out_filename)
                try:
                    ds.save_as(clean_name)
                except IOError:
                    logger.error('Error writing file %s' % clean_name)
                    self.close_all()
                    return False
 def testBackslash(self):
     """DataElement: String with '\\' sets multi-valued data_element."""
     data_element = DataElement((1, 2), "DS", r"42.1\42.2\42.3")
     self.assertEqual(data_element.VM, 3, "Did not get a mult-valued value")
 def setUp(self):
     self.data_elementSH = DataElement((1, 2), "SH", "hello")
     self.data_elementIS = DataElement((1, 2), "IS", "42")
     self.data_elementDS = DataElement((1, 2), "DS", "42.00001")
     self.data_elementMulti = DataElement((1, 2), "DS",
                                          ['42.1', '42.2', '42.3'])
示例#10
0
 def add_new(self, tag, VR, value):
     """Create a new DataElement instance and add it to this Dataset."""
     data_element = DataElement(tag, VR, value)
     # use data_element.tag since DataElement verified it
     self[data_element.tag] = data_element
示例#11
0
def data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when=None, defer_size=None, encoding=default_encoding):
    """Create a generator to efficiently return the raw data elements
    Returns (VR, length, raw_bytes, value_tell, is_little_endian),
    where:
    VR -- None if implicit VR, otherwise the VR read from the file
    length -- the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL),
    value_bytes -- the raw bytes from the DICOM file
                    (not parsed into python types)
    is_little_endian -- True if transfer syntax is little endian; else False
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = dicom.debugging
    element_struct_unpack = element_struct.unpack

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            raise StopIteration  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                             bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if in_py3:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFFL:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFFL:
            if defer_size is not None and length > defer_size:
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded."
                            "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" % (value_tell,
                           bytes2hex(value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == (0x08, 0x05):
                from dicom.values import convert_string
                encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                # Store the encoding value in the generator for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR,
                                                    is_little_endian, length, encoding)
                yield DataElement(tag, VR, seq, value_tell,
                                                    is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set, then store it
                if tag == (0x08, 0x05):
                    from dicom.values import convert_string
                    encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                    # Store the encoding value in the generator for use with future elements (SQs)
                    encoding = convert_encodings(encoding)

                yield RawDataElement(tag, VR, length, value, value_tell,
                                is_implicit_VR, is_little_endian)
示例#12
0
 def testBackslash(self):
     """DataElement: Passing string with '\\' sets multi-valued data_element."""
     data_element = DataElement((1,2), "DS", r"42.1\42.2\42.3") # note r" to avoid \ as escape chr
     self.assertEqual(data_element.VM, 3, "Did not get a mult-valued value")
示例#13
0
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, 
                            defer_size=None):
    """Create a generator to efficiently return the raw data elements
    Specifically, returns (VR, length, raw_bytes, value_tell, is_little_endian),
    where:
    VR -- None if implicit VR, otherwise the VR read from the file
    length -- the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL),
    value_bytes -- the raw bytes from the DICOM file (not parsed into python types)
    is_little_endian -- True if transfer syntax is little endian; else False
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length
    
    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = dicom.debugging
    
    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        unpack_format = endian_chr + "HHL" # XXX in python >=2.5, can do struct.Struct to save time
    else: # Explicit VR
        unpack_format = endian_chr + "HH2sH" # tag, VR, 2-byte length (or 0 if special VRs)
        extra_length_format = endian_chr + "L"  # for special VRs
        
    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)        
        if len(bytes_read) < 8:
            raise StopIteration # at end of file
        if debugging: debug_msg = "%08x: %s" % (fp.tell()-8, bytes2hex(bytes_read))
        
        if is_implicit_VR:
            VR = None # must reset each time -- may have looked up on last iteration (e.g. SQ)
            group, elem, length = unpack(unpack_format, bytes_read)
        else: # explicit VR
            group, elem, VR, length = unpack(unpack_format, bytes_read)
            if VR in ('OB','OW','OF','SQ','UN', 'UT'):
                bytes_read = fp_read(4)
                length = unpack(extra_length_format, bytes_read)[0]
                if debugging: debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR: debug_msg += " %s " % VR
            if length != 0xFFFFFFFFL:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Now are positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        # logger.debug("%08x: start of value of length %d" % (value_tell, length))
        tag = TupleTag((group, elem))
        if stop_when is not None:
            if stop_when(tag, VR, length): # XXX VR may be None here!! Should stop_when just take tag?
                if debugging: 
                    logger_debug("Reading ended by stop_when callback. Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in ('OB','OW','OF','SQ','UN', 'UT'): 
                    rewind_length += 4
                fp.seek(value_tell-rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFFL:
            if defer_size is not None and length > defer_size:
                # Flag as deferred read by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. Skipping forward to next data element.")
                fp.seek(fp_tell()+length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" % (value_tell, 
                              bytes2hex(value[:12]), dotdot, value[:12], dotdot))
            yield RawDataElement(tag, VR, length, value, value_tell, 
                                     is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        #  ... unless is SQ type, in which case is easier to parse it, because
        #      undefined length SQs and items of undefined lengths can be nested
        #      and it would be error-prone to read to the correct outer delimiter 
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError: 
                    pass
            if VR == 'SQ':
                if debugging:
                    logger_debug("%08x: Reading and parsing undefined length sequence"
                                % fp_tell())
                seq = read_sequence(fp, is_implicit_VR, is_little_endian, length)
                yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian, delimiter,
                                        defer_size)
                yield RawDataElement(tag, VR, length, value, value_tell,
                                is_implicit_VR, is_little_endian)