示例#1
0
    def test_replace_identifiers(self):
        print("Testing deid.dicom replace_identifiers")
        from deid.dicom import replace_identifiers
        from deid.dicom import get_identifiers

        from pydicom import read_file

        dicom_files = get_dicom(self.dataset, return_dir=True)
        ids = get_identifiers(dicom_files)

        # Before blanking, 28 fields don't have blanks
        notblanked = read_file(dicom_files[0])
        notblanked_fields = [
            x for x in notblanked.dir() if notblanked.get(x) != ''
        ]  # 28
        self.assertTrue(len(notblanked_fields) == 28)

        updated_files = replace_identifiers(dicom_files,
                                            ids,
                                            output_folder=self.tmpdir)

        # After replacing only 9 don't have blanks
        blanked = read_file(updated_files[0])
        blanked_fields = [x for x in blanked.dir() if blanked.get(x) != '']
        self.assertTrue(len(blanked_fields) == 9)
示例#2
0
 def test_get_identifiers(self):
     print("Testing deid.dicom get_identifiers")
     from deid.dicom import get_identifiers
     dicom_files = get_dicom(self.dataset, return_dir=True)
     ids = get_identifiers(dicom_files)
     self.assertTrue(len(ids) == 1)
     self.assertTrue(isinstance(ids, dict))
     self.assertEqual(len(ids['cookie-47']), 7)
示例#3
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)  # removes empty / null

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            dicom.get(x) for x in fields if x.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = [x for x in fields if "Instance" in x]
        actual = extract_fields_list(dicom, actions)
        self.assertEqual(actual, expected)

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        ids[dicom.filename]["cookie_names"] = expected_names
        ids[dicom.filename]["operator_names"] = expected_operator

        # This is for fields
        ids[dicom.filename]["instance_fields"] = expected
        ids[dicom.filename]["id"] = "new-cookie-id"
        ids[dicom.filename]["source_id"] = "new-operator-id"

        replaced = replace_identifiers(dicom,
                                       ids=ids,
                                       save=False,
                                       deid=self.deid)
        cleaned = replaced.pop()
        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")

        # Currently we don't well handle tag types, so we convert to string
        for field in expected_operator:
            self.assertTrue(str(field) not in cleaned)
示例#4
0
文件: epr.py 项目: ydiazn/dhdicom
 def _anonimize(self, image):
     '''
     self._anonimize(image) => file path, anonimiza la imagen pasada
     por parámetros según las reglas definidas en self.recipe y devuelve
     la ruta de un fichero temporal con dichas modificaciones
     '''
     files = [image.path]
     ids = get_identifiers(files)
     cleaned_files = replace_identifiers(dicom_files=files,
                                         deid=self.recipe,
                                         ids=ids)
     return cleaned_files[0]
示例#5
0
def main():
    if len(sys.argv) is not 3:
        print("argv")
        sys.exit(1)

    input_folder = sys.argv[1]
    output_folder = sys.argv[2]

    dicom_files = [
        join(input_folder, dicom_file) for dicom_file in listdir(input_folder)
    ]
    ids = get_identifiers(dicom_files)

    # or use default conf, and then keep AccessionNumber
    #recipe = DeidRecipe('deid.conf')
    recipe = DeidRecipe()
    #recipe.deid['header'].remove({'action': 'REMOVE', 'field': 'AccessionNumber'})
    recipe.deid['header'].append({
        'action': 'REMOVE',
        'field': 'InstitutionName'
    })

    updated_ids = dict()
    for image, fields in ids.items():
        #fields['id'] = 'cookiemonster'
        #fields['source_id'] = "cookiemonster-image-%s" %(count)
        updated_ids[basename(image)] = fields

    if not exists(output_folder):
        try:
            makedirs(output_folder)
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

    cleaned_files = replace_identifiers(dicom_files=dicom_files,
                                        deid=recipe,
                                        ids=updated_ids,
                                        output_folder=output_folder)
    def test_nested_replace(self):
        """
        Fields are read into a dictionary lookup that should index back to the
        correct data element. We add this test to ensure this is happening,
        meaning that a replace action to a particular contains: string changes
        both top level and nested fields.

        %header

        REPLACE contains:StudyInstanceUID var:new_val
        """
        print("Test nested_replace")
        dicom_file = get_file(self.dataset)

        actions = [{
            "action": "REPLACE",
            "field": "contains:StudyInstanceUID",
            "value": "var:new_val",
        }]
        recipe = create_recipe(actions)

        items = get_identifiers([dicom_file])
        for item in items:
            items[item]["new_val"] = "modified"

        result = replace_identifiers(
            dicom_files=dicom_file,
            ids=items,
            deid=recipe,
            save=False,
        )
        self.assertEqual(1, len(result))
        self.assertEqual(result[0].StudyInstanceUID, "modified")
        self.assertEqual(
            result[0].RequestAttributesSequence[0].StudyInstanceUID,
            "modified")
示例#7
0
from deid.utils import get_installdir
from deid.data import get_dataset
import os

# This is a complete example of doing de-identifiction. For details, see our docs
# https://pydicom.github.io/deid

# This will get a set of example cookie dicoms
base = get_dataset('dicom-cookies')
dicom_files = list(
    get_files(base))  # todo : consider using generator functionality

# This is the function to get identifiers
from deid.dicom import get_identifiers

ids = get_identifiers(dicom_files)

#**
# Here you might save them in your special (IRB approvied) places
# And then provide replacement anonymous ids to put back in the data
# A cookie tumor example is below
#**

################################################################################
# The Deid Recipe
#
# The process of flagging images comes down to writing a set of filters to
# check if each image meets some criteria of interest. For example, I might
# create a filter called "xray" that is triggered when the Modality is CT or XR.
# We specify these fliters in a simple text file called a "deid recipe." When
# you work with the functions, you have the choice to instantiate the object
示例#8
0
def main(args, parser):

    # Global output folder
    output_folder = args.outfolder
    if output_folder is None:
        output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    if args.deid is not None:
        params = load_deid(args.deid)
        if params['format'] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params['format'], args.format))
    # Get list of dicom files
    base = args.input
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset('dicom-cookies')
    basename = os.path.basename(base)
    dicom_files = list(
        get_files(base))  # todo : consider using generator functionality

    do_get = False
    do_put = False
    ids = None
    if args.action == "all":
        bot.info("GET and PUT identifiers from %s" % (basename))
        do_get = True
        do_put = True

    elif args.action == "get":
        do_get = True
        bot.info("GET and PUT identifiers from %s" % (basename))

    elif args.action == "put":
        bot.info("PUT identifiers from %s" % (basename))
        do_put = True
        if args.ids is None:
            bot.error(
                "To PUT without GET you must provide a json file with ids.")
            sys.exit(1)
        ids = args.ids

    # GET identifiers

    if do_get is True:
        ids = get_identifiers(dicom_files)
        if args.do_print is True:
            print(ids)
        else:
            save_identifiers(ids, output_folder)

    if do_put is True:
        cleaned_files = replace_identifiers(dicom_files=dicom_files,
                                            ids=ids,
                                            deid=args.deid,
                                            overwrite=args.overwrite,
                                            output_folder=output_folder)

        bot.info("%s %s files at %s" %
                 (len(cleaned_files), args.format, output_folder))
示例#9
0
        for rootPatient, directoriesPatient, filenamesPatient in os.walk(
                os.path.join(src, directory)):
            directoriesPatient.sort()
            count_patientDir = 1
            for directorySequence in directoriesPatient:
                for rootImage, directoriesImage, filenamesImage in os.walk(
                        os.path.join(src, directory, directorySequence)):
                    if len(filenamesImage) > 2:
                        for filename in filenamesImage:
                            possibleFilename = os.path.join(
                                rootImage, filename)

                            if filename.startswith('I'):
                                ids = get_identifiers(
                                    possibleFilename
                                )  #  deid function: gets identifiers from a dicom file
                                print(possibleFilename)

                                for image, fields in ids.items():

                                    series_description = fields[
                                        'SeriesDescription']
                                    series_description = series_description.lower(
                                    )

                                    #count_patientDir = 1

                                    if series_description.startswith(
                                            "dif"
                                    ) or series_description.startswith(
示例#10
0
    def test_extract_groups(self):
        print("Test deid.dicom.groups extract_values_list")
        from deid.dicom.groups import extract_values_list, extract_fields_list

        dicom = get_dicom(self.dataset)
        fields = get_fields(dicom)

        # Test split action
        actions = [{
            "action": "SPLIT",
            "field": "PatientID",
            "value": 'by="^";minlength=4'
        }]
        expected_names = dicom.get("PatientID").split("^")
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_names)

        # Test field action
        actions = [{"action": "FIELD", "field": "startswith:Operator"}]
        expected_operator = [
            x.element.value for uid, x in fields.items()
            if x.element.keyword.startswith("Operator")
        ]
        actual = extract_values_list(dicom, actions)
        self.assertEqual(actual, expected_operator)

        print("Test deid.dicom.groups extract_fields_list")
        actions = [{"action": "FIELD", "field": "contains:Instance"}]
        expected = {
            uid: x
            for uid, x in fields.items() if "Instance" in x.element.keyword
        }
        actual = extract_fields_list(dicom, actions)
        for uid in expected:
            assert uid in actual

        # Get identifiers for file
        ids = get_identifiers(dicom)
        self.assertTrue(isinstance(ids, dict))

        # Add keys to be used for replace to ids - these first are for values
        parser = DicomParser(dicom, recipe=self.deid)
        parser.define("cookie_names", expected_names)
        parser.define("operator_names", expected_operator)

        # This is for fields
        parser.define("instance_fields", expected)
        parser.define("id", "new-cookie-id")
        parser.define("source_id", "new-operator-id")
        parser.parse()

        # Were the changes made?
        assert parser.dicom.get("PatientID") == "new-cookie-id"
        assert parser.dicom.get("OperatorsName") == "new-operator-id"

        # Instance fields should be removed based on recipe
        for uid, field in parser.lookup["instance_fields"].items():
            self.assertTrue(field.element.keyword not in parser.dicom)

        # Start over
        dicom = get_dicom(self.dataset)

        # We need to provide ids with variables "id" and "source_id"
        ids = {
            dicom.filename: {
                "id": "new-cookie-id",
                "source_id": "new-operator-id"
            }
        }

        # Returns list of updated dicom, since save is False
        replaced = replace_identifiers(dicom,
                                       save=False,
                                       deid=self.deid,
                                       ids=ids)
        cleaned = replaced.pop()

        self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
        self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")
示例#11
0
# Let's use the default dicom_schema
from som.api.google.bigquery.schema import dicom_schema

table = client.get_or_create_table(dataset=dataset,
                                   table_name='dicomCookies',
                                   schema=dicom_schema)

# Let's load some dummy data from deid
from deid.data import get_dataset
from deid.dicom import get_files
dicom_files = get_files(get_dataset('dicom-cookies'))

# Now de-identify to get clean files
from deid.dicom import get_identifiers, replace_identifiers
metadata = get_identifiers(dicom_files)
updated_files = replace_identifiers(dicom_files=dicom_files, ids=metadata)

# Define some metadata for each entity and item
updates = {
    "item_id": "cookieTumorDatabase",
    "entity_id": "cookie-47",
    "Modality": "cookie"
}

for image_file in dicom_files:
    if image_file in metadata:
        metadata[image_file].update(updates)
    else:
        metadata[image_file] = updates
示例#12
0
        for rootPatient, directoriesPatient, filenamesPatient in os.walk(
                os.path.join(src, directory)):

            for directorySequence in directoriesPatient:  #for each sequence folder
                createFolder(os.path.join(dst, caseID, directorySequence))
                #enters each Sequence folder
                for rootImage, directoriesImage, filenamesImage in os.walk(
                        os.path.join(src, directory, directorySequence)):

                    for filename in filenamesImage:
                        possibleFilename = os.path.join(rootImage, filename)
                        if filename.startswith('I'):
                            #if 'DIRFILE' not in possibleFilename and 'dirty' not in possibleFilename and '.DS_Store' not in possibleFilename and '.bmp' not in possibleFilename and '._I10' not in possibleFilename and '._I11' not in possibleFilename and '._I00' not in possibleFilename and '._I200' not in possibleFilename and '._I460' not in possibleFilename and '._I880' not in possibleFilename:
                            #print(possibleFilename)
                            ids = get_identifiers(
                                possibleFilename
                            )  #deid function to get identifiers from a dicom file

                            recipe.deid  #changing header values

                            #print(recipe.deid)
                            #print(recipe.get_actions()) # check the actions that are defined

                            updated_ids = dict()
                            count = 0
                            for image, fields in ids.items():
                                #save these items to put into .csv
                                patientName = fields['PatientName']
                                study_date = fields['StudyDate']
                                #institution_name = fields['InstitutionName']
                                #patient_age = fields['PatientAge']