Пример #1
0
def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
    """Find DICOM file paths in the specified root directory file tree.

    Parameters
    ----------
    root_path
        Path to the root directory specifying the file hierarchy.

    yield_directories, optional
        Whether to yield paths to directories containing DICOM files
        or separately to each file (default).


    Yields
    ------
    The paths to DICOM files or DICOM-containing directories (if
    `yield_directories` is True).

    """
    # TODO add some filtering options
    for root, _, files in os.walk(root_path):
        if yield_directories:
            if any(
                (is_dicom(pathlib.Path(root, f).as_posix()) for f in files)):
                yield root
        else:
            for f in files:
                fpath = pathlib.Path(root, f).as_posix()
                if is_dicom(fpath):
                    yield fpath
Пример #2
0
    def test_is_dicom(self):
        """Test the is_dicom function."""
        invalid_file = test_file.replace('CT_', 'CT')  # invalid file
        notdicom_file = osp.abspath(__file__)  # use own file

        # valid file returns True
        self.assertTrue(is_dicom(test_file))

        # return false for real file but not dicom
        self.assertFalse(is_dicom(notdicom_file))

        # test invalid path
        self.assertRaises(IOError, is_dicom, invalid_file)
Пример #3
0
    def test_is_dicom(self):
        """Test the is_dicom function."""
        invalid_file = test_file.replace('CT_', 'CT')  # invalid file
        notdicom_file = osp.abspath(__file__)  # use own file

        # valid file returns True
        self.assertTrue(is_dicom(test_file))

        # return false for real file but not dicom
        self.assertFalse(is_dicom(notdicom_file))

        # test invalid path
        self.assertRaises(IOError, is_dicom, invalid_file)
Пример #4
0
    def test_is_dicom(self):
        """Test the is_dicom function."""
        notdicom_file = os.path.abspath(__file__)  # use own file

        # valid file returns True
        assert is_dicom(test_file)

        # return false for real file but not dicom
        assert not is_dicom(notdicom_file)

        # test invalid path
        with pytest.raises(IOError):
            is_dicom('xxxx.dcm')

        # Test no meta prefix/preamble fails
        assert not is_dicom(no_meta_file)
Пример #5
0
    def test_is_dicom(self):
        """Test the is_dicom function."""
        invalid_file = test_file.replace('CT_', 'CT')  # invalid file
        notdicom_file = os.path.abspath(__file__)  # use own file

        # valid file returns True
        assert is_dicom(test_file)

        # return false for real file but not dicom
        assert not is_dicom(notdicom_file)

        # test invalid path
        with pytest.raises(IOError):
            is_dicom(invalid_file)

        # Test no meta prefix/preamble fails
        assert not is_dicom(no_meta_file)
Пример #6
0
def scan_dcm(path):
    fl = []
    pathToGlob = Path(path)
    if pathToGlob.exists():
        f_list = list(pathToGlob.rglob("*"))
        # pprint(f_list)
        for i in f_list:
            if i.is_file():
                if is_dicom(i):
                    fl.append(i)
        return fl
Пример #7
0
 def __init__(self, source_directory):
     self.source_directory = source_directory
     os.chdir(self.source_directory)
     print ('In ReadDICOMFiles...')
     for file in glob.glob("*.dcm"):
         print(file)
         if is_dicom(file):
             print("It is indeed DICOM!")
             dcm_file = pydicom.dcmread(file)
         else:
             print("It's probably not DICOM")
Пример #8
0
def find_dicom_files(directory,
                     pattern="*",
                     directory_exclude_pattern='',
                     recursive=True):
    """
    search a root directory for all files matching a given pattern (in Glob format - *.dcm etc)
    and that have the "DICM" magic number
    returns a full path name
    """
    for root, dirs, files in os.walk(directory):
        if not recursive:
            dirs = []
        for x in dirs:
            if fnmatch.fnmatch(x, directory_exclude_pattern):
                try:
                    dirs.remove(x)
                except Exception:
                    pass
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                if is_dicom(filename):
                    yield filename
    def retrieve(self):
        #global global_fifo_q
        os.chdir(self.source_directory)
        for file in glob.glob("*.dcm"):
            print(file)
            if is_dicom(file):
                print("It is indeed DICOM!")
                dcm_file = pydicom.dcmread(file)
                print(dcm_file)

                parsed_result = {}
                testType = dcm_file.StudyDescription
                studyDate = dcm_file.StudyDate
                studyTime = dcm_file.StudyTime
                deviceSerialNumber = dcm_file.DeviceSerialNumber
                institutionName = dcm_file.InstitutionName
                manufacturer = dcm_file.Manufacturer
                manufacturerModelName = dcm_file.ManufacturerModelName
                entranceDose = dcm_file.EntranceDoseInmGy
                studyInstanceUID = dcm_file.StudyInstanceUID
                seriesInstanceUID = dcm_file.SeriesInstanceUID
                parsed_result["testInfo"] = {
                    "testType": testType,
                    "studyDate": studyDate,
                    "studyTime": studyTime,
                    "deviceSerialNumber": deviceSerialNumber,
                    "institutuionName": institutionName,
                    "manufacturerModelName": manufacturerModelName,
                    "manufacturer": manufacturer,
                    "entranceDoseinmGy": entranceDose,
                    "studyInstanceUID": studyInstanceUID,
                    "seriesInstanceUID": seriesInstanceUID
                }
                user_firstName = dcm_file.PatientName.given_name
                user_lastName = dcm_file.PatientName.family_name
                ethnic_group = dcm_file.EthnicGroup
                user_birthdate = dcm_file.PatientBirthDate
                user_sex = dcm_file.PatientSex
                user_id = dcm_file.PatientID
                if user_id is '':
                    user_id = str(user_firstName) + "." + str(
                        user_lastName) + "." + str(
                            user_birthdate) + "@noemail.unk"
                user_age = dcm_file.PatientAge
                user_Size = dcm_file.PatientSize
                user_Weight = dcm_file.PatientWeight
                parsed_result["userInfo"] = {
                    "firstName": user_firstName,
                    "lastName": user_lastName,
                    "email": user_id,
                    "ethnicGroup": ethnic_group,
                    "birthDate": user_birthdate,
                    "userSex": user_sex,
                    "userAge": user_age,
                    "userSize": user_Size,
                    "userWeight": user_Weight
                }

                xml_string = dcm_file.ImageComments
                xml_root = etree.fromstring(xml_string)

                parsed_result["bodyComposition"] = {}
                for leaf in xml_root.iter('COMP_ROI'):
                    regionName = lowerCamelCase(leaf.attrib['region'])
                    parsed_result["bodyComposition"][regionName] = {}
                    for reading in leaf.iter():
                        # skip attributes that don't have a value
                        if not 'units' in reading.attrib:
                            continue
                        # Normalize the value (% or lbs)
                        key = lowerCamelCase(reading.tag)
                        units = reading.attrib['units'].strip()
                        value = None
                        if units == '%':
                            value = normalizePercentageValue(
                                float(reading.text))
                        else:
                            value = normalizeWeightValue(
                                float(reading.text), units)
                        # save the reading
                        parsed_result["bodyComposition"][regionName][
                            key] = value

                parsed_result["BMD"] = {}

                #print ("XML_ROOT", xml_root)

                for leaf in xml_root.iter('ROI'):
                    regionName = lowerCamelCase(leaf.attrib['region'])
                    parsed_result["BMD"][regionName] = {}
                    for reading in leaf.iter():
                        if reading.text is None:
                            continue
                        elif reading.text is '-':
                            continue
                        key = lowerCamelCase(reading.tag)
                        # units = reading.attrib['units'].strip()
                        value = float(reading.text)
                        parsed_result["BMD"][regionName][key] = value

                parsed_result["visceralFat"] = {}
                for leaf in xml_root.iter('VAT_MASS'):
                    regionName = lowerCamelCase(
                        'Estimated Visceral Adipose Tissue')
                    parsed_result["visceralFat"][regionName] = {}
                    for reading in leaf.iter():
                        # skip attributes that don't have a value
                        if not 'units' in reading.attrib:
                            continue
                        # Normalize the value (% or lbs)
                        key = lowerCamelCase(reading.tag)
                        units = reading.attrib['units'].strip()
                        value = None
                        if units == '%':
                            value = normalizePercentageValue(
                                float(reading.text))
                        else:
                            value = normalizeWeightValue(
                                float(reading.text), units)
                        # save the reading
                        parsed_result["visceralFat"][regionName][key] = value

                # convert it all to JSON and Save
                self.json_result = json.dumps(parsed_result)
                print(self.json_result)

                # Add data to queue
                #data_to_process = (user_id, json_result, studyDate, studyTime, file)
                #global_fifo_q.put(data_to_process)

                data_to_process = (user_id, self.json_result, studyDate,
                                   studyTime, file, user_firstName,
                                   user_lastName, user_birthdate,
                                   studyInstanceUID)
                self.ins_var_global_fifo_q.put(data_to_process)

            else:
                print("It's probably not DICOM")
                print("Trying to move NON-DICOM file to:" +
                      self.error_directory)
                #Handle Error and send notifiction
                #Move to error directory
                #shutil.move(file, self.error_directory)
                try:
                    shutil.move(file, self.error_directory)
                except shutil.Error as e:
                    print('Error: %s' % e)
                    os.remove(file)
                    pass
                    # eg. source or destination doesn't exist
                except IOError as e:
                    print('Error: %s' % e.strerror)
                    os.remove(file)
                    pass
Пример #10
0
    return result


# Normalize percentage values into preferred values [0, 1]
def normalizePercentageValue(value):
    result = float(value) / float(100)
    return result


'''
JSON Parse Execution
'''
# Change to os.walk path
#dcm_file = pydicom.read_file('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm')

if is_dicom('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm'):
    print("It is indeed DICOM!")
    dcm_file = pydicom.dcmread('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm')
else:
    print("It's probably not DICOM")


parsed_result = {}
testType = dcm_file.StudyDescription
studyDate = dcm_file.StudyDate
studyTime = dcm_file.StudyTime
deviceSerialNumber = dcm_file.DeviceSerialNumber
institutionName = dcm_file.InstitutionName
manufacturer = dcm_file.Manufacturer
#manufacturerModelName = dcm_file.ManufacturersModelName
#entranceDose = dcm_file.EntranceDoseinmGy
Пример #11
0
def dump_series2json(dcm_root, mode='one_per_dir', series_file_pattern='00000001.dcm'):
    """
    :param dcm_root:
    :param mode:        'one_per_dir'
    :param series_file_pattern:
    :return:
    """
    if not os.path.exists(dcm_root): return {}
    series = {'01_PatientName':[],
              '02_PatientID':[],
              '03_StudyDate':[],
              '04_AcquisitionDateTime':[],
              '05_SeriesDescription':[],
              '06_NumberofSlices':[],
              '07_Load':[],
              '08_SeriesRoot':[],
              '09_SeriesFiles':[]}
    for subdir, _, files in os.walk(dcm_root):
        if len(files) <= 0: continue
        print('working on %s' % (subdir))
        # find right patterned files
        re_pattern = series_file_pattern.replace('*', '.*')
        r_files = []
        if mode != 'one_per_dir': r_files = files
        else:
            for file in files: r_files += re.findall(re_pattern, file)
            r_files.sort()
            r_files = [r_files[0]]
        # iterate all found files
        pnames = []
        pids = []
        sdates = []
        sdecrps = []
        acqdatetimes = []
        subdirs = []
        slices = {}
        series_keys = []
        for file in r_files:
            dcm_file = os.path.join(subdir, file)
            if not is_dicom(dcm_file): continue
            ds = pydicom.read_file(dcm_file)
            if hasattr(ds, 'AcquisitionDate'): acqdate = str(ds.AcquisitionDate)
            else: acqdate = 'NA'
            if hasattr(ds, 'AcquisitionTime'): acqtime = str(ds.AcquisitionTime)
            else: acqtime = 'NA'
            if hasattr(ds, 'PatientName'): pname = str(ds.PatientName)
            else: pname = 'NA'
            if hasattr(ds, 'PatientID'): pid = str(ds.PatientID)
            else: pid = 'NA'
            if hasattr(ds, 'StudyDate'): sdate = str(ds.StudyDate)
            else: sdate = 'NA'
            if hasattr(ds, 'SeriesDescription'): sdecrp = str(ds.SeriesDescription)
            else: sdecrp = 'NA'
            series_key = str(sdecrp + subdir)
            if series_key not in series_keys:
                series_keys.append(series_key)
                slices[series_key] = []
                pnames.append(pname)
                pids.append(pid)
                sdates.append(sdate)
                sdecrps.append(sdecrp)
                acqdatetimes.append(acqdate + acqtime)
                subdirs.append(subdir)
            slices[series_key].append(file)
        # append to series
        series['01_PatientName'] += pnames
        series['02_PatientID'] += pids
        series['03_StudyDate'] += sdates
        series['04_AcquisitionDateTime'] += acqdatetimes
        series['05_SeriesDescription'] += sdecrps
        series['07_Load'] += [False] * len(pnames)
        series['08_SeriesRoot'] += subdirs
        for series_key in series_keys:
            series['06_NumberofSlices'].append(len(slices[series_key]))
            series['09_SeriesFiles'].append(slices[series_key])
    return series
Пример #12
0
def Main():
    '''
    JSON Parse Execution
    '''
    # Change to os.walk path
    # dcm_file = pydicom.read_file('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm')

    if is_dicom('c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm'):
        print("It is indeed DICOM!")
        dcm_file = pydicom.dcmread(
            'c:/temp/pending_dexafit_files/1.2.840.113619.2.110.500342.20180111131347.3.1.12.1.dcm')
    else:
        print("It's probably not DICOM")

    parsed_result = {}
    testType = dcm_file.StudyDescription
    studyDate = dcm_file.StudyDate
    studyTime = dcm_file.StudyTime
    deviceSerialNumber = dcm_file.DeviceSerialNumber
    institutionName = dcm_file.InstitutionName
    manufacturer = dcm_file.Manufacturer
    # manufacturerModelName = dcm_file.ManufacturersModelName
    # entranceDose = dcm_file.EntranceDoseinmGy
    studyInstanceUID = dcm_file.StudyInstanceUID
    seriesInstanceUID = dcm_file.SeriesInstanceUID
    parsed_result["testInfo"] = {
        "testType": testType,
        "studyDate": studyDate,
        "studyTime": studyTime,
        "deviceSerialNumber": deviceSerialNumber,
        "institutuionName": institutionName,
        # "manufacturerModelName": manufacturerModelName,
        "manufacturer": manufacturer,
        # "entranceDoseinmGy": entranceDose,
        "studyInstanceUID": studyInstanceUID,
        "seriesInstanceUID": seriesInstanceUID
    }
    # user_firstName = dcm_file.PatientsName.given_name
    # user_lastName = dcm_file.PatientsName.family_name
    user_id = dcm_file.PatientID
    ethnic_group = dcm_file.EthnicGroup
    # user_birthdate = dcm_file.PatientsBirthDate
    # user_sex = dcm_file.PatientsSex
    # user_age = dcm_file.PatientsAge
    # user_Size = dcm_file.PatientsSize
    # user_Weight = dcm_file.PatientsWeight
    parsed_result["userInfo"] = {
        # "firstName": user_firstName,
        # "lastName": user_lastName,
        "email": user_id,
        "ethnicGroup": ethnic_group,
        # "birthDate": user_birthdate,
        # "userSex": user_sex,
        # "userAge": user_age,
        # "userSize": user_Size,
        # "userWeight": user_Weight
    }

    xml_string = dcm_file.ImageComments
    xml_root = etree.fromstring(xml_string)

    parsed_result["bodyComposition"] = {}
    for leaf in xml_root.iter('COMP_ROI'):
        regionName = lowerCamelCase(leaf.attrib['region'])
        parsed_result["bodyComposition"][regionName] = {}
        for reading in leaf.iter():
            # skip attributes that don't have a value
            if not 'units' in reading.attrib:
                continue
            # Normalize the value (% or lbs)
            key = lowerCamelCase(reading.tag)
            units = reading.attrib['units'].strip()
            value = None
            if units == '%':
                value = normalizePercentageValue(float(reading.text))
            else:
                value = normalizeWeightValue(float(reading.text), units)
            # save the reading
            parsed_result["bodyComposition"][regionName][key] = value

    parsed_result["BMD"] = {}
    for leaf in xml_root.iter('ROI'):
        regionName = lowerCamelCase(leaf.attrib['region'])
        parsed_result["BMD"][regionName] = {}
        for reading in leaf.iter():
            if reading.text is None:
                continue
            elif reading.text is '-':
                continue
            key = lowerCamelCase(reading.tag)
            # units = reading.attrib['units'].strip()
            value = float(reading.text)
            parsed_result["BMD"][regionName][key] = value

    parsed_result["visceralFat"] = {}
    for leaf in xml_root.iter('VAT_MASS'):
        regionName = lowerCamelCase('Estimated Visceral Adipose Tissue')
        parsed_result["visceralFat"][regionName] = {}
        for reading in leaf.iter():
            # skip attributes that don't have a value
            if not 'units' in reading.attrib:
                continue
            # Normalize the value (% or lbs)
            key = lowerCamelCase(reading.tag)
            units = reading.attrib['units'].strip()
            value = None
            if units == '%':
                value = normalizePercentageValue(float(reading.text))
            else:
                value = normalizeWeightValue(float(reading.text), units)
            # save the reading
            parsed_result["visceralFat"][regionName][key] = value

    # convert it all to JSON and Save
    json_result = json.dumps(parsed_result)
    print(json_result)

    # Add data to queue
    data_to_process = (user_id, json_result, studyDate, studyTime)
    global_fifo_q.put(data_to_process)

    # When ready to save as JSON file.
    # with open('DXAParse.json','w') as outfile:
    #     outfile.write(json_result)

    # DICOM File has been parsed to JSON and saved as a new file.
    source_directory = 'c:/temp/pending_dexafit_files/'
    f = ReadDICOMFiles(source_directory)

    #Store in DB
    # Database Connection Details
    dsn_database = "dexafitpostgres"  # e.g. "compose"
    dsn_hostname = "dexafit-postgres-instance.cnhfhogvgcm2.us-east-2.rds.amazonaws.com"  # e.g.: "aws-us-east-1-portal.4.dblayer.com"
    dsn_port = "5432"  # e.g. 11101
    dsn_uid = "aws_dexafit"  # e.g. "admin"
    dsn_pwd = "$$dicomaws$$"  # e.g. "xxx"


    s = StoreData(dsn_hostname ,dsn_port,  dsn_database, dsn_uid, dsn_pwd)
    st = threading.Thread(target=s.retrieve_and_store, name = 'Retrieve Q Data', args=())
    st.start()
    st.join()
Пример #13
0
def dump_series2json(dcm_root, mode='one_per_dir', series_file_pattern='00000001.dcm'):
    """
    :param dcm_root:
    :param mode:        'one_per_dir'
    :param series_file_pattern:
    :return:
    """
    if not os.path.exists(dcm_root): return {}
    series = {'01_PatientName':[],
              '02_PatientID':[],
              '03_StudyDate':[],
              '04_AcquisitionDateTime':[],
              '05_SeriesDescription':[],
              '06_NumberofSlices':[],
              '07_Load':[],
              '08_SeriesRoot':[],
              '09_SeriesFiles':[],
              '10_Type':[],
              '11_Func':[],
              '12_Task':[]}
    for subdir, _, files in os.walk(dcm_root):
        if len(files) <= 0: continue
        print('working on %s' % (subdir))
        # find right patterned files
        #TODO: verify whether re_pattern is doing anything
        re_pattern = series_file_pattern.replace('*', '.*')
        r_files = []
        if mode != 'one_per_dir': r_files = files
        else:
            for file in files: r_files += re.findall(re_pattern, file)
            r_files.sort()
            r_files = [r_files[0]]
        # iterate all found files
        pnames = []
        pids = []
        stypes = []
        sdates = []
        sdecrps = []
        acqdatetimes = []
        subdirs = []
        slices = {}
        series_keys = []
        tasks = []
        funcs = []
        for file in r_files:
            dcm_file = os.path.join(subdir, file)
            if not is_dicom(dcm_file): continue
            ds = pydicom.read_file(dcm_file)
            if hasattr(ds,'Modality'): stype = str(ds.Modality)
            else: stype = 'NA'
            if hasattr(ds, 'AcquisitionDate'): acqdate = str(ds.AcquisitionDate)
            else: acqdate = 'NA'
            if hasattr(ds, 'AcquisitionTime'): acqtime = str(ds.AcquisitionTime)
            else: acqtime = 'NA'
            if hasattr(ds, 'PatientName'): pname = str(ds.PatientName)
            else: pname = 'NA'
            if hasattr(ds, 'PatientID'): pid = str(ds.PatientID)
            else: pid = 'NA'
            if hasattr(ds, 'StudyDate'): sdate = str(ds.StudyDate)
            else: sdate = 'NA'
            try: func = ds[0x0065,0x102b].value.split('\\')[1]
            except: func = 'NA'
            try: task = ds[0x0065,0x100c].value
            except: task = 'NA'
            if hasattr(ds, 'SeriesDescription'): sdecrp = str(ds.SeriesDescription)
            else: sdecrp = 'NA'
            series_key = str(sdecrp + subdir)
            if series_key not in series_keys:
                series_keys.append(series_key)
                slices[series_key] = []
                pnames.append(pname)
                pids.append(pid)
                sdates.append(sdate)
                sdecrps.append(sdecrp)
                acqdatetimes.append(acqdate + acqtime)
                subdirs.append(subdir)
                stypes.append(stype)
                funcs.append(func)
                tasks.append(task)
            slices[series_key].append(file)
        # append to series
        series['01_PatientName'] += pnames
        series['02_PatientID'] += pids
        series['03_StudyDate'] += sdates
        series['04_AcquisitionDateTime'] += acqdatetimes
        series['05_SeriesDescription'] += sdecrps
        series['07_Load'] += [False] * len(pnames)
        series['08_SeriesRoot'] += subdirs
        series['10_Type'] += stypes
        series['11_Func'] += funcs
        series['12_Task'] += tasks
        for series_key in series_keys:
            series['06_NumberofSlices'].append(len(slices[series_key]))
            series['09_SeriesFiles'].append(slices[series_key])
    return series