def test_get_dataset(self): """Test the different functions to get lists of data files.""" # Test base locations charbase = os.path.join(DATA_ROOT, 'charset_files') self.assertTrue(os.path.exists(charbase)) testbase = os.path.join(DATA_ROOT, 'test_files') self.assertTrue(os.path.exists(testbase)) # Test file get chardata = get_charset_files() self.assertTrue(len(chardata) > 15) # Test that top level file is included bases = [basename(x) for x in chardata] # Test that subdirectory files included testdata = get_testdata_files() bases = [basename(x) for x in testdata] self.assertTrue('2693' in bases) self.assertTrue(len(testdata) > 70) # The files should be from their respective bases [self.assertTrue(testbase in x) for x in testdata] [self.assertTrue(charbase in x) for x in chardata]
def test_get_dataset(self): """Test the different functions to get lists of data files.""" # Test base locations charbase = os.path.join(DATA_ROOT, 'charset_files') assert os.path.exists(charbase) testbase = os.path.join(DATA_ROOT, 'test_files') assert os.path.exists(testbase) # Test file get chardata = get_charset_files() assert 15 < len(chardata) # Test that top level file is included bases = [basename(x) for x in chardata] # Test that subdirectory files included testdata = get_testdata_files() bases = [basename(x) for x in testdata] assert '2693' in bases assert 70 < len(testdata) # The files should be from their respective bases for x in testdata: assert testbase in x for x in chardata: assert charbase in x
def test_japanese_multi_byte_personname(self): """Test japanese person name which has multi byte strings are correctly encoded.""" file_path = get_charset_files('chrH32.dcm')[0] ds = dcmread(file_path) ds.decode() if hasattr(ds.PatientName, 'original_string'): original_string = ds.PatientName.original_string ds.PatientName.original_string = None fp = DicomBytesIO() fp.is_implicit_VR = False fp.is_little_endian = True ds.save_as(fp, write_like_original=False) fp.seek(0) ds_out = dcmread(fp) assert original_string == ds_out.PatientName.original_string japanese_pn = PersonName(u"Mori^Ogai=森^鷗外=もり^おうがい") pyencs = pydicom.charset.convert_encodings( ["ISO 2022 IR 6", "ISO 2022 IR 87", "ISO 2022 IR 159"]) actual_encoded = bytes(japanese_pn.encode(pyencs)) expect_encoded = ( b"\x4d\x6f\x72\x69\x5e\x4f\x67\x61\x69\x3d\x1b\x24\x42\x3f" b"\x39\x1b\x28\x42\x5e\x1b\x24\x28\x44\x6c\x3f\x1b\x24\x42" b"\x33\x30\x1b\x28\x42\x3d\x1b\x24\x42\x24\x62\x24\x6a\x1b" b"\x28\x42\x5e\x1b\x24\x42\x24\x2a\x24\x26\x24\x2c\x24\x24" b"\x1b\x28\x42") assert expect_encoded == actual_encoded
def test_charset_patient_names(self, filename, patient_name): """Test patient names are correctly decoded and encoded.""" # check that patient names are correctly read file_path = get_charset_files(filename + '.dcm')[0] ds = dcmread(file_path) ds.decode() assert patient_name == ds.PatientName # check that patient names are correctly written back fp = DicomBytesIO() fp.is_implicit_VR = False fp.is_little_endian = True ds.save_as(fp, write_like_original=False) fp.seek(0) ds = dcmread(fp) assert patient_name == ds.PatientName # check that patient names are correctly written back # without original byte string (PersonName3 only) if hasattr(ds.PatientName, 'original_string'): ds.PatientName.original_string = None fp = DicomBytesIO() fp.is_implicit_VR = False fp.is_little_endian = True ds.save_as(fp, write_like_original=False) fp.seek(0) ds = dcmread(fp) assert patient_name == ds.PatientName
def test_decoding_with_specific_tags(self): """Decoding is correctly applied even if Specific Character Set is not in specific tags...""" rus_file = get_charset_files("chrRuss.dcm")[0] ds = dcmread(rus_file, specific_tags=['PatientName']) ds.decode() assert 2 == len(ds) # specific character set is always decoded assert u'Люкceмбypг' == ds.PatientName
def test_get_dataset_pattern(self): """Test that pattern is working properly.""" pattern = 'CT_small*' filename = get_testdata_files(pattern) assert filename[0].endswith('CT_small.dcm') pattern = 'chrX1*' filename = get_charset_files(pattern) assert filename[0].endswith('chrX1.dcm')
def test_get_dataset_pattern(self): """Test that pattern is working properly.""" pattern = 'CT_small' filename = get_testdata_files(pattern) self.assertTrue(filename[0].endswith('CT_small.dcm')) pattern = 'chrX1' filename = get_charset_files(pattern) self.assertTrue(filename[0].endswith('chrX1.dcm'))
def test_inherited_character_set_in_sequence(self): """charset: can read and decode SQ with parent encoding.............""" ds = dcmread(get_charset_files('chrSQEncoding1.dcm')[0]) ds.decode() # These datasets inside of the SQ shall be decoded with the parent # dataset's encoding sequence = ds[0x32, 0x1064][0] assert ['shift_jis', 'iso2022_jp'] == sequence._character_set assert u'ヤマダ^タロウ=山田^太郎=やまだ^たろう' == sequence.PatientName
def test_changed_character_set(self): # Regression test for #629 multiPN_name = get_charset_files("chrFrenMulti.dcm")[0] ds = dcmread(multiPN_name) # is Latin-1 ds.SpecificCharacterSet = 'ISO_IR 192' from pydicom.filebase import DicomBytesIO fp = DicomBytesIO() ds.save_as(fp, write_like_original=False) fp.seek(0) ds_out = dcmread(fp) # we expect UTF-8 encoding here assert b'Buc^J\xc3\xa9r\xc3\xb4me' == ds_out.get_item(0x00100010).value
def test_nested_character_sets(self): """charset: can read and decode SQ with different encodings.........""" ds = dcmread(get_charset_files("chrSQEncoding.dcm")[0]) ds.decode() # These datasets inside of the SQ cannot be decoded with # default_encoding OR UTF-8 (the parent dataset's encoding). # Instead, we make sure that it is decoded using the # (0008,0005) tag of the dataset sequence = ds[0x32, 0x1064][0] assert ['shift_jis', 'iso2022_jp'] == sequence._character_set assert u'ヤマダ^タロウ=山田^太郎=やまだ^たろう' == sequence.PatientName
def test_japanese_multi_byte_personname(self): """Test japanese person name which has multi byte strings are correctly encoded.""" file_path = get_charset_files('chrH32.dcm')[0] ds = dcmread(file_path) ds.decode() if hasattr(ds.PatientName, 'original_string'): original_string = ds.PatientName.original_string ds.PatientName.original_string = None fp = DicomBytesIO() fp.is_implicit_VR = False fp.is_little_endian = True ds.save_as(fp, write_like_original=False) fp.seek(0) ds_out = dcmread(fp) assert original_string == ds_out.PatientName.original_string
def test_get_dataset(self): """Test the different functions to get lists of data files.""" # The cached files downloaded from the pydicom-data repo cached_data_test_files = str(get_data_dir()) # If pydicom-data is available locally ext_path = None if 'pydicom-data' in external_data_sources(): ext_path = os.fspath( external_data_sources()['pydicom-data'].data_path) # Test base locations charbase = os.path.join(DATA_ROOT, 'charset_files') assert os.path.exists(charbase) testbase = os.path.join(DATA_ROOT, 'test_files') assert os.path.exists(testbase) # Test file get chardata = get_charset_files() assert 15 < len(chardata) # Test that top level file is included bases = [basename(x) for x in chardata] # Test that subdirectory files included testdata = get_testdata_files() bases = [basename(x) for x in testdata] assert '2693' in bases assert 70 < len(testdata) # The files should be from their respective bases for x in testdata: # Don't check files from external sources other than pydicom-data if (testbase not in x and cached_data_test_files not in x and (ext_path not in x if ext_path else True)): continue assert (testbase in x or cached_data_test_files in x or (ext_path in x if ext_path else False)) for x in chardata: assert charbase in x
def test_charset_patient_names(self, filename, patient_name): """Test pixel_array for big endian matches little.""" file_path = get_charset_files(filename + '.dcm')[0] ds = dcmread(file_path) ds.decode() assert patient_name == ds.PatientName
# Copyright 2008-2018 pydicom authors. See LICENSE file for details. """unittest cases for pydicom.charset module""" import unittest from pydicom.data import get_charset_files from pydicom.data import get_testdata_files import pydicom.charset from pydicom.dataelem import DataElement from pydicom import dcmread latin1_file = get_charset_files("chrFren.dcm")[0] jp_file = get_charset_files("chrH31.dcm")[0] multiPN_file = get_charset_files("chrFrenMulti.dcm")[0] sq_encoding_file = get_charset_files("chrSQEncoding.dcm")[0] sq_encoding1_file = get_charset_files("chrSQEncoding1.dcm")[0] explicit_ir6_file = get_charset_files("chrJapMultiExplicitIR6.dcm")[0] normal_file = get_testdata_files("CT_small.dcm")[0] class CharsetTests(unittest.TestCase): def test_latin1(self): """charset: can read and decode latin_1 file........................""" ds = dcmread(latin1_file) ds.decode() # Make sure don't get unicode encode error on converting to string expected = u'Buc^J\xe9r\xf4me' got = ds.PatientName self.assertEqual(expected, got, "Expected %r, got %r" % (expected, got))
"""List summary info for the test files in the charset directory""" from glob import glob import logging import os import pydicom from pydicom.data import get_charset_files if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(message)s') logger = logging.getLogger('charlist') # Get list of all DICOM files names = get_charset_files("*.dcm") # Collect summary information from the files files_info = [] for name in names: ds = pydicom.dcmread(name) ds.decode() fname = os.path.basename(name) try: files_info.append((fname, ds.SpecificCharacterSet, ds.PatientName)) except Exception: try: requested_seq = ds.RequestedProcedureCodeSequence[0] spec_charset = requested_seq.SpecificCharacterSet patient_name = requested_seq.PatientName files_info.append((fname,
# -*- coding: utf-8 -*- # Copyright 2008-2018 pydicom authors. See LICENSE file for details. """unittest cases for pydicom.charset module""" import unittest import pytest from pydicom.data import get_charset_files from pydicom.data import get_testdata_files import pydicom.charset from pydicom.dataelem import DataElement, RawDataElement, DataElement_from_raw from pydicom import dcmread, Dataset latin1_file = get_charset_files("chrFren.dcm")[0] jp_file = get_charset_files("chrH31.dcm")[0] multiPN_file = get_charset_files("chrFrenMulti.dcm")[0] sq_encoding_file = get_charset_files("chrSQEncoding.dcm")[0] sq_encoding1_file = get_charset_files("chrSQEncoding1.dcm")[0] explicit_ir6_file = get_charset_files("chrJapMultiExplicitIR6.dcm")[0] normal_file = get_testdata_files("CT_small.dcm")[0] class CharsetTests(unittest.TestCase): def test_latin1(self): """charset: can read and decode latin_1 file........................""" ds = dcmread(latin1_file) ds.decode() # Make sure don't get unicode encode error on converting to string expected = u'Buc^Jérôme' got = ds.PatientName