Exemplo n.º 1
0
def get_formatted_scans_key_row(item):
    """
    Parameters
    ----------
    item

    Returns
    -------
    row: list
        [ISO acquisition time, performing physician name, random string]

    """
    dcm_fn = item[-1][0]
    from heudiconv.external.dcmstack import ds
    mw = ds.wrapper_from_data(
        dcm.read_file(dcm_fn, stop_before_pixels=True, force=True))
    # we need to store filenames and acquisition times
    # parse date and time and get it into isoformat
    date = mw.dcm_data.ContentDate
    time = mw.dcm_data.ContentTime.split('.')[0]
    td = time + date
    acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()
    # add random string
    randstr = ''.join(map(chr, sample(k=8, population=range(33, 127))))
    try:
        perfphys = mw.dcm_data.PerformingPhysicianName
    except AttributeError:
        perfphys = ''
    row = [acq_time, perfphys, randstr]
    # empty entries should be 'n/a'
    # https://github.com/dartmouth-pbs/heudiconv/issues/32
    row = ['n/a' if not str(e) else e for e in row]
    return row
Exemplo n.º 2
0
def test_grouping(tmpdir, subject):
    dicoms = [
        op.join(TESTS_DATA_PATH, fl) for fl in ['axasc35.dcm', 'phantom.dcm']
    ]
    # ensure DICOMs are different studies
    studyuids = {
        dcm.read_file(fl, stop_before_pixels=True).StudyInstanceUID
        for fl in dicoms
    }
    assert len(studyuids) == len(dicoms)
    # symlink to common location
    outdir = tmpdir.mkdir('out')
    datadir = tmpdir.mkdir(subject)
    for fl in dicoms:
        os.symlink(fl, (datadir / op.basename(fl)).strpath)

    template = op.join("{subject}/*.dcm")
    hargs = gen_heudiconv_args(tmpdir.strpath,
                               outdir.strpath,
                               subject,
                               'convertall.py',
                               template=template)

    with pytest.raises(AssertionError):
        runner(hargs)

    # group all found DICOMs under subject, despite conflicts
    hargs += ["-g", "all"]
    runner(hargs)
    assert len([fl for fl in outdir.visit(fil='run0*')]) == 4
    tsv = (outdir / 'participants.tsv')
    assert tsv.check()
    lines = tsv.open().readlines()
    assert len(lines) == 2
    assert lines[1].split('\t')[0] == 'sub-{}'.format(subject)
Exemplo n.º 3
0
def test_private_csa_header(tmpdir):
    dcm_file = op.join(TESTS_DATA_PATH, 'axasc35.dcm')
    dcm_data = dcm.read_file(dcm_file, stop_before_pixels=True)
    for pub, priv in DICOM_FIELDS_TO_TEST.items():
        # ensure missing public tag
        with pytest.raises(AttributeError):
            dcm.pub
        # ensure private tag is found
        assert parse_private_csa_header(dcm_data, pub, priv) != ''
        # and quickly run heudiconv with no conversion
        runner(['--files', dcm_file, '-c' 'none', '-f', 'reproin'])
Exemplo n.º 4
0
def get_dicom_series_time(dicom_list):
    """Get time in seconds since epoch from dicom series date and time
    Primarily to be used for reproducible time stamping
    """
    import time
    import calendar

    dicom = dcm.read_file(dicom_list[0], stop_before_pixels=True, force=True)
    dcm_date = dicom.SeriesDate  # YYYYMMDD
    dcm_time = dicom.SeriesTime  # HHMMSS.MICROSEC
    dicom_time_str = dcm_date + dcm_time.split('.', 1)[0]  # YYYYMMDDHHMMSS
    # convert to epoch
    return calendar.timegm(time.strptime(dicom_time_str, '%Y%m%d%H%M%S'))
Exemplo n.º 5
0
def get_dicom_series_time(dicom_list):
    """Get time in seconds since epoch from dicom series date and time
    Primarily to be used for reproducible time stamping
    """
    import time
    import calendar

    dicom = dcm.read_file(dicom_list[0], stop_before_pixels=True, force=True)
    dcm_date = dicom.SeriesDate  # YYYYMMDD
    dcm_time = dicom.SeriesTime  # HHMMSS.MICROSEC
    dicom_time_str = dcm_date + dcm_time.split('.', 1)[0]  # YYYYMMDDHHMMSS
    # convert to epoch
    return calendar.timegm(time.strptime(dicom_time_str, '%Y%m%d%H%M%S'))
Exemplo n.º 6
0
def get_formatted_scans_key_row(dcm_fn):
    """
    Parameters
    ----------
    item

    Returns
    -------
    row: list
        [ISO acquisition time, performing physician name, random string]

    """
    dcm_data = dcm.read_file(dcm_fn, stop_before_pixels=True, force=True)
    # we need to store filenames and acquisition times
    # parse date and time and get it into isoformat
    try:
        date = dcm_data.ContentDate
        time = dcm_data.ContentTime.split('.')[0]
        td = time + date
        acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()
    except AttributeError as exc:
        lgr.warning("Failed to get date/time for the content: %s", str(exc))
        acq_time = None
    # add random string
    # But let's make it reproducible by using all UIDs
    # (might change across versions?)
    randcontent = u''.join([
        getattr(dcm_data, f) or '' for f in sorted(dir(dcm_data))
        if f.endswith('UID')
    ])
    randstr = hashlib.md5(randcontent.encode()).hexdigest()[:8]
    try:
        perfphys = dcm_data.PerformingPhysicianName
    except AttributeError:
        perfphys = ''
    row = [acq_time, perfphys, randstr]
    # empty entries should be 'n/a'
    # https://github.com/dartmouth-pbs/heudiconv/issues/32
    row = ['n/a' if not str(e) else e for e in row]
    return row
Exemplo n.º 7
0
def get_formatted_scans_key_row(item):
    """
    Parameters
    ----------
    item

    Returns
    -------
    row: list
        [ISO acquisition time, performing physician name, random string]

    """
    dcm_fn = item[-1][0]
    from heudiconv.external.dcmstack import ds
    mw = ds.wrapper_from_data(dcm.read_file(dcm_fn,
                                            stop_before_pixels=True,
                                            force=True))
    # we need to store filenames and acquisition times
    # parse date and time and get it into isoformat
    try:
        date = mw.dcm_data.ContentDate
        time = mw.dcm_data.ContentTime.split('.')[0]
        td = time + date
        acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()
    except AttributeError as exc:
        lgr.warning("Failed to get date/time for the content: %s", str(exc))
        acq_time = None
    # add random string
    randstr = ''.join(map(chr, sample(k=8, population=range(33, 127))))
    try:
        perfphys = mw.dcm_data.PerformingPhysicianName
    except AttributeError:
        perfphys = ''
    row = [acq_time, perfphys, randstr]
    # empty entries should be 'n/a'
    # https://github.com/dartmouth-pbs/heudiconv/issues/32
    row = ['n/a' if not str(e) else e for e in row]
    return row
Exemplo n.º 8
0
def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
    """Process list of dicoms and return seqinfo and file group
    `seqinfo` contains per-sequence extract of fields from DICOMs which
    will be later provided into heuristics to decide on filenames
    Parameters
    ----------
    files : list of str
      List of files to consider
    file_filter : callable, optional
      Applied to each item of filenames. Should return True if file needs to be
      kept, False otherwise.
    dcmfilter : callable, optional
      If called on dcm_data and returns True, it is used to set series_id
    grouping : {'studyUID', 'accession_number', None}, optional
        what to group by: studyUID or accession_number
    Returns
    -------
    seqinfo : list of list
      `seqinfo` is a list of info entries per each sequence (some entry
      there defines a key for `filegrp`)
    filegrp : dict
      `filegrp` is a dictionary with files groupped per each sequence
    """
    allowed_groupings = ['studyUID', 'accession_number', None]
    if grouping not in allowed_groupings:
        raise ValueError('I do not know how to group by {0}'.format(grouping))
    per_studyUID = grouping == 'studyUID'
    per_accession_number = grouping == 'accession_number'
    lgr.info("Analyzing %d dicoms", len(files))

    groups = [[], []]
    mwgroup = []

    studyUID = None
    # for sanity check that all DICOMs came from the same
    # "study".  If not -- what is the use-case? (interrupted acquisition?)
    # and how would then we deal with series numbers
    # which would differ already
    if file_filter:
        nfl_before = len(files)
        files = list(filter(file_filter, files))
        nfl_after = len(files)
        lgr.info('Filtering out {0} dicoms based on their filename'.format(
            nfl_before-nfl_after))
    for fidx, filename in enumerate(files):
        from heudiconv.external.dcmstack import ds
        # TODO after getting a regression test check if the same behavior
        #      with stop_before_pixels=True
        mw = ds.wrapper_from_data(dcm.read_file(filename, force=True))

        for sig in ('iop', 'ICE_Dims', 'SequenceName'):
            try:
                del mw.series_signature[sig]
            except:
                pass

        try:
            file_studyUID = mw.dcm_data.StudyInstanceUID
        except AttributeError:
            lgr.info("File {} is missing any StudyInstanceUID".format(filename))
            file_studyUID = None

        try:
            series_id = (int(mw.dcm_data.SeriesNumber),
                         mw.dcm_data.ProtocolName)
            file_studyUID = mw.dcm_data.StudyInstanceUID

            if not per_studyUID:
                # verify that we are working with a single study
                if studyUID is None:
                    studyUID = file_studyUID
                elif not per_accession_number:
                    assert studyUID == file_studyUID, (
                    "Conflicting study identifiers found [{}, {}].".format(
                    studyUID, file_studyUID
                    ))
        except AttributeError as exc:
            lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s',
                        filename, exc)
            series_id = (-1, 'none')
            file_studyUID = None

        if not series_id[0] < 0:
            if dcmfilter is not None and dcmfilter(mw.dcm_data):
                series_id = (-1, mw.dcm_data.ProtocolName)

        # filter out unwanted non-image-data DICOMs by assigning
        # a series number < 0 (see test below)
        if not series_id[0] < 0 and mw.dcm_data[0x0008, 0x0016].repval in (
                'Raw Data Storage',
                'GrayscaleSoftcopyPresentationStateStorage'):
            series_id = (-1, mw.dcm_data.ProtocolName)

        if per_studyUID:
            series_id = series_id + (file_studyUID,)

        ingrp = False
        for idx in range(len(mwgroup)):
            # same = mw.is_same_series(mwgroup[idx])
            if mw.is_same_series(mwgroup[idx]):
                # the same series should have the same study uuid
                assert (mwgroup[idx].dcm_data.get('StudyInstanceUID', None)
                        == file_studyUID)
                ingrp = True
                if series_id[0] >= 0:
                    series_id = (mwgroup[idx].dcm_data.SeriesNumber,
                                 mwgroup[idx].dcm_data.ProtocolName)
                    if per_studyUID:
                        series_id = series_id + (file_studyUID,)
                groups[0].append(series_id)
                groups[1].append(idx)

        if not ingrp:
            mwgroup.append(mw)
            groups[0].append(series_id)
            groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))

    total = 0
    seqinfo = OrderedDict()

    # for the next line to make any sense the series_id needs to
    # be sortable in a way that preserves the series order
    for series_id, mwidx in sorted(group_map.items()):
        if series_id[0] < 0:
            # skip our fake series with unwanted files
            continue
        mw = mwgroup[mwidx]
        if mw.image_shape is None:
            # this whole thing has now image data (maybe just PSg DICOMs)
            # nothing to see here, just move on
            continue
        dcminfo = mw.dcm_data
        series_files = [files[i] for i, s in enumerate(groups[0])
                        if s == series_id]
        # turn the series_id into a human-readable string -- string is needed
        # for JSON storage later on
        if per_studyUID:
            studyUID = series_id[2]
            series_id = series_id[:2]
        accession_number = dcminfo.get('AccessionNumber')

        series_id = '-'.join(map(str, series_id))

        size = list(mw.image_shape) + [len(series_files)]
        total += size[-1]
        if len(size) < 4:
            size.append(1)

        # MG - refactor into util function
        try:
            TR = float(dcminfo.RepetitionTime) / 1000.
        except (AttributeError, ValueError):
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except (AttributeError, ValueError):
            TE = -1
        try:
            refphys = str(dcminfo.ReferringPhysicianName)
        except AttributeError:
            refphys = ''
        try:
            image_type = tuple(dcminfo.ImageType)
        except AttributeError:
            image_type = ''
        try:
            series_desc = dcminfo.SeriesDescription
        except AttributeError:
            series_desc = ''

        motion_corrected = 'MOCO' in image_type

        if dcminfo.get([0x18,0x24], None):
            # GE and Philips scanners
            sequence_name = dcminfo[0x18,0x24].value
        elif dcminfo.get([0x19, 0x109c], None):
            # Siemens scanners
            sequence_name = dcminfo[0x19, 0x109c].value
        else:
            sequence_name = 'Not found'

        info = SeqInfo(
            total,
            op.split(series_files[0])[1],
            series_id,
            op.basename(op.dirname(series_files[0])),
            '-', '-',
            size[0], size[1], size[2], size[3],
            TR, TE,
            dcminfo.ProtocolName,
            motion_corrected,
            'derived' in [x.lower() for x in dcminfo.get('ImageType', [])],
            dcminfo.get('PatientID'),
            dcminfo.get('StudyDescription'),
            refphys,
            dcminfo.get('SeriesDescription'),
            sequence_name,
            image_type,
            accession_number,
            # For demographics to populate BIDS participants.tsv
            dcminfo.get('PatientAge'),
            dcminfo.get('PatientSex'),
            dcminfo.get('AcquisitionDate'),
        )
        # candidates
        # dcminfo.AccessionNumber
        #   len(dcminfo.ReferencedImageSequence)
        #   len(dcminfo.SourceImageSequence)
        # FOR demographics
        if per_studyUID:
            key = studyUID.split('.')[-1]
        elif per_accession_number:
            key = accession_number
        else:
            key = ''
        lgr.debug("%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" % (
            key,
            info.series_id,
            dcminfo.SeriesDescription,
            dcminfo.ProtocolName,
            info.is_derived,
            len(dcminfo.get('ReferencedImageSequence', '')),
            len(dcminfo.get('SourceImageSequence', '')),
            info.image_type
        ))
        if per_studyUID:
            if studyUID not in seqinfo:
                seqinfo[studyUID] = OrderedDict()
            seqinfo[studyUID][info] = series_files
        elif per_accession_number:
            if accession_number not in seqinfo:
                seqinfo[accession_number] = OrderedDict()
            seqinfo[accession_number][info] = series_files
        else:
            seqinfo[info] = series_files

    if per_studyUID:
        lgr.info("Generated sequence info for %d studies with %d entries total",
                 len(seqinfo), sum(map(len, seqinfo.values())))
    elif per_accession_number:
        lgr.info("Generated sequence info for %d accession numbers with %d "
                 "entries total", len(seqinfo), sum(map(len, seqinfo.values())))
    else:
        lgr.info("Generated sequence info with %d entries", len(seqinfo))
    return seqinfo
Exemplo n.º 9
0
def group_dicoms_into_seqinfos(files, file_filter, dcmfilter, grouping):
    """Process list of dicoms and return seqinfo and file group
    `seqinfo` contains per-sequence extract of fields from DICOMs which
    will be later provided into heuristics to decide on filenames
    Parameters
    ----------
    files : list of str
      List of files to consider
    file_filter : callable, optional
      Applied to each item of filenames. Should return True if file needs to be
      kept, False otherwise.
    dcmfilter : callable, optional
      If called on dcm_data and returns True, it is used to set series_id
    grouping : {'studyUID', 'accession_number', None}, optional
        what to group by: studyUID or accession_number
    Returns
    -------
    seqinfo : list of list
      `seqinfo` is a list of info entries per each sequence (some entry
      there defines a key for `filegrp`)
    filegrp : dict
      `filegrp` is a dictionary with files groupped per each sequence
    """
    lgr.info("FFFFFFFFFFFFFFFFFFFFFFFFFf")
    allowed_groupings = ['studyUID', 'accession_number', None]
    if grouping not in allowed_groupings:
        raise ValueError('I do not know how to group by {0}'.format(grouping))
    per_studyUID = grouping == 'studyUID'
    per_accession_number = grouping == 'accession_number'
    lgr.info("Analyzing %d dicoms", len(files))
    grouping = 'accession_number'  # tjs temp
    groups = [[], []]
    mwgroup = []

    studyUID = None
    # for sanity check that all DICOMs came from the same
    # "study".  If not -- what is the use-case? (interrupted acquisition?)
    # and how would then we deal with series numbers
    # which would differ already
    if file_filter:
        nfl_before = len(files)
        files = list(filter(file_filter, files))
        nfl_after = len(files)
        lgr.info('Filtering out {0} dicoms based on their filename'.format(
            nfl_before - nfl_after))
    for fidx, filename in enumerate(files):
        from heudiconv.external.dcmstack import ds
        # TODO after getting a regression test check if the same behavior
        #      with stop_before_pixels=True
        mw = ds.wrapper_from_data(dcm.read_file(filename, force=True))
        lgr.info("WWWWWWWWWWWWWWWWWWWWWWw")
        lgr.info("dicoms.py>group_dicoms_into_seqinfos:vars(mw) var: %s",
                 vars(mw))

        for sig in ('iop', 'ICE_Dims', 'SequenceName'):
            try:
                del mw.series_signature[sig]
            except:
                pass

        try:
            file_studyUID = mw.dcm_data.StudyInstanceUID
        except AttributeError:
            lgr.info("dicoms.py-group_dicoms_into_seqinfo var: %s", vars(mw))
            lgr.info(
                "File {} is missing any StudyInstanceUID".format(filename))
            file_studyUID = None

        try:
            series_id = (int(mw.dcm_data.SeriesNumber),
                         mw.dcm_data.ProtocolName)
            file_studyUID = mw.dcm_data.StudyInstanceUID

            if not per_studyUID:
                # verify that we are working with a single study
                if studyUID is None:
                    studyUID = file_studyUID
                elif not per_accession_number:
                    assert studyUID == file_studyUID, (
                        "Conflicting study identifiers found [{}, {}].".format(
                            studyUID, file_studyUID))
        except AttributeError as exc:
            lgr.warning('Ignoring %s since not quite a "normal" DICOM: %s',
                        filename, exc)
            series_id = (-1, 'none')
            file_studyUID = None

        if not series_id[0] < 0:
            if dcmfilter is not None and dcmfilter(mw.dcm_data):
                series_id = (-1, mw.dcm_data.ProtocolName)

        # filter out unwanted non-image-data DICOMs by assigning
        # a series number < 0 (see test below)
        if not series_id[0] < 0 and mw.dcm_data[0x0008, 0x0016].repval in (
                'Raw Data Storage',
                'GrayscaleSoftcopyPresentationStateStorage'):
            series_id = (-1, mw.dcm_data.ProtocolName)

        if per_studyUID:
            series_id = series_id + (file_studyUID, )

        ingrp = False
        for idx in range(len(mwgroup)):
            # same = mw.is_same_series(mwgroup[idx])
            if mw.is_same_series(mwgroup[idx]):
                # the same series should have the same study uuid
                assert (mwgroup[idx].dcm_data.get('StudyInstanceUID',
                                                  None) == file_studyUID)
                ingrp = True
                if series_id[0] >= 0:
                    series_id = (mwgroup[idx].dcm_data.SeriesNumber,
                                 mwgroup[idx].dcm_data.ProtocolName)
                    if per_studyUID:
                        series_id = series_id + (file_studyUID, )
                groups[0].append(series_id)
                groups[1].append(idx)

        if not ingrp:
            mwgroup.append(mw)
            groups[0].append(series_id)
            groups[1].append(len(mwgroup) - 1)

    group_map = dict(zip(groups[0], groups[1]))

    total = 0
    seqinfo = OrderedDict()

    # for the next line to make any sense the series_id needs to
    # be sortable in a way that preserves the series order
    for series_id, mwidx in sorted(group_map.items()):
        if series_id[0] < 0:
            # skip our fake series with unwanted files
            continue
        mw = mwgroup[mwidx]
        if mw.image_shape is None:
            # this whole thing has now image data (maybe just PSg DICOMs)
            # nothing to see here, just move on
            continue
        dcminfo = mw.dcm_data
        series_files = [
            files[i] for i, s in enumerate(groups[0]) if s == series_id
        ]
        # turn the series_id into a human-readable string -- string is needed
        # for JSON storage later on
        if per_studyUID:
            studyUID = series_id[2]
            series_id = series_id[:2]
        accession_number = dcminfo.get('AccessionNumber')

        series_id = '-'.join(map(str, series_id))

        size = list(mw.image_shape) + [len(series_files)]
        total += size[-1]
        if len(size) < 4:
            size.append(1)

        # MG - refactor into util function
        try:
            TR = float(dcminfo.RepetitionTime) / 1000.
        except (AttributeError, ValueError):
            TR = -1
        try:
            TE = float(dcminfo.EchoTime)
        except (AttributeError, ValueError):
            TE = -1
        try:
            refphys = str(dcminfo.ReferringPhysicianName)
        except AttributeError:
            refphys = ''
        try:
            image_type = tuple(dcminfo.ImageType)
        except AttributeError:
            image_type = ''
        try:
            series_desc = dcminfo.SeriesDescription
        except AttributeError:
            series_desc = ''

        motion_corrected = 'MOCO' in image_type

        if dcminfo.get([0x18, 0x24], None):
            # GE and Philips scanners
            sequence_name = dcminfo[0x18, 0x24].value
        elif dcminfo.get([0x19, 0x109c], None):
            # Siemens scanners
            sequence_name = dcminfo[0x19, 0x109c].value
        else:
            sequence_name = 'Not found'

        info = SeqInfo(
            total,
            op.split(series_files[0])[1],
            series_id,
            op.basename(op.dirname(series_files[0])),
            '-',
            '-',
            size[0],
            size[1],
            size[2],
            size[3],
            TR,
            TE,
            dcminfo.ProtocolName,
            motion_corrected,
            'derived' in [x.lower() for x in dcminfo.get('ImageType', [])],
            dcminfo.get('PatientID'),
            dcminfo.get('StudyDescription'),
            refphys,
            dcminfo.get('SeriesDescription'),
            sequence_name,
            image_type,
            accession_number,
            # For demographics to populate BIDS participants.tsv
            dcminfo.get('PatientAge'),
            dcminfo.get('PatientSex'),
            dcminfo.get('AcquisitionDate'),
        )
        # candidates
        # dcminfo.AccessionNumber
        #   len(dcminfo.ReferencedImageSequence)
        #   len(dcminfo.SourceImageSequence)
        # FOR demographics
        if per_studyUID:
            key = studyUID.split('.')[-1]
        elif per_accession_number:
            key = accession_number
        else:
            key = ''
        lgr.debug(
            "%30s %30s %27s %27s %5s nref=%-2d nsrc=%-2d %s" %
            (key, info.series_id, dcminfo.SeriesDescription,
             dcminfo.ProtocolName, info.is_derived,
             len(dcminfo.get('ReferencedImageSequence', '')),
             len(dcminfo.get('SourceImageSequence', '')), info.image_type))
        if per_studyUID:
            if studyUID not in seqinfo:
                seqinfo[studyUID] = OrderedDict()
            seqinfo[studyUID][info] = series_files
        elif per_accession_number:
            if accession_number not in seqinfo:
                seqinfo[accession_number] = OrderedDict()
            seqinfo[accession_number][info] = series_files
        else:
            seqinfo[info] = series_files

    if per_studyUID:
        lgr.info(
            "Generated sequence info for %d studies with %d entries total",
            len(seqinfo), sum(map(len, seqinfo.values())))
    elif per_accession_number:
        lgr.info(
            "Generated sequence info for %d accession numbers with %d "
            "entries total", len(seqinfo), sum(map(len, seqinfo.values())))
    else:
        lgr.info("Generated sequence info with %d entries", len(seqinfo))
    return seqinfo