import matplotlib.pyplot as plt
import seaborn as sns
id = []
diameter = []
surface_area = []
volume = []
Sphericity = []
for i in range(1, 200):
    if i < 10:
        s = '000' + str(i)
    elif i < 100:
        s = '00' + str(i)
    else:
        s = '0' + str(i)
    pid = 'LIDC-IDRI-{}'.format(s)
    scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
    nodules = scan.cluster_annotations()
    if not nodules:
        continue
    for idx, nodule in enumerate(nodules):
        id.append(pid[-4:] + '_' + str(idx))
        diameter.append(nodule[0].diameter)
        surface_area.append(nodule[0].surface_area)
        volume.append(nodule[0].volume)
        Sphericity.append(nodule[0].Sphericity)
    print(".", end='')
print()

data_dic = {}
# data_dic['id'] = id
data_dic['diameter'] = diameter
Exemplo n.º 2
0
import pylidc as pl

for subjectID in range(1, 1013):
    s = 'LIDC-IDRI-%04i' % subjectID
    scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == s)
    if scans.count() > 1:
        print("%s has %d scans" % (s, scans.count()))
        for i, scan in enumerate(scans):
            print("  Scan %d has %d annotations" %
                  (i + 1, len(scan.annotations)))
Exemplo n.º 3
0
    # errors in LIDC-IDRI-0052, LIDC-IDRI-0065, LIDC-IDRI-0068
    #     if idx<2:continue
    #     if idx == 5: break
    #     if idx<=15:continue
    #     if idx>20:continue
    name_original = k
    k = k.split('_block')[0]
    df_patient = df.loc[df['patientid'] == int(k[-4:])]
    pid = k

    # query the LIDC images with patient_id = pid
    # HERE WE JUST USE THE FIRST ONE!!
    idx_scan = 0

    # get the scan object for this scan
    scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid)[idx_scan]

    # here we can reject according to any criteria we like
    thickSlice = (scan.slice_thickness > 3) | (scan.slice_spacing > 3)
    missingSlices = len(np.unique(np.round(
        100 * np.diff(scan.slice_zvals)))) != 1
    if (thickSlice):
        # we want to reject this scan/patient
        print('Undesirable slice characteristics, rejecting')
        listOfRejectedPatients.append(pid)
        #continue
        raise ValueError('Undesirable slice characteristics, rejecting')
    elif (missingSlices):
        print('Missing slices, rejecting')
        listOfRejectedPatients.append(pid)
        #continue
Exemplo n.º 4
0
with open('error.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['short_name', 'cluster'])

with open('nodule_size.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['short_name', 'cluster', 'size'])

with open('scan_size.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['short_name', 'size'])

# Generate mask from the four annotations for each nodule.
# Only nodules with four annotations available are included.
# Only voxels marked by three or four radiologists are regarded as nodule voxels.
all_scans = pl.query(pl.Scan)
config['patch_size'] = 64
savepath = config['savepath']
start = time.clock()
num_scans = all_scans.count()
#num = 2
all_anns = pl.query(pl.Annotation).join(pl.Scan)
for scan_id in range(num_scans):
    anns = all_anns.filter(pl.Annotation.scan_id == scan_id)
    scan = all_scans[scan_id]
    shortname_scan = '0' * (4 - len(str(scan_id))) + str(scan_id)

    try:
        path = scan.get_path_to_dicom_files()
    except AssertionError:
        print(shortname_scan, 'does not exist')
Exemplo n.º 5
0
 def get_texture(self, texture_val):
     return pl.query(pl.Annotation).filter(pl.Annotation.texture <= texture_val).all()
Exemplo n.º 6
0
 def get_contour(self, annotation):
     return pl.query(pl.Contour).filter(pl.Contour.annotation_id == annotation.id).all()
Exemplo n.º 7
0
def get_texture_1():
    texture_1 = pl.query(pl.Annotation).filter(pl.Annotation.texture == 1)
    return texture_1.all()
Exemplo n.º 8
0
TEST_MODE = False
print(f"test mode: {TEST_MODE}")

file = open(LOG_FILE, "w+")

# The LIDC database contains annotations of up to 4 radiologist per nodule.
# We need to combine these annotations. Luckily, the pylidc module provides a way to cluster annotations from overlapping nodules
# It turns out that 'nodule_id' does not refer to a nodule at all, they do not overlap.
# Luckily, pylidc has functionality built in to determine which nodules belong together
#
# Extract annotations to dataframe (note: using pd.read_sql_table might be better but I couldn't figure out which connection to use)
# ## Load scans with pylidc
# Create dataframe with scan information

scans = pl.query(pl.Scan).all()
scan_dict = {}
for scan in scans:
    patient_id = scan.patient_id[-4:]
    if patient_id in scan_dict.keys():
        print(f"patient with multiple scans: {patient_id}; ", end="")
        patient_id = str(format(int(patient_id) + int(2000)))
        print(f"new id: {patient_id}")
    scan_dict[patient_id] = scan
assert len(scan_dict.keys()) == 1018

if not (RESOURCES_DIR / "scan_df.csv").exists():
    scan_df_dict = {}
    print("preparing scan dataframe")
    for patient_id, scan in tqdm(scan_dict.items()):  # TODO add scan-id here
        scan_df_dict[patient_id] = {
def check_nodule_intersections(patch_size=144, res='Legacy'):
    recluster_using_cliques = False
    pat_with_nod = 0
    pat_without_nod = 0
    nodule_count = 0
    max_size = 0
    min_size = 999999
    min_dist = 999999
    outliers = []
    size_list = []
    global_size_list = []

    pause = 0
    for scan in pl.query(pl.Scan).all()[:]:
        if len(scan.annotations) == 0:
            continue
        # cluster by intersection
        tol = 0.95
        nods, D = scan.cluster_annotations(metric='jaccard',
                                           tol=tol,
                                           return_distance_matrix=True)
        if len(nods) == 0:
            pat_without_nod += 1
            continue
        pat_with_nod += 1

        if recluster_using_cliques:
            adjacency = D <= tol
            if adjacency.shape[0] > 1:
                clusters = cluster_by_cliques(adjacency, None)
                print(
                    "Study ({}), Series({}) of patient {}: {} connected components. {} cliques"
                    .format(scan.study_instance_uid, scan.series_instance_uid,
                            scan.patient_id, len(nods), len(clusters)))
                nodule_count += len(nods)
                if len(nods) != len(
                        clusters):  #[[n.id for n in anns] for anns in nods]
                    pause = pause + 1
                    mds(scan=scan, clusters=clusters, distance_matrix=D)
                # re-cluster nodules by cliques
                nods = [[scan.annotations[i] for i in ids] for ids in clusters]
            else:
                clusters = [[0]]
        else:
            id_0 = np.min([ann.id for ann in scan.annotations])
            clusters = [[ann.id - id_0 for ann in cluster] for cluster in nods]

        centers = []
        boxes = []
        for cluster in clusters:
            nod = [scan.annotations[ann_id] for ann_id in cluster]
            print("Nodule of patient {} with {} annotations.".format(
                scan.patient_id, len(nod)))
            min_ = reduce((lambda x, y: np.minimum(x, y)),
                          [ann.bbox()[:, 0] for ann in nod])
            max_ = reduce((lambda x, y: np.maximum(x, y)),
                          [ann.bbox()[:, 1] for ann in nod])
            size = scan.pixel_spacing * (max_ - min_ + 1)
            size_list.append(size)
            if np.max(size) >= 64:
                print("\tNodule Size = {:.1f} x {:.1f} x {:.1f}".format(
                    size[0], size[1], size[2]))
            if size[2] == 1:
                print("\t\tNodule BB = {}".format(
                    [ann.bbox()[:, 0] for ann in nod]))
            max_size = np.maximum(max_size, size)
            min_size = np.minimum(min_size, size)

            centers.append(scan.pixel_spacing * min_ + size // 2)
            boxes.append(
                np.vstack(
                    [scan.pixel_spacing * min_, scan.pixel_spacing * max_]))

        cluster_candidates = []
        for i, nod_i in enumerate(nods):
            j_outs = []
            for j, nod_j in enumerate(nods):
                if i == j:
                    continue
                #if centers[i][2] < boxes[j][0][2]: # ignore if cross-section of i doesn't contain j
                #    continue
                #if centers[i][2] > boxes[j][1][2]: # ignore if cross-section of i doesn't contain j
                #    continue
                dist = np.abs(centers[i] - boxes[j])
                dist = np.min(dist, axis=0)
                dist = np.max(dist)
                min_dist = np.minimum(min_dist, dist)
                if dist > 32:
                    continue
                if dist > 10:
                    stop = 1
                print("\tDist = {}".format(dist))
                min_ = np.minimum(boxes[i][0], boxes[j][0])
                max_ = np.maximum(boxes[i][1], boxes[j][1])
                size = (max_ - min_ + 1)
                print("\t\tMerged ({}, {}) Size = {:.1f} x {:.1f} x {:.1f}".
                      format(i, j, size[0], size[1], size[2]))
                j_outs.append(j)
                outliers.append((dist, np.max(size)))
            if len(j_outs) > 1:
                boxes = np.array(boxes)
                min_ = reduce((lambda x, y: np.minimum(x, y)),
                              [bb[0, :] for bb in boxes[j_outs + [i]]])
                max_ = reduce((lambda x, y: np.maximum(x, y)),
                              [bb[1, :] for bb in boxes[j_outs + [i]]])
                size = (max_ - min_ + 1)
                if np.any(size > 60):
                    stop = 1
                print(
                    "\t\t Global Merged ({}, {}) Size = {:.1f} x {:.1f} x {:.1f}"
                    .format(i, j_outs, size[0], size[1], size[2]))
                global_size_list.append(np.max(size))

    print("=" * 30)
    print("Prepared {} entries".format(nodule_count))
    print("{} patients with nodules, {} patients without nodules".format(
        pat_with_nod, pat_without_nod))
    print("\tMax Size = {:.1f} x {:.1f} x {:.1f}".format(
        max_size[0], max_size[1], max_size[2]))
    print("\tMin Size = {:.1f} x {:.1f} x {:.1f}".format(
        min_size[0], min_size[1], min_size[2]))
    print("\tMin Dist = {}".format(min_dist))
    print("== Number of cluster breaks = {} ==".format(pause))

    x_dist = [o[0] for o in outliers]
    y_size = [o[1] for o in outliers]

    plt.figure()

    plt.subplot(311)
    plt.title('Nodule (cluster) Size')
    plt.xlabel('size')
    plt.ylabel('hist')
    plt.hist(np.max(size_list, axis=1), 50)

    plt.subplot(312)
    plt.title('Pairwise-Merges')
    plt.xlabel('dist')
    plt.ylabel('merged size')
    plt.scatter(
        np.array(x_dist).astype('uint'),
        np.array(y_size).astype('uint'))

    plt.subplot(313)
    plt.title('Total Size')
    plt.xlabel('size')
    plt.ylabel('hist')
    plt.hist(global_size_list, 50)

    plt.show()
Exemplo n.º 10
0
def get_scan():
    scan = pl.query(pl.Scan).filter(pl.Annotation.texture == 1)
    return scan.all()
def extract(patch_size=144, res='Legacy', dump=True):

    filename = 'NodulePatches{}-{}.p'.format(patch_size, res)

    dataset = []
    nodSize = []
    pat_with_nod = 0
    pat_without_nod = 0
    patient_nodules = {}

    if dump is False:
        print("Running without dump")

    for scan in pl.query(pl.Scan).all()[:]:
        # cycle 1018 scans
        #
        # Example for debuging:
        #   scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == 'LIDC-IDRI-0004').first()
        #
        nods = scan.cluster_annotations(metric='jaccard',
                                        tol=0.95,
                                        tol_limit=0.7)
        if len(nods) > 0:
            pat_with_nod += 1
            print("Study ({}), Series({}) of patient {}: {} nodules.".format(
                scan.study_instance_uid, scan.series_instance_uid,
                scan.patient_id, len(nods)))
            patient_nodules['scan.patient_id'] = len(nods)
            dicom = scan.load_all_dicom_images(verbose=False)

            for nod in nods:
                print("Nodule of patient {} with {} annotations.".format(
                    scan.patient_id, len(nod)))
                largestSliceA = [getLargestSliceInBB(ann)[0] for ann in nod
                                 ]  # larget slice within annotated bb
                annID = np.argmax(
                    largestSliceA
                )  # which of the annotation has the largest slice

                largestSliceZ = [getLargestSliceInBB(ann)[1]
                                 for ann in nod]  # index within the mask
                z = interpolateZfromBBidx(
                    nod[annID],
                    largestSliceZ[annID])  # just for the entry data
                # possible mismatch betwean retrived z and largestSliceZ[annID] due to missing dicom files
                #
                if res is 'Legacy':
                    di_slice = getSlice(dicom, z, rescale=True)
                    mask = get_full_size_mask(nod[annID], di_slice.shape)
                    patch = cropSlice(di_slice, nod[annID].centroid(),
                                      patch_size)
                    mask = cropSlice(mask, nod[annID].centroid(), patch_size)
                else:
                    vol0, seg0 = nod[annID].uniform_cubic_resample(
                        side_length=(patch_size - 1),
                        resolution=res,
                        verbose=0)
                    largestSliceZ = np.argmax(
                        np.sum(seg0.astype('float32'), axis=(0, 1)))
                    patch = rescale_im_to_hu(vol0[:, :, largestSliceZ],
                                             dicom[0].RescaleIntercept,
                                             dicom[0].RescaleSlope)
                    mask = seg0[:, :, largestSliceZ]

                entry = {
                    'patch':
                    patch.astype(np.int16),
                    'info': (scan.patient_id, scan.study_instance_uid,
                             scan.series_instance_uid, nod[annID]._nodule_id),
                    'nod_ids': [n._nodule_id for n in nod],
                    'rating':
                    np.array([ann.feature_vals() for ann in nod]),
                    'mask':
                    mask.astype(np.int16),
                    'z':
                    z,
                    'size':
                    getNoduleSize(nod)
                }
                dataset.append(entry)

                #gc.collect()
        else:
            pat_without_nod += 1

    print("Prepared {} entries".format(len(dataset)))
    print("{} patients with nodules, {} patients without nodules".format(
        pat_with_nod, pat_without_nod))

    if dump:
        pickle.dump(dataset, open(filename, 'wb'))
        print("Dumpted to {}".format(filename))
    else:
        print("No Dump")
def extract_from_cluster_map(cluster_map, patch_size=144, res='Legacy'):

    dataset = []

    for scan in pl.query(pl.Scan).all()[:]:
        # cycle 1018 scans
        #
        # Example for debuging:
        #   scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == 'LIDC-IDRI-0004').first()
        #
        try:
            nods, cluster_indices = cluster_map[scan.id]
        except:
            continue
        print("Study ({}), Series({}) of patient {}:".format(
            scan.study_instance_uid, scan.series_instance_uid,
            scan.patient_id))
        dicom = scan.load_all_dicom_images(verbose=False)

        for indices in cluster_indices:
            assert len(nods) > 0
            nodules_in_cluster = np.concatenate([nods[i] for i in indices])
            print("\tCluster with {} nodules.".format(len(nodules_in_cluster)))

            z_range = get_z_range(nodules_in_cluster)
            img_zs = [float(img.ImagePositionPatient[-1]) for img in dicom]
            assert (len(np.unique(img_zs)) == len(img_zs))
            for z in filter(lambda x: (x <= z_range[1]) & (x >= z_range[0]),
                            img_zs):
                image = getSlice(dicom, z, rescale=True)
                full_mask = np.zeros(image.shape).astype('bool')
                weights = []
                ratings = []
                nodule_ids = []
                annotation_size = []
                for nod in nodules_in_cluster:
                    mask, bb, w = getMask(z, nod, img_zs, scan)
                    if mask is None or 0 == w:  # skip annotation
                        continue
                    full_mask[int(bb[0][0]):int(bb[0][1] + 1),
                              int(bb[1][0]):int(bb[1][1] + 1)] |= mask
                    nodule_ids += [nod._nodule_id]
                    ratings += [nod.feature_vals()]
                    assert (len(np.flatnonzero(mask)) > 0)
                    annotation_size += [
                        calc_mask_size(mask, mm_per_px=scan.pixel_spacing)
                    ]
                    weights += [w]
                if 0 == np.count_nonzero(full_mask):  # skips slice
                    continue
                mask_size = calc_mask_size(full_mask,
                                           mm_per_px=scan.pixel_spacing)
                if type(res) is float:
                    new_shape = tuple(
                        (np.array(image.shape) *
                         (scan.pixel_spacing / res)).astype('int'))
                    image = transform.resize(image,
                                             output_shape=new_shape,
                                             order=1,
                                             preserve_range=True,
                                             mode='constant')
                    full_mask = transform.resize(full_mask,
                                                 output_shape=new_shape,
                                                 order=0,
                                                 preserve_range=True,
                                                 mode='constant')
                    if 0 == np.count_nonzero(full_mask):
                        # sometimes the mask is pixel-wide, so after resize nothing is left
                        # would've anyhow been filtered in later stages
                        continue
                patch, mask = crop(image,
                                   full_mask,
                                   fix_size=patch_size,
                                   stdev=0)

                if np.abs(mask_size -
                          calc_mask_size(mask, mm_per_px=res)) > res:
                    print("{}, {}:\n\tfull mask size = {}\n\tmask size = {}".
                          format(scan.patient_id, z, mask_size,
                                 calc_mask_size(mask, mm_per_px=res)))
                assert (patch.shape == (patch_size, patch_size))
                assert (mask.shape == (patch_size, patch_size))

                entry = {
                    'patch':
                    patch.astype(np.int16),
                    'info': (scan.patient_id, scan.study_instance_uid,
                             scan.series_instance_uid, nodule_ids),
                    'nod_ids':
                    nodule_ids,
                    'rating':
                    np.array(ratings),
                    'ann_size':
                    np.array(annotation_size),
                    'weights':
                    np.array(weights),
                    'mask':
                    mask.astype(np.bool),
                    'z':
                    z,
                    'size':
                    mask_size
                }
                dataset.append(entry)

    print("Prepared {} entries".format(len(dataset)))

    return dataset
Exemplo n.º 13
0
import pylidc as pl
import numpy as np

# test
ann = pl.query(pl.Annotation).filter(pl.Annotation.texture == 1)[1]
mask = ann.boolean_mask()
vol = ann.scan.to_volume()

# print (mask[363][343])
# print (mask.shape)
print(vol[363][343][0])
print(vol.shape)
Exemplo n.º 14
0
import pylidc as pl
import dicom
import pylab
from numpy import shape
import os
import numpy

query = pl.query(pl.Contour)
print('Total Contours = ' + str(query.count()))


def strip_leading_zeros(file_name):
    length = len(file_name)
    split_index = 0
    i = 0
    for i in range(length):
        if file_name[i] == '0':
            continue
        else:
            break
    return file_name[i:]


qann = pl.query(pl.Annotation)
for ann in qann:
    scan = ann.scan
    contours = ann.contours
    base_path = scan.get_path_to_dicom_files(checkpath=False)

    z_to_file_mapping = {}
    for filename in os.listdir(base_path):
Exemplo n.º 15
0
import numpy as np
import pylidc as pl


scans = pl.query(pl.Scan)
nscans = scans.count()
for i,scan in enumerate(scans):
    print i+1,"/",nscans

    images = scan.load_all_dicom_images(verbose=0)
    img_zs = [float(img.ImagePositionPatient[-1]) for img in images]
    img_zs = np.unique(img_zs)

    for zval in img_zs:
        z = pl.Zval()
        z.val = float(zval)
        z.scan = scan

pl._session.commit()
Exemplo n.º 16
0
import numpy as np
import pylidc

#scan = pylidc.query(pylidc.Scan).filter(pylidc.Scan.patient_id == 'LIDC-IDRI-0340').first()
# should be [4,4]

scan = pylidc.query(pylidc.Scan).filter(pylidc.Scan.patient_id == 'LIDC-IDRI-0867').first()


#print([len(a) for a in scan.cluster_annotations()])
print([len(a) for a in scan.cluster_annotations(metric='jaccard', tol=0.95, tol_limit=0.7)])

print(np.vstack([a.bbox()[0] for a in [scan.annotations[i] for i in [0,3,4,7]] ]))
print(np.vstack([a.bbox()[1] for a in [scan.annotations[i] for i in [0,3,4,7]] ]))
print(np.vstack([a.bbox()[2] for a in [scan.annotations[i] for i in [0,3,4,7]] ]))

print('-'*10)

print(np.vstack([a.bbox()[0] for a in [scan.annotations[i] for i in [1,2,5,6]] ]))
print(np.vstack([a.bbox()[1] for a in [scan.annotations[i] for i in [1,2,5,6]] ]))
print(np.vstack([a.bbox()[2] for a in [scan.annotations[i] for i in [1,2,5,6]] ]))
    if length == 3:
        return contours

    mid_point = int(math.floor(length / 2))
    mid_contours = []

    for i in range(mid_point - 1, mid_point + 2):
        mid_contours.append(contours[i])

    return mid_contours


from sqlalchemy import and_

annotations = pl.query(pl.Annotation).filter(
    and_(pl.Annotation.id >= 39, pl.Annotation.id <= 39))
# Fetch and process all the annotation data there is in the system
annotations = pl.query(pl.Annotation)
#  annotations_count = annotations.count()

qualified_ann_count = 0

max_xrange = 0
max_yrange = 0
min_xrange = 100000
min_yrange = 100000

training_data = []
target_data = []

for ann in annotations:
Exemplo n.º 18
0
            f'{out_path}{subset_series_ids[jj]}/lungs_segmented/lungs_segmented.npz',
            numpyImage_segmented)

        # go through all candidates that are in this image
        # sort to make sure we have all the trues (for prototyping only)
        curr_cands = curr_cands.sort_values(
            'class', ascending=False).reset_index(drop=True)

        # Added in v2
        one_segmentation_consensus = np.zeros_like(numpyImage)
        one_segmentation_maxvol = np.zeros_like(numpyImage)
        labelledNods = np.zeros_like(numpyImage)

        # query the LIDC images HERE WE JUST USE THE FIRST ONE!!
        idx_scan = 0
        scan = pl.query(pl.Scan).filter(
            pl.Scan.series_instance_uid == subset_series_ids[jj])[idx_scan]
        nods = scan.cluster_annotations(
        )  # get the annotations for all nodules in this scan
        #print(np.shape(nods))

        #Get all the nodules (class==1)
        curr_cands_class1 = curr_cands.loc[curr_cands['class'] == 1]

        for i_curr_cand in range(len(curr_cands_class1)):
            curr_cand = curr_cands_class1.iloc[i_curr_cand]
            # first need to find the corresponding column in the annotations csv (assuming its the closest
            # nodule to  the current candidate)
            # extract the annotations for the scan id of our current candidate
            annotations_scan_df = annotations_df.loc[
                annotations_df['seriesuid'] == curr_cand['seriesuid']]
Exemplo n.º 19
0
import pylidc as pl
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
from sklearn.cluster import SpectralClustering
from sklearn.decomposition import PCA
from scipy.spatial.distance import pdist, cdist, squareform

ratings = np.array([ann.feature_vals() for ann in pl.query(pl.Annotation).all()])
projection_3d = PCA(n_components=3).fit_transform(ratings)

distance_matrix = squareform(pdist(ratings, 'euclidean'))
affinity_matrix = np.exp(- distance_matrix / distance_matrix.std())

##
# SELECT N OF CLUSTERS
##

n_clusters = np.arange(16, 1025, 16)
n_clusters_scores = list()
for k in n_clusters:
    sc = SpectralClustering(64, affinity='precomputed', assign_labels='kmeans', n_init=100)
    clusters = sc.fit_predict(affinity_matrix)
    cluster_score = list()
    for label in np.unique(clusters):
        cluster_mask = (label == clusters)
        out_of_cluster_mask = np.logical_not(cluster_mask)

        interclass_scores = cdist(projection_3d[cluster_mask, :], projection_3d[out_of_cluster_mask, :], 'euclidean').min(axis=1).mean()
        inclass_scores = pdist(projection_3d[cluster_mask, :], 'euclidean').mean()
        if inclass_scores < 1e-3:
Exemplo n.º 20
0
 def get_scan(self, annotation):
     return (pl.query(pl.Scan).filter(pl.Scan.id == annotation.scan_id)).all()
Exemplo n.º 21
0
        return contours

    mid_point = int(math.floor(length / 2))
    mid_contours = []

    for i in range(mid_point - 1, mid_point + 2):
        mid_contours.append(contours[i])

    return mid_contours


from sqlalchemy import and_

# annotations = pl.query(pl.Annotation).filter(and_(pl.Annotation.id >= 4640, pl.Annotation.id <= 4641))
# Fetch and process all the annotation data there is in the system
annotations = pl.query(pl.Annotation)
annotations_count = annotations.count()

qualified_ann_count = 0

max_xrange = 0
max_yrange = 0
min_xrange = 100000
min_yrange = 100000

training_data = []
target_data = []

for ann in annotations:
    ann_id = str(ann.id)
    ann_id = ann_id.rjust(8, ' ')
Exemplo n.º 22
0
    def makeCompositeObjects(self, subjectID):

        # convert all segmentations and measurements into composite objects
        # 1. find all segmentations
        # 2. read all, append metadata
        # 3. find all measurements
        # 4. read all, append metadata
        import re
        s = 'LIDC-IDRI-%04i' % subjectID
        self.logger.info("Making composite objects for " + s)

        scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == s)
        self.logger.info(" Found %d scans" % (scans.count()))

        # cannot just take all segmentation files in a folder, since

        for scan in scans:
            studyUID = scan.study_instance_uid
            seriesUID = scan.series_instance_uid
            seriesDir = os.path.join(self.rootDir, s, studyUID, seriesUID)
            if not os.path.exists(seriesDir):
                self.logger.error("Files not found for subject " + s)
                return

            dcmFiles = glob.glob(os.path.join(seriesDir, "*.dcm"))
            if not len(dcmFiles):
                logger.error("No DICOM files found for subject " + s)
                return

            firstFile = os.path.join(seriesDir, dcmFiles[0])

            try:
                ctDCM = pydicom.read_file(firstFile)
            except:
                logger.error("Failed to read input file " + firstFile)
                return

            self.instanceCount = 1000

            subjectScanTempDir = os.path.join(self.tempDir, s, studyUID,
                                              seriesUID)
            allSegmentations = glob.glob(
                os.path.join(subjectScanTempDir, 'Nodule*Annotation*.nrrd'))

            if not len(allSegmentations):
                continue

            segMetadata = {}
            nrrdSegFileList = ""
            srMetadata = {}

            for segID, seg in enumerate(allSegmentations):

                prefix = seg[:-5]
                matches = re.match('Nodule (\d+) - Annotation (.+)\.',
                                   os.path.split(seg)[1])
                print("Nodule: " + matches.group(1) + " Annotation: " +
                      matches.group(2))

                if not segMetadata:
                    segMetadata = json.load(open(prefix + ".json"))
                else:
                    thisSegMetadata = json.load(open(prefix + ".json"))
                    segMetadata["segmentAttributes"].append(
                        thisSegMetadata["segmentAttributes"][0])

                if not srMetadata:
                    srMetadata = json.load(open(prefix + " measurements.json"))
                else:
                    thisSRMetadata = json.load(
                        open(prefix + " measurements.json"))
                    thisSRMetadata["Measurements"][0][
                        "ReferencedSegment"] = segID + 1
                    srMetadata["Measurements"].append(
                        thisSRMetadata["Measurements"][0])

                nrrdSegFileList = nrrdSegFileList + seg + ","

            segMetadata[
                "ContentDescription"] = "Lung nodule segmentation - all"
            segMetadata["SeriesDescription"] = "Segmentations of all nodules"
            segMetadata["SeriesNumber"] = str(
                int(ctDCM.SeriesNumber if ctDCM.SeriesNumber else 0) +
                self.instanceCount)
            self.instanceCount = self.instanceCount + 1

            # run SEG converter

            allSegsJSON = os.path.join(subjectScanTempDir,
                                       "all_segmentations.json")
            with open(allSegsJSON, "w") as f:
                json.dump(segMetadata, f, indent=2)

            compositeSEGFileName = os.path.join(subjectScanTempDir,
                                                "all_segmentations.dcm")
            nrrdSegFileList = nrrdSegFileList[:-1]

            converterCmd = [
                'itkimage2segimage', "--inputImageList", nrrdSegFileList,
                "--inputDICOMDirectory", seriesDir, "--inputMetadata",
                allSegsJSON, "--outputDICOM", compositeSEGFileName
            ]
            if self.args.skip:
                converterCmd.append('--skip')
            self.logger.info("Converting to DICOM SEG with " +
                             str(converterCmd))

            sp = subprocess.Popen(converterCmd,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
            (stdout, stderr) = sp.communicate()
            self.logger.info("itkimage2segimage stdout: " +
                             stdout.decode('ascii'))
            self.logger.warning("itkimage2segimage stderr: " +
                                stderr.decode('ascii'))

            if not os.path.exists(compositeSEGFileName):
                self.logger.error(
                    "Failed to access output composite SEG file for " + s)

            # populate composite SR JSON
            # need SEG SOPInstnaceUID for that purpose
            segDcm = pydicom.read_file(compositeSEGFileName)
            segUID = segDcm.SOPInstanceUID
            ctSeriesUID = segDcm.ReferencedSeriesSequence[0].SeriesInstanceUID

            for mItem in range(len(srMetadata["Measurements"])):
                srMetadata["Measurements"][mItem][
                    "segmentationSOPInstanceUID"] = segUID

            srMetadata["compositeContext"] = [
                os.path.split(compositeSEGFileName)[1]
            ]

            srMetadata["ContentDescription"] = "Lung nodule measurements - all"
            srMetadata["SeriesDescription"] = "Evaluations for all nodules"
            srMetadata["SeriesNumber"] = str(
                int(ctDCM.SeriesNumber) + self.instanceCount)
            self.instanceCount = self.instanceCount + 1

            allSrsJSON = os.path.join(subjectScanTempDir,
                                      "all_measurements.json")
            with open(allSrsJSON, "w") as f:
                json.dump(srMetadata, f, indent=2)

            compositeSRFileName = os.path.join(subjectScanTempDir,
                                               "all_measurements.dcm")
            nrrdSegFileList = nrrdSegFileList[:-1]

            converterCmd = [
                'tid1500writer', "--inputMetadata", allSrsJSON,
                "--inputImageLibraryDirectory", seriesDir,
                "--inputCompositeContextDirectory", subjectScanTempDir,
                "--outputDICOM", compositeSRFileName
            ]
            self.logger.info("Converting to DICOM SR with " +
                             str(converterCmd))

            sp = subprocess.Popen(converterCmd,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE)
            (stdout, stderr) = sp.communicate()
            self.logger.info("tid1500writer stdout: " + stdout.decode('ascii'))
            self.logger.warning("tid1500writer stderr: " +
                                stderr.decode('ascii'))

            if not os.path.exists(compositeSRFileName):
                self.logger.error(
                    "Failed to access output composite SR file for " + s)
Exemplo n.º 23
0
def get_scan_from_ann(ann):
    scan_ann = pl.query(pl.Scan).filter(pl.Scan.id == ann.scan_id)
    return scan_ann
Exemplo n.º 24
0
    def convertForSubject(self, subjectID):
        s = 'LIDC-IDRI-%04i' % subjectID
        self.logger.info("Processing subject %s" % (s))
        scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == s)
        self.logger.info(" Found %d scans" % (scans.count()))

        for scan in scans:
            studyUID = scan.study_instance_uid
            seriesUID = scan.series_instance_uid
            seriesDir = os.path.join(self.rootDir, s, studyUID, seriesUID)
            if not os.path.exists(seriesDir):
                self.logger.error("Files not found for subject " + s)
                return

            dcmFiles = glob.glob(os.path.join(seriesDir, "*.dcm"))
            if not len(dcmFiles):
                logger.error("No DICOM files found for subject " + s)
                return

            firstFile = os.path.join(seriesDir, dcmFiles[0])

            try:
                ctDCM = pydicom.read_file(firstFile)
            except:
                logger.error("Failed to read input file " + firstFile)
                return

            ok = lidc_helpers.checkSeriesGeometry(seriesDir)
            if not ok:
                self.logger.warning("Geometry inconsistent for subject %s" %
                                    (s))

            self.tempSubjectDir = os.path.join(self.tempDir, s)
            reconTempDir = os.path.join(self.tempSubjectDir, "dicom2nrrd")
            try:
                os.makedirs(reconTempDir)
            except:
                pass

            scanNRRDFile = os.path.join(self.tempSubjectDir, s + '_CT.nrrd')
            if not os.path.exists(scanNRRDFile):
                # convert
                # tempDir = tempfile.mkdtemp()
                plastimatchCmd = [
                    '/Users/fedorov/build/plastimatch/plastimatch', 'convert',
                    '--input', seriesDir, '--output-img', scanNRRDFile
                ]
                self.logger.info("Running plastimatch with " +
                                 str(plastimatchCmd))

                sp = subprocess.Popen(plastimatchCmd,
                                      stderr=subprocess.PIPE,
                                      stdout=subprocess.PIPE)
                (stdout, stderr) = sp.communicate()
                self.logger.info("plastimatch stdout: " +
                                 stdout.decode('ascii'))
                self.logger.warning("plastimatch stderr: " +
                                    stderr.decode('ascii'))

                self.logger.info('plastimatch completed')
                self.logger.info("Conversion of CT volume OK - result in " +
                                 scanNRRDFile)
            else:
                self.logger.info(
                    scanNRRDFile +
                    " exists. Not rerunning volume reconstruction.")

            reader = itk.ImageFileReader[itk.Image[itk.SS, 3]].New()
            reader.SetFileName(scanNRRDFile)
            reader.Update()
            volume = reader.GetOutput()

            #logger.info(volume.GetLargestPossibleRegion().GetSize())

            # now iterate over all nodules available for this subject
            anns = scan.annotations
            self.logger.info("Have %d annotations for subject %s" %
                             (len(anns), s))

            self.instanceCount = 0

            clusteredAnnotationIDs = []

            for nCount, nodule in enumerate(scan.cluster_annotations()):

                noduleUID = pydicom.uid.generate_uid(
                    prefix=None)  # by default, pydicom uses 2.25 root

                for aCount, a in enumerate(nodule):

                    clusteredAnnotationIDs.append(a.id)
                    self.convertSingleAnnotation(nCount, aCount, a, ctDCM,
                                                 noduleUID, volume, seriesDir)

            if len(clusteredAnnotationIDs) != len(anns):
                self.logger.warning("%d annotations unaccounted for!" %
                                    (len(anns) - len(clusteredAnnotationIDs)))

            for ua in anns:
                if ua.id not in clusteredAnnotationIDs:
                    aCount = aCount + 1
                    nCount = nCount + 1
                    noduleUID = pydicom.uid.generate_uid(prefix=None)
                    self.convertSingleAnnotation(nCount, aCount, ua, ctDCM,
                                                 noduleUID, volume, seriesDir)

            self.cleanUpTempDir(self.tempSubjectDir)
Exemplo n.º 25
0
def get_ann_from_scan():
    for i in range(len(pl.query(pl.Scan).all())):
        scan = pl.query(pl.Scan)[i]
        nodules = scan.cluster_annotations()
        print(nodules)
        return nodules
import pylidc as pl

ann = pl.query(pl.Annotation).first()
print(ann.scan.patient_id)

anns = pl.query(pl.Annotation).filter(pl.Annotation.spiculation == 5,
                                      pl.Annotation.malignancy == 5)
# print(anns)

ann = pl.query(pl.Annotation)\
    .filter(pl.Annotation.malignancy==4).first()
print(ann.malignancy, ann.Malignancy)
print(ann.margin, ann.Margin)

ann.print_formatted_feature_table()
Exemplo n.º 27
0
def get_visual(i):
    ann = pl.query(pl.Annotation).filter(pl.Annotation.texture == 1)[i]
    scan = pl.query(pl.Scan).filter(pl.Scan.id == ann.scan_id)[i]
    scan.visualize(annotation_groups=scan.cluster_annotations())
Exemplo n.º 28
0
    image = np.stack([s.pixel_array for s in slices])
    image = image.astype(np.int16)
    image[image == -2000] = 0

    for slice_number in range(len(slices)):
        intercept = slices[slice_number].RescaleIntercept
        slope = slices[slice_number].RescaleSlope
        if slope != 1:
            image[slice_number] = slope * image[slice_number].astype(
                np.float64)
            image[slice_number] = image[slice_number].astype(np.int16)
        image[slice_number] += np.int16(intercept)
    return np.array(image, dtype=np.int16)


all_scans = pl.query(pl.Scan)
num_scans = all_scans.count()


def save_slices(save_path,
                id_range=[0, num_scans],
                norm_range=np.array([[-1000, 200], [-250, 200], [-1000,
                                                                 -745]])):
    # Save slices as png with 3 channels

    # Create a table to save the corresponding origin path of the saved npys.
    with open('names.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['short_name', 'origin_path'])

    with open('miss.csv', 'w') as f:
Exemplo n.º 29
0
def get_scan_data():
    for i in range(len(pl.query(pl.Scan).all())):
        scans = pl.query(pl.Scan)[i]
        return scans
Exemplo n.º 30
0
    def prepare_dataset(self):
        # This is to name each image and mask
        prefix = [str(x).zfill(3) for x in range(1000)]

        # Make directory
        if not os.path.exists(self.img_path):
            os.makedirs(self.img_path)
        if not os.path.exists(self.mask_path):
            os.makedirs(self.mask_path)
        if not os.path.exists(self.clean_path_img):
            os.makedirs(self.clean_path_img)
        if not os.path.exists(self.clean_path_mask):
            os.makedirs(self.clean_path_mask)
        if not os.path.exists(self.meta_path):
            os.makedirs(self.meta_path)

        IMAGE_DIR = Path(self.img_path)
        MASK_DIR = Path(self.mask_path)
        CLEAN_DIR_IMAGE = Path(self.clean_path_img)
        CLEAN_DIR_MASK = Path(self.clean_path_mask)

        for patient in tqdm(self.IDRI_list):
            pid = patient  #LIDC-IDRI-0001~
            scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()
            nodules_annotation = scan.cluster_annotations()
            vol = scan.to_volume()
            print(
                "Patient ID: {} Dicom Shape: {} Number of Annotated Nodules: {}"
                .format(pid, vol.shape, len(nodules_annotation)))

            patient_image_dir = IMAGE_DIR / pid
            patient_mask_dir = MASK_DIR / pid
            Path(patient_image_dir).mkdir(parents=True, exist_ok=True)
            Path(patient_mask_dir).mkdir(parents=True, exist_ok=True)

            if len(nodules_annotation) > 0:
                # Patients with nodules
                for nodule_idx, nodule in enumerate(nodules_annotation):
                    # Call nodule images. Each Patient will have at maximum 4 annotations as there are only 4 doctors
                    # This current for loop iterates over total number of nodules in a single patient
                    mask, cbbox, masks = consensus(nodule, self.c_level,
                                                   self.padding)
                    lung_np_array = vol[cbbox]

                    # We calculate the malignancy information
                    malignancy, cancer_label = self.calculate_malignancy(
                        nodule)

                    for nodule_slice in range(mask.shape[2]):
                        # This second for loop iterates over each single nodule.
                        # There are some mask sizes that are too small. These may hinder training.
                        if np.sum(mask[:, :,
                                       nodule_slice]) <= self.mask_threshold:
                            continue
                        # Segment Lung part only
                        lung_segmented_np_array = segment_lung(
                            lung_np_array[:, :, nodule_slice])
                        # I am not sure why but some values are stored as -0. <- this may result in datatype error in pytorch training # Not sure
                        lung_segmented_np_array[lung_segmented_np_array ==
                                                -0] = 0
                        # This itereates through the slices of a single nodule
                        # Naming of each file: NI= Nodule Image, MA= Mask Original
                        nodule_name = "{}_NI{}_slice{}".format(
                            pid[-4:], prefix[nodule_idx], prefix[nodule_slice])
                        mask_name = "{}_MA{}_slice{}".format(
                            pid[-4:], prefix[nodule_idx], prefix[nodule_slice])
                        meta_list = [
                            pid[-4:], nodule_idx, prefix[nodule_slice],
                            nodule_name, mask_name, malignancy, cancer_label,
                            False
                        ]

                        self.save_meta(meta_list)
                        np.save(patient_image_dir / nodule_name,
                                lung_segmented_np_array)
                        np.save(patient_mask_dir / mask_name,
                                mask[:, :, nodule_slice])
            else:
                print("Clean Dataset", pid)
                patient_clean_dir_image = CLEAN_DIR_IMAGE / pid
                patient_clean_dir_mask = CLEAN_DIR_MASK / pid
                Path(patient_clean_dir_image).mkdir(parents=True,
                                                    exist_ok=True)
                Path(patient_clean_dir_mask).mkdir(parents=True, exist_ok=True)
                #There are patients that don't have nodule at all. Meaning, its a clean dataset. We need to use this for validation
                for slice in range(vol.shape[2]):
                    if slice > 50:
                        break
                    lung_segmented_np_array = segment_lung(vol[:, :, slice])
                    lung_segmented_np_array[lung_segmented_np_array == -0] = 0
                    lung_mask = np.zeros_like(lung_segmented_np_array)

                    #CN= CleanNodule, CM = CleanMask
                    nodule_name = "{}/{}_CN001_slice{}".format(
                        pid, pid[-4:], prefix[slice])
                    mask_name = "{}/{}_CM001_slice{}".format(
                        pid, pid[-4:], prefix[slice])
                    meta_list = [
                        pid[-4:], slice, prefix[slice], nodule_name, mask_name,
                        0, False, True
                    ]
                    self.save_meta(meta_list)
                    np.save(patient_clean_dir_image / nodule_name,
                            lung_segmented_np_array)
                    np.save(patient_clean_dir_mask / mask_name, lung_mask)

        print("Saved Meta data")
        self.meta.to_csv(self.meta_path + 'meta_info.csv', index=False)
Exemplo n.º 31
0
def get_scan(name):
    scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == name).first()

    return scan