def genTextures():

    GLCMAngleList = ['Avg']
    featureTitle = ['Image Contrast', 'Image Filename','X', 'Y', 'Boundary (1) or not (inside: 0), (outside:2)', 'Biopsy(1) or not (0)']

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    featureTitle = featureTitle + LBPFeatureList

    Gaborsigma_range = (0.6,1.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []
    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq))

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank)

    MeanStdfeaturelist = ['Raw_Mean','Raw_Std']
    featureTitle = featureTitle + MeanStdfeaturelist

    dicomnames = ['EPI', 'P', 'Q', 'RCBV', 'SPGRC', 'T2']

    for texturemapfile in os.listdir(rootDir):

        if texturemapfile.startswith('.'):
            continue
        if texturemapfile.startswith('..'):
            continue

        print texturemapfile

        patientname = texturemapfile.split('_')[0]
        if fnmatch.fnmatch(patientname, "*FSL*"):
            newpatientname = patientname.replace("FSL", "")
        elif fnmatch.fnmatch(patientname, "*h*"):
            newpatientname = patientname.replace("h", "")
        else:
            newpatientname = patientname
        print newpatientname

        slicepathfile = os.path.join(rootDir, texturemapfile)

        for slicefile in os.listdir(slicepathfile):
            if slicefile.startswith('.'):
                continue
            if slicefile.startswith('..'):
                continue

            print slicefile


            slicenum = slicefile.replace('slice', '')
            slicenum = int(slicenum)

            dcmxmlfilepath = os.path.join(slicepathfile, slicefile)

            dcmfiledict = dict()
            for dcmfile in os.listdir(dcmxmlfilepath):

                if dcmfile.startswith('.'):
                    continue
                if fnmatch.fnmatch(dcmfile, '*dcm*') is False:
                    continue
                if fnmatch.fnmatch(dcmfile, '*precontrast*'):
                    continue

                if fnmatch.fnmatch(dcmfile, '*C*SPGR*') or fnmatch.fnmatch(dcmfile, '*+C*T1*') or fnmatch.fnmatch(dcmfile,'*T1*+C*'):
                    SPGRCfile = dcmfile
                    dcmfiledict['SPGRC'] = SPGRCfile

                if fnmatch.fnmatch(dcmfile, '*T2*'):
                    T2file = dcmfile
                    dcmfiledict['T2'] = T2file

                if fnmatch.fnmatch(dcmfile, '*q*'):
                    Qfile = dcmfile
                    dcmfiledict['Q'] = Qfile

                if fnmatch.fnmatch(dcmfile, '*p*'):
                    Pfile = dcmfile
                    dcmfiledict['P'] = Pfile

                if fnmatch.fnmatch(dcmfile, '*rCBV*'):
                    RCBVfile = dcmfile
                    dcmfiledict['RCBV'] = RCBVfile


                if fnmatch.fnmatch(dcmfile, '*EPI*+C*') or fnmatch.fnmatch(dcmfile, '*+C*EPI*'):
                    EPIfile = dcmfile
                    dcmfiledict['EPI'] = EPIfile


            for xmlfile in os.listdir(dcmxmlfilepath):
                if not fnmatch.fnmatch(xmlfile, '*.xml'):
                    continue

                if fnmatch.fnmatch(xmlfile, '*NECROSIS*') or fnmatch.fnmatch(xmlfile,'*necrosis*'):
                    continue

                if fnmatch.fnmatch(xmlfile, '*C*SPGR*') or fnmatch.fnmatch(xmlfile, '*+C*T1*') or fnmatch.fnmatch(
                        xmlfile, '*T1*+C*'):
                    T1xmlfile = xmlfile

                if fnmatch.fnmatch(xmlfile, '*T2*'):
                    T2xmlfile = xmlfile


            print '\n'

            T1xmlfilepath = os.path.join(dcmxmlfilepath, T1xmlfile)
            T2xmlfilepath = os.path.join(dcmxmlfilepath, T2xmlfile)

            if slicenum not in biopsycoordinatefile[newpatientname]:
                continue
            else:
                biopsycoordinatelist = biopsycoordinatefile[newpatientname][slicenum]

            T1windowptlist = ParseXMLDrawROI(T1xmlfilepath,'T1',biopsycoordinatelist)
            T2windowptlist = ParseXMLDrawROI(T2xmlfilepath,'T2',biopsycoordinatelist)

            # start to do T1
            featuresOutFn = 'ROI_Texture_Map.csv'

            T1featuresOutFn = newpatientname + '_' + slicefile + '_' + 'T1' + '_' + featuresOutFn
            featuresCSVFn = os.path.join(outputDir, T1featuresOutFn)

            with open(featuresCSVFn, 'wb') as featureCSVFile:
                featureWriter = csv.writer(featureCSVFile, dialect='excel')
                featureWriter.writerow(featureTitle)

                for eachdcm in dicomnames:
                    dicomfile = dcmfiledict[eachdcm]

                    dicomfilepath = os.path.join(dcmxmlfilepath, dicomfile)

                    dicomImage = Read2DImage(dicomfilepath)

                    for eachpt in T1windowptlist:

                        meanstd = list()
                        GLCM = list()
                        LBP = list()
                        Gabor = list()

                        xcoord = int(eachpt[0])
                        ycoord = int(eachpt[1])
                        boundaryornot = int(eachpt[2])
                        biopsyornot = int(eachpt[3])

                        aFeature = [eachdcm, dicomfile, xcoord,ycoord, boundaryornot,biopsyornot]

                        subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4]

                        subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min())

                        # GLCM
                        glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                        for GLCMAngle in GLCMAngleList:
                            for featureName in haralick_labels[:-1]:
                                GLCM.append(glcmFeatures[GLCMAngle][featureName])

                        # raw mean and std of subimage
                        Raw_mean = numpy.mean(subImage)
                        Raw_std = numpy.std(subImage)

                        meanstd.append(Raw_mean)
                        meanstd.append(Raw_std)

                        # LBP subimage
                        subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius,
                                      xcoord - 4 - LBPRadius: xcoord + 4 + LBPRadius]

                        extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(), subImage.min())

                        # need to use extended ROI
                        LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius,
                                                                     LBPMethod)
                        for lbp in LBPs:
                            LBP.append(lbp)

                        # Gabor, width = 8
                        # use extended ROI
                        GaborFeatures = ExtendGaborFeatures.calcFeatures(dicomImage, xcoord - 4, ycoord - 4, 8, 8,
                                                                         Gaborkernel_bank, subImage.max(),
                                                                         subImage.min())

                        for gaborfeature in GaborFeatures:
                            for eachg in gaborfeature:
                                Gabor.append(eachg)

                        aFeature = aFeature + GLCM + LBP + Gabor + meanstd
                        featureWriter.writerow(aFeature)

            # start to do T2
            T2featuresOutFn = newpatientname + '_' + slicefile + '_' + 'T2' + '_' + featuresOutFn
            featuresCSVFn = os.path.join(outputDir, T2featuresOutFn)

            with open(featuresCSVFn, 'wb') as featureCSVFile:
                featureWriter = csv.writer(featureCSVFile, dialect='excel')
                featureWriter.writerow(featureTitle)

                for eachdcm in dicomnames:
                    dicomfile = dcmfiledict[eachdcm]

                    dicomfilepath = os.path.join(dcmxmlfilepath, dicomfile)

                    dicomImage = Read2DImage(dicomfilepath)

                    for eachpt in T2windowptlist:

                        meanstd = list()
                        GLCM = list()
                        LBP = list()
                        Gabor = list()

                        xcoord = int(eachpt[0])
                        ycoord = int(eachpt[1])
                        boundaryornot = int(eachpt[2])
                        biopsyornot = int(eachpt[3])

                        aFeature = [eachdcm, dicomfile, xcoord, ycoord, boundaryornot, biopsyornot]

                        subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4]

                        subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min())

                        # GLCM
                        glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                        for GLCMAngle in GLCMAngleList:
                            for featureName in haralick_labels[:-1]:
                                GLCM.append(glcmFeatures[GLCMAngle][featureName])

                        # raw mean and std of subimage
                        Raw_mean = numpy.mean(subImage)
                        Raw_std = numpy.std(subImage)

                        meanstd.append(Raw_mean)
                        meanstd.append(Raw_std)

                        # LBP subimage
                        subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius,
                                      xcoord - 4 - LBPRadius: xcoord + 4 + LBPRadius]

                        extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(),
                                                                   subImage.min())

                        # need to use extended ROI
                        LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius,
                                                              LBPMethod)
                        for lbp in LBPs:
                            LBP.append(lbp)

                        # Gabor, width = 8
                        # use extended ROI
                        GaborFeatures = ExtendGaborFeatures.calcFeatures(dicomImage, xcoord - 4, ycoord - 4,
                                                                         8, 8,
                                                                         Gaborkernel_bank, subImage.max(),
                                                                         subImage.min())

                        for gaborfeature in GaborFeatures:
                            for eachg in gaborfeature:
                                Gabor.append(eachg)

                        aFeature = aFeature + GLCM + LBP + Gabor + meanstd
                        featureWriter.writerow(aFeature)
def genFeatures():
    # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization
    # If 'False', use only lesion image
    # default value is 'True'

    # Parameters and feature list of each algorithm
    GLCMAngleList = ['0', '45', '90', '135', 'Avg']

    LBPRadius = 3
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    kernel_bank = []
    Gaborsigma_range = [0.6]
    Gaborfreq_range = (0.1, 0.3, 0.5)

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = [
        'PatientID', 'Phase', 'LesionName', 'ROI_Y', 'ROI_X', 'Width', 'Height'
    ]
    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    # List all dicom files and generate features for each images
    # Feature results stored in separate csv files for each folder
    featuresCSVFn = os.path.join(outputDir, featuresOutFn)
    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for casefile in os.listdir(rootDir):
            if casefile.startswith('.'):
                continue
            if casefile.startswith('..'):
                continue
            if fnmatch.fnmatch(casefile, '*Icon*'):
                continue

            print casefile

            patientid = casefile.split('_')[1] + casefile.split('_')[2]

            splitlist = casefile.split('_')

            phasename = ''
            for item in splitlist[3:len(splitlist)]:
                phasename = phasename + ' ' + item

            print patientid
            print phasename

            phasefilepath = os.path.join(rootDir, casefile)

            lesionfolder = []

            for phasefile in os.listdir(phasefilepath):
                if phasefile.startswith('.'):
                    continue
                if phasefile.startswith('..'):
                    continue
                if fnmatch.fnmatch(phasefile, '*Icon*'):
                    continue
                if fnmatch.fnmatch(phasefile, '*Lesion*') or fnmatch.fnmatch(
                        phasefile, '*lesion*'):
                    lesionfolder.append(phasefile)

            print lesionfolder

            for lesionfolderfile in lesionfolder:
                lesionPath = os.path.join(rootDir, casefile, lesionfolderfile)

                for roifile in os.listdir(lesionPath):
                    if roifile.startswith('.'):
                        continue
                    if roifile.startswith('..'):
                        continue
                    if fnmatch.fnmatch(roifile, '*Icon*'):
                        continue
                    if fnmatch.fnmatch(roifile, '*rec.csv'):
                        lesionroiFn = roifile
                        # Largest rectangle file name with absolute path
                        lesionROIRectFn = os.path.join(lesionPath, lesionroiFn)

                # DICOM file name with absolute path
                lesionDicom = os.path.join(lesionPath, lesionDicomFn)

                # ROI file name with absolute path
                normalROICoords = os.path.join(lesionPath, roiCoordsFn)

                dualROIGrayLevels = numpy.array([])
                with open(normalROICoords, 'r') as roiCoordsFile:
                    roiCoordsList = csv.reader(roiCoordsFile, delimiter=';')
                    for row in roiCoordsList:
                        dualROIGrayLevels = numpy.append(
                            dualROIGrayLevels, int(row[2]))

                with open(lesionROIRectFn, 'r') as roiFile:
                    roiList = csv.DictReader(roiFile, dialect='excel')
                    for aROI in roiList:
                        if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1):
                            print('Invalid ROI for %s @ %s.' %
                                  (patientid, phasename))
                            continue

                        # only normal
                        dicomImage = Read2DImage(lesionDicom)

                        subImage = dicomImage[int(aROI['Y']):(int(aROI['Y']) + int(aROI['H'])), \
                                   int(aROI['X']):(int(aROI['X']) + int(aROI['W']))]

                        subImageLBP = dicomImage[int(aROI['Y']) - LBPRadius:(int(aROI['Y']) + int(aROI['H'])) + LBPRadius, \
                                      int(aROI['X']) - LBPRadius:(int(aROI['X']) + int(aROI['W'])) + LBPRadius]

                        mean_LargBox, std_LargBox = Mean_Std_LargestBox2(
                            dicomImage, int(aROI['X']), int(aROI['Y']),
                            int(aROI['W']), int(aROI['H']))

                        subImage = GrayScaleNormalization(
                            subImage, dualROIGrayLevels.ptp())

                        extendsubImageLBP = GrayScaleNormalization(
                            subImageLBP, dualROIGrayLevels.ptp())

                        if numpy.all(subImage == 0):
                            print('%s @ %s is all zero.' %
                                  (patientid, phasename))
                            continue

                        aFeature = [
                            patientid, phasename, lesionfolderfile, aROI['Y'],
                            aROI['X'], aROI['W'], aROI['H']
                        ]

                        # GLCM
                        glcmFeatures = GLCMFeatures.calcFeatures(subImage)

                        for GLCMAngle in GLCMAngleList:
                            for featureName in haralick_labels[:-1]:
                                aFeature.append(
                                    glcmFeatures[GLCMAngle][featureName])

                        # LBP
                        lbpFeatures = ExtendLBPFeatures.calcFeatures(
                            extendsubImageLBP, LBPnPoints, LBPRadius,
                            LBPMethod)
                        aFeature = aFeature + lbpFeatures.tolist()

                        # Gabor
                        GaborFeatures = ExtendGaborFeatures.calcFeatures(
                            subImage, int(aROI['W']), Gaborkernel_bank)

                        for gaborfeature in GaborFeatures:
                            aFeature = aFeature + gaborfeature.tolist()

                        aFeature = aFeature + [mean_LargBox, std_LargBox]

                        featureWriter.writerow(aFeature)
    print('Done.')
Пример #3
0
def genTAfeatures(patientID, phasename, lesionDicom, lesionROIRectFn,
                  Gaborkernel_bank):
    GLCMAngleList = ['0', '45', '90', '135', 'Avg']

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    with open(lesionROIRectFn, 'r') as roiFile:
        roiList = csv.DictReader(roiFile, dialect='excel')
        for aROI in roiList:
            if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1):
                print('Invalid ROI for %s @ %s.' % (patientID, phasename))
                continue

            dicomImage = Read2DImage(lesionDicom)

            # in Python, coords should be coords(matlab)-1
            # coordinates from Osirtx: 0,0 so keep it not -1
            xcoord = int(aROI['X'])
            ycoord = int(aROI['Y'])
            width = int(aROI['W'])
            height = int(aROI['H'])

            subImage = dicomImage[ycoord:(ycoord + height),
                                  xcoord:(xcoord +
                                          width)]  # errors here: before: Y + W

            # if patientID == 'SHHCC_2468':
            #     print xcoord
            #     print ycoord
            #
            #     print dicomImage
            #     print phasename
            #     print(subImage)
            #     print lesionDicom
            #     print lesionROIRectFn

            subImageLBP = dicomImage[ycoord - LBPRadius:(ycoord + height) +
                                     LBPRadius, xcoord -
                                     LBPRadius:(xcoord + width) + LBPRadius]

            ## get mean and standard deviation of lesion ROI's gray level of lagest box directly from subImage
            mean_LargBox = numpy.mean(subImage)
            std_LargBox = numpy.std(subImage)

            subImageGLCM = GrayScaleNormalization(subImage, subImage.max(),
                                                  subImage.min())

            # if patientID == 'SHHCC_2468':
            #     print phasename
            #     print(subImageGLCM)
            #     print(numpy.shape(subImageGLCM))
            #     print(numpy.min(subImageGLCM))
            #     print (numpy.max(subImageGLCM))

            # for extended LBP, we still use grayscale range of 8*8 box to normalize extended ROI 10*10 box
            extendsubImageLBP = GrayScaleNormalization(subImageLBP,
                                                       subImage.max(),
                                                       subImage.min())

            if numpy.all(subImage == 0):
                print('%s @ %s is all zero.' % (patientID, phasename))
                continue

            aFeature = [
                patientID, phasename, aROI['Y'], aROI['X'], aROI['W'],
                aROI['H']
            ]

            # GLCM
            # dont need to extended ROI
            glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

            for GLCMAngle in GLCMAngleList:
                for featureName in haralick_labels[:-1]:
                    aFeature.append(glcmFeatures[GLCMAngle][featureName])

            # LBP
            # need to use extended ROI
            lbpFeatures = ExtendLBPFeatures.calcFeatures(
                extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
            aFeature = aFeature + lbpFeatures.tolist()

            # Gabor
            GaborFeatures = ExtendGaborFeatures.calcFeatures(
                dicomImage, xcoord, ycoord, width, height, Gaborkernel_bank,
                subImage.max(), subImage.min())

            for gaborfeature in GaborFeatures:

                aFeature = aFeature + gaborfeature.tolist()

            aFeature = aFeature + [mean_LargBox, std_LargBox]

            return aFeature
Пример #4
0
def genTextures():
    dicomnames = ['EPI', 'P', 'Q', 'RCBV', 'SPGRC', 'T2']

    #GLCMAngleList = ['0', '45', '90', '135', 'Avg']
    GLCMAngleList = ['Avg']

    featureTitle = ['Patient', 'ID', 'slice number', 'X', 'Y']

    for GLCMAngle in GLCMAngleList:
        for dicom in dicomnames:
            for featureName in haralick_labels[:-1]:
                GLCMname = dicom + '-' + featureName + '_' + GLCMAngle
                featureTitle.append(GLCMname)

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for dicom in dicomnames:
        for x in xrange(0, LBPnPoints + 1):
            LBPname = dicom + '-' +'LBP_%02d' % x
            LBPFeatureList.append(LBPname)
        LBPname = dicom + '-' +'LBP_Other'
        LBPFeatureList.append(LBPname)

    featureTitle = featureTitle + LBPFeatureList

    Gaborsigma_range = (0.6,1.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []
    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    for dicom in dicomnames:
        for GaborSigma in Gaborsigma_range:
            for GaborFreq in Gaborfreq_range:
                for featureName in GaborFeatureList:
                    featureTitle.append(dicom + '-'+featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq))

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank)


    meanstdTitle = []
    for dicom in dicomnames:
        meanname = dicom +'-'+'Raw_Mean'
        stdname = dicom + '-' + 'Raw_Std'
        meanstdTitle.append(meanname)
        meanstdTitle.append(stdname)

    featureTitle = featureTitle + meanstdTitle


    featuresCSVFn = os.path.join(outputDir, featuresOutFn)
    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect = 'excel')
        featureWriter.writerow(featureTitle)

        ptindex = 0
        for pt in ptnamelist: # such as RW,JTy...according to CSV file to sort

            ptslice = []
            for coordtuple in ptcoordlist:
                if pt in coordtuple:
                    ptslice.append(coordtuple[pt])
            #print pt,ptslice
            ptfolderpath = os.path.join(rootDir,folderdict[pt])

            for slicelist in ptslice:
                patientid = ptlist[ptindex]
                ptindex += 1
                slicenum = slicelist[2]
                xcoord = slicelist[0]
                ycoord = slicelist[1]

                slicefolder = 'slice'+ str(slicenum)

                slicefolderpath = os.path.join(ptfolderpath,slicefolder)
                #print pt,slicelist, slicefolderpath

                dcmfiledict = dict()

                for dcmfile in os.listdir(slicefolderpath):

                    if dcmfile.startswith('.'):
                        continue
                    if fnmatch.fnmatch(dcmfile,'*dcm*') is False:
                        continue

                    if fnmatch.fnmatch(dcmfile,'*C*SPGR*') or fnmatch.fnmatch(dcmfile,'*+C*T1*') or fnmatch.fnmatch(dcmfile,'*T1*+C*'):
                        SPGRCfile = dcmfile
                        dcmfiledict['SPGRC']=SPGRCfile

                    if fnmatch.fnmatch(dcmfile,'*T2*'):
                        T2file = dcmfile
                        dcmfiledict['T2']=T2file

                    if fnmatch.fnmatch(dcmfile,'*q*'):
                        Qfile = dcmfile
                        dcmfiledict['Q']=Qfile

                    if fnmatch.fnmatch(dcmfile,'*p*'):
                        Pfile = dcmfile
                        dcmfiledict['P'] = Pfile

                    if fnmatch.fnmatch(dcmfile,'*rCBV*'):
                        # if fnmatch.fnmatch(dcmfile, '*rCBV*normalized*'):
                        #     RCBVfile = dcmfile
                        #     dcmfiledict['RCBVnorm'] = RCBVfile
                        # else:
                        RCBVfile = dcmfile
                        dcmfiledict['RCBV'] = RCBVfile


                    if fnmatch.fnmatch(dcmfile,'*EPI*+C*') or fnmatch.fnmatch(dcmfile,'*+C*EPI*'):
                        EPIfile = dcmfile
                        dcmfiledict['EPI'] = EPIfile

                print pt,patientid,slicenum,len(dcmfiledict),dcmfiledict

                aFeature = [pt, patientid, slicenum, xcoord,ycoord]

                meanstd = list()
                GLCM = list()
                LBP = list()
                Gabor = list()

                # start GLCM for each dicom
                for GLCMAngle in GLCMAngleList:
                    for eachdcm in dicomnames:

                        dicomfile = dcmfiledict[eachdcm]

                        dicomfilepath = os.path.join(slicefolderpath,dicomfile)

                        dicomImage = Read2DImage(dicomfilepath)
                        subImage = dicomImage[ycoord-4:ycoord+4,xcoord-4:xcoord+4]

                        subImageGLCM = GrayScaleNormalization(subImage, subImage.max(),subImage.min())

                        if numpy.all(subImage == 0):
                            print('%s @ %s is all zero.' % (patientid, slicenum))
                            continue

                        # GLCM
                        glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                        for featureName in haralick_labels[:-1]:
                            GLCM.append(glcmFeatures[GLCMAngle][featureName])


                for eachdcm in dicomnames:

                    dicomfile = dcmfiledict[eachdcm]
                    dicomfilepath = os.path.join(slicefolderpath, dicomfile)

                    dicomImage = Read2DImage(dicomfilepath)
                    subImage = dicomImage[ycoord - 4: ycoord + 4,xcoord - 4:xcoord + 4 ]

                    ## get normalized to 0, 255: raw mean and standard deviation
                    dicommean, dicomstd= Norm_Mean_Std_LargestBox(subImage,subImage.max(),subImage.min())

                    meanstd.append(dicommean)
                    meanstd.append(dicomstd)


                    # LBP subimage
                    subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius,
                                  xcoord - 4 - LBPRadius: xcoord + 4 + LBPRadius]
                    extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(),subImage.min())

                    # LBP
                    # need to use extended ROI
                    LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)

                    for lbp in LBPs:
                        LBP.append(lbp)

                    # Gabor, width = 8
                    # use extended ROI
                    GaborFeatures = ExtendGaborFeatures.calcFeatures(dicomImage, xcoord - 4, ycoord - 4, 8, 8,
                                                                     Gaborkernel_bank, subImage.max(), subImage.min())

                    for gaborfeature in GaborFeatures:
                        for eachg in gaborfeature:
                            Gabor.append(eachg)

                aFeature = aFeature + GLCM +LBP + Gabor +  meanstd
                featureWriter.writerow(aFeature)
def genTextures():

    GLCMAngleList = ['Avg']
    featureTitle = ['X', 'Y']

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    featureTitle = featureTitle + LBPFeatureList

    Gaborsigma_range = (0.6, 1.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []
    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    MeanStdfeaturelist = ['Raw_Mean', 'Raw_Std']
    featureTitle = featureTitle + MeanStdfeaturelist

    featuresCSVFn = os.path.join(outputDir, featuresOutFn)

    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        meanstd = list()
        # GLCM = list()
        # LBP = list()
        # Gabor = list()

        xcoord = 151
        ycoord = 83
        dicomImage = Read2DImage(rootDir)

        aFeature = [xcoord, ycoord]

        subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4]

        subImageGLCM = GrayScaleNormalization(subImage, subImage.max(),
                                              subImage.min())

        # GLCM
        glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

        for GLCMAngle in GLCMAngleList:
            for featureName in haralick_labels[:-1]:
                aFeature.append(glcmFeatures[GLCMAngle][featureName])

        # raw mean and std of subimage
        Raw_mean = numpy.mean(subImage)
        Raw_std = numpy.std(subImage)

        meanstd.append(Raw_mean)
        meanstd.append(Raw_std)

        # LBP subimage
        subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius,
                                 xcoord - 4 - LBPRadius:xcoord + 4 + LBPRadius]

        extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(),
                                                   subImage.min())

        # need to use extended ROI
        LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints,
                                              LBPRadius, LBPMethod)
        aFeature = aFeature + LBPs.tolist()

        # Gabor, width = 8
        # use extended ROI
        GaborFeatures = ExtendGaborFeatures.calcFeatures(
            dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank,
            subImage.max(), subImage.min())

        for gaborfeature in GaborFeatures:
            aFeature = aFeature + gaborfeature.tolist()

        aFeature = aFeature + meanstd
        featureWriter.writerow(aFeature)
Пример #6
0
def genFeatures():

    GLCMAngleList = ['Avg']

    LBPRadius = 3
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (1.0, 3.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)

    kernel_bank = []

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = ['PatientID', 'Phase', 'ROI_X', 'ROI_Y', 'Width', 'Height']
    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['LargestBox_Raw_Mean', 'LargestBox_Raw_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    featuresCSVFn = os.path.join(outputDir, featuresOutFn)

    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for casefile in os.listdir(rootDir):
            if casefile.startswith('.'):
                continue
            if casefile.startswith('..'):
                continue
            if fnmatch.fnmatch(casefile, '*Icon*'):
                continue

            if fnmatch.fnmatch(casefile, '*xlsx*'):
                continue

            print casefile

            patientID = casefile.split('_ ')[0]
            print patientID

            patientfilepath = os.path.join(rootDir, casefile)

            phaseDir = dict()
            for patientPhaseDir in os.listdir(patientfilepath):
                if patientPhaseDir.startswith('.'):
                    continue
                if patientPhaseDir.startswith('..'):
                    continue
                if fnmatch.fnmatch(patientPhaseDir, '*Icon*'):
                    continue

                if fnmatch.fnmatch(patientPhaseDir, '*Lesion*'):
                    phaseDir['lesion'] = patientPhaseDir
                    phasename = patientPhaseDir

                    print patientPhaseDir

                if fnmatch.fnmatch(patientPhaseDir, '*Control*'):
                    phaseDir['control'] = patientPhaseDir

                    print patientPhaseDir

            if 'lesion' not in phaseDir:
                continue

            lesionfile = os.path.join(patientfilepath, phaseDir['lesion'])
            lesionlargestrecpath = os.path.join(lesionfile, recfile)
            lesionDicom = os.path.join(lesionfile, dcmfile)

            normalfile = os.path.join(patientfilepath, phaseDir['control'])
            normallargestrecpath = os.path.join(normalfile, recfile)
            normalDicom = os.path.join(normalfile, dcmfile)

            with open(lesionlargestrecpath, 'r') as roiFile:
                roiList = csv.DictReader(roiFile, dialect='excel')

                for aROI in roiList:

                    xcoord = int(aROI['X'])
                    ycoord = int(aROI['Y'])
                    width = int(aROI['W'])
                    height = int(aROI['H'])

                print xcoord, ycoord, width, height

                lesiondicomImage = Read2DImage(lesionDicom)
                normaldicomImage = Read2DImage(normalDicom)

                lesionsubImage = lesiondicomImage[ycoord:(ycoord + height),
                                                  xcoord:(xcoord + width)]
                normalsubImage = normaldicomImage[ycoord:(ycoord + height),
                                                  xcoord:(xcoord + width)]

                mean_LargBox = numpy.mean(lesionsubImage)
                std_LargBox = numpy.std(lesionsubImage)

                # get max gray scale and min grayscale from both lesion and normal dicom
                lesionimageMax = lesionsubImage.max()
                lesionimageMin = lesionsubImage.min()
                normalimageMax = normalsubImage.max()
                normalimageMin = normalsubImage.min()

                # compare max and min and get max / min for normalization
                if lesionimageMax > normalimageMax:
                    subImageMax = lesionimageMax
                else:
                    subImageMax = normalimageMax

                if lesionimageMin < normalimageMin:
                    subImageMin = lesionimageMin
                else:
                    subImageMin = normalimageMin

                subImageGLCM = GrayScaleNormalization(lesionsubImage,
                                                      subImageMax, subImageMin)

                subImageLBP = lesiondicomImage[ycoord -
                                               LBPRadius:(ycoord + height) +
                                               LBPRadius, xcoord -
                                               LBPRadius:(xcoord + width) +
                                               LBPRadius]

                aFeature = [
                    patientID, phasename, aROI['X'], aROI['Y'], aROI['W'],
                    aROI['H']
                ]

                # GLCM
                glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                for GLCMAngle in GLCMAngleList:
                    for featureName in haralick_labels[:-1]:
                        aFeature.append(glcmFeatures[GLCMAngle][featureName])

                # LBP
                extendsubImageLBP = GrayScaleNormalization(
                    subImageLBP, subImageMax, subImageMin)

                lbpFeatures = ExtendLBPFeatures.calcFeatures(
                    extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
                aFeature = aFeature + lbpFeatures.tolist()

                # Gabor
                GaborFeatures = ExtendGaborFeatures.calcFeatures(
                    lesiondicomImage, xcoord, ycoord, width, height,
                    Gaborkernel_bank, subImageMax, subImageMin)

                for gaborfeature in GaborFeatures:
                    aFeature = aFeature + gaborfeature.tolist()

                aFeature = aFeature + [mean_LargBox, std_LargBox]

                featureWriter.writerow(aFeature)
Пример #7
0
def genTextures():

    GLCMAngleList = ['0', '45', '90', '135', 'Avg']

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (1.0, 2.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = ['PatientID']

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['Raw_Mean', 'Raw_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    featuresCSVFn = os.path.join(outputDir, filename)
    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for casefile in os.listdir(rootDir):
            if casefile.startswith('.'):
                continue
            if casefile.startswith('..'):
                continue
            if fnmatch.fnmatch(casefile, '*Icon*'):
                continue

            # casefile = '20586908_0.jpg'
            casename = casefile.split('.')[0]
            print casename

            casefilepath = os.path.join(rootDir, casefile)
            subImage = Read2DImage(casefilepath)

            # height = subImage.shape[1]
            # width = subImage.shape[0]
            #
            # xcoord = 5
            # ycoord = 5

            # subImage = dicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)]  # errors here: before: Y + W
            #
            # subImageLBP = dicomImage[ycoord - LBPRadius:(ycoord + height) + LBPRadius,
            #               xcoord - LBPRadius:(xcoord + width) + LBPRadius]

            # subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min())

            # for extended LBP, we still use grayscale range of 8*8 box to normalize extended ROI 10*10 box
            # extendsubImageLBP = GrayScaleNormalization(subImage, subImage.max(), subImage.min())

            aFeature = [casename]

            ## get mean and standard deviation of lesion ROI's gray level of lagest box directly from subImage
            Raw_mean = numpy.mean(subImage)
            Raw_std = numpy.std(subImage)

            # SubImagelist = list()
            # SubImageArraylist = subImage.tolist()
            # for smalllist in SubImageArraylist:
            #     SubImagelist += smalllist
            #
            # Raw_kurtosis = kurtosis(SubImagelist)
            # Raw_skewness = skew(SubImagelist)

            # GLCM
            glcmFeatures = GLCMFeatures.calcFeatures(subImage)

            # print glcmFeatures

            for GLCMAngle in GLCMAngleList:
                for featureName in haralick_labels[:-1]:
                    aFeature.append(glcmFeatures[GLCMAngle][featureName])

            # LBP
            lbpFeatures = ExtendLBPFeatures.calcFeatures(
                subImage, LBPnPoints, LBPRadius, LBPMethod)

            # print lbpFeatures

            aFeature = aFeature + lbpFeatures.tolist()

            # Gabor
            GaborFeatures = KRAS_GaborFeatures.calcFeatures(
                subImage, Gaborkernel_bank)

            # GaborFeatures = GB.calcFeatures(subImage,Gaborsigma_range,Gaborfreq_range)
            #
            for gaborfeature in GaborFeatures:
                aFeature = aFeature + gaborfeature.tolist()
            #
            aFeature = aFeature + [Raw_mean, Raw_std]
            featureWriter.writerow(aFeature)
Пример #8
0
def genFeatures():
    # Parameters and feature list of each algorithm
    GLCMAngleList = ['0', '45', '90', '135', 'Avg']

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (0.6, 1.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = [
        'PatientID', 'Dicom Image Filename', 'Xml Filename', 'Phase Name', 'X',
        'Y'
    ]

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['Raw_Mean', 'Raw_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    # List all dicom files and generate features for each images
    # Feature results stored in separate csv files for each folder

    #casenum = 0
    # for twofolder in os.listdir(rootDir):
    #     if twofolder.startswith('.'):
    #         continue
    #     if twofolder.startswith('..'):
    #         continue
    #     if fnmatch.fnmatch(twofolder, '*Icon*'):
    #         continue
    twofolder = 'malignant'
    # print twofolder
    rootDir2 = os.path.join(rootDir, twofolder)

    casefile = 'Pt9'
    # for casefile in os.listdir(rootDir2):
    #     if casefile.startswith('.'):
    #         continue
    #     if casefile.startswith('..'):
    #         continue
    #     if fnmatch.fnmatch(casefile, '*Icon*'):
    #         continue
    # casefile = 'Pt36'
    # casenum += 1

    # print casefile

    roiDicomfile = dict()
    roiCCxmlfile = list()
    roiMLOxmlfile = list()

    lesionpath = os.path.join(rootDir2, casefile)

    for lesionfile in os.listdir(lesionpath):
        if lesionfile.startswith('.'):
            continue
        if lesionfile.startswith('..'):
            continue
        if fnmatch.fnmatch(lesionfile, '*Icon*'):
            continue
        if fnmatch.fnmatch(lesionfile, '*texture*'):
            continue

        if fnmatch.fnmatch(lesionfile, '*DES*CC*dcm'):
            roiDicomfile['DES-CC'] = lesionfile

        if fnmatch.fnmatch(lesionfile, '*LE*CC*dcm'):
            roiDicomfile['LE-CC'] = lesionfile

        if fnmatch.fnmatch(lesionfile, '*DES*MLO*dcm'):
            roiDicomfile['DES-MLO'] = lesionfile

        if fnmatch.fnmatch(lesionfile, '*LE*MLO*dcm'):
            roiDicomfile['LE-MLO'] = lesionfile

        if fnmatch.fnmatch(lesionfile, '*CC*xml'):
            roiCCxmlfile.append(lesionfile)

        if fnmatch.fnmatch(lesionfile, '*MLO*xml'):
            roiMLOxmlfile.append(lesionfile)

    # print roiCCxmlfile
    # print roiMLOxmlfile
    # print roiDicomfile

    patientID = casefile
    phasenames = ['DES-CC', 'LE-CC', 'DES-MLO', 'LE-MLO']

    if casefile == 'Pt45':
        roiccxml = roiCCxmlfile[0]
        roimloxml = roiMLOxmlfile[1]
    else:
        # for Pt45 and Pt48, all use this setting for only getting first 2 files from list
        roiccxml = roiCCxmlfile[0]
        roimloxml = roiMLOxmlfile[0]

    roiccxmlpath = os.path.join(lesionpath, roiccxml)
    roimloxmlpath = os.path.join(lesionpath, roimloxml)

    CCxmin, CCxmax, CCymin, CCymax, CCxycoord = ParseXMLDrawROI(roiccxmlpath)
    MLOxmin, MLOxmax, MLOymin, MLOymax, MLOxycoord = ParseXMLDrawROI(
        roimloxmlpath)

    # check if coords inside boundary or outside boundary
    CCwindowptlist = chooseinoutcoord(CCxmin, CCxmax, CCymin, CCymax,
                                      CCxycoord)
    MLOwindowptlist = chooseinoutcoord(MLOxmin, MLOxmax, MLOymin, MLOymax,
                                       MLOxycoord)

    featuresOutFn = 'ROI_Texture_Map.csv'

    # start to do T2
    featuresOutFn = patientID + '_' + twofolder + '_' + featuresOutFn
    featuresCSVFn = os.path.join(outputDir, featuresOutFn)

    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for phase in phasenames:

            print phase
            lesionDicomFn = roiDicomfile[phase]

            dicomfilepath = os.path.join(lesionpath, lesionDicomFn)

            dicomImage = Read2DImage(dicomfilepath)

            if fnmatch.fnmatch(phase, '*CC'):

                for eachpt in CCwindowptlist:

                    meanstd = list()
                    GLCM = list()
                    LBP = list()
                    Gabor = list()

                    xcoord = int(eachpt[0])
                    ycoord = int(eachpt[1])
                    # boundaryornot = int(eachpt[2])

                    aFeature = [
                        patientID, lesionDicomFn, roiccxml, phase, xcoord,
                        ycoord
                    ]

                    subImage = dicomImage[ycoord - 4:ycoord + 4,
                                          xcoord - 4:xcoord + 4]

                    subshape = numpy.shape(subImage)
                    if subshape[0] != 8 or subshape[1] != 8:
                        continue

                    subImageGLCM = GrayScaleNormalization(
                        subImage, subImage.max(), subImage.min())

                    # get normalized to 0, 255: raw mean and standard deviation
                    Raw_mean, Raw_std = Norm_Mean_Std_LargestBox(
                        subImage, subImage.max(), subImage.min())
                    meanstd.append(Raw_mean)
                    meanstd.append(Raw_std)

                    # GLCM
                    glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                    for GLCMAngle in GLCMAngleList:
                        for featureName in haralick_labels[:-1]:
                            GLCM.append(glcmFeatures[GLCMAngle][featureName])

                    # LBP subimage
                    subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord +
                                             4 + LBPRadius, xcoord - 4 -
                                             LBPRadius:xcoord + 4 + LBPRadius]

                    extendsubImageLBP = GrayScaleNormalization(
                        subImageLBP, subImage.max(), subImage.min())

                    # need to use extended ROI
                    LBPs = ExtendLBPFeatures.calcFeatures(
                        extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
                    for lbp in LBPs:
                        LBP.append(lbp)

                    # Gabor, width = 8
                    # use extended ROI
                    GaborFeatures = ExtendGaborFeatures.calcFeatures(
                        dicomImage, xcoord - 4, ycoord - 4, 8, 8,
                        Gaborkernel_bank, subImage.max(), subImage.min())

                    for gaborfeature in GaborFeatures:
                        for eachg in gaborfeature:
                            Gabor.append(eachg)

                    TAfeatures = GLCM + LBP + Gabor + meanstd

                    if TAfeatures == None:
                        continue

                    aFeature = aFeature + TAfeatures
                    featureWriter.writerow(aFeature)

            else:

                for eachpt in MLOwindowptlist:

                    meanstd = list()
                    GLCM = list()
                    LBP = list()
                    Gabor = list()

                    xcoord = int(eachpt[0])
                    ycoord = int(eachpt[1])
                    #boundaryornot = int(eachpt[2])

                    aFeature = [
                        patientID, lesionDicomFn, roimloxml, phase, xcoord,
                        ycoord
                    ]

                    subImage = dicomImage[ycoord - 4:ycoord + 4,
                                          xcoord - 4:xcoord + 4]

                    subshape = numpy.shape(subImage)
                    # print subshape
                    # for some box, it is nearly boundary of image, like Pt36, it cannot generate 8*8 box
                    if subshape[0] != 8 or subshape[1] != 8:
                        continue

                    subImageGLCM = GrayScaleNormalization(
                        subImage, subImage.max(), subImage.min())

                    # get normalized to 0, 255: raw mean and standard deviation
                    Raw_mean, Raw_std = Norm_Mean_Std_LargestBox(
                        subImage, subImage.max(), subImage.min())
                    meanstd.append(Raw_mean)
                    meanstd.append(Raw_std)

                    # GLCM
                    glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                    for GLCMAngle in GLCMAngleList:
                        for featureName in haralick_labels[:-1]:
                            GLCM.append(glcmFeatures[GLCMAngle][featureName])

                    # LBP subimage
                    subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord +
                                             4 + LBPRadius, xcoord - 4 -
                                             LBPRadius:xcoord + 4 + LBPRadius]

                    extendsubImageLBP = GrayScaleNormalization(
                        subImageLBP, subImage.max(), subImage.min())

                    # need to use extended ROI
                    LBPs = ExtendLBPFeatures.calcFeatures(
                        extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
                    for lbp in LBPs:
                        LBP.append(lbp)

                    # Gabor, width = 8
                    # use extended ROI
                    GaborFeatures = ExtendGaborFeatures.calcFeatures(
                        dicomImage, xcoord - 4, ycoord - 4, 8, 8,
                        Gaborkernel_bank, subImage.max(), subImage.min())

                    for gaborfeature in GaborFeatures:
                        for eachg in gaborfeature:
                            Gabor.append(eachg)

                    TAfeatures = GLCM + LBP + Gabor + meanstd

                    if TAfeatures == None:
                        continue

                    aFeature = aFeature + TAfeatures
                    featureWriter.writerow(aFeature)
def genFeatures(dualRescaleOption=True):
    # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization
    # If 'False', use only lesion image
    # default value is 'True'


    # Parameters and feature list of each algorithm
    GLCMAngleList = ['0', '45', '90', '135', 'Avg']

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    # LoGHSigmaList = numpy.arange(2, 7, 2, dtype=numpy.float)
    # LogHFeatureList = ['LoGH_Mean', 'LoGH_Variance', 'LoGH_Skewness', 'LoGH_Kurtosis', 'LoGH_Entropy',
    #                    'LoGH_Uniformity']

    # Gaborsigma_range = numpy.arange(1, 6, 2)
    # Gaborfreq_range = numpy.round(numpy.arange(0.1, 0.6, 0.2), 2)

    #kernel_bank = []

    #Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank)

    # GaborSigmaRange = (1.0, 3.0)
    # GaborFreqRange = (0.1, 0.3, 0.5)
    #GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = ['PatientID', 'Phase', 'ROI_Y', 'ROI_X', 'Width', 'Height']
    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    # for GaborSigma in Gaborsigma_range:
    #     for GaborFreq in Gaborfreq_range:
    #         for featureName in GaborFeatureList:
    #             featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq))

    MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    # List all dicom files and generate features for each images
    # Feature results stored in separate csv files for each folder
    featuresCSVFn = os.path.join(outputDir, featuresOutFn)
    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for patientPhaseDir in os.listdir(rootDir):
            if patientPhaseDir.startswith('.') or \
                    os.path.isfile(os.path.join(rootDir, patientPhaseDir)):
                continue

            print patientPhaseDir

            if fnmatch.fnmatch(patientPhaseDir, '*20160212*'):
                patientID = '20160212'
            else:
                patientID = patientPhaseDir.split('_')[2]

            if fnmatch.fnmatch(patientPhaseDir, '*ADC*'):
                phasename = 'ADC'
            elif fnmatch.fnmatch(patientPhaseDir, '*TRACEW*'):
                phasename = 'TRACEW'
            else:
                phasename = 'FOV'

            #print('Processing %s @ %s ...' % (patientID, phaseName))

            lesionPath = os.path.join(rootDir, patientPhaseDir, lesionFolder)
            normalPath = os.path.join(rootDir, patientPhaseDir, normalFolder)

            # DICOM file name with absolute path
            lesionDicom = os.path.join(lesionPath, lesionDicomFn)
            normalDicom = os.path.join(normalPath, normalDicomFn)

            # ROI file name with absolute path
            lesionROICoords = os.path.join(lesionPath, roiCoordsFn)
            normalROICoords = os.path.join(normalPath, roiCoordsFn)

            # Largest rectangle file name with absolute path
            lesionROIRectFn = os.path.join(lesionPath, lesionroiFn)

            if (not os.path.isfile(lesionDicom)) or \
                    (not os.path.isfile(normalDicom)) or \
                    (not os.path.isfile(lesionROICoords)) or \
                    (not os.path.isfile(normalROICoords)) or \
                    (not os.path.isfile(normalROICoords)):  # If any of the file is missing, skip
                print('Missing File for %s @ %s.' % (patientID, phasename))
                continue

            dualROIGrayLevels = numpy.array([])
            with open(lesionROICoords, 'r') as roiCoordsFile:
                roiCoordsList = csv.reader(roiCoordsFile, delimiter=';')
                for row in roiCoordsList:
                    dualROIGrayLevels = numpy.append(dualROIGrayLevels, int(row[2]))
            if (dualRescaleOption):
                with open(normalROICoords, 'r') as roiCoordsFile:
                    roiCoordsList = csv.reader(roiCoordsFile, delimiter=';')
                    for row in roiCoordsList:
                        dualROIGrayLevels = numpy.append(dualROIGrayLevels, int(row[2]))

            with open(lesionROIRectFn, 'r') as roiFile:
                roiList = csv.DictReader(roiFile, dialect='excel')
                for aROI in roiList:
                    if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1):
                        print('Invalid ROI for %s @ %s.' % (patientID, phasename))
                        continue

                    dicomImage = Read2DImage(lesionDicom)

                    subImage = dicomImage[int(aROI['Y']):(int(aROI['Y']) + int(aROI['H'])), \
                               int(aROI['X']):(int(aROI['X']) + int(aROI['W']))]

                    subImageLBP = dicomImage[int(aROI['Y']) - LBPRadius:(int(aROI['Y']) + int(aROI['H'])) + LBPRadius, \
                                  int(aROI['X']) - LBPRadius:(int(aROI['X']) + int(aROI['W'])) + LBPRadius]

                    mean_LargBox, std_LargBox = Mean_Std_LargestBox2(dicomImage, int(aROI['X']), int(aROI['Y']),
                                                                     int(aROI['W']), int(aROI['H']))

                    subImage = GrayScaleNormalization(subImage, dualROIGrayLevels.ptp())

                    extendsubImageLBP = GrayScaleNormalization(subImageLBP,dualROIGrayLevels.ptp())

                    if numpy.all(subImage == 0):
                        print('%s @ %s is all zero.' % (patientID, phasename))
                        continue

                    aFeature = [patientID, phasename, aROI['Y'], aROI['X'], aROI['W'], aROI['H']]

                    # GLCM
                    glcmFeatures = GLCMFeatures.calcFeatures(subImage)

                    for GLCMAngle in GLCMAngleList:
                        for featureName in haralick_labels[:-1]:
                            aFeature.append(glcmFeatures[GLCMAngle][featureName])

                    # LBP
                    lbpFeatures = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
                    aFeature = aFeature + lbpFeatures.tolist()

                    # Gabor
                    # GaborFeatures = ExtendGaborFeatures.calcFeatures(subImage, int(aROI['W']), Gaborkernel_bank)
                    #
                    # for gaborfeature in GaborFeatures:
                    #     aFeature = aFeature + gaborfeature.tolist()

                    aFeature = aFeature + [mean_LargBox, std_LargBox]

                    featureWriter.writerow(aFeature)
    print('Done.')
Пример #10
0
def genFeatures(dualRescaleOption=True):
    # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization
    # If 'False', use only lesion image
    # default value is 'True'

    # Parameters and feature list of each algorithm
    #GLCMAngleList = ['0', '45', '90', '135', 'Avg']
    GLCMAngleList = ['Avg']

    LBPRadius = 3
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'
    # LBPnBins = 12

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (1.0, 3.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)

    kernel_bank = []

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = ['PatientID', 'Phase', 'ROI_Y', 'ROI_X', 'Width', 'Height']
    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    # List all dicom files and generate features for each images
    # Feature results stored in separate csv files for each folder
    featuresCSVFn = os.path.join(outputDir, featuresOutFn)
    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for patientPhaseDir in os.listdir(rootDir):
            if patientPhaseDir.startswith('.') or \
                    os.path.isfile(os.path.join(rootDir, patientPhaseDir)):
                continue

            patientID = patientPhaseDir.split('_ ')[0]
            phaseName = patientPhaseDir.split('_ ')[1].split('_')[
                1]  # Only a simple parser, not always precise

            print('Processing %s @ %s ...' % (patientID, phaseName))

            lesionPath = os.path.join(rootDir, patientPhaseDir, lesionFolder)
            normalPath = os.path.join(rootDir, patientPhaseDir, normalFolder)

            # DICOM file name with absolute path
            lesionDicom = os.path.join(lesionPath, lesionDicomFn)
            normalDicom = os.path.join(normalPath, normalDicomFn)

            # ROI file name with absolute path
            lesionROICoords = os.path.join(lesionPath, roiCoordsFn)
            normalROICoords = os.path.join(normalPath, roiCoordsFn)

            # Largest rectangle file name with absolute path
            lesionROIRectFn = os.path.join(lesionPath, roiFn)

            if (not os.path.isfile(lesionDicom)) or \
                    (not os.path.isfile(normalDicom)) or \
                    (not os.path.isfile(lesionROICoords)) or \
                    (not os.path.isfile(normalROICoords)) or \
                    (not os.path.isfile(normalROICoords)):  #  If any of the file is missing, skip,
                # To do: problem here

                print('Missing File for %s @ %s.' % (patientID, phaseName))
                continue

            with open(lesionROIRectFn, 'r') as roiFile:
                roiList = csv.DictReader(roiFile, dialect='excel')
                for aROI in roiList:
                    if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1):
                        print('Invalid ROI for %s @ %s.' %
                              (patientID, phaseName))
                        continue

                    lesiondicomImage = Read2DImage(lesionDicom)
                    normaldicomImage = Read2DImage(normalDicom)

                    xcoord = int(aROI['X'])
                    ycoord = int(aROI['Y'])
                    width = int(aROI['W'])
                    height = int(aROI['H'])

                    lesionsubImage = lesiondicomImage[ycoord:(ycoord + height),
                                                      xcoord:(xcoord + width)]
                    normalsubImage = normaldicomImage[ycoord:(ycoord + height),
                                                      xcoord:(xcoord + width)]

                    mean_LargBox = numpy.mean(lesionsubImage)
                    std_LargBox = numpy.std(lesionsubImage)

                    # get max gray scale and min grayscale from both lesion and normal dicom
                    lesionimageMax = lesionsubImage.max()
                    lesionimageMin = lesionsubImage.min()
                    normalimageMax = normalsubImage.max()
                    normalimageMin = normalsubImage.min()

                    # compare max and min and get max / min for normalization
                    if lesionimageMax > normalimageMax:
                        subImageMax = lesionimageMax
                    else:
                        subImageMax = normalimageMax

                    if lesionimageMin < normalimageMin:
                        subImageMin = lesionimageMin
                    else:
                        subImageMin = normalimageMin

                    subImageLBP = lesiondicomImage[
                        ycoord - LBPRadius:(ycoord + height) + LBPRadius,
                        xcoord - LBPRadius:(xcoord + width) + LBPRadius]

                    subImageGLCM = GrayScaleNormalization(
                        lesionsubImage, subImageMax, subImageMin)

                    if numpy.all(lesionsubImage == 0):
                        print('%s @ %s is all zero.' % (patientID, phaseName))
                        continue

                    aFeature = [
                        patientID, phaseName, aROI['Y'], aROI['X'], aROI['W'],
                        aROI['H']
                    ]

                    # GLCM
                    glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                    for GLCMAngle in GLCMAngleList:
                        for featureName in haralick_labels[:-1]:
                            aFeature.append(
                                glcmFeatures[GLCMAngle][featureName])

                    # LBP
                    extendsubImageLBP = GrayScaleNormalization(
                        subImageLBP, subImageMax, subImageMin)

                    lbpFeatures = ExtendLBPFeatures.calcFeatures(
                        extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
                    aFeature = aFeature + lbpFeatures.tolist()

                    # Gabor
                    GaborFeatures = ExtendGaborFeatures.calcFeatures(
                        lesiondicomImage, xcoord, ycoord, width, height,
                        Gaborkernel_bank, subImageMax, subImageMin)

                    for gaborfeature in GaborFeatures:
                        aFeature = aFeature + gaborfeature.tolist()

                    aFeature = aFeature + [mean_LargBox, std_LargBox]

                    featureWriter.writerow(aFeature)
    print('Done.')
Пример #11
0
def genFeatures():
    # Parameters and feature list of each algorithm

    GLCMAngleList = ['Avg']

    LBPRadius = 3
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (0.6, 1.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []

    Gaborkernel_bank = MinACRExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std', 'Gabor_Kurtosis', 'Gabor_Skewness']

    # Generate full list of features combined with parameters
    featureTitle = ['PatientID', 'ROI_Y', 'ROI_X', 'Width', 'Height']

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq))

    MeanStdLBfeaturelist = ['LargestBox_Mean','LargestBox_Std','LargestBox_Kurtosis','LargestBox_Skewness']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    # List all dicom files and generate features for each images
    # Feature results stored in separate csv files for each folder
    featuresCSVFn = os.path.join(outputDir, featuresOutFn)

    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for ACRfolder in os.listdir(rootDir):
            if ACRfolder.startswith('.'):
                continue
            if ACRfolder.startswith('..'):
                continue

            patientid = ACRfolder.split('_')[0] + ACRfolder.split('_')[1]

            ACRpath = os.path.join(rootDir,ACRfolder)

            for lesionfolder in os.listdir(ACRpath):
                if lesionfolder.startswith('.'):
                    continue
                if lesionfolder.startswith('..'):
                    continue

                ACRlesionpath = os.path.join(ACRpath,lesionfolder)

                # get timepoint folder path
                timepointpath1 = os.path.join(ACRlesionpath, timepointlist[0])
                timepointpath2 = os.path.join(ACRlesionpath, timepointlist[1])
                timepointpath3 = os.path.join(ACRlesionpath, timepointlist[2])
                timepointpath4 = os.path.join(ACRlesionpath, timepointlist[3])

                # get lesion dicom file path
                lesionDicom1 = os.path.join(timepointpath1, dicomfile)
                lesionDicom2 = os.path.join(timepointpath2, dicomfile)
                lesionDicom3 = os.path.join(timepointpath3, dicomfile)
                lesionDicom4 = os.path.join(timepointpath4, dicomfile)

                # get image gray scale from each dicom file
                dicomImage1 = Read2DImage(lesionDicom1)
                dicomImage2 = Read2DImage(lesionDicom2)
                dicomImage3 = Read2DImage(lesionDicom3)
                dicomImage4 = Read2DImage(lesionDicom4)

                # get largest box coordinate
                lesionROIRectFn1 = os.path.join(timepointpath1, largestboxfile)
                lesionROIRectFn2 = os.path.join(timepointpath2, largestboxfile)
                lesionROIRectFn3 = os.path.join(timepointpath3, largestboxfile)
                lesionROIRectFn4 = os.path.join(timepointpath4, largestboxfile)

                # get subimage and subimage LBP from dicom image of 4 time points
                with open(lesionROIRectFn1, 'r') as roiFile:
                    roiList = csv.DictReader(roiFile, dialect='excel')
                    for aROI in roiList:
                        xcoord1 = int(aROI['X'])
                        ycoord1 = int(aROI['Y'])
                        width1 = int(aROI['W'])
                        height1 = int(aROI['H'])

                subImage1 = dicomImage1[ycoord1:ycoord1 + height1, xcoord1:xcoord1 + width1]

                subImageLBP1 = dicomImage1[ycoord1 - LBPRadius:(ycoord1 + height1) + LBPRadius, xcoord1 - LBPRadius:(xcoord1 + width1) + LBPRadius]

                with open(lesionROIRectFn2, 'r') as roiFile:
                    roiList = csv.DictReader(roiFile, dialect='excel')
                    for aROI in roiList:
                        xcoord2 = int(aROI['X'])
                        ycoord2 = int(aROI['Y'])
                        width2 = int(aROI['W'])
                        height2 = int(aROI['H'])

                subImage2 = dicomImage2[ycoord2:ycoord2 + height2, xcoord2:xcoord2 + width2]

                subImageLBP2 = dicomImage2[ycoord2 - LBPRadius:(ycoord2 + height2) + LBPRadius, xcoord2 - LBPRadius:(xcoord2 + width2) + LBPRadius]

                with open(lesionROIRectFn3, 'r') as roiFile:
                    roiList = csv.DictReader(roiFile, dialect='excel')
                    for aROI in roiList:
                        xcoord3 = int(aROI['X'])
                        ycoord3 = int(aROI['Y'])
                        width3 = int(aROI['W'])
                        height3 = int(aROI['H'])

                subImage3 = dicomImage3[ycoord3:ycoord3 + height3, xcoord3:xcoord3 + width3]

                subImageLBP3 = dicomImage3[ycoord3 - LBPRadius:(ycoord3 + height3) + LBPRadius, xcoord3 - LBPRadius:(xcoord3 + width3) + LBPRadius]

                with open(lesionROIRectFn4, 'r') as roiFile:
                    roiList = csv.DictReader(roiFile, dialect='excel')
                    for aROI in roiList:
                        xcoord4 = int(aROI['X'])
                        ycoord4 = int(aROI['Y'])
                        width4 = int(aROI['W'])
                        height4 = int(aROI['H'])

                subImage4 = dicomImage4[ycoord4:ycoord4 + height4, xcoord4:xcoord4 + width4]

                subImageLBP4 = dicomImage4[ycoord4 - LBPRadius:(ycoord4 + height4) + LBPRadius, xcoord4 - LBPRadius:(xcoord4 + width4) + LBPRadius]

                # generate min/max image matrix (same width and height in 4 matrix)
                MinSubImage,MaxSubImage = MinMaxSubImageGen(subImage1,subImage2,subImage3,subImage4,height1,width1)

                # minsize = numpy.shape(MinImage)
                # maxsize = numpy.shape(MaxImage)
                # print 'original:',minsize,maxsize

                # get extended LBP height, width
                LBPheight = height1 + 2*LBPRadius
                LBPwidth =  width1 + 2*LBPRadius

                # get min/ max LBP subimage from 4 LBP subimages
                MinLBPSubImage,MaxLBPSubImage = MinMaxSubImageGen(subImageLBP1,subImageLBP2,subImageLBP3,subImageLBP4, LBPheight,LBPwidth)

                # LBPminsize = numpy.shape(MinLBPImage)
                # LBPmaxsize = numpy.shape(MaxLBPImage)
                # print 'LBP:',LBPminsize,LBPmaxsize

                # get raw mean/ std from min subimage
                mean_LargBox = numpy.mean(MinSubImage)
                std_LargBox = numpy.std(MinSubImage)

                # add Kurtosis and Skewness into Raw Features
                MinSubImagelist = list()
                MinSubImageArraylist = MinSubImage.tolist()
                for smalllist in MinSubImageArraylist:
                    MinSubImagelist += smalllist

                Kurtosis_LargBox = kurtosis(MinSubImagelist)
                Skewness_LargBox = skew(MinSubImagelist)

                # normalized original subimage, GLCM can use this
                subImageGLCM = GrayScaleNormalization(MinSubImage, MinSubImage.max(), MinSubImage.min())
                # print subImageGLCM

                # for extended LBP, we still use grayscale range of 8*8 box to normalize extended ROI 10*10 box (LBP radius = 1)
                extendsubImageLBP = GrayScaleNormalization(MinLBPSubImage, MinSubImage.max(), MinSubImage.min())

                if numpy.all(MinSubImage == 0):
                    print('%s @ %s is all zero.' % (patientid))
                    continue

                aFeature = [patientid, ycoord1,xcoord1,width1,height1]

                # GLCM
                glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM)

                for GLCMAngle in GLCMAngleList:
                    for featureName in haralick_labels[:-1]:
                        aFeature.append(glcmFeatures[GLCMAngle][featureName])

                # LBP
                # need to use extended ROI
                lbpFeatures = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod)
                aFeature = aFeature + lbpFeatures.tolist()

                # Gabor
                GaborFeatures = MinACRExtendGaborFeatures.calcFeatures(dicomImage1,dicomImage2,dicomImage3,dicomImage4, xcoord1, ycoord1, xcoord2,
                ycoord2,xcoord3,ycoord3,xcoord4,ycoord4, width1, height1, Gaborkernel_bank, MinSubImage.max(), MinSubImage.min())

                for gaborfeature in GaborFeatures:
                    aFeature = aFeature + gaborfeature.tolist()

                aFeature = aFeature + [mean_LargBox, std_LargBox,Kurtosis_LargBox,Skewness_LargBox]
                featureWriter.writerow(aFeature)
Пример #12
0
def genFeatures():
    # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization
    # If 'False', use only lesion image
    # default value is 'True'

    # Parameters and feature list of each algorithm
    GLCMAngleList = ['0', '45', '90', '135', 'Avg']

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (1.0, 2.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = ['PatientID', 'Phase', 'ROI_Y', 'ROI_X', 'Width', 'Height']

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist

    shapetitlelist = [
        'compactness', 'entropy', 'bending energy', 'ratio(min/max)'
    ]
    featureTitle = featureTitle + shapetitlelist

    # List all dicom files and generate features for each images
    # Feature results stored in separate csv files for each folder
    featuresCSVFn = os.path.join(outputDir, featuresOutFn)
    casefilenum = 0

    with open(featuresCSVFn, 'wb') as featureCSVFile:
        featureWriter = csv.writer(featureCSVFile, dialect='excel')
        featureWriter.writerow(featureTitle)

        for casefile in os.listdir(rootDir):
            if casefile.startswith('.'):
                continue
            if casefile.startswith('..'):
                continue
            if fnmatch.fnmatch(casefile, '*Icon*'):
                continue
            if casefile == 'M24 - M24':
                continue
            if casefile == 'M5 - M5':
                continue
            print '\n'
            print casefile
            casefilenum += 1

            patientid = casefile.split('-')[0]

            patientfolderpath = os.path.join(rootDir, casefile)

            for patientfolder in os.listdir(patientfolderpath):
                if patientfolder.startswith('.'):
                    continue
                if patientfolder.startswith('..'):
                    continue
                if fnmatch.fnmatch(patientfolder, '*Icon*'):
                    continue
                if fnmatch.fnmatch(patientfolder, '*roi*'):
                    continue

                Dfolderpath = os.path.join(patientfolderpath, patientfolder)

                for phasefolder in os.listdir(Dfolderpath):

                    phasefolderpath = os.path.join(Dfolderpath, phasefolder)
                    if phasefolder.startswith('.'):
                        continue
                    if phasefolder.startswith('..'):
                        continue
                    if phasefolder.startswith('*Icon*'):
                        continue
                    if os.path.isfile(phasefolderpath):
                        continue

                    print phasefolder

                    phase1 = phasefolder.split('-')[0].replace(' ', '')
                    if fnmatch.fnmatch(phase1, '*CC*DES*'):
                        phasename = 'CC DES'
                    elif fnmatch.fnmatch(phase1, '*CC*LE*'):
                        phasename = 'CC LE'
                    elif fnmatch.fnmatch(phase1, '*MLO*DES*'):
                        phasename = 'MLO DES'
                    elif fnmatch.fnmatch(phase1, '*MLO*LE*'):
                        phasename = 'MLO LE'
                    elif fnmatch.fnmatch(phase1, '*LM*DES*'):
                        phasename = 'LM DES'
                    else:
                        phasename = 'LM LE'

                    rectfile = ''
                    contourfile = ''
                    dicomfile = ''
                    for file in os.listdir(phasefolderpath):

                        if fnmatch.fnmatch(file, '*texture*'):
                            continue
                        if fnmatch.fnmatch(file, '*(1)*'):
                            continue
                        if fnmatch.fnmatch(file, '*largest_rec*'):
                            rectfile = file

                        if fnmatch.fnmatch(file, '*csv*') or fnmatch.fnmatch(
                                file, '*csv*'):
                            if not fnmatch.fnmatch(file, '*largest_rec*'):
                                contourfile = file

                        if fnmatch.fnmatch(file, '*dcm*'):
                            dicomfile = file

                    #print rectfile,contourfile,dicomfile

                    recpath = os.path.join(phasefolderpath, rectfile)
                    contourpath = os.path.join(phasefolderpath, contourfile)
                    dicompath = os.path.join(phasefolderpath, dicomfile)

                    shapedescriptors = ROI_ShapeAnalysis_92.genShapefeatures(
                        contourpath)

                    with open(recpath, 'r') as roiFile:
                        roiList = csv.DictReader(roiFile, dialect='excel')
                        for aROI in roiList:
                            if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1):
                                print('Invalid ROI for %s @ %s.' %
                                      (patientid, phasename))
                                continue

                            xcoord = int(aROI['X'])
                            ycoord = int(aROI['Y'])
                            width = int(aROI['W'])
                            height = int(aROI['H'])

                            dicomImage = Read2DImage(dicompath)
                            subImage = dicomImage[ycoord:(
                                ycoord + height), xcoord:(
                                    xcoord +
                                    width)]  # errors here: before: Y + W

                            subImageLBP = dicomImage[
                                ycoord - LBPRadius:(ycoord + height) +
                                LBPRadius, xcoord -
                                LBPRadius:(xcoord + width) + LBPRadius]

                            mean_LargBox = numpy.mean(subImage)
                            std_LargBox = numpy.std(subImage)

                            subImageGLCM = GrayScaleNormalization(
                                subImage, subImage.max(), subImage.min())

                            extendsubImageLBP = GrayScaleNormalization(
                                subImageLBP, subImage.max(), subImage.min())

                            if numpy.all(subImage == 0):
                                print('%s @ %s is all zero.' %
                                      (patientid, phasename))
                                continue

                            aFeature = [
                                patientid, phasename, aROI['Y'], aROI['X'],
                                aROI['W'], aROI['H']
                            ]

                            # GLCM
                            # dont need to extended ROI
                            glcmFeatures = GLCMFeatures.calcFeatures(
                                subImageGLCM)

                            for GLCMAngle in GLCMAngleList:
                                for featureName in haralick_labels[:-1]:
                                    aFeature.append(
                                        glcmFeatures[GLCMAngle][featureName])

                            # LBP
                            # need to use extended ROI
                            lbpFeatures = ExtendLBPFeatures.calcFeatures(
                                extendsubImageLBP, LBPnPoints, LBPRadius,
                                LBPMethod)
                            aFeature = aFeature + lbpFeatures.tolist()

                            # Gabor
                            GaborFeatures = ExtendGaborFeatures.calcFeatures(
                                dicomImage, xcoord, ycoord, width,
                                height, Gaborkernel_bank, subImage.max(),
                                subImage.min())
                            for gaborfeature in GaborFeatures:
                                aFeature = aFeature + gaborfeature.tolist()

                            aFeature = aFeature + [mean_LargBox, std_LargBox]

                            aFeature = aFeature + shapedescriptors

                            featureWriter.writerow(aFeature)
def genFeatures():
    # Parameters and feature list of each algorithm
    GLCMAngleList = ['Avg']

    LBPRadius = 1
    LBPnPoints = 8 * LBPRadius
    LBPMethod = 'uniform'

    LBPFeatureList = []
    for x in xrange(0, LBPnPoints + 1):
        LBPFeatureList.append('LBP_%02d' % x)
    LBPFeatureList.append('LBP_Other')

    Gaborsigma_range = (0.6, 1.0)
    Gaborfreq_range = (0.1, 0.3, 0.5)
    kernel_bank = []

    Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(
        Gaborsigma_range, Gaborfreq_range, kernel_bank)

    GaborFeatureList = ['Gabor_Mean', 'Gabor_Std']

    # Generate full list of features combined with parameters
    featureTitle = [
        'PatientID', 'Dicom Image Filename', 'Xml Filename', 'Phase Name', 'X',
        'Y', 'Boundary (1) or not (inside: 0)'
    ]

    for GLCMAngle in GLCMAngleList:
        for featureName in haralick_labels[:-1]:
            featureTitle.append(featureName + '_' + GLCMAngle)

    featureTitle = featureTitle + LBPFeatureList

    for GaborSigma in Gaborsigma_range:
        for GaborFreq in Gaborfreq_range:
            for featureName in GaborFeatureList:
                featureTitle.append(featureName + '_' + str(GaborSigma) + '_' +
                                    str(GaborFreq))

    MeanStdLBfeaturelist = ['Raw_Mean', 'Raw_Std']
    featureTitle = featureTitle + MeanStdLBfeaturelist + ['Ylabel']

    for eachptfolder in os.listdir(rootDir):
        if eachptfolder.startswith('.'):
            continue
        if eachptfolder.startswith('..'):
            continue
        if fnmatch.fnmatch(eachptfolder, '*Icon*'):
            continue
        if eachptfolder == 'M24 - M24':
            continue
        if eachptfolder == 'M5 - M5':
            continue

        # ptnum+=1
        print('\n')
        # print eachptfolder

        patientID = eachptfolder.split('-')[0]
        print patientID

        if fnmatch.fnmatch(eachptfolder, '*B*'):
            twofolder = 'benign'
        else:
            twofolder = 'malignant'

        rootDir2 = os.path.join(rootDir, eachptfolder)

        for folder2 in os.listdir(rootDir2):
            if folder2.startswith('.'):
                continue
            if folder2.startswith('..'):
                continue
            if fnmatch.fnmatch(folder2, '*Icon*'):
                continue
            if fnmatch.fnmatch(folder2, '*roi*'):
                continue

            # print folder2

            rootDir3 = os.path.join(rootDir2, folder2)

            ccxmlfile = list()
            mloxmlfile = list()
            roiDicomfile = dict()
            roiDicomfolder = dict()
            phasefolderpath = ''

            for xmlcasefolder in os.listdir(rootDir3):
                phasefolderpath = os.path.join(rootDir3, xmlcasefolder)

                if xmlcasefolder.startswith('.'):
                    continue
                if xmlcasefolder.startswith('..'):
                    continue
                if fnmatch.fnmatch(xmlcasefolder, '*Icon*'):
                    continue

                if fnmatch.fnmatch(xmlcasefolder, '*CC*xml'):
                    ccxmlfile.append(xmlcasefolder)
                    print ccxmlfile
                    # xmlnum+=1

                if fnmatch.fnmatch(xmlcasefolder, '*MLO*xml'):
                    mloxmlfile.append(xmlcasefolder)
                    print mloxmlfile
                    # xmlnum+=1

                if os.path.isdir(phasefolderpath):

                    roiDicomfile, roiDicomfolder = finddcmfile(
                        phasefolderpath, xmlcasefolder, roiDicomfile,
                        roiDicomfolder)

            roiccxmlpath = os.path.join(rootDir3, ccxmlfile[0])
            roimloxmlpath = os.path.join(rootDir3, mloxmlfile[0])

            CCxmin, CCxmax, CCymin, CCymax, CCxycoord = ParseXMLDrawROI(
                roiccxmlpath)
            MLOxmin, MLOxmax, MLOymin, MLOymax, MLOxycoord = ParseXMLDrawROI(
                roimloxmlpath)

            # check if coords inside boundary or outside boundary
            CCwindowptlist = chooseinoutcoord(CCxmin, CCxmax, CCymin, CCymax,
                                              CCxycoord)
            MLOwindowptlist = chooseinoutcoord(MLOxmin, MLOxmax, MLOymin,
                                               MLOymax, MLOxycoord)

            featuresOutFn = 'ROI_Texture_Map.csv'

            # start to do T2
            featuresOutFn = patientID + '_' + twofolder + '_' + featuresOutFn
            featuresCSVFn = os.path.join(outputDir, featuresOutFn)

            phasenames = ['DES-CC', 'LE-CC', 'DES-MLO', 'LE-MLO']

            with open(featuresCSVFn, 'wb') as featureCSVFile:
                featureWriter = csv.writer(featureCSVFile, dialect='excel')
                featureWriter.writerow(featureTitle)

                for phase in phasenames:

                    print phase
                    lesionDicomFn = roiDicomfile[phase]

                    lesionDicomFolder = roiDicomfolder[phase]

                    lesionDicomFolderpath = os.path.join(
                        rootDir3, lesionDicomFolder)
                    dicomfilepath = os.path.join(lesionDicomFolderpath,
                                                 lesionDicomFn)

                    dicomImage = Read2DImage(dicomfilepath)

                    if fnmatch.fnmatch(phase, '*CC'):

                        for eachpt in CCwindowptlist:

                            meanstd = list()
                            GLCM = list()
                            LBP = list()
                            Gabor = list()

                            xcoord = int(eachpt[0])
                            ycoord = int(eachpt[1])
                            boundaryornot = int(eachpt[2])

                            aFeature = [
                                patientID, lesionDicomFn, ccxmlfile[0], phase,
                                xcoord, ycoord, boundaryornot
                            ]

                            subImage = dicomImage[ycoord - 4:ycoord + 4,
                                                  xcoord - 4:xcoord + 4]

                            subshape = numpy.shape(subImage)
                            if subshape[0] != 8 or subshape[1] != 8:
                                continue

                            subImageGLCM = GrayScaleNormalization(
                                subImage, subImage.max(), subImage.min())

                            # get normalized to 0, 255: raw mean and standard deviation
                            Raw_mean, Raw_std = Norm_Mean_Std_LargestBox(
                                subImage, subImage.max(), subImage.min())
                            meanstd.append(Raw_mean)
                            meanstd.append(Raw_std)

                            # GLCM
                            glcmFeatures = GLCMFeatures.calcFeatures(
                                subImageGLCM)

                            for GLCMAngle in GLCMAngleList:
                                for featureName in haralick_labels[:-1]:
                                    GLCM.append(
                                        glcmFeatures[GLCMAngle][featureName])

                            # LBP subimage
                            subImageLBP = dicomImage[ycoord - 4 -
                                                     LBPRadius:ycoord + 4 +
                                                     LBPRadius, xcoord - 4 -
                                                     LBPRadius:xcoord + 4 +
                                                     LBPRadius]

                            extendsubImageLBP = GrayScaleNormalization(
                                subImageLBP, subImage.max(), subImage.min())

                            # need to use extended ROI
                            LBPs = ExtendLBPFeatures.calcFeatures(
                                extendsubImageLBP, LBPnPoints, LBPRadius,
                                LBPMethod)
                            for lbp in LBPs:
                                LBP.append(lbp)

                            # Gabor, width = 8
                            # use extended ROI
                            GaborFeatures = ExtendGaborFeatures.calcFeatures(
                                dicomImage, xcoord - 4, ycoord - 4, 8,
                                8, Gaborkernel_bank, subImage.max(),
                                subImage.min())

                            for gaborfeature in GaborFeatures:
                                for eachg in gaborfeature:
                                    Gabor.append(eachg)

                            TAfeatures = GLCM + LBP + Gabor + meanstd

                            if TAfeatures == None:
                                continue

                            aFeature = aFeature + TAfeatures
                            featureWriter.writerow(aFeature)

                    else:

                        for eachpt in MLOwindowptlist:

                            meanstd = list()
                            GLCM = list()
                            LBP = list()
                            Gabor = list()

                            xcoord = int(eachpt[0])
                            ycoord = int(eachpt[1])
                            boundaryornot = int(eachpt[2])

                            aFeature = [
                                patientID, lesionDicomFn, mloxmlfile[0], phase,
                                xcoord, ycoord, boundaryornot
                            ]

                            subImage = dicomImage[ycoord - 4:ycoord + 4,
                                                  xcoord - 4:xcoord + 4]

                            subshape = numpy.shape(subImage)
                            # print subshape
                            # for some box, it is nearly boundary of image, like Pt36, it cannot generate 8*8 box
                            if subshape[0] != 8 or subshape[1] != 8:
                                continue

                            subImageGLCM = GrayScaleNormalization(
                                subImage, subImage.max(), subImage.min())

                            # get normalized to 0, 255: raw mean and standard deviation
                            Raw_mean, Raw_std = Norm_Mean_Std_LargestBox(
                                subImage, subImage.max(), subImage.min())
                            meanstd.append(Raw_mean)
                            meanstd.append(Raw_std)

                            # GLCM
                            glcmFeatures = GLCMFeatures.calcFeatures(
                                subImageGLCM)

                            for GLCMAngle in GLCMAngleList:
                                for featureName in haralick_labels[:-1]:
                                    GLCM.append(
                                        glcmFeatures[GLCMAngle][featureName])

                            # LBP subimage
                            subImageLBP = dicomImage[ycoord - 4 -
                                                     LBPRadius:ycoord + 4 +
                                                     LBPRadius, xcoord - 4 -
                                                     LBPRadius:xcoord + 4 +
                                                     LBPRadius]

                            extendsubImageLBP = GrayScaleNormalization(
                                subImageLBP, subImage.max(), subImage.min())

                            # need to use extended ROI
                            LBPs = ExtendLBPFeatures.calcFeatures(
                                extendsubImageLBP, LBPnPoints, LBPRadius,
                                LBPMethod)
                            for lbp in LBPs:
                                LBP.append(lbp)

                            # Gabor, width = 8
                            # use extended ROI
                            GaborFeatures = ExtendGaborFeatures.calcFeatures(
                                dicomImage, xcoord - 4, ycoord - 4, 8,
                                8, Gaborkernel_bank, subImage.max(),
                                subImage.min())

                            for gaborfeature in GaborFeatures:
                                for eachg in gaborfeature:
                                    Gabor.append(eachg)

                            TAfeatures = GLCM + LBP + Gabor + meanstd

                            if TAfeatures == None:
                                continue

                            aFeature = aFeature + TAfeatures
                            featureWriter.writerow(aFeature)