def genTextures(): GLCMAngleList = ['Avg'] featureTitle = ['Image Contrast', 'Image Filename','X', 'Y', 'Boundary (1) or not (inside: 0), (outside:2)', 'Biopsy(1) or not (0)'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') featureTitle = featureTitle + LBPFeatureList Gaborsigma_range = (0.6,1.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank) MeanStdfeaturelist = ['Raw_Mean','Raw_Std'] featureTitle = featureTitle + MeanStdfeaturelist dicomnames = ['EPI', 'P', 'Q', 'RCBV', 'SPGRC', 'T2'] for texturemapfile in os.listdir(rootDir): if texturemapfile.startswith('.'): continue if texturemapfile.startswith('..'): continue print texturemapfile patientname = texturemapfile.split('_')[0] if fnmatch.fnmatch(patientname, "*FSL*"): newpatientname = patientname.replace("FSL", "") elif fnmatch.fnmatch(patientname, "*h*"): newpatientname = patientname.replace("h", "") else: newpatientname = patientname print newpatientname slicepathfile = os.path.join(rootDir, texturemapfile) for slicefile in os.listdir(slicepathfile): if slicefile.startswith('.'): continue if slicefile.startswith('..'): continue print slicefile slicenum = slicefile.replace('slice', '') slicenum = int(slicenum) dcmxmlfilepath = os.path.join(slicepathfile, slicefile) dcmfiledict = dict() for dcmfile in os.listdir(dcmxmlfilepath): if dcmfile.startswith('.'): continue if fnmatch.fnmatch(dcmfile, '*dcm*') is False: continue if fnmatch.fnmatch(dcmfile, '*precontrast*'): continue if fnmatch.fnmatch(dcmfile, '*C*SPGR*') or fnmatch.fnmatch(dcmfile, '*+C*T1*') or fnmatch.fnmatch(dcmfile,'*T1*+C*'): SPGRCfile = dcmfile dcmfiledict['SPGRC'] = SPGRCfile if fnmatch.fnmatch(dcmfile, '*T2*'): T2file = dcmfile dcmfiledict['T2'] = T2file if fnmatch.fnmatch(dcmfile, '*q*'): Qfile = dcmfile dcmfiledict['Q'] = Qfile if fnmatch.fnmatch(dcmfile, '*p*'): Pfile = dcmfile dcmfiledict['P'] = Pfile if fnmatch.fnmatch(dcmfile, '*rCBV*'): RCBVfile = dcmfile dcmfiledict['RCBV'] = RCBVfile if fnmatch.fnmatch(dcmfile, '*EPI*+C*') or fnmatch.fnmatch(dcmfile, '*+C*EPI*'): EPIfile = dcmfile dcmfiledict['EPI'] = EPIfile for xmlfile in os.listdir(dcmxmlfilepath): if not fnmatch.fnmatch(xmlfile, '*.xml'): continue if fnmatch.fnmatch(xmlfile, '*NECROSIS*') or fnmatch.fnmatch(xmlfile,'*necrosis*'): continue if fnmatch.fnmatch(xmlfile, '*C*SPGR*') or fnmatch.fnmatch(xmlfile, '*+C*T1*') or fnmatch.fnmatch( xmlfile, '*T1*+C*'): T1xmlfile = xmlfile if fnmatch.fnmatch(xmlfile, '*T2*'): T2xmlfile = xmlfile print '\n' T1xmlfilepath = os.path.join(dcmxmlfilepath, T1xmlfile) T2xmlfilepath = os.path.join(dcmxmlfilepath, T2xmlfile) if slicenum not in biopsycoordinatefile[newpatientname]: continue else: biopsycoordinatelist = biopsycoordinatefile[newpatientname][slicenum] T1windowptlist = ParseXMLDrawROI(T1xmlfilepath,'T1',biopsycoordinatelist) T2windowptlist = ParseXMLDrawROI(T2xmlfilepath,'T2',biopsycoordinatelist) # start to do T1 featuresOutFn = 'ROI_Texture_Map.csv' T1featuresOutFn = newpatientname + '_' + slicefile + '_' + 'T1' + '_' + featuresOutFn featuresCSVFn = os.path.join(outputDir, T1featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for eachdcm in dicomnames: dicomfile = dcmfiledict[eachdcm] dicomfilepath = os.path.join(dcmxmlfilepath, dicomfile) dicomImage = Read2DImage(dicomfilepath) for eachpt in T1windowptlist: meanstd = list() GLCM = list() LBP = list() Gabor = list() xcoord = int(eachpt[0]) ycoord = int(eachpt[1]) boundaryornot = int(eachpt[2]) biopsyornot = int(eachpt[3]) aFeature = [eachdcm, dicomfile, xcoord,ycoord, boundaryornot,biopsyornot] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min()) # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: GLCM.append(glcmFeatures[GLCMAngle][featureName]) # raw mean and std of subimage Raw_mean = numpy.mean(subImage) Raw_std = numpy.std(subImage) meanstd.append(Raw_mean) meanstd.append(Raw_std) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius: xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures(dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) aFeature = aFeature + GLCM + LBP + Gabor + meanstd featureWriter.writerow(aFeature) # start to do T2 T2featuresOutFn = newpatientname + '_' + slicefile + '_' + 'T2' + '_' + featuresOutFn featuresCSVFn = os.path.join(outputDir, T2featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for eachdcm in dicomnames: dicomfile = dcmfiledict[eachdcm] dicomfilepath = os.path.join(dcmxmlfilepath, dicomfile) dicomImage = Read2DImage(dicomfilepath) for eachpt in T2windowptlist: meanstd = list() GLCM = list() LBP = list() Gabor = list() xcoord = int(eachpt[0]) ycoord = int(eachpt[1]) boundaryornot = int(eachpt[2]) biopsyornot = int(eachpt[3]) aFeature = [eachdcm, dicomfile, xcoord, ycoord, boundaryornot, biopsyornot] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min()) # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: GLCM.append(glcmFeatures[GLCMAngle][featureName]) # raw mean and std of subimage Raw_mean = numpy.mean(subImage) Raw_std = numpy.std(subImage) meanstd.append(Raw_mean) meanstd.append(Raw_std) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius: xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures(dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) aFeature = aFeature + GLCM + LBP + Gabor + meanstd featureWriter.writerow(aFeature)
def genFeatures(): # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization # If 'False', use only lesion image # default value is 'True' # Parameters and feature list of each algorithm GLCMAngleList = ['0', '45', '90', '135', 'Avg'] LBPRadius = 3 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') kernel_bank = [] Gaborsigma_range = [0.6] Gaborfreq_range = (0.1, 0.3, 0.5) Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = [ 'PatientID', 'Phase', 'LesionName', 'ROI_Y', 'ROI_X', 'Width', 'Height' ] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist # List all dicom files and generate features for each images # Feature results stored in separate csv files for each folder featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for casefile in os.listdir(rootDir): if casefile.startswith('.'): continue if casefile.startswith('..'): continue if fnmatch.fnmatch(casefile, '*Icon*'): continue print casefile patientid = casefile.split('_')[1] + casefile.split('_')[2] splitlist = casefile.split('_') phasename = '' for item in splitlist[3:len(splitlist)]: phasename = phasename + ' ' + item print patientid print phasename phasefilepath = os.path.join(rootDir, casefile) lesionfolder = [] for phasefile in os.listdir(phasefilepath): if phasefile.startswith('.'): continue if phasefile.startswith('..'): continue if fnmatch.fnmatch(phasefile, '*Icon*'): continue if fnmatch.fnmatch(phasefile, '*Lesion*') or fnmatch.fnmatch( phasefile, '*lesion*'): lesionfolder.append(phasefile) print lesionfolder for lesionfolderfile in lesionfolder: lesionPath = os.path.join(rootDir, casefile, lesionfolderfile) for roifile in os.listdir(lesionPath): if roifile.startswith('.'): continue if roifile.startswith('..'): continue if fnmatch.fnmatch(roifile, '*Icon*'): continue if fnmatch.fnmatch(roifile, '*rec.csv'): lesionroiFn = roifile # Largest rectangle file name with absolute path lesionROIRectFn = os.path.join(lesionPath, lesionroiFn) # DICOM file name with absolute path lesionDicom = os.path.join(lesionPath, lesionDicomFn) # ROI file name with absolute path normalROICoords = os.path.join(lesionPath, roiCoordsFn) dualROIGrayLevels = numpy.array([]) with open(normalROICoords, 'r') as roiCoordsFile: roiCoordsList = csv.reader(roiCoordsFile, delimiter=';') for row in roiCoordsList: dualROIGrayLevels = numpy.append( dualROIGrayLevels, int(row[2])) with open(lesionROIRectFn, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1): print('Invalid ROI for %s @ %s.' % (patientid, phasename)) continue # only normal dicomImage = Read2DImage(lesionDicom) subImage = dicomImage[int(aROI['Y']):(int(aROI['Y']) + int(aROI['H'])), \ int(aROI['X']):(int(aROI['X']) + int(aROI['W']))] subImageLBP = dicomImage[int(aROI['Y']) - LBPRadius:(int(aROI['Y']) + int(aROI['H'])) + LBPRadius, \ int(aROI['X']) - LBPRadius:(int(aROI['X']) + int(aROI['W'])) + LBPRadius] mean_LargBox, std_LargBox = Mean_Std_LargestBox2( dicomImage, int(aROI['X']), int(aROI['Y']), int(aROI['W']), int(aROI['H'])) subImage = GrayScaleNormalization( subImage, dualROIGrayLevels.ptp()) extendsubImageLBP = GrayScaleNormalization( subImageLBP, dualROIGrayLevels.ptp()) if numpy.all(subImage == 0): print('%s @ %s is all zero.' % (patientid, phasename)) continue aFeature = [ patientid, phasename, lesionfolderfile, aROI['Y'], aROI['X'], aROI['W'], aROI['H'] ] # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImage) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append( glcmFeatures[GLCMAngle][featureName]) # LBP lbpFeatures = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = ExtendGaborFeatures.calcFeatures( subImage, int(aROI['W']), Gaborkernel_bank) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox] featureWriter.writerow(aFeature) print('Done.')
def genTAfeatures(patientID, phasename, lesionDicom, lesionROIRectFn, Gaborkernel_bank): GLCMAngleList = ['0', '45', '90', '135', 'Avg'] LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' with open(lesionROIRectFn, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1): print('Invalid ROI for %s @ %s.' % (patientID, phasename)) continue dicomImage = Read2DImage(lesionDicom) # in Python, coords should be coords(matlab)-1 # coordinates from Osirtx: 0,0 so keep it not -1 xcoord = int(aROI['X']) ycoord = int(aROI['Y']) width = int(aROI['W']) height = int(aROI['H']) subImage = dicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)] # errors here: before: Y + W # if patientID == 'SHHCC_2468': # print xcoord # print ycoord # # print dicomImage # print phasename # print(subImage) # print lesionDicom # print lesionROIRectFn subImageLBP = dicomImage[ycoord - LBPRadius:(ycoord + height) + LBPRadius, xcoord - LBPRadius:(xcoord + width) + LBPRadius] ## get mean and standard deviation of lesion ROI's gray level of lagest box directly from subImage mean_LargBox = numpy.mean(subImage) std_LargBox = numpy.std(subImage) subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min()) # if patientID == 'SHHCC_2468': # print phasename # print(subImageGLCM) # print(numpy.shape(subImageGLCM)) # print(numpy.min(subImageGLCM)) # print (numpy.max(subImageGLCM)) # for extended LBP, we still use grayscale range of 8*8 box to normalize extended ROI 10*10 box extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(), subImage.min()) if numpy.all(subImage == 0): print('%s @ %s is all zero.' % (patientID, phasename)) continue aFeature = [ patientID, phasename, aROI['Y'], aROI['X'], aROI['W'], aROI['H'] ] # GLCM # dont need to extended ROI glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append(glcmFeatures[GLCMAngle][featureName]) # LBP # need to use extended ROI lbpFeatures = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord, ycoord, width, height, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox] return aFeature
def genTextures(): dicomnames = ['EPI', 'P', 'Q', 'RCBV', 'SPGRC', 'T2'] #GLCMAngleList = ['0', '45', '90', '135', 'Avg'] GLCMAngleList = ['Avg'] featureTitle = ['Patient', 'ID', 'slice number', 'X', 'Y'] for GLCMAngle in GLCMAngleList: for dicom in dicomnames: for featureName in haralick_labels[:-1]: GLCMname = dicom + '-' + featureName + '_' + GLCMAngle featureTitle.append(GLCMname) LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for dicom in dicomnames: for x in xrange(0, LBPnPoints + 1): LBPname = dicom + '-' +'LBP_%02d' % x LBPFeatureList.append(LBPname) LBPname = dicom + '-' +'LBP_Other' LBPFeatureList.append(LBPname) featureTitle = featureTitle + LBPFeatureList Gaborsigma_range = (0.6,1.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] for dicom in dicomnames: for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(dicom + '-'+featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank) meanstdTitle = [] for dicom in dicomnames: meanname = dicom +'-'+'Raw_Mean' stdname = dicom + '-' + 'Raw_Std' meanstdTitle.append(meanname) meanstdTitle.append(stdname) featureTitle = featureTitle + meanstdTitle featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect = 'excel') featureWriter.writerow(featureTitle) ptindex = 0 for pt in ptnamelist: # such as RW,JTy...according to CSV file to sort ptslice = [] for coordtuple in ptcoordlist: if pt in coordtuple: ptslice.append(coordtuple[pt]) #print pt,ptslice ptfolderpath = os.path.join(rootDir,folderdict[pt]) for slicelist in ptslice: patientid = ptlist[ptindex] ptindex += 1 slicenum = slicelist[2] xcoord = slicelist[0] ycoord = slicelist[1] slicefolder = 'slice'+ str(slicenum) slicefolderpath = os.path.join(ptfolderpath,slicefolder) #print pt,slicelist, slicefolderpath dcmfiledict = dict() for dcmfile in os.listdir(slicefolderpath): if dcmfile.startswith('.'): continue if fnmatch.fnmatch(dcmfile,'*dcm*') is False: continue if fnmatch.fnmatch(dcmfile,'*C*SPGR*') or fnmatch.fnmatch(dcmfile,'*+C*T1*') or fnmatch.fnmatch(dcmfile,'*T1*+C*'): SPGRCfile = dcmfile dcmfiledict['SPGRC']=SPGRCfile if fnmatch.fnmatch(dcmfile,'*T2*'): T2file = dcmfile dcmfiledict['T2']=T2file if fnmatch.fnmatch(dcmfile,'*q*'): Qfile = dcmfile dcmfiledict['Q']=Qfile if fnmatch.fnmatch(dcmfile,'*p*'): Pfile = dcmfile dcmfiledict['P'] = Pfile if fnmatch.fnmatch(dcmfile,'*rCBV*'): # if fnmatch.fnmatch(dcmfile, '*rCBV*normalized*'): # RCBVfile = dcmfile # dcmfiledict['RCBVnorm'] = RCBVfile # else: RCBVfile = dcmfile dcmfiledict['RCBV'] = RCBVfile if fnmatch.fnmatch(dcmfile,'*EPI*+C*') or fnmatch.fnmatch(dcmfile,'*+C*EPI*'): EPIfile = dcmfile dcmfiledict['EPI'] = EPIfile print pt,patientid,slicenum,len(dcmfiledict),dcmfiledict aFeature = [pt, patientid, slicenum, xcoord,ycoord] meanstd = list() GLCM = list() LBP = list() Gabor = list() # start GLCM for each dicom for GLCMAngle in GLCMAngleList: for eachdcm in dicomnames: dicomfile = dcmfiledict[eachdcm] dicomfilepath = os.path.join(slicefolderpath,dicomfile) dicomImage = Read2DImage(dicomfilepath) subImage = dicomImage[ycoord-4:ycoord+4,xcoord-4:xcoord+4] subImageGLCM = GrayScaleNormalization(subImage, subImage.max(),subImage.min()) if numpy.all(subImage == 0): print('%s @ %s is all zero.' % (patientid, slicenum)) continue # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for featureName in haralick_labels[:-1]: GLCM.append(glcmFeatures[GLCMAngle][featureName]) for eachdcm in dicomnames: dicomfile = dcmfiledict[eachdcm] dicomfilepath = os.path.join(slicefolderpath, dicomfile) dicomImage = Read2DImage(dicomfilepath) subImage = dicomImage[ycoord - 4: ycoord + 4,xcoord - 4:xcoord + 4 ] ## get normalized to 0, 255: raw mean and standard deviation dicommean, dicomstd= Norm_Mean_Std_LargestBox(subImage,subImage.max(),subImage.min()) meanstd.append(dicommean) meanstd.append(dicomstd) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius: xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(),subImage.min()) # LBP # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures(dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) aFeature = aFeature + GLCM +LBP + Gabor + meanstd featureWriter.writerow(aFeature)
def genTextures(): GLCMAngleList = ['Avg'] featureTitle = ['X', 'Y'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') featureTitle = featureTitle + LBPFeatureList Gaborsigma_range = (0.6, 1.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) MeanStdfeaturelist = ['Raw_Mean', 'Raw_Std'] featureTitle = featureTitle + MeanStdfeaturelist featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) meanstd = list() # GLCM = list() # LBP = list() # Gabor = list() xcoord = 151 ycoord = 83 dicomImage = Read2DImage(rootDir) aFeature = [xcoord, ycoord] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min()) # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append(glcmFeatures[GLCMAngle][featureName]) # raw mean and std of subimage Raw_mean = numpy.mean(subImage) Raw_std = numpy.std(subImage) meanstd.append(Raw_mean) meanstd.append(Raw_std) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius:xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization(subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + LBPs.tolist() # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + meanstd featureWriter.writerow(aFeature)
def genFeatures(): GLCMAngleList = ['Avg'] LBPRadius = 3 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (1.0, 3.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = ['PatientID', 'Phase', 'ROI_X', 'ROI_Y', 'Width', 'Height'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['LargestBox_Raw_Mean', 'LargestBox_Raw_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for casefile in os.listdir(rootDir): if casefile.startswith('.'): continue if casefile.startswith('..'): continue if fnmatch.fnmatch(casefile, '*Icon*'): continue if fnmatch.fnmatch(casefile, '*xlsx*'): continue print casefile patientID = casefile.split('_ ')[0] print patientID patientfilepath = os.path.join(rootDir, casefile) phaseDir = dict() for patientPhaseDir in os.listdir(patientfilepath): if patientPhaseDir.startswith('.'): continue if patientPhaseDir.startswith('..'): continue if fnmatch.fnmatch(patientPhaseDir, '*Icon*'): continue if fnmatch.fnmatch(patientPhaseDir, '*Lesion*'): phaseDir['lesion'] = patientPhaseDir phasename = patientPhaseDir print patientPhaseDir if fnmatch.fnmatch(patientPhaseDir, '*Control*'): phaseDir['control'] = patientPhaseDir print patientPhaseDir if 'lesion' not in phaseDir: continue lesionfile = os.path.join(patientfilepath, phaseDir['lesion']) lesionlargestrecpath = os.path.join(lesionfile, recfile) lesionDicom = os.path.join(lesionfile, dcmfile) normalfile = os.path.join(patientfilepath, phaseDir['control']) normallargestrecpath = os.path.join(normalfile, recfile) normalDicom = os.path.join(normalfile, dcmfile) with open(lesionlargestrecpath, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: xcoord = int(aROI['X']) ycoord = int(aROI['Y']) width = int(aROI['W']) height = int(aROI['H']) print xcoord, ycoord, width, height lesiondicomImage = Read2DImage(lesionDicom) normaldicomImage = Read2DImage(normalDicom) lesionsubImage = lesiondicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)] normalsubImage = normaldicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)] mean_LargBox = numpy.mean(lesionsubImage) std_LargBox = numpy.std(lesionsubImage) # get max gray scale and min grayscale from both lesion and normal dicom lesionimageMax = lesionsubImage.max() lesionimageMin = lesionsubImage.min() normalimageMax = normalsubImage.max() normalimageMin = normalsubImage.min() # compare max and min and get max / min for normalization if lesionimageMax > normalimageMax: subImageMax = lesionimageMax else: subImageMax = normalimageMax if lesionimageMin < normalimageMin: subImageMin = lesionimageMin else: subImageMin = normalimageMin subImageGLCM = GrayScaleNormalization(lesionsubImage, subImageMax, subImageMin) subImageLBP = lesiondicomImage[ycoord - LBPRadius:(ycoord + height) + LBPRadius, xcoord - LBPRadius:(xcoord + width) + LBPRadius] aFeature = [ patientID, phasename, aROI['X'], aROI['Y'], aROI['W'], aROI['H'] ] # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append(glcmFeatures[GLCMAngle][featureName]) # LBP extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImageMax, subImageMin) lbpFeatures = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = ExtendGaborFeatures.calcFeatures( lesiondicomImage, xcoord, ycoord, width, height, Gaborkernel_bank, subImageMax, subImageMin) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox] featureWriter.writerow(aFeature)
def genTextures(): GLCMAngleList = ['0', '45', '90', '135', 'Avg'] LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (1.0, 2.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = ['PatientID'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['Raw_Mean', 'Raw_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist featuresCSVFn = os.path.join(outputDir, filename) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for casefile in os.listdir(rootDir): if casefile.startswith('.'): continue if casefile.startswith('..'): continue if fnmatch.fnmatch(casefile, '*Icon*'): continue # casefile = '20586908_0.jpg' casename = casefile.split('.')[0] print casename casefilepath = os.path.join(rootDir, casefile) subImage = Read2DImage(casefilepath) # height = subImage.shape[1] # width = subImage.shape[0] # # xcoord = 5 # ycoord = 5 # subImage = dicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)] # errors here: before: Y + W # # subImageLBP = dicomImage[ycoord - LBPRadius:(ycoord + height) + LBPRadius, # xcoord - LBPRadius:(xcoord + width) + LBPRadius] # subImageGLCM = GrayScaleNormalization(subImage, subImage.max(), subImage.min()) # for extended LBP, we still use grayscale range of 8*8 box to normalize extended ROI 10*10 box # extendsubImageLBP = GrayScaleNormalization(subImage, subImage.max(), subImage.min()) aFeature = [casename] ## get mean and standard deviation of lesion ROI's gray level of lagest box directly from subImage Raw_mean = numpy.mean(subImage) Raw_std = numpy.std(subImage) # SubImagelist = list() # SubImageArraylist = subImage.tolist() # for smalllist in SubImageArraylist: # SubImagelist += smalllist # # Raw_kurtosis = kurtosis(SubImagelist) # Raw_skewness = skew(SubImagelist) # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImage) # print glcmFeatures for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append(glcmFeatures[GLCMAngle][featureName]) # LBP lbpFeatures = ExtendLBPFeatures.calcFeatures( subImage, LBPnPoints, LBPRadius, LBPMethod) # print lbpFeatures aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = KRAS_GaborFeatures.calcFeatures( subImage, Gaborkernel_bank) # GaborFeatures = GB.calcFeatures(subImage,Gaborsigma_range,Gaborfreq_range) # for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() # aFeature = aFeature + [Raw_mean, Raw_std] featureWriter.writerow(aFeature)
def genFeatures(): # Parameters and feature list of each algorithm GLCMAngleList = ['0', '45', '90', '135', 'Avg'] LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (0.6, 1.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = [ 'PatientID', 'Dicom Image Filename', 'Xml Filename', 'Phase Name', 'X', 'Y' ] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['Raw_Mean', 'Raw_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist # List all dicom files and generate features for each images # Feature results stored in separate csv files for each folder #casenum = 0 # for twofolder in os.listdir(rootDir): # if twofolder.startswith('.'): # continue # if twofolder.startswith('..'): # continue # if fnmatch.fnmatch(twofolder, '*Icon*'): # continue twofolder = 'malignant' # print twofolder rootDir2 = os.path.join(rootDir, twofolder) casefile = 'Pt9' # for casefile in os.listdir(rootDir2): # if casefile.startswith('.'): # continue # if casefile.startswith('..'): # continue # if fnmatch.fnmatch(casefile, '*Icon*'): # continue # casefile = 'Pt36' # casenum += 1 # print casefile roiDicomfile = dict() roiCCxmlfile = list() roiMLOxmlfile = list() lesionpath = os.path.join(rootDir2, casefile) for lesionfile in os.listdir(lesionpath): if lesionfile.startswith('.'): continue if lesionfile.startswith('..'): continue if fnmatch.fnmatch(lesionfile, '*Icon*'): continue if fnmatch.fnmatch(lesionfile, '*texture*'): continue if fnmatch.fnmatch(lesionfile, '*DES*CC*dcm'): roiDicomfile['DES-CC'] = lesionfile if fnmatch.fnmatch(lesionfile, '*LE*CC*dcm'): roiDicomfile['LE-CC'] = lesionfile if fnmatch.fnmatch(lesionfile, '*DES*MLO*dcm'): roiDicomfile['DES-MLO'] = lesionfile if fnmatch.fnmatch(lesionfile, '*LE*MLO*dcm'): roiDicomfile['LE-MLO'] = lesionfile if fnmatch.fnmatch(lesionfile, '*CC*xml'): roiCCxmlfile.append(lesionfile) if fnmatch.fnmatch(lesionfile, '*MLO*xml'): roiMLOxmlfile.append(lesionfile) # print roiCCxmlfile # print roiMLOxmlfile # print roiDicomfile patientID = casefile phasenames = ['DES-CC', 'LE-CC', 'DES-MLO', 'LE-MLO'] if casefile == 'Pt45': roiccxml = roiCCxmlfile[0] roimloxml = roiMLOxmlfile[1] else: # for Pt45 and Pt48, all use this setting for only getting first 2 files from list roiccxml = roiCCxmlfile[0] roimloxml = roiMLOxmlfile[0] roiccxmlpath = os.path.join(lesionpath, roiccxml) roimloxmlpath = os.path.join(lesionpath, roimloxml) CCxmin, CCxmax, CCymin, CCymax, CCxycoord = ParseXMLDrawROI(roiccxmlpath) MLOxmin, MLOxmax, MLOymin, MLOymax, MLOxycoord = ParseXMLDrawROI( roimloxmlpath) # check if coords inside boundary or outside boundary CCwindowptlist = chooseinoutcoord(CCxmin, CCxmax, CCymin, CCymax, CCxycoord) MLOwindowptlist = chooseinoutcoord(MLOxmin, MLOxmax, MLOymin, MLOymax, MLOxycoord) featuresOutFn = 'ROI_Texture_Map.csv' # start to do T2 featuresOutFn = patientID + '_' + twofolder + '_' + featuresOutFn featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for phase in phasenames: print phase lesionDicomFn = roiDicomfile[phase] dicomfilepath = os.path.join(lesionpath, lesionDicomFn) dicomImage = Read2DImage(dicomfilepath) if fnmatch.fnmatch(phase, '*CC'): for eachpt in CCwindowptlist: meanstd = list() GLCM = list() LBP = list() Gabor = list() xcoord = int(eachpt[0]) ycoord = int(eachpt[1]) # boundaryornot = int(eachpt[2]) aFeature = [ patientID, lesionDicomFn, roiccxml, phase, xcoord, ycoord ] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subshape = numpy.shape(subImage) if subshape[0] != 8 or subshape[1] != 8: continue subImageGLCM = GrayScaleNormalization( subImage, subImage.max(), subImage.min()) # get normalized to 0, 255: raw mean and standard deviation Raw_mean, Raw_std = Norm_Mean_Std_LargestBox( subImage, subImage.max(), subImage.min()) meanstd.append(Raw_mean) meanstd.append(Raw_std) # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: GLCM.append(glcmFeatures[GLCMAngle][featureName]) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius:xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) TAfeatures = GLCM + LBP + Gabor + meanstd if TAfeatures == None: continue aFeature = aFeature + TAfeatures featureWriter.writerow(aFeature) else: for eachpt in MLOwindowptlist: meanstd = list() GLCM = list() LBP = list() Gabor = list() xcoord = int(eachpt[0]) ycoord = int(eachpt[1]) #boundaryornot = int(eachpt[2]) aFeature = [ patientID, lesionDicomFn, roimloxml, phase, xcoord, ycoord ] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subshape = numpy.shape(subImage) # print subshape # for some box, it is nearly boundary of image, like Pt36, it cannot generate 8*8 box if subshape[0] != 8 or subshape[1] != 8: continue subImageGLCM = GrayScaleNormalization( subImage, subImage.max(), subImage.min()) # get normalized to 0, 255: raw mean and standard deviation Raw_mean, Raw_std = Norm_Mean_Std_LargestBox( subImage, subImage.max(), subImage.min()) meanstd.append(Raw_mean) meanstd.append(Raw_std) # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: GLCM.append(glcmFeatures[GLCMAngle][featureName]) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius:xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) TAfeatures = GLCM + LBP + Gabor + meanstd if TAfeatures == None: continue aFeature = aFeature + TAfeatures featureWriter.writerow(aFeature)
def genFeatures(dualRescaleOption=True): # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization # If 'False', use only lesion image # default value is 'True' # Parameters and feature list of each algorithm GLCMAngleList = ['0', '45', '90', '135', 'Avg'] LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') # LoGHSigmaList = numpy.arange(2, 7, 2, dtype=numpy.float) # LogHFeatureList = ['LoGH_Mean', 'LoGH_Variance', 'LoGH_Skewness', 'LoGH_Kurtosis', 'LoGH_Entropy', # 'LoGH_Uniformity'] # Gaborsigma_range = numpy.arange(1, 6, 2) # Gaborfreq_range = numpy.round(numpy.arange(0.1, 0.6, 0.2), 2) #kernel_bank = [] #Gaborkernel_bank = ExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank) # GaborSigmaRange = (1.0, 3.0) # GaborFreqRange = (0.1, 0.3, 0.5) #GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = ['PatientID', 'Phase', 'ROI_Y', 'ROI_X', 'Width', 'Height'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList # for GaborSigma in Gaborsigma_range: # for GaborFreq in Gaborfreq_range: # for featureName in GaborFeatureList: # featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist # List all dicom files and generate features for each images # Feature results stored in separate csv files for each folder featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for patientPhaseDir in os.listdir(rootDir): if patientPhaseDir.startswith('.') or \ os.path.isfile(os.path.join(rootDir, patientPhaseDir)): continue print patientPhaseDir if fnmatch.fnmatch(patientPhaseDir, '*20160212*'): patientID = '20160212' else: patientID = patientPhaseDir.split('_')[2] if fnmatch.fnmatch(patientPhaseDir, '*ADC*'): phasename = 'ADC' elif fnmatch.fnmatch(patientPhaseDir, '*TRACEW*'): phasename = 'TRACEW' else: phasename = 'FOV' #print('Processing %s @ %s ...' % (patientID, phaseName)) lesionPath = os.path.join(rootDir, patientPhaseDir, lesionFolder) normalPath = os.path.join(rootDir, patientPhaseDir, normalFolder) # DICOM file name with absolute path lesionDicom = os.path.join(lesionPath, lesionDicomFn) normalDicom = os.path.join(normalPath, normalDicomFn) # ROI file name with absolute path lesionROICoords = os.path.join(lesionPath, roiCoordsFn) normalROICoords = os.path.join(normalPath, roiCoordsFn) # Largest rectangle file name with absolute path lesionROIRectFn = os.path.join(lesionPath, lesionroiFn) if (not os.path.isfile(lesionDicom)) or \ (not os.path.isfile(normalDicom)) or \ (not os.path.isfile(lesionROICoords)) or \ (not os.path.isfile(normalROICoords)) or \ (not os.path.isfile(normalROICoords)): # If any of the file is missing, skip print('Missing File for %s @ %s.' % (patientID, phasename)) continue dualROIGrayLevels = numpy.array([]) with open(lesionROICoords, 'r') as roiCoordsFile: roiCoordsList = csv.reader(roiCoordsFile, delimiter=';') for row in roiCoordsList: dualROIGrayLevels = numpy.append(dualROIGrayLevels, int(row[2])) if (dualRescaleOption): with open(normalROICoords, 'r') as roiCoordsFile: roiCoordsList = csv.reader(roiCoordsFile, delimiter=';') for row in roiCoordsList: dualROIGrayLevels = numpy.append(dualROIGrayLevels, int(row[2])) with open(lesionROIRectFn, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1): print('Invalid ROI for %s @ %s.' % (patientID, phasename)) continue dicomImage = Read2DImage(lesionDicom) subImage = dicomImage[int(aROI['Y']):(int(aROI['Y']) + int(aROI['H'])), \ int(aROI['X']):(int(aROI['X']) + int(aROI['W']))] subImageLBP = dicomImage[int(aROI['Y']) - LBPRadius:(int(aROI['Y']) + int(aROI['H'])) + LBPRadius, \ int(aROI['X']) - LBPRadius:(int(aROI['X']) + int(aROI['W'])) + LBPRadius] mean_LargBox, std_LargBox = Mean_Std_LargestBox2(dicomImage, int(aROI['X']), int(aROI['Y']), int(aROI['W']), int(aROI['H'])) subImage = GrayScaleNormalization(subImage, dualROIGrayLevels.ptp()) extendsubImageLBP = GrayScaleNormalization(subImageLBP,dualROIGrayLevels.ptp()) if numpy.all(subImage == 0): print('%s @ %s is all zero.' % (patientID, phasename)) continue aFeature = [patientID, phasename, aROI['Y'], aROI['X'], aROI['W'], aROI['H']] # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImage) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append(glcmFeatures[GLCMAngle][featureName]) # LBP lbpFeatures = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor # GaborFeatures = ExtendGaborFeatures.calcFeatures(subImage, int(aROI['W']), Gaborkernel_bank) # # for gaborfeature in GaborFeatures: # aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox] featureWriter.writerow(aFeature) print('Done.')
def genFeatures(dualRescaleOption=True): # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization # If 'False', use only lesion image # default value is 'True' # Parameters and feature list of each algorithm #GLCMAngleList = ['0', '45', '90', '135', 'Avg'] GLCMAngleList = ['Avg'] LBPRadius = 3 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' # LBPnBins = 12 LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (1.0, 3.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = ['PatientID', 'Phase', 'ROI_Y', 'ROI_X', 'Width', 'Height'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist # List all dicom files and generate features for each images # Feature results stored in separate csv files for each folder featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for patientPhaseDir in os.listdir(rootDir): if patientPhaseDir.startswith('.') or \ os.path.isfile(os.path.join(rootDir, patientPhaseDir)): continue patientID = patientPhaseDir.split('_ ')[0] phaseName = patientPhaseDir.split('_ ')[1].split('_')[ 1] # Only a simple parser, not always precise print('Processing %s @ %s ...' % (patientID, phaseName)) lesionPath = os.path.join(rootDir, patientPhaseDir, lesionFolder) normalPath = os.path.join(rootDir, patientPhaseDir, normalFolder) # DICOM file name with absolute path lesionDicom = os.path.join(lesionPath, lesionDicomFn) normalDicom = os.path.join(normalPath, normalDicomFn) # ROI file name with absolute path lesionROICoords = os.path.join(lesionPath, roiCoordsFn) normalROICoords = os.path.join(normalPath, roiCoordsFn) # Largest rectangle file name with absolute path lesionROIRectFn = os.path.join(lesionPath, roiFn) if (not os.path.isfile(lesionDicom)) or \ (not os.path.isfile(normalDicom)) or \ (not os.path.isfile(lesionROICoords)) or \ (not os.path.isfile(normalROICoords)) or \ (not os.path.isfile(normalROICoords)): # If any of the file is missing, skip, # To do: problem here print('Missing File for %s @ %s.' % (patientID, phaseName)) continue with open(lesionROIRectFn, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1): print('Invalid ROI for %s @ %s.' % (patientID, phaseName)) continue lesiondicomImage = Read2DImage(lesionDicom) normaldicomImage = Read2DImage(normalDicom) xcoord = int(aROI['X']) ycoord = int(aROI['Y']) width = int(aROI['W']) height = int(aROI['H']) lesionsubImage = lesiondicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)] normalsubImage = normaldicomImage[ycoord:(ycoord + height), xcoord:(xcoord + width)] mean_LargBox = numpy.mean(lesionsubImage) std_LargBox = numpy.std(lesionsubImage) # get max gray scale and min grayscale from both lesion and normal dicom lesionimageMax = lesionsubImage.max() lesionimageMin = lesionsubImage.min() normalimageMax = normalsubImage.max() normalimageMin = normalsubImage.min() # compare max and min and get max / min for normalization if lesionimageMax > normalimageMax: subImageMax = lesionimageMax else: subImageMax = normalimageMax if lesionimageMin < normalimageMin: subImageMin = lesionimageMin else: subImageMin = normalimageMin subImageLBP = lesiondicomImage[ ycoord - LBPRadius:(ycoord + height) + LBPRadius, xcoord - LBPRadius:(xcoord + width) + LBPRadius] subImageGLCM = GrayScaleNormalization( lesionsubImage, subImageMax, subImageMin) if numpy.all(lesionsubImage == 0): print('%s @ %s is all zero.' % (patientID, phaseName)) continue aFeature = [ patientID, phaseName, aROI['Y'], aROI['X'], aROI['W'], aROI['H'] ] # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append( glcmFeatures[GLCMAngle][featureName]) # LBP extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImageMax, subImageMin) lbpFeatures = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = ExtendGaborFeatures.calcFeatures( lesiondicomImage, xcoord, ycoord, width, height, Gaborkernel_bank, subImageMax, subImageMin) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox] featureWriter.writerow(aFeature) print('Done.')
def genFeatures(): # Parameters and feature list of each algorithm GLCMAngleList = ['Avg'] LBPRadius = 3 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (0.6, 1.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = MinACRExtendGaborFeatures.genKernelBank(Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std', 'Gabor_Kurtosis', 'Gabor_Skewness'] # Generate full list of features combined with parameters featureTitle = ['PatientID', 'ROI_Y', 'ROI_X', 'Width', 'Height'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['LargestBox_Mean','LargestBox_Std','LargestBox_Kurtosis','LargestBox_Skewness'] featureTitle = featureTitle + MeanStdLBfeaturelist # List all dicom files and generate features for each images # Feature results stored in separate csv files for each folder featuresCSVFn = os.path.join(outputDir, featuresOutFn) with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for ACRfolder in os.listdir(rootDir): if ACRfolder.startswith('.'): continue if ACRfolder.startswith('..'): continue patientid = ACRfolder.split('_')[0] + ACRfolder.split('_')[1] ACRpath = os.path.join(rootDir,ACRfolder) for lesionfolder in os.listdir(ACRpath): if lesionfolder.startswith('.'): continue if lesionfolder.startswith('..'): continue ACRlesionpath = os.path.join(ACRpath,lesionfolder) # get timepoint folder path timepointpath1 = os.path.join(ACRlesionpath, timepointlist[0]) timepointpath2 = os.path.join(ACRlesionpath, timepointlist[1]) timepointpath3 = os.path.join(ACRlesionpath, timepointlist[2]) timepointpath4 = os.path.join(ACRlesionpath, timepointlist[3]) # get lesion dicom file path lesionDicom1 = os.path.join(timepointpath1, dicomfile) lesionDicom2 = os.path.join(timepointpath2, dicomfile) lesionDicom3 = os.path.join(timepointpath3, dicomfile) lesionDicom4 = os.path.join(timepointpath4, dicomfile) # get image gray scale from each dicom file dicomImage1 = Read2DImage(lesionDicom1) dicomImage2 = Read2DImage(lesionDicom2) dicomImage3 = Read2DImage(lesionDicom3) dicomImage4 = Read2DImage(lesionDicom4) # get largest box coordinate lesionROIRectFn1 = os.path.join(timepointpath1, largestboxfile) lesionROIRectFn2 = os.path.join(timepointpath2, largestboxfile) lesionROIRectFn3 = os.path.join(timepointpath3, largestboxfile) lesionROIRectFn4 = os.path.join(timepointpath4, largestboxfile) # get subimage and subimage LBP from dicom image of 4 time points with open(lesionROIRectFn1, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: xcoord1 = int(aROI['X']) ycoord1 = int(aROI['Y']) width1 = int(aROI['W']) height1 = int(aROI['H']) subImage1 = dicomImage1[ycoord1:ycoord1 + height1, xcoord1:xcoord1 + width1] subImageLBP1 = dicomImage1[ycoord1 - LBPRadius:(ycoord1 + height1) + LBPRadius, xcoord1 - LBPRadius:(xcoord1 + width1) + LBPRadius] with open(lesionROIRectFn2, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: xcoord2 = int(aROI['X']) ycoord2 = int(aROI['Y']) width2 = int(aROI['W']) height2 = int(aROI['H']) subImage2 = dicomImage2[ycoord2:ycoord2 + height2, xcoord2:xcoord2 + width2] subImageLBP2 = dicomImage2[ycoord2 - LBPRadius:(ycoord2 + height2) + LBPRadius, xcoord2 - LBPRadius:(xcoord2 + width2) + LBPRadius] with open(lesionROIRectFn3, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: xcoord3 = int(aROI['X']) ycoord3 = int(aROI['Y']) width3 = int(aROI['W']) height3 = int(aROI['H']) subImage3 = dicomImage3[ycoord3:ycoord3 + height3, xcoord3:xcoord3 + width3] subImageLBP3 = dicomImage3[ycoord3 - LBPRadius:(ycoord3 + height3) + LBPRadius, xcoord3 - LBPRadius:(xcoord3 + width3) + LBPRadius] with open(lesionROIRectFn4, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: xcoord4 = int(aROI['X']) ycoord4 = int(aROI['Y']) width4 = int(aROI['W']) height4 = int(aROI['H']) subImage4 = dicomImage4[ycoord4:ycoord4 + height4, xcoord4:xcoord4 + width4] subImageLBP4 = dicomImage4[ycoord4 - LBPRadius:(ycoord4 + height4) + LBPRadius, xcoord4 - LBPRadius:(xcoord4 + width4) + LBPRadius] # generate min/max image matrix (same width and height in 4 matrix) MinSubImage,MaxSubImage = MinMaxSubImageGen(subImage1,subImage2,subImage3,subImage4,height1,width1) # minsize = numpy.shape(MinImage) # maxsize = numpy.shape(MaxImage) # print 'original:',minsize,maxsize # get extended LBP height, width LBPheight = height1 + 2*LBPRadius LBPwidth = width1 + 2*LBPRadius # get min/ max LBP subimage from 4 LBP subimages MinLBPSubImage,MaxLBPSubImage = MinMaxSubImageGen(subImageLBP1,subImageLBP2,subImageLBP3,subImageLBP4, LBPheight,LBPwidth) # LBPminsize = numpy.shape(MinLBPImage) # LBPmaxsize = numpy.shape(MaxLBPImage) # print 'LBP:',LBPminsize,LBPmaxsize # get raw mean/ std from min subimage mean_LargBox = numpy.mean(MinSubImage) std_LargBox = numpy.std(MinSubImage) # add Kurtosis and Skewness into Raw Features MinSubImagelist = list() MinSubImageArraylist = MinSubImage.tolist() for smalllist in MinSubImageArraylist: MinSubImagelist += smalllist Kurtosis_LargBox = kurtosis(MinSubImagelist) Skewness_LargBox = skew(MinSubImagelist) # normalized original subimage, GLCM can use this subImageGLCM = GrayScaleNormalization(MinSubImage, MinSubImage.max(), MinSubImage.min()) # print subImageGLCM # for extended LBP, we still use grayscale range of 8*8 box to normalize extended ROI 10*10 box (LBP radius = 1) extendsubImageLBP = GrayScaleNormalization(MinLBPSubImage, MinSubImage.max(), MinSubImage.min()) if numpy.all(MinSubImage == 0): print('%s @ %s is all zero.' % (patientid)) continue aFeature = [patientid, ycoord1,xcoord1,width1,height1] # GLCM glcmFeatures = GLCMFeatures.calcFeatures(subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append(glcmFeatures[GLCMAngle][featureName]) # LBP # need to use extended ROI lbpFeatures = ExtendLBPFeatures.calcFeatures(extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = MinACRExtendGaborFeatures.calcFeatures(dicomImage1,dicomImage2,dicomImage3,dicomImage4, xcoord1, ycoord1, xcoord2, ycoord2,xcoord3,ycoord3,xcoord4,ycoord4, width1, height1, Gaborkernel_bank, MinSubImage.max(), MinSubImage.min()) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox,Kurtosis_LargBox,Skewness_LargBox] featureWriter.writerow(aFeature)
def genFeatures(): # dualRescaleOption: whether use both lesion and normal ROI for grayscale normalization # If 'False', use only lesion image # default value is 'True' # Parameters and feature list of each algorithm GLCMAngleList = ['0', '45', '90', '135', 'Avg'] LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (1.0, 2.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = ['PatientID', 'Phase', 'ROI_Y', 'ROI_X', 'Width', 'Height'] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['LargestBox_Mean', 'LargestBox_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist shapetitlelist = [ 'compactness', 'entropy', 'bending energy', 'ratio(min/max)' ] featureTitle = featureTitle + shapetitlelist # List all dicom files and generate features for each images # Feature results stored in separate csv files for each folder featuresCSVFn = os.path.join(outputDir, featuresOutFn) casefilenum = 0 with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for casefile in os.listdir(rootDir): if casefile.startswith('.'): continue if casefile.startswith('..'): continue if fnmatch.fnmatch(casefile, '*Icon*'): continue if casefile == 'M24 - M24': continue if casefile == 'M5 - M5': continue print '\n' print casefile casefilenum += 1 patientid = casefile.split('-')[0] patientfolderpath = os.path.join(rootDir, casefile) for patientfolder in os.listdir(patientfolderpath): if patientfolder.startswith('.'): continue if patientfolder.startswith('..'): continue if fnmatch.fnmatch(patientfolder, '*Icon*'): continue if fnmatch.fnmatch(patientfolder, '*roi*'): continue Dfolderpath = os.path.join(patientfolderpath, patientfolder) for phasefolder in os.listdir(Dfolderpath): phasefolderpath = os.path.join(Dfolderpath, phasefolder) if phasefolder.startswith('.'): continue if phasefolder.startswith('..'): continue if phasefolder.startswith('*Icon*'): continue if os.path.isfile(phasefolderpath): continue print phasefolder phase1 = phasefolder.split('-')[0].replace(' ', '') if fnmatch.fnmatch(phase1, '*CC*DES*'): phasename = 'CC DES' elif fnmatch.fnmatch(phase1, '*CC*LE*'): phasename = 'CC LE' elif fnmatch.fnmatch(phase1, '*MLO*DES*'): phasename = 'MLO DES' elif fnmatch.fnmatch(phase1, '*MLO*LE*'): phasename = 'MLO LE' elif fnmatch.fnmatch(phase1, '*LM*DES*'): phasename = 'LM DES' else: phasename = 'LM LE' rectfile = '' contourfile = '' dicomfile = '' for file in os.listdir(phasefolderpath): if fnmatch.fnmatch(file, '*texture*'): continue if fnmatch.fnmatch(file, '*(1)*'): continue if fnmatch.fnmatch(file, '*largest_rec*'): rectfile = file if fnmatch.fnmatch(file, '*csv*') or fnmatch.fnmatch( file, '*csv*'): if not fnmatch.fnmatch(file, '*largest_rec*'): contourfile = file if fnmatch.fnmatch(file, '*dcm*'): dicomfile = file #print rectfile,contourfile,dicomfile recpath = os.path.join(phasefolderpath, rectfile) contourpath = os.path.join(phasefolderpath, contourfile) dicompath = os.path.join(phasefolderpath, dicomfile) shapedescriptors = ROI_ShapeAnalysis_92.genShapefeatures( contourpath) with open(recpath, 'r') as roiFile: roiList = csv.DictReader(roiFile, dialect='excel') for aROI in roiList: if (int(aROI['Y']) == 1) and (int(aROI['X']) == 1): print('Invalid ROI for %s @ %s.' % (patientid, phasename)) continue xcoord = int(aROI['X']) ycoord = int(aROI['Y']) width = int(aROI['W']) height = int(aROI['H']) dicomImage = Read2DImage(dicompath) subImage = dicomImage[ycoord:( ycoord + height), xcoord:( xcoord + width)] # errors here: before: Y + W subImageLBP = dicomImage[ ycoord - LBPRadius:(ycoord + height) + LBPRadius, xcoord - LBPRadius:(xcoord + width) + LBPRadius] mean_LargBox = numpy.mean(subImage) std_LargBox = numpy.std(subImage) subImageGLCM = GrayScaleNormalization( subImage, subImage.max(), subImage.min()) extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImage.max(), subImage.min()) if numpy.all(subImage == 0): print('%s @ %s is all zero.' % (patientid, phasename)) continue aFeature = [ patientid, phasename, aROI['Y'], aROI['X'], aROI['W'], aROI['H'] ] # GLCM # dont need to extended ROI glcmFeatures = GLCMFeatures.calcFeatures( subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: aFeature.append( glcmFeatures[GLCMAngle][featureName]) # LBP # need to use extended ROI lbpFeatures = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) aFeature = aFeature + lbpFeatures.tolist() # Gabor GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord, ycoord, width, height, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: aFeature = aFeature + gaborfeature.tolist() aFeature = aFeature + [mean_LargBox, std_LargBox] aFeature = aFeature + shapedescriptors featureWriter.writerow(aFeature)
def genFeatures(): # Parameters and feature list of each algorithm GLCMAngleList = ['Avg'] LBPRadius = 1 LBPnPoints = 8 * LBPRadius LBPMethod = 'uniform' LBPFeatureList = [] for x in xrange(0, LBPnPoints + 1): LBPFeatureList.append('LBP_%02d' % x) LBPFeatureList.append('LBP_Other') Gaborsigma_range = (0.6, 1.0) Gaborfreq_range = (0.1, 0.3, 0.5) kernel_bank = [] Gaborkernel_bank = ExtendGaborFeatures.genKernelBank( Gaborsigma_range, Gaborfreq_range, kernel_bank) GaborFeatureList = ['Gabor_Mean', 'Gabor_Std'] # Generate full list of features combined with parameters featureTitle = [ 'PatientID', 'Dicom Image Filename', 'Xml Filename', 'Phase Name', 'X', 'Y', 'Boundary (1) or not (inside: 0)' ] for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: featureTitle.append(featureName + '_' + GLCMAngle) featureTitle = featureTitle + LBPFeatureList for GaborSigma in Gaborsigma_range: for GaborFreq in Gaborfreq_range: for featureName in GaborFeatureList: featureTitle.append(featureName + '_' + str(GaborSigma) + '_' + str(GaborFreq)) MeanStdLBfeaturelist = ['Raw_Mean', 'Raw_Std'] featureTitle = featureTitle + MeanStdLBfeaturelist + ['Ylabel'] for eachptfolder in os.listdir(rootDir): if eachptfolder.startswith('.'): continue if eachptfolder.startswith('..'): continue if fnmatch.fnmatch(eachptfolder, '*Icon*'): continue if eachptfolder == 'M24 - M24': continue if eachptfolder == 'M5 - M5': continue # ptnum+=1 print('\n') # print eachptfolder patientID = eachptfolder.split('-')[0] print patientID if fnmatch.fnmatch(eachptfolder, '*B*'): twofolder = 'benign' else: twofolder = 'malignant' rootDir2 = os.path.join(rootDir, eachptfolder) for folder2 in os.listdir(rootDir2): if folder2.startswith('.'): continue if folder2.startswith('..'): continue if fnmatch.fnmatch(folder2, '*Icon*'): continue if fnmatch.fnmatch(folder2, '*roi*'): continue # print folder2 rootDir3 = os.path.join(rootDir2, folder2) ccxmlfile = list() mloxmlfile = list() roiDicomfile = dict() roiDicomfolder = dict() phasefolderpath = '' for xmlcasefolder in os.listdir(rootDir3): phasefolderpath = os.path.join(rootDir3, xmlcasefolder) if xmlcasefolder.startswith('.'): continue if xmlcasefolder.startswith('..'): continue if fnmatch.fnmatch(xmlcasefolder, '*Icon*'): continue if fnmatch.fnmatch(xmlcasefolder, '*CC*xml'): ccxmlfile.append(xmlcasefolder) print ccxmlfile # xmlnum+=1 if fnmatch.fnmatch(xmlcasefolder, '*MLO*xml'): mloxmlfile.append(xmlcasefolder) print mloxmlfile # xmlnum+=1 if os.path.isdir(phasefolderpath): roiDicomfile, roiDicomfolder = finddcmfile( phasefolderpath, xmlcasefolder, roiDicomfile, roiDicomfolder) roiccxmlpath = os.path.join(rootDir3, ccxmlfile[0]) roimloxmlpath = os.path.join(rootDir3, mloxmlfile[0]) CCxmin, CCxmax, CCymin, CCymax, CCxycoord = ParseXMLDrawROI( roiccxmlpath) MLOxmin, MLOxmax, MLOymin, MLOymax, MLOxycoord = ParseXMLDrawROI( roimloxmlpath) # check if coords inside boundary or outside boundary CCwindowptlist = chooseinoutcoord(CCxmin, CCxmax, CCymin, CCymax, CCxycoord) MLOwindowptlist = chooseinoutcoord(MLOxmin, MLOxmax, MLOymin, MLOymax, MLOxycoord) featuresOutFn = 'ROI_Texture_Map.csv' # start to do T2 featuresOutFn = patientID + '_' + twofolder + '_' + featuresOutFn featuresCSVFn = os.path.join(outputDir, featuresOutFn) phasenames = ['DES-CC', 'LE-CC', 'DES-MLO', 'LE-MLO'] with open(featuresCSVFn, 'wb') as featureCSVFile: featureWriter = csv.writer(featureCSVFile, dialect='excel') featureWriter.writerow(featureTitle) for phase in phasenames: print phase lesionDicomFn = roiDicomfile[phase] lesionDicomFolder = roiDicomfolder[phase] lesionDicomFolderpath = os.path.join( rootDir3, lesionDicomFolder) dicomfilepath = os.path.join(lesionDicomFolderpath, lesionDicomFn) dicomImage = Read2DImage(dicomfilepath) if fnmatch.fnmatch(phase, '*CC'): for eachpt in CCwindowptlist: meanstd = list() GLCM = list() LBP = list() Gabor = list() xcoord = int(eachpt[0]) ycoord = int(eachpt[1]) boundaryornot = int(eachpt[2]) aFeature = [ patientID, lesionDicomFn, ccxmlfile[0], phase, xcoord, ycoord, boundaryornot ] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subshape = numpy.shape(subImage) if subshape[0] != 8 or subshape[1] != 8: continue subImageGLCM = GrayScaleNormalization( subImage, subImage.max(), subImage.min()) # get normalized to 0, 255: raw mean and standard deviation Raw_mean, Raw_std = Norm_Mean_Std_LargestBox( subImage, subImage.max(), subImage.min()) meanstd.append(Raw_mean) meanstd.append(Raw_std) # GLCM glcmFeatures = GLCMFeatures.calcFeatures( subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: GLCM.append( glcmFeatures[GLCMAngle][featureName]) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius:xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) TAfeatures = GLCM + LBP + Gabor + meanstd if TAfeatures == None: continue aFeature = aFeature + TAfeatures featureWriter.writerow(aFeature) else: for eachpt in MLOwindowptlist: meanstd = list() GLCM = list() LBP = list() Gabor = list() xcoord = int(eachpt[0]) ycoord = int(eachpt[1]) boundaryornot = int(eachpt[2]) aFeature = [ patientID, lesionDicomFn, mloxmlfile[0], phase, xcoord, ycoord, boundaryornot ] subImage = dicomImage[ycoord - 4:ycoord + 4, xcoord - 4:xcoord + 4] subshape = numpy.shape(subImage) # print subshape # for some box, it is nearly boundary of image, like Pt36, it cannot generate 8*8 box if subshape[0] != 8 or subshape[1] != 8: continue subImageGLCM = GrayScaleNormalization( subImage, subImage.max(), subImage.min()) # get normalized to 0, 255: raw mean and standard deviation Raw_mean, Raw_std = Norm_Mean_Std_LargestBox( subImage, subImage.max(), subImage.min()) meanstd.append(Raw_mean) meanstd.append(Raw_std) # GLCM glcmFeatures = GLCMFeatures.calcFeatures( subImageGLCM) for GLCMAngle in GLCMAngleList: for featureName in haralick_labels[:-1]: GLCM.append( glcmFeatures[GLCMAngle][featureName]) # LBP subimage subImageLBP = dicomImage[ycoord - 4 - LBPRadius:ycoord + 4 + LBPRadius, xcoord - 4 - LBPRadius:xcoord + 4 + LBPRadius] extendsubImageLBP = GrayScaleNormalization( subImageLBP, subImage.max(), subImage.min()) # need to use extended ROI LBPs = ExtendLBPFeatures.calcFeatures( extendsubImageLBP, LBPnPoints, LBPRadius, LBPMethod) for lbp in LBPs: LBP.append(lbp) # Gabor, width = 8 # use extended ROI GaborFeatures = ExtendGaborFeatures.calcFeatures( dicomImage, xcoord - 4, ycoord - 4, 8, 8, Gaborkernel_bank, subImage.max(), subImage.min()) for gaborfeature in GaborFeatures: for eachg in gaborfeature: Gabor.append(eachg) TAfeatures = GLCM + LBP + Gabor + meanstd if TAfeatures == None: continue aFeature = aFeature + TAfeatures featureWriter.writerow(aFeature)