예제 #1
0
def getFold(fold=0,
            fname_in='trainFolds.csv',
            fnames=['CTs.csv', 'Fleischner.csv', 'Nodules.csv'],
            prefix_in='train',
            prefix_out='',
            excludeFold=True):

    if not prefix_out:
        prefix_out = 'fold{}'.format(fold)  # eg. fold0

    #Get fold lnds
    nodules = readCsv(fname_in)
    header = nodules[0]
    lines = nodules[1:]

    foldind = header.index('Fold{}'.format(fold))  # get fold idx from file
    foldlnd = [l[foldind] for l in lines if len(l) > foldind
               ]  # select correct lnd number except with missing data

    for fname in fnames:  # loop thru filetypes
        lines = readCsv(prefix_in + fname)
        header = lines[0]
        lines = lines[1:]

        lndind = header.index('LNDbID')
        if not excludeFold:
            lines = [l for l in lines if l[lndind] in foldlnd]
        else:
            lines = [l for l in lines if not l[lndind] in foldlnd]

        #Save to csv
        writeCsv(prefix_out + fname, [header] + lines)
예제 #2
0
def getFold(fold=0, fname_in='trainFolds.csv',
            fnames=['CTs.csv', 'Fleischner.csv', 'Nodules.csv'],
            prefix_in='train', prefix_out='',
            excludeFold=False):
    if not prefix_out:
        prefix_out = 'fold{}'.format(fold)

    # Get fold lnds
    nodules = readCsv(fname_in)
    header = nodules[0]
    lines = nodules[1:]

    foldind = header.index('Fold{}'.format(fold))
    foldlnd = [l[foldind] for l in lines if len(l) > foldind]

    for fname in fnames:
        lines = readCsv(prefix_in + fname)
        header = lines[0]
        lines = lines[1:]

        lndind = header.index('LNDbID')
        if not excludeFold:
            lines = [l for l in lines if l[lndind] in foldlnd]
        else:
            lines = [l for l in lines if not l[lndind] in foldlnd]

        # Save to csv
        writeCsv(prefix_out + fname, [header] + lines)
예제 #3
0
    def assessWithoutDist(self):
        print "reading data..."
        pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)

        dao = ImageDao()
        imgs = dao.getAll()
        typeDict = {}
        for img in imgs:
            typeDict[img.imgId] = img.imgType

        print "training..."
        trainSet = pictures[:self.trainNum]
        self.knn.train(trainSet)

        testSet = pictures[self.trainNum:self.totalNum]

        accuracyList = []
        heads = ['distance', 'accuracy', 'averageK']

        print "predicting..."
        for d in range(2000, 4000, 20):
            accuracy, avgK = self.knn.predictForManyWithDist(
                testSet, self.trainNum, d, typeDict)

            item = [d, accuracy, avgK]
            accuracyList.append(item)

            print "distance:%d     accuracy:%f%%       averageK:%f" % (
                d, accuracy * 100, avgK)

        saveCsv(self.resultBasePath + 'assessDist_Radius10_5000-1000.csv',
                heads, accuracyList)
예제 #4
0
def pair(fileName):
    try:
        datas = readCsv(fileName)
    except FileNotFoundError as e:
        print(e)
        exit()

    groupedBy = datas.groupby('Hogwarts House')

    fig, axes = plt.subplots(13, 13, figsize=(15, 10))
    fig.subplots_adjust(hspace=0.1, wspace=0.1)
    for ax in axes:
        for a in ax:
            a.set_xticklabels([])
            a.set_yticklabels([])
            a.tick_params(axis='both', width=0)

    for i in range(len(courses)):
        groupedBy[courses[i]].plot(kind='hist', alpha=0.5, ax=axes[i][i])
        axes[i][i].set_xlabel(courses[i].replace(' ', '\n'), fontsize=8)
        axes[i][i].set_ylabel(courses[i].replace(' ', '\n'), fontsize=8)

    for i, xi in enumerate(courses):
        for j, yi in enumerate(courses):
            if j < i:
                axes[i][j].remove()
                continue
            if xi == yi:
                continue
            ravenclowX = groupedBy.get_group('Ravenclaw')[xi]
            slytherinX = groupedBy.get_group('Slytherin')[xi]
            gryffindorX = groupedBy.get_group('Gryffindor')[xi]
            hufflepuffX = groupedBy.get_group('Hufflepuff')[xi]

            ravenclowY = groupedBy.get_group('Ravenclaw')[yi]
            slytherinY = groupedBy.get_group('Slytherin')[yi]
            gryffindorY = groupedBy.get_group('Gryffindor')[yi]
            hufflepuffY = groupedBy.get_group('Hufflepuff')[yi]

            axes[i][j].plot(ravenclowX,
                            ravenclowY,
                            'v',
                            markersize=.5,
                            color='blue')
            axes[i][j].plot(slytherinX,
                            slytherinY,
                            '^',
                            markersize=.5,
                            color='green')
            axes[i][j].plot(gryffindorX,
                            gryffindorY,
                            'o',
                            markersize=.5,
                            color='red')
            axes[i][j].plot(hufflepuffX,
                            hufflepuffY,
                            's',
                            markersize=.5,
                            color='yellow')
    plt.show()
예제 #5
0
    def assess(self, k=None):
        if not k:
            k = self.k
        print "reading data..."
        pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)

        dao = ImageDao()
        imgs = dao.getAll()
        typeDict = {}
        for img in imgs:
            typeDict[img.imgId] = img.imgType

        print "training..."
        trainSet = pictures[:self.trainNum]
        self.knn.train(trainSet)

        testSet = pictures[self.trainNum:self.totalNum]

        zerNp = np.zeros([k, self.testNum])
        testLabel = np.arange(self.trainNum, self.totalNum)
        for i in range(len(testLabel)):
            testLabel[i] = typeDict[str(testLabel[i]).zfill(5)]
        testLabel = (zerNp + testLabel).astype('int').T

        print "predicting..."
        accuracy, avgCriDist = self.knn.predictForManyWithK(
            testSet, testLabel, k, typeDict)

        print "accuracy:%f%%       averageCriticalDist:%f" % (accuracy * 100,
                                                              avgCriDist)
예제 #6
0
def getTextures(count):
    csvLines = utils.readCsv("../trainset_csv/trainNodules_gt.csv")
    textures = [row[-1] for row in csvLines]
    # delete 1st element
    del textures[0]

    if count != -1:
        del textures[count:]

    return textures
예제 #7
0
def getMaskVolumes():

    csvLines = utils.readCsv("../trainset_csv/trainNodules_gt.csv")
    last_ID = 0
    scan = 0
    spacing = 0
    origin = 0

    maskVolumesList = []

    # ignore header
    for line in csvLines[1:]:

        # get image of this patient only one time (there are repeated patient ids)
        current_ID = line[0]
        if last_ID != current_ID:
            print(getFileID(current_ID))
            scan, spacing, origin, _ = utils.readMhd(
                '../LNDb dataset/dataset/LNDb-' + getFileID(current_ID) +
                '.mhd')
            spacing = [float(spacing[i]) for i in range(3)]

        # find the coordinates of the current nodule (it is done for every line of the csv)
        finding_coords = line[4:7]

        nodule_x = (float(finding_coords[0]) - float(origin[0])) / float(
            spacing[0])
        nodule_y = (float(finding_coords[1]) - float(origin[1])) / float(
            spacing[1])
        nodule_z = (float(finding_coords[2]) - float(origin[2])) / float(
            spacing[2])
        real_coords = [nodule_x, nodule_y, nodule_z]

        # get a mask for the image of this patient (from one of the radiologists that found the current nodule)
        radiologists = line[
            1]  # list of radiologists that found the current nodule
        radId = str(
            radiologists[0]
        )  # always choose the mask from the first radiologist in the list
        mask, _, _, _ = utils.readMhd('../LNDb dataset/masks/LNDb-' +
                                      getFileID(current_ID) + '_rad' + radId +
                                      '.mhd')

        # extract mini cube of the current nodule on the masked scan
        mask_volume = utils.extractCube(mask,
                                        spacing,
                                        real_coords,
                                        cube_size=80)

        # add mask volumes to the list
        maskVolumesList.append(mask_volume)

        last_ID = current_ID

    return maskVolumesList
예제 #8
0
    def assessFeaWithBlurWithoutK(self):
        print "reading neaFeaData..."
        netFea = readCsv(self.dataBaseUrl + "netFea", self.totalNum) * 256 * 40
        print "reading data..."
        pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)
        pictures = np.append(pictures, netFea, axis=1)
        print pictures.shape
        dao = ImageDao()
        imgs = dao.getAll()
        typeDict = {}
        for img in imgs:
            typeDict[img.imgId] = img.imgType

        print "training..."
        trainSet = pictures[:self.trainNum]
        self.knn.train(trainSet)

        testSet = pictures[self.trainNum:self.totalNum]

        accuracyList = []
        heads = ['k', 'accuracy', 'averageCriticalDist']

        print "predicting..."
        for k in range(1, 101):
            zerNp = np.zeros([k, self.testNum])
            testLabel = np.arange(self.trainNum, self.totalNum)
            for i in range(len(testLabel)):
                testLabel[i] = typeDict[str(testLabel[i]).zfill(5)]
            testLabel = (zerNp + testLabel).astype('int').T

            accuracy, avgCriDist = self.knn.predictForManyWithK(
                testSet, testLabel, k, typeDict)

            item = [k, accuracy, avgCriDist]
            accuracyList.append(item)

            print "k:%d     accuracy:%f%%       averageCriticalDist:%f" % (
                k, accuracy * 100, avgCriDist)

        saveCsv(self.resultBasePath + 'assessFeaWithBlurWithoutK.csv', heads,
                accuracyList)
예제 #9
0
def createCube(cubeSize=80):
    csvLines = readCsv("csv/trainSet.csv")
    header = csvLines[0]
    nodules = csvLines[1:]

    for i, n in tqdm(enumerate(nodules[:1])):
        x = int(n[header.index("x")])
        y = int(n[header.index("y")])
        z = int(n[header.index("z")])

        lnd = int(n[header.index("LNDbID")])

        ctr = np.array([x, y, z])
        [scan, spacing, _, _] = readMhd('data/LNDb-{:04}.mhd'.format(lnd))
        cube = extractCube(scan, spacing, ctr, cubeSize)

        np.save("cube/{:0}.npy".format(i), cube)
예제 #10
0
    def assessWithoutRadius(self, k=None):
        if not k:
            k = self.k
        accuracyList = []
        heads = ['radius', 'accuracy', 'averageCriticalDist']

        for radius in range(5, 15):
            print radius, ':'
            print "blurring ..."
            data = MyData()
            data.saveCsvWithGaussianBlur(radius=radius)

            dao = ImageDao()
            imgs = dao.getAll()
            typeDict = {}
            for img in imgs:
                typeDict[img.imgId] = img.imgType

            zerNp = np.zeros([k, self.testNum])
            testLabel = np.arange(self.trainNum, self.totalNum)
            for i in range(len(testLabel)):
                testLabel[i] = typeDict[str(testLabel[i]).zfill(5)]
            testLabel = (zerNp + testLabel).astype('int').T

            pictures = readCsv(self.dataBaseUrl + "data", self.totalNum)

            print "training..."
            trainSet = pictures[:self.trainNum]
            self.knn.train(trainSet)

            testSet = pictures[self.trainNum:self.totalNum]

            print "predicting..."
            accuracy, avgCriDist = self.knn.predictForManyWithK(
                testSet, testLabel, k, typeDict)

            item = [radius, accuracy, avgCriDist]
            accuracyList.append(item)

            print "k:%d     radius:%f    accuracy:%f%%   averageCriticalDist:%f" % (
                k, radius, accuracy * 100, avgCriDist)

        saveCsv(self.resultBasePath + 'assessRadiusK' + str(k) + '.csv', heads,
                accuracyList)
예제 #11
0
def scatter(fileName):
	try:
		datas = readCsv(fileName)
	except FileNotFoundError as e:
		print(e)
		exit()

	groupedBy = datas.groupby('Hogwarts House')

	fig, axes = plt.subplots(13, 13, figsize=(15, 10))
	fig.subplots_adjust(hspace=0, wspace=0)
	i = 0
	for ax, course in zip(axes, courses):
		ax[i].text(0.5, 0.5, course.replace(' ', '\n'), ha='center', va='center', fontsize=8)
		for a in ax:
			a.set_xticklabels([])
			a.set_yticklabels([])
			a.tick_params(axis='both', width=0)
		i += 1


	for i, xi in enumerate(courses):
		for j, yi in enumerate(courses):
			if xi == yi :
				continue

			ravenclowX = groupedBy.get_group('Ravenclaw')[xi]
			slytherinX = groupedBy.get_group('Slytherin')[xi]
			gryffindorX = groupedBy.get_group('Gryffindor')[xi]
			hufflepuffX = groupedBy.get_group('Hufflepuff')[xi]


			ravenclowY = groupedBy.get_group('Ravenclaw')[yi]
			slytherinY = groupedBy.get_group('Slytherin')[yi]
			gryffindorY = groupedBy.get_group('Gryffindor')[yi]
			hufflepuffY = groupedBy.get_group('Hufflepuff')[yi]

			axes[i][j].plot(ravenclowX, ravenclowY, 'v', markersize=.5, color='blue')
			axes[i][j].plot(slytherinX, slytherinY, '^', markersize=.5, color='green')
			axes[i][j].plot(gryffindorX, gryffindorY, 'o', markersize=.5, color='red')
			axes[i][j].plot(hufflepuffX, hufflepuffY, 's', markersize=.5, color='yellow')
	
	plt.show()
예제 #12
0
파일: histogram.py 프로젝트: ppicavez/dslr
def histogram(fileName):

    try:
        datas = readCsv(fileName)
    except FileNotFoundError as e:
        print(e)
        exit(1)
    plt.figure(figsize=(8, 6))
    groupedBy = datas.groupby('Hogwarts House')
    for course in courses:
        groupedBy[course].plot(kind='hist', alpha=0.5)
        plt.title("Histogram of notes frequency by House  \n for " + course +
                  " course")
        plt.legend(loc='upper left')
        print("Notes frequency by House for " + course)
        print(
            "Close graphical windows to see next histogram  or press Ctrl + W \n"
        )
        plt.show()
예제 #13
0
파일: describe.py 프로젝트: ppicavez/dslr
def describe(fileName):
    
    datas = readCsv(fileName)
    datas = dropColumns(datas, ["Index", "Hogwarts House", "First Name",
                           "Last Name", "Birthday", "Best Hand"])
    result = pd.DataFrame(
        columns=datas.columns,
        index=["Count","Mean","Std","Min","25%","50%","75%","Max"
        ]
    )

    result.iloc[0] = count(datas)
    result.iloc[1] = mean(datas)
    result.iloc[2] = std(datas)
    result.iloc[3] = minimum(datas)
    result.iloc[4] = quantile(datas, 0.25)
    result.iloc[5] = quantile(datas, 0.50)
    result.iloc[6] = quantile(datas, 0.75)
    result.iloc[7] = maximum(datas)

    print(result)
예제 #14
0
파일: load.py 프로젝트: kpprasa/mimia_proj
def generate_data(data_file, fold, data_path='../data/', is_train=True):

    lines = readCsv(data_file)
    header = lines[0]
    lines = lines[1:]
    consolidate_centers = {}
    consolidate_labels = {}

    # loop through lines, collate labels, centers by images
    for line in lines:
        process_line(header, line, consolidate_centers, consolidate_labels)

    # load and save examples (subimages) and labels (textures)
    examples = []
    labels = []
    for Id, centers in consolidate_centers.items():
        img_name = 'LNDb-{:04}.mhd'.format(int(Id))
        examples += load_image(os.path.join(data_path, img_name), centers)
        labels += consolidate_labels[Id]

    examples = np.array(examples)
    labels = np.array(labels)

    # now the labels and examples should be in order
    # so we save as a pickle file (serialized file) to be loaded by ML model
    data = ('train' if is_train else 'val') + \
        '_data_except_{}.npy'.format(fold)
    texture_file = ('train' if is_train else 'val') + \
        '_labels_except_{}.npy'.format(fold)

    DATAFILE = open(data, mode='wb')
    pickle.dump(examples, DATAFILE)
    DATAFILE.close()

    LABELFILE = open(texture_file, mode='wb')
    pickle.dump(labels, LABELFILE)
    LABELFILE.close()
예제 #15
0
def getCubes(cubeSize):
    csvLines = utils.readCsv("../trainset_csv/trainNodules_gt.csv")
    last_ID = 0
    scan = 0
    spacing = 0
    origin = 0
    
    cubeList = []    
    textures = [row[-1] for row in csvLines]

    # delete 1st element (header)
    del textures[0]

    # ignore header
    for line in csvLines[1:]:
                
        current_ID = line[0]
        if last_ID != current_ID:
            print(getFileID(current_ID))
            scan,spacing,origin,_ = utils.readMhd('../LNDb dataset/dataset/LNDb-' + getFileID(current_ID) + '.mhd')
            spacing = [float(spacing[i]) for i in range(3)]

        finding_coords = line[4:7]
        
        nodule_x = (float(finding_coords[0]) - float(origin[0])) / float(spacing[0])
        nodule_y = (float(finding_coords[1]) - float(origin[1])) / float(spacing[1])
        nodule_z = (float(finding_coords[2]) - float(origin[2])) / float(spacing[2])
        real_coords = [nodule_x, nodule_y, nodule_z]

        scan_cube = utils.extractCube(scan, spacing, real_coords, cube_size=cubeSize)
        
        cubeList.append(scan_cube)

        # nodule_coords
        last_ID = current_ID
    
    return cubeList, textures
예제 #16
0
import numpy as np
import copy
from matplotlib import pyplot as plt
from utils import readMhd, readCsv, getImgWorldTransfMats, convertToImgCoord, extractCube
from readNoduleList import nodEqDiam

dispFlag = False

# Read nodules csv
csvlines = readCsv('trainNodules_gt.csv')
header = csvlines[0]
nodules = csvlines[1:]

lndloaded = -1
for n in nodules:
    vol = float(n[header.index('Volume')])
    if nodEqDiam(vol) > 3:  #only get nodule cubes for nodules>3mm
        ctr = np.array([
            float(n[header.index('x')]),
            float(n[header.index('y')]),
            float(n[header.index('z')])
        ])
        lnd = int(n[header.index('LNDbID')])
        rads = list(map(int, list(n[header.index('RadID')].split(','))))
        radfindings = list(
            map(int, list(n[header.index('RadFindingID')].split(','))))
        finding = int(n[header.index('FindingID')])

        print(lnd, finding, rads, radfindings)

        # Read scan
예제 #17
0
import numpy as np
from sklearn import svm
from sklearn.metrics import mean_squared_error

import utils

# read csv data
X, y = utils.readCsv()

# train svm
clf = svm.SVR(C=100000000, gamma=0.01, epsilon=200000, kernel='rbf')
clf.fit(X, y)

# evaluate fit on training data
y_pred = clf.predict(X)
score = np.sqrt(mean_squared_error(y, y_pred))
print "RMSE: %s" % score

# # Output
# RMSE: 545770.525711

# store svm
utils.writeSVM(clf)
예제 #18
0
                tex = 0
            nodules.append([
                int(n[lndind][0]),
                ','.join([str(int(r))
                          for r in n[radind]]),  #list radiologist IDs
                ','.join([str(int(f)) for f in n[fndind]
                          ]),  #list radiologist finding's IDs
                ind + 1,  # new finding ID
                np.mean(n[xind]),  #centroid is the average of centroids
                np.mean(n[yind]),
                np.mean(n[zind]),
                agrlvl,  # number of radiologists that annotated the finding (0 if non-nodule)
                nod,
                vol,
                tex
            ])
    if verb:
        for n in nodules:
            print(n)
    return nodules


if __name__ == "__main__":
    # Merge nodules from train set
    prefix = 'train'
    fname_gtNodulesFleischner = '{}Nodules.csv'.format(prefix)
    gtNodules = readCsv(fname_gtNodulesFleischner)
    for line in gtNodules:
        print(line)
    gtNodules = joinNodules(gtNodules)
    writeCsv('{}Nodules_gt.csv'.format(prefix), gtNodules)  #write to csv
예제 #19
0
            a[1][i].imshow(np.reshape(encode_decode[i], (50, 50, 3)))
        f.show()
        plt.draw()
        plt.show()

    def predict(self):
        saver = tf.train.Saver()
        saver.restore(self.sess, "network/model/model.ckpt")
        encode = self.sess.run(self.encoder_op,
                               feed_dict={self.X: self.dataSet})

        size = encode.shape[0]
        i = 1
        while (i * 1000) < size:
            np.savetxt('DataSet/netFea' + str(i - 1) + '.csv',
                       encode[(i - 1) * 1000:i * 1000],
                       delimiter=',')
            i = i + 1
        np.savetxt('DataSet/netFea' + str(i - 1) + '.csv',
                   encode[(i - 1) * 1000:size],
                   delimiter=',')


if __name__ == '__main__':
    print "reading data..."
    dataSet = readCsv("DataSetRaw/data", -1)
    model = AutoEnc_Tensor(dataSet=dataSet)
    # model.train()
    # model.test()
    model.predict()
예제 #20
0
import numpy as np
import copy
from matplotlib import pyplot as plt
from utils import readMhd, readCsv, getImgWorldTransfMats, convertToImgCoord, extractCube
from readNoduleList import nodEqDiam
import cv2 as cv

dispFlag = False

# Read nodules csv
csvlines = readCsv('../trainset_csv/trainNodules_gt.csv')
header = csvlines[0]
nodules = csvlines[1:]

# Descriptors Algorithm
orb = cv.ORB_create()


def computeDescriptors(scan_cube):
    descriptors3d = []
    for s in scan_cube:
        # convert to grayscale
        img = s.astype(np.float64)
        if img.max() != img.min():
            img = (img - img.min()) * (255.0 / (img.max() - img.min()))
        img = img.astype(np.uint8)

        # compute descriptors
        kp, des = orb.detectAndCompute(img, None)
        if des is not None:
            for d in des:
예제 #21
0
print "initializing..."
imgBaseURL = "img/"
dataBaseUrl = "DataSet/"

upperK = 50
select = -1
k = 0

optimalDist = 2200
optimalLeastK = 12
dataNum = -1
radius = 10

print "reading data..."
data = MyData()
pictures = readCsv(dataBaseUrl + "data", dataNum)

print "training..."
knn = KNearestNeighbor()
knn.train(pictures)

dao = ImageDao()
imgs = dao.getAll()
typeDict = {}
for img in imgs:
    typeDict[img.imgId] = img.imgUrl

# server.config['UPLOAD_FOLDER'] = os.getcwd()


@server.route('/', methods=['GET', 'POST'])
예제 #22
0
from model import NestedUNet
from sklearn.model_selection import KFold
from sklearn.metrics import jaccard_score


def count_params(model):  #計算網路參數
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


if __name__ == "__main__":
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    torch.manual_seed(10)

    reader = readCsv('./train_valid_CTs.csv')
    data_list = readCsv('./data_list_train_valid.csv')
    reader = reader[1:]
    data_list = np.array(data_list[1:])

    tr_data = list()
    tr_label = list()
    for number in range(len(reader)):  #讀取檔案製作訓練資料集[病人,幾張,3,Y,X]
        [scan, spacing, origin, transfmat
         ] = readMhd('traindata/LNDb-{:04}.mhd'.format(int(reader[number][0])))
        [mask, spacing, origin, transfmat] = readMhd(
            'trainlabel_1c1t/LNDb-{:04}_w_Text_merged-mask.mhd'.format(
                int(reader[number][0])))
        for z in range(scan.shape[0]):
            tr_data.append(scan[z])
            tr_label.append(mask[z])
예제 #23
0
                    help="Possible values: hz, power, dur")
parser.add_argument('-out',
                    dest="OUTPUT_FILE",
                    default="../data/output/bird_sort_hz_mix.txt",
                    help="Output txt file")
args = parser.parse_args()

INPUT_SAMPLE_FILE = args.INPUT_SAMPLE_FILE
SORT_BY = args.SORT_BY
MIN_VALUE = args.MIN_VALUE
MAX_VALUE = args.MAX_VALUE
OUTPUT_FILE = args.OUTPUT_FILE
OVERLAP = 0.25

print("Reading data file...")
sampleData = readCsv(INPUT_SAMPLE_FILE)
print("Found %s rows in %s" % (len(sampleData), INPUT_SAMPLE_FILE))

# filter values
sampleData = [s for s in sampleData if MIN_VALUE <= s[SORT_BY] <= MAX_VALUE]

# sort values
sampleData = sorted(sampleData, key=lambda k: k[SORT_BY])
values = [s[SORT_BY] for s in sampleData]
print("%s range: [%s - %s]" % (SORT_BY, min(values), max(values)))

totalDur = sum([int(s["dur"] * (1.0 - OVERLAP)) for s in sampleData])
print("Total time: %s" %
      time.strftime('%H:%M:%S', time.gmtime(totalDur / 1000)))

instructions = []
예제 #24
0
import numpy as np
from sklearn import svm
from sklearn.cross_validation import KFold
from sklearn.metrics import mean_squared_error

import utils

# read csv data
X, y = utils.readCsv()

# 10 folds cross validation svm training
rmse = []
kf = KFold(len(X), n_folds=10)
for train, test in kf:
    clf = svm.SVR(C=100000000, gamma=0.01, epsilon=200000, kernel='rbf')
    clf.fit(X[train], y[train])
    y_pred = clf.predict(X[test])
    score = np.sqrt(mean_squared_error(y[test], y_pred))
    rmse.append(score)
    print "RMSE: %s" % score

print "Min RMSE: %s\nMax RMSE: %s\nMean RMSE: %s" % (
	np.min(rmse), np.max(rmse), np.mean(rmse))

# # Output
# RMSE: 464307.13222
# RMSE: 580966.456102
# RMSE: 519024.052988
# RMSE: 608102.489362
# RMSE: 606586.000127
# RMSE: 600282.364792
예제 #25
0
            numpy.mean(numpy.sqrt(c_array[1]))
        ]
        costs.append(cost)
        print 'Training epoch %d, reconstruction cost ' % cost[0], numpy.mean(
            cost[1]), ' jacobian norm ', cost[2]

    heads = ['epoch', 'cost', 'jacobian']
    saveCsv(fileName="layer_1Cost.csv", heads=heads, datas=costs)

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))

    image = Image.fromarray(
        tile_raster_images(X=ca.W.get_value(borrow=True).T,
                           img_shape=(50, 50 * 3),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))

    image.save('cae_filters.png')
    numpy.savetxt('layer_1W.csv', ca.W.get_value(borrow=True), delimiter=',')
    os.chdir('../')


if __name__ == '__main__':
    print "reading data..."
    train_set_x = readCsv("DataSetRaw/data", 1000)
    test_cA(train_set_x=train_set_x, n_hidden=100)
예제 #26
0

def calcNodTexClass(nodtex):
    # Compute texture class from texture rating nodtex
    texthr = [7 / 3, 11 / 3]
    if isinstance(nodtex, float) or isinstance(nodtex, int):
        if nodtex >= texthr[0] and nodtex <= texthr[1]:
            vclass = 1
        elif nodtex > texthr[1]:
            vclass = 2
        else:
            vclass = 0
    else:  # numpy array
        vclass = np.zeros(nodtex.shape)
        vclass[np.bitwise_and(nodtex >= texthr[0], nodtex < texthr[1])] = 1
        vclass[nodtex >= texthr[1]] = 2
    return vclass


if __name__ == "__main__":
    # Compute Fleischner score for all trainset nodules
    fname_gtNodulesFleischner = 'trainNodules.csv'
    gtNodules = readCsv(fname_gtNodulesFleischner)
    gtNodules = joinNodules(gtNodules)
    pdFleischner = calcFleischner(gtNodules)

    # Compute Fleischner score for all predicted nodules (given volume and texture rating/class/probabilities)
    fname_pdNodulesFleischner = 'predictedNodules.csv'
    pdNodules = readCsv('input/' + fname_pdNodulesFleischner)
    pdFleischner = calcFleischner(pdNodules)
예제 #27
0
                    val = [(epoch + 1), i, c]
                    costs.append(val)
        # saver.save(self.sess,'network/model/model.ckpt')
        heads = ['epoch', 'cost']
        # saveCsv(fileName="network/netCost.csv", heads=heads, datas=costs)

        print("Optimization Finished!")

    def test(self):
        saver = tf.train.Saver()
        saver.restore(self.sess, "network/model/model.ckpt")
        # Applying encode and decode over test set
        encode_decode = self.sess.run(
            self.decoder_op,
            feed_dict={self.X: self.dataSet[:self.examples_to_show]})
        # Compare original images with their reconstructions
        f, a = plt.subplots(2, 10, figsize=(10, 2))
        for i in range(self.examples_to_show):
            a[2][i].imshow(np.reshape(encode_decode[i], (50, 50, 3)))
            a[3][i].imshow(np.reshape(encode_decode[i] - a[i], (50, 50, 3)))
        f.show()
        plt.draw()
        plt.show()


if __name__ == '__main__':
    print "reading data..."
    dataSet = readCsv("DataSetRawNoDb/data", 1000)
    model = AutoEnc_Tensor(dataSet=dataSet)
    a = model.train()
    # model.test()
예제 #28
0
INPUT_PHRASE_STATS_FILE = args.INPUT_PHRASE_STATS_FILE
INPUT_CHORDS_FILE = args.INPUT_CHORDS_FILE
BPM = args.BPM
SEEDS = [int(s) for s in args.SEEDS.split(",")]
OUTPUT_FILE = args.OUTPUT_FILE

MS_PER_BEAT = int(round(60.0 / BPM * 1000))
DIVISIONS_PER_BEAT = 4  # e.g. 4 = quarter notes, 8 = eighth notes, etc
BEATS_PER_PHRASE = 16
ROUND_TO_NEAREST = int(round(MS_PER_BEAT / DIVISIONS_PER_BEAT))
VARIANCE_MS = 10  # +/- milliseconds an instrument note should be off by to give it a little more "natural" feel

seedPos = 0

print("Reading data file...")
metaData = readCsv(INPUT_META_FILE)
sampleData = readCsv(INPUT_SAMPLE_FILE)
phraseStats = readCsv(INPUT_PHRASE_STATS_FILE)
chordData = readCsv(INPUT_CHORDS_FILE)

# add note-octave to smapleData
for i, d in enumerate(sampleData):
    sampleData[i]["noteOctave"] = "%s%s" % (d["note"], d["octave"])
for i, d in enumerate(chordData):
    chordData[i]["noteOctave"] = "%s%s" % (d["note"], d["octave"])

print("Found %s rows in %s" % (len(metaData), INPUT_META_FILE))
print("Found %s rows in %s" % (len(sampleData), INPUT_SAMPLE_FILE))
print("Found %s rows in %s" % (len(phraseStats), INPUT_PHRASE_STATS_FILE))
print("Found %s rows in %s" % (len(chordData), INPUT_CHORDS_FILE))
예제 #29
0
    (88, 164, 172),
    (115, 181, 128),
    (149, 189, 94),
    (185, 189, 74),
    (214, 177, 62),
    (229, 146, 53),
    (229, 94, 43),
    (217, 33, 32)
]
random.seed(3)
random.shuffle(colors)
colorCount = len(colors)

# Read files
print("Reading data file...")
data = readCsv(INPUT_FILE)
rowCount = len(data)
print("Found %s rows in %s" % (rowCount, INPUT_FILE))

groups = list(set(d["group"] for d in data))

# Make sure output dirs exist
outDirs = [os.path.dirname(OUTPUT_FILE)]
for outDir in outDirs:
    if not os.path.exists(outDir):
        os.makedirs(outDir)

im = Image.new(mode="RGB", size=(WIDTH, HEIGHT), color=(0, 0, 0))

draw = ImageDraw.Draw(im)
for i, d in enumerate(data):
예제 #30
0
import numpy as np
import copy
from matplotlib import pyplot as plt
from utils import readMhd, readCsv, getImgWorldTransfMats, convertToImgCoord, extractCube
from readNoduleList import nodEqDiam

dispFlag = False

# Read nodules csv
csvlines = readCsv('/media/tungthanhlee/SSD/grand_challenge/dataset/LNDB_segmentation/trainset_csv/trainNodules.csv')
# csvlines = readCsv('/home/ad/LungCancer/dataset/trainset_csv/trainNodules.csv')

header = csvlines[0]
nodules = csvlines[1:]

lndloaded = -1
for n in nodules:
        vol = float(n[header.index('Volume')])
        if nodEqDiam(vol)>3: #only get nodule cubes for nodules>3mm
                ctr = np.array([float(n[header.index('x')]), float(n[header.index('y')]), float(n[header.index('z')])])
                lnd = int(n[header.index('LNDbID')])
                
                rad = int(n[header.index('RadID')])
                # rads = list(map(int,list(n[header.index('RadID')].split(','))))
                # radfindings = list(map(int,list(n[header.index('RadFindingID')].split(','))))
                finding = int(n[header.index('FindingID')])
                
                # print(lnd,finding,rads,radfindings)
                print(lnd,finding,rad,finding)
                # Read scan
                if lnd!=lndloaded:
예제 #31
0

def calcNodTexClass(nodtex):
    # Compute texture class from texture rating nodtex
    texthr = [7 / 3, 11 / 3]
    if isinstance(nodtex, float) or isinstance(nodtex, int):
        if nodtex >= texthr[0] and nodtex <= texthr[1]:
            vclass = 1
        elif nodtex > texthr[1]:
            vclass = 2
        else:
            vclass = 0
    else:  #numpy array
        vclass = np.zeros(nodtex.shape)
        vclass[np.bitwise_and(nodtex >= texthr[0], nodtex < texthr[1])] = 1
        vclass[nodtex >= texthr[1]] = 2
    return vclass


if __name__ == "__main__":
    # Compute Fleischner score for all trainset nodules
    fname_gtNodulesFleischner = '../trainset_csv/trainNodules.csv'
    gtNodules = readCsv(fname_gtNodulesFleischner)
    gtNodules = joinNodules(gtNodules)
    pdFleischner = calcFleischner(gtNodules)

    # Compute Fleischner score for all predicted nodules (given volume and texture rating/class/probabilities)
    fname_pdNodulesFleischner = 'predictedNodulesC.csv'
    pdNodules = readCsv('../submission/' + fname_pdNodulesFleischner)
    pdFleischner = calcFleischner(pdNodules)