Пример #1
0
def get_areal_features(root, features_path, masks_dir, n_bins = 100):
    prep_out_path(features_path)
    files = os.listdir(root)

    df = pd.DataFrame(columns = range(n_bins * 2) + ['name', 'level'])
    names = pd.read_csv(labels_file)
    print "Starting extraction: ", time_now_str()

    for j, f in enumerate(files):
        label = names.loc[names['image'] == path.splitext(f)[0]]
        start = time.time()
        imr = ImageReader(root, f, masks_dir, gray_scale = True)

        drusen = get_predicted_region(imr.image, Labels.Drusen)
        blood = get_predicted_region(imr.image, Labels.Haemorage)

        Bc = np.ones((5, 5))
        labels_drusen, n_drusen = mh.label(drusen, Bc)
        labels_blood, n_blood = mh.label(blood, Bc)

        area = float(cv2.countNonZero(imr.mask))

        outp = np.array([], dtype = np.int)

        # sizes excluding background
        sizes_drusen = mhl.labeled_size(labels_drusen)[1:] / area
        sizes_blood = mhl.labeled_size(labels_blood)[1:] / area

        hist_druzen, _ = np.histogram(sizes_drusen, n_bins, (0, 1e-3))
        hist_blood, _ = np.histogram(sizes_blood, n_bins, (0, 1e-3))


        outp = np.r_[outp, hist_druzen]
        outp = np.r_[outp, hist_blood]
        outp = np.r_[outp, label.values[0]]
        df.loc[j] = outp
        print "Extracted: {0}, took {1:02.2f} sec ".format(f, time.time() - start)
      
    # write out the csv
    df.to_csv(path.join(features_path, prefix + ".csv"), index = False, header=True)    
    print "Extracted: ", prefix, "@", time_now_str()
Пример #2
0
import cv2
import time

preprocessed = '/kaggle/retina/train/labelled'
masks = '/kaggle/retina/train/masks'
orig = '/kaggle/retina/train/sample/split'
output = '/kaggle/retina/train/sample/features'

n_bins = 100

prep_out_path(output)

for i in range(0, 5):
    prefix = str(i)

    print "Starting extraction @ ", time_now_str()
    files = os.listdir(path.join(preprocessed, prefix))
    
    # intermediate output will be stored here
    # we will save all the files first then join them into one csv file
    df = pd.DataFrame(columns = range(n_bins * 2 + 1))
    j = 0

    for f in files:
        start = time.time()
        
        im_file = path.join(prefix, f)

        extractor = DarkBrightDetector(preprocessed, orig, im_file, masks, is_debug = False)
        labels = extractor.find_bright_regions()
Пример #3
0
df = pd.read_csv(sample_file)
n_bins = 100

feats = df.ix[:, :n_bins * 2].values.astype(np.float)
levels = df['level'].values
names = df['name'].values

X_train, X_test, Y_train, Y_test = train_test_split(feats,
                                                    levels,
                                                    test_size=0.2)

print "Read, train: {:d}, test: {:d}".format(X_train.shape[0], X_test.shape[0])

inner_estimator = DecisionTreeClassifier(max_depth=15, class_weight='auto')
abc = AdaBoostClassifier(base_estimator=inner_estimator, n_estimators=175)

print "Instantiated classifier"
print "Starting: ", time_now_str()

#scaler = prep.StandardScaler().fit(X_train)
#X_train_scaled = scaler.transform(X_train)
#X_test_scaled = scaler.transform(X_test)

abc.fit(X_train, Y_train)
a_train = metrics.accuracy_score(Y_train, abc.predict(X_train))
a_test = metrics.accuracy_score(Y_test, abc.predict(X_test))

print "Finished: ", time_now_str()

print "Accuracy: \n\tTrain: {:2.5f}\n\tTest: {:2.5f}".format(a_train, a_test)
Пример #4
0
n_bins = 100

X_train = df.ix[:, :n_bins * 2].values.astype(np.float)
Y_train = df['level'].values

X_test = df_test.ix[:, :n_bins * 2].values.astype(np.float)
Y_test = np.array([])

images = df_test['name'].values

print "Read, train: {:d}, test: {:d}".format(X_train.shape[0], X_test.shape[0])

rf = SKSupervisedLearning(SVC, X_train, Y_train, X_test, Y_test)

# parameters tuned from the above
#rf.train_params = {'n_estimators' : 1000, 'max_features': 'sqrt', 'class_weight': 'auto'}
rf.train_params = {'C': 100, 'gamma' : 0.001, 'probability' : True, 'class_weight': 'auto'}
rf.scoring = "accuracy"
print "Instantiated classifier"

print "Starting: ", time_now_str()

rf.fit_standard_scaler()
rf.fit_and_validate()
Y_test = rf.clf.predict(X_test)
pred = pd.DataFrame(np.array([images, Y_test]).transpose(), columns = ['image', 'level'])
pred.to_csv(path.join(pred_dir, "prediction2.csv"), index = False)

print "Finished: ", time_now_str()
Пример #5
0
import cv2
import time

preprocessed = '/kaggle/retina/train/labelled'
masks = '/kaggle/retina/train/masks'
orig = '/kaggle/retina/train/sample/split'
output = '/kaggle/retina/train/sample/features'

n_bins = 100

prep_out_path(output)

for i in range(0, 5):
    prefix = str(i)

    print "Starting extraction @ ", time_now_str()
    files = os.listdir(path.join(preprocessed, prefix))

    # intermediate output will be stored here
    # we will save all the files first then join them into one csv file
    df = pd.DataFrame(columns=range(n_bins * 2 + 1))
    j = 0

    for f in files:
        start = time.time()

        im_file = path.join(prefix, f)

        extractor = DarkBrightDetector(preprocessed,
                                       orig,
                                       im_file,
Пример #6
0
model.add(Dense(768, 447))
model.add(Activation('softmax'))

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=Adagrad())

#grapher = Grapher()
#grapher.plot(model, "/temp/graph.png")

nb_epoch = 2
batch_size = 300
nb_samples = 30

from kobra.tr_utils import time_now_str
print("Start time: " + time_now_str())

#x_val, y_val = BatchGenerator(train_path, labels_map, batch_size).get_val()

dsl = DataSetLoader(train_path, labels_file, labels_map)
imgen = ImageDataGenerator()
imgen.fit(dsl.X_train)

X_train = dsl.X_train - imgen.mean
X_train = X_train / imgen.std

model.fit(X_train, dsl.Y_train, batch_size=30, nb_epoch=2, validation_split=0.1)

#for e in range(nb_epoch):
#    print("Epoch %d" % e)
#    batches = BatchGenerator(train_path, labels_file, labels_map, batch_size)