def get_areal_features(root, features_path, masks_dir, n_bins = 100): prep_out_path(features_path) files = os.listdir(root) df = pd.DataFrame(columns = range(n_bins * 2) + ['name', 'level']) names = pd.read_csv(labels_file) print "Starting extraction: ", time_now_str() for j, f in enumerate(files): label = names.loc[names['image'] == path.splitext(f)[0]] start = time.time() imr = ImageReader(root, f, masks_dir, gray_scale = True) drusen = get_predicted_region(imr.image, Labels.Drusen) blood = get_predicted_region(imr.image, Labels.Haemorage) Bc = np.ones((5, 5)) labels_drusen, n_drusen = mh.label(drusen, Bc) labels_blood, n_blood = mh.label(blood, Bc) area = float(cv2.countNonZero(imr.mask)) outp = np.array([], dtype = np.int) # sizes excluding background sizes_drusen = mhl.labeled_size(labels_drusen)[1:] / area sizes_blood = mhl.labeled_size(labels_blood)[1:] / area hist_druzen, _ = np.histogram(sizes_drusen, n_bins, (0, 1e-3)) hist_blood, _ = np.histogram(sizes_blood, n_bins, (0, 1e-3)) outp = np.r_[outp, hist_druzen] outp = np.r_[outp, hist_blood] outp = np.r_[outp, label.values[0]] df.loc[j] = outp print "Extracted: {0}, took {1:02.2f} sec ".format(f, time.time() - start) # write out the csv df.to_csv(path.join(features_path, prefix + ".csv"), index = False, header=True) print "Extracted: ", prefix, "@", time_now_str()
import cv2 import time preprocessed = '/kaggle/retina/train/labelled' masks = '/kaggle/retina/train/masks' orig = '/kaggle/retina/train/sample/split' output = '/kaggle/retina/train/sample/features' n_bins = 100 prep_out_path(output) for i in range(0, 5): prefix = str(i) print "Starting extraction @ ", time_now_str() files = os.listdir(path.join(preprocessed, prefix)) # intermediate output will be stored here # we will save all the files first then join them into one csv file df = pd.DataFrame(columns = range(n_bins * 2 + 1)) j = 0 for f in files: start = time.time() im_file = path.join(prefix, f) extractor = DarkBrightDetector(preprocessed, orig, im_file, masks, is_debug = False) labels = extractor.find_bright_regions()
df = pd.read_csv(sample_file) n_bins = 100 feats = df.ix[:, :n_bins * 2].values.astype(np.float) levels = df['level'].values names = df['name'].values X_train, X_test, Y_train, Y_test = train_test_split(feats, levels, test_size=0.2) print "Read, train: {:d}, test: {:d}".format(X_train.shape[0], X_test.shape[0]) inner_estimator = DecisionTreeClassifier(max_depth=15, class_weight='auto') abc = AdaBoostClassifier(base_estimator=inner_estimator, n_estimators=175) print "Instantiated classifier" print "Starting: ", time_now_str() #scaler = prep.StandardScaler().fit(X_train) #X_train_scaled = scaler.transform(X_train) #X_test_scaled = scaler.transform(X_test) abc.fit(X_train, Y_train) a_train = metrics.accuracy_score(Y_train, abc.predict(X_train)) a_test = metrics.accuracy_score(Y_test, abc.predict(X_test)) print "Finished: ", time_now_str() print "Accuracy: \n\tTrain: {:2.5f}\n\tTest: {:2.5f}".format(a_train, a_test)
n_bins = 100 X_train = df.ix[:, :n_bins * 2].values.astype(np.float) Y_train = df['level'].values X_test = df_test.ix[:, :n_bins * 2].values.astype(np.float) Y_test = np.array([]) images = df_test['name'].values print "Read, train: {:d}, test: {:d}".format(X_train.shape[0], X_test.shape[0]) rf = SKSupervisedLearning(SVC, X_train, Y_train, X_test, Y_test) # parameters tuned from the above #rf.train_params = {'n_estimators' : 1000, 'max_features': 'sqrt', 'class_weight': 'auto'} rf.train_params = {'C': 100, 'gamma' : 0.001, 'probability' : True, 'class_weight': 'auto'} rf.scoring = "accuracy" print "Instantiated classifier" print "Starting: ", time_now_str() rf.fit_standard_scaler() rf.fit_and_validate() Y_test = rf.clf.predict(X_test) pred = pd.DataFrame(np.array([images, Y_test]).transpose(), columns = ['image', 'level']) pred.to_csv(path.join(pred_dir, "prediction2.csv"), index = False) print "Finished: ", time_now_str()
import cv2 import time preprocessed = '/kaggle/retina/train/labelled' masks = '/kaggle/retina/train/masks' orig = '/kaggle/retina/train/sample/split' output = '/kaggle/retina/train/sample/features' n_bins = 100 prep_out_path(output) for i in range(0, 5): prefix = str(i) print "Starting extraction @ ", time_now_str() files = os.listdir(path.join(preprocessed, prefix)) # intermediate output will be stored here # we will save all the files first then join them into one csv file df = pd.DataFrame(columns=range(n_bins * 2 + 1)) j = 0 for f in files: start = time.time() im_file = path.join(prefix, f) extractor = DarkBrightDetector(preprocessed, orig, im_file,
model.add(Dense(768, 447)) model.add(Activation('softmax')) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=Adagrad()) #grapher = Grapher() #grapher.plot(model, "/temp/graph.png") nb_epoch = 2 batch_size = 300 nb_samples = 30 from kobra.tr_utils import time_now_str print("Start time: " + time_now_str()) #x_val, y_val = BatchGenerator(train_path, labels_map, batch_size).get_val() dsl = DataSetLoader(train_path, labels_file, labels_map) imgen = ImageDataGenerator() imgen.fit(dsl.X_train) X_train = dsl.X_train - imgen.mean X_train = X_train / imgen.std model.fit(X_train, dsl.Y_train, batch_size=30, nb_epoch=2, validation_split=0.1) #for e in range(nb_epoch): # print("Epoch %d" % e) # batches = BatchGenerator(train_path, labels_file, labels_map, batch_size)