Пример #1
0
def main(in_folder, train_folder, val_folder, labels_file,
         scale, crop_shape, random_draws, valsplit):
    names = parse_folder(in_folder, "jpeg")
    print "Total number of files in %s: %d" % (in_folder, len(names))
    pdlab = read_csv(labels_file,
                     names=['image', 'label'], index_col='image', header=0)
    # Determine the train and val split
    pdlab = sample_train_val_split(names, pdlab, valsplit)
    
    # Create a parallel pool
    errf = open('err.log', 'w')
    sys.stderr = errf
    with Parallel(n_jobs=8) as parallel:
        rets = parallel(delayed(main_proc)(
                name,
                pdlab.ix[extract_filename_in_path(name)]['label'],
                train_folder if pdlab.ix[
                    extract_filename_in_path(name)][
                    'val'] == 0 else val_folder,
                scale,
                crop_shape,
                random_draws,
                True if pdlab.ix[
                    extract_filename_in_path(name)][
                    'val'] == 0 else False,
                False,
                i)
                        for i, name in enumerate(names))
        print "Done. A total of %d files processed." % len(rets)
        for f in glob.glob("*.log"):
            os.unlink(f)
    errf.close()
Пример #2
0
import numpy as np
import matplotlib.pyplot as plt
import cv2

# get_ipython().magic(u'matplotlib inline')


# In[2]:

from joblib import Parallel, delayed


# In[3]:

in_folder = '/media/shared/dr/DiabeticRetinopathy/train_orig/'
names = np.asarray(parse_folder(in_folder, "jpeg"))


# In[4]:

num_samples = 20000
resize_size = (25, 25)
train_split = 0.9


# In[5]:

idx = np.arange(names.shape[0])
rng = np.random.RandomState(seed=1234)
rng.shuffle(idx)
X_names = names[idx[:num_samples]]