def split_dataset_into_train_test(split_input_path, split_output_path, config, paths): split_folders.ratio(split_input_path, split_output_path, seed=1337, ratio=config.train_test_valid_ratio) # remove images folder which of no use after split try: for dir in os.listdir(f"{split_output_path}/val/"): for image in os.listdir(f"{split_output_path}/val/{dir}/"): shutil.move(f"{split_output_path}/val/{dir}/{image}", f"{paths['input_path']}", copy_function=shutil.copytree) except: pass try: shutil.rmtree(split_input_path) shutil.rmtree(f"{split_output_path}/val") except: try: os.rmdir(split_input_path) os.rmdir(f"{split_output_path}/val") except: pass
def organiseFiles(self): """ Organises the resulted folder into Training / Test Sets (0.8 / 0.1 / 0.1) """ root = path.abspath(path.join(__file__ ,"../../saved/")) output = path.abspath(path.join(__file__ ,"../../output/")) split_folders.ratio(root, output = output, seed = 1337, ratio = (.8, .1, .1))
def preprocess(self): """ reshape the images and split """ split_folders.ratio('./img_data/', output="./data", seed=1337, ratio=(.7, .3)) # default values train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_datagen = ImageDataGenerator(rescale=1. / 255) self.training_set = train_datagen.flow_from_directory( './data/train', target_size=(64, 64), batch_size=32, class_mode='categorical', shuffle=False) self.test_set = test_datagen.flow_from_directory( './data/val', target_size=(64, 64), batch_size=32, class_mode='categorical', shuffle=False)
def split_to_train_test(): # Split val/test with a fixed number of items e.g. 100 for each set. # To only split into training and validation set, use a single number to `fixed`, i.e., `10`. split_folders.ratio('data/image_sort', output="data/image_train", seed=1337, ratio=(.8, .2)) # default values
def split_directory_into_train_val_test_sets(indir, outdir, ratios): """Split your dataset directory into three separate directories for train, validation, and test sets Arguments: indir {/yourpath} -- path to dataset directory outdir {'/yourpath'} -- path to new directory ratios {tuple of floats that sum to 1} -- in the form (train_size, val_size, test_size) """ split_folders.ratio(indir, output=outdir, seed=1337, ratio=ratios)
def split_into_tvt(inPath, proportions, outPath=None): """ proportions should be a list [a, b, c] where a+b+c = 1 """ if outPath is None: outPath = inPath split_folders.ratio(input=inPath, output=outPath, seed=22, ratio=proportions) return
def split_data(self, data_path, out_path, ratio=(.35, .35, .3)): #TODO Add dependency in readme import split_folders # 80% train, 10% val, 10% test #random seeds for shuffling defaults to 1337 split_folders.ratio(input=data_path, output=out_path, seed=1337, ratio=ratio)
def split_images(input_directory, output_directory,type,value): if(type == "ratio"): # Split with a ratio. # To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`. split_folders.ratio(input_directory, output_directory, seed=1337, ratio=value) # default values elif(type == "fixed"): # Split val/test with a fixed number of items e.g. 100 for each set. # To only split into training and validation set, use a single number to `fixed`, i.e., `10`. split_folders.fixed(input_directory, output_directory, seed=1337, fixed=value, oversample=False) # default values else: print("Invalid input")
def split_folder(self): """ Split with a ratio. To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`. :return: """ split_folders.ratio(cfg.DATASETS.RAW_PATH, output=cfg.DATASETS.PATH, seed=cfg.CONST.RANDOM_SEED, ratio=(cfg.DATASETS.TRAIN_RATIO, cfg.DATASETS.VAL_RATIO, cfg.DATASETS.TEST_RATIO)) # default values
def test_split_ratio(): input_dir = os.path.join(os.path.dirname(__file__), "imgs") output_dir = os.path.join(os.path.dirname(__file__), "output") shutil.rmtree(output_dir, ignore_errors=True) split_folders.ratio(input_dir, output_dir) # ensure the number of pics is the same a = len(list(pathlib.Path(input_dir).glob("**/*.jpg"))) b = len(list(pathlib.Path(output_dir).glob("**/*.jpg"))) assert a == b
def test_split_ratio_no_test(): input_dir = os.path.join(os.path.dirname(__file__), 'imgs') output_dir = os.path.join(os.path.dirname(__file__), 'output') shutil.rmtree(output_dir, ignore_errors=True) split_folders.ratio(input_dir, output_dir, ratio=(.8, .2)) # ensure the number of pics is the same a = len(list(pathlib.Path(input_dir).glob('**/*.jpg'))) b = len(list(pathlib.Path(output_dir).glob('**/*.jpg'))) assert a == b
def main(): # Split data into train/test folders split_folders.ratio(DATA_FOLDER, output=SPLIT_DATA_FOLDER, ratio=(1 - TEST_SPLIT, TEST_SPLIT)) # Specify transforms on original data train_transform = transforms.Compose([ transforms.RandomResizedCrop(size=(50, 50), scale=(0.5, 1)), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose( [transforms.Resize(size=(50, 50)), transforms.ToTensor()]) # Produce train and test data loaders train_folder = os.path.join(SPLIT_DATA_FOLDER, TRAIN_FOLDER) train_loader = produce_data_loader(data_folder=train_folder, transform=train_transform) test_folder = os.path.join(SPLIT_DATA_FOLDER, TEST_FOLDER) test_loader = produce_data_loader(data_folder=test_folder, transform=test_transform) # Optionally save some transformed images that were created from originals if False: save_images_from_loader(data_loader=train_loader, folder=os.path.join(TRANSFORMS_FOLDER, 'train')) save_images_from_loader(data_loader=test_loader, folder=os.path.join(TRANSFORMS_FOLDER, 'test')) # Initialize objects for the net device = get_device() print(f'Using device: {device}') field_road_net = FieldRoadNet().to(device) optimizer = torch.optim.Adam(params=field_road_net.parameters()) criterion = torch.nn.CrossEntropyLoss() # Train the network field_road_net = train_network(net=field_road_net, optimizer=optimizer, criterion=criterion, loader=train_loader) # Evaluate accuracy on test test_network(net=field_road_net, loader=test_loader, save_images=True)
def split_Data_folder( self, folder_to_split="", ratio=(.8, .1, .1), seed=89, ): #if os.path.isdir(self.__data_dir): if not os.path.isdir(self.__train_dir): print(os.getcwd()) split_folders.ratio(folder_to_split, output=self.__data_dir, seed=seed, ratio=ratio) else: print("le dossier {} existe déjà".format(self.__train_dir))
def divideDataIntoTrainValTestSets(data_root, train=.6, val=.2, test=.2): """ Distributes data into train, validation, and test sets from all/all folder """ all_root = os.path.join(data_root, "all/all") assert train + val + test == 1 split_folders.ratio(all_root, output=data_root, seed=1337, ratio=(train, val, test)) # default values os.rename(os.path.join(data_root, "val"), os.path.join(data_root, "validation")) temp = "Temp" os.mkdir(os.path.join(data_root, "train" + temp)) os.mkdir(os.path.join(data_root, "validation" + temp)) os.mkdir(os.path.join(data_root, "test" + temp)) for item in ["train", "validation", "test"]: dest = shutil.move(os.path.join(data_root, item), os.path.join(data_root, item + temp)) os.rename(os.path.join(data_root, item + temp), os.path.join(data_root, item))
def splitratio(tr,tes): path=os.getcwd() if os.path.isdir(path+"\\output"): shutil.rmtree(path+"\\output") path+="\\bbcsport" split_folders.ratio(path, output="output", seed=1337, ratio=(tr,tes)) # default values
import split_folders split_folders.ratio("C:\\Users\\Reddy\\Desktop\\Proj\\number_data", output="C:\\Users\\Reddy\\Desktop\\Proj\\new_data", seed=1337, ratio=(.7, .3, 0))
slash = "/" if platform == "win32": slash = "\\" PATH = os.getcwd() # Define data path data_path = PATH + slash + data_set data_dir_list = listdir(data_path) for dataset in data_dir_list: print(dataset) img_list = listdir(data_path + slash + dataset) print('Loaded the images of dataset-' + '{}\n'.format(dataset)) for filename in img_list: if filename.endswith('.JPG'): try: img = Image.open(data_path + slash + dataset + slash + filename) # open the image file print(img._getexif()) except (IndexError, AttributeError, OSError) as e: print("DELTED FILE") os.system('rm ' + data_path + slash + dataset + slash + filename) if filename.endswith('.db'): print("DELTED FILE") os.system('rm ' + data_path + slash + dataset + slash + filename) split_folders.ratio(data_set, output="Dataset", seed=1337, ratio=(.8, .1, .1)) # default values
cv2.imwrite( "/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front/sadness/" + file, img) count += 1 elif substr == 'SU': img = cv2.imread(path + "/" + file, -1) cv2.imwrite( "/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front/surprise/" + file, img) count += 1 return count split_folders.ratio( '/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front', output= '/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front(with train-test-val split)', seed=1337, ratio=(.7, .15, .15)) split_folders.ratio( '/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front_half', output= '/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front_half(with train-test-val split)', seed=1337, ratio=(.7, .15, .15)) split_folders.ratio( '/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front_half_full', output= '/content/drive/My Drive/Datasets/kdef_akdef/KDEF_front_half_full(with train-test-val split)', seed=1337, ratio=(.7, .15, .15))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split() for g in genres: pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True) for filename in os.listdir(f'./drive/My Drive/genres/{g}'): songname = f'./drive/My Drive/genres/{g}/{filename}' y, sr = librosa.load(songname, mono=True, duration=5) print(y.shape) plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB') plt.axis('off') plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png') plt.clf() # To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`. split_folders.ratio('./img_data/', output="./data", seed=1337, ratio=(.8, .2)) # default values train_datagen = ImageDataGenerator( # rescale all pixel values from 0-255, so aftre this step all our pixel values are in range (0,1) rescale=1./255, shear_range=0.2, # to apply some random tranfromations zoom_range=0.2, # to apply zoom horizontal_flip=True) # image will be flipper horiz test_datagen = ImageDataGenerator(rescale=1./255) training_set = train_datagen.flow_from_directory( './data/train', target_size=(64, 64), batch_size=32, class_mode='categorical',
# Following is the ink to download the dataset # http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar # download the image data and update the path variable to the downloaded dataset # this code will take a single folder dataset ad will split it into train and validation datset # ratio can be changed to .8,.1,.1 to split the data into train validation and test instead import split_folders split_folders.ratio('Path_to_Dataset', output="Output_path", seed=1337, ratio=(.8, .2)) # default values
# -*- coding: utf-8 -*- # @Time : 10/26/19 6:53 PM # @Author : luolu # @Email : [email protected] # @File : split_data_folders.py # @Software: PyCharm import split_folders # Split with a ratio. # To only split into training and validation set, # set a tuple to `ratio`, i.e, `(.8, .2)`. split_folders.ratio( '/home/xkjs/Downloads/bdai/PycharmProjects/DataUtils/img/image/', output="/home/xkjs/PycharmProjects/Semantic-Segmentation-Suite/Dataset_bp", seed=1337, ratio=(.8, .1, .1)) # default values # Split val/test with a fixed number of items e.g. # 100 for each set. # To only split into training and validation set, # use a single number to `fixed`, i.e., `10`. # split_folders.fixed('input_folder', # output="output", # seed=1337, # fixed=(100, 100), # oversample=False) # default values
import split_folders split_folders.ratio("DataNew", output="Data", seed=1337, ratio=(.8, .2))
# Functions that may be helpful in Deep Learning process # USAGE: install relevant library in Python # 1. pip install split-folders # 2. vis # 1. split images in a whole folder into train/validation/test set import split_folders # Split with a ratio. # To only split into training and validation (/test) set, set a tuple to `ratio`, i.e, `(.8, .2)`. split_folders.ratio('input_folder', output="output", seed=1337, ratio=(.8, .1, .1)) # default values # 2. filter_visualization # visualize the activation maps of the network so we could tell what exactly it learned from keras import models #https://www.analyticsvidhya.com/blog/2018/03/essentials-of-deep-learning-visualizing-convolutional-neural-networks/ model = models.load_model("models/adr_cnn1.h5") model.summary() from vis.visualization import visualize_activation from vis.utils import utils from matplotlib import pyplot as plt #matplotlib inline plt.rcParams['figure.figsize'] = (18, 6) # Utility to search for layer index by name. # Alternatively we can specify this as -1 since it corresponds to the last layer. layer_idx = utils.find_layer_idx(model, 'dense_2')
3.tensorflow ''' import split_folders from keras.models import Sequential from keras import layers from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten from keras.optimizers import nadam from keras.preprocessing.image import ImageDataGenerator # Change source and destination source = "C:/Users/Excalibur/Desktop/Big_data/flowers" destination = "C:/Users/Excalibur/Desktop/Big_data/dest" # Split folder used to split data into test,validation and train according to ratio defined split_folders.ratio(source, output=destination, seed=1337, ratio=(.6, .2, .2)) # default values # Folder destination of test,val,train train_folder = "C:/Users/Excalibur/Desktop/Big_data/dest/train" test_folder = "C:/Users/Excalibur/Desktop/Big_data/dest/test" val_folder = "C:/Users/Excalibur/Desktop/Big_data/dest/val" # Using CNN sequential model model = Sequential() model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(128, (3, 3), activation='relu'))
#this part mount google drive to colab import os import tarfile from google.colab import drive drive.mount('/content/gdrive/') import os os.mkdir('dataset') #%% #train test split imgpath = '/Users/byronleung/Downloads/postgraduate study/下学期/机器学习/归档/Images' output_ = '/Users/byronleung/Downloads/postgraduate study/下学期/机器学习/dataset' import split_folders #%% # Split with a ratio. split_folders.ratio(imgpath, output=output_, seed=1337, ratio=(.80, .1, .1))#ratio of train,validation and test #%% from keras.preprocessing.image import ImageDataGenerator train_dir = 'gdrive/My Drive/dataset/train' test_dir = 'gdrive/My Drive/dataset/test' vali_dir = 'gdrive/My Drive/dataset/val' train_datagen=ImageDataGenerator(rescale=1./255,rotation_range=45 ,width_shift_range=0.2 ,height_shift_range=0.2,shear_range=0.2 ,zoom_range=0.2,horizontal_flip=True,vertical_flip=True,fill_mode="nearest") test_datagen=ImageDataGenerator(rescale=1./255) vali_datagen = ImageDataGenerator(rescale=1./255) train_generator=train_datagen.flow_from_directory(train_dir,target_size=(299,299),batch_size=20 ,class_mode="sparse")#return 2D one-hot label
ax.imshow(img) ax.axis('off') plt.tight_layout() plt.show() #directory with all image data_dir = os.path.join(os.getcwd(), 'images') subdirs = [x[1] for x in os.walk(data_dir)] #all classes to classify an image classes = subdirs[0] #split data into train/val/test split_folders.ratio('images', output='split_set', seed=1337, ratio=(0.6, 0.4, 0)) #split into different train/val/test directories data_dir = os.path.join(os.getcwd(), 'split_set') train_dir = os.path.join(data_dir, 'train') val_dir = os.path.join(data_dir, 'val') test_dir = os.path.join(data_dir, 'test') total_train = 0 total_val = 0 #find total training sizes for c in classes: temp_url = os.path.join(train_dir, c) total_train += len(os.listdir(temp_url)) temp_url = os.path.join(val_dir, c)
import warnings import os from tqdm import tqdm from PIL import Image import split_folders size = 512, 512 warnings.filterwarnings("ignore") split_folders.ratio('./dataset_old', output="./dataset", seed=1337, ratio=(.8, .1, .1)) # default values for folder in os.listdir('./dataset'): for directories in tqdm(os.listdir('./dataset/' + folder)): for file in os.listdir('./dataset/' + folder + '/' + directories): img = './dataset/' + folder + '/' + directories + '/' + file # print(img) im = Image.open(img) try: rgb_im = im.convert('RGB') if im.size != size: # print(im.size) im_resized = im.resize(size, Image.ANTIALIAS) rgb_im = im_resized.convert('RGB') try: rgb_im.save('./dataset/' + folder + '/' + directories + '/' + file[:-4] + '.jpg', dpi=(72, 72))
# cv2.imshow('img', img) # cv2.waitKey(0) # cv2.destroyAllWindows() # zip image and labels together data = zip(labels, test) # create directory to save images if not os.path.exists('./images'): os.mkdir('./images') # save images as per emotion cnt = 0 for d in data: #print(d) dir = Path(f'./images/{d[0]}') if not os.path.exists(dir): os.mkdir(dir) cv2.imwrite(f'{dir}/img{cnt}.jpg', d[1]) # img = cv2.imread(f'{dir}/img{cnt}.jpg') # img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # cv2.imwrite(f'{dir}/gryimg{cnt}.jpg', img_gry) # print(img_gry.shape) cnt += 1 # split dataset into test train and validation split_folders.ratio('./images/', output='./images/DATA', seed=12345, ratio=(0.8, 0.1, 0.1))
def main(): if os.path.exists('cropped'): shutil.rmtree('cropped') os.mkdir('cropped') for subdir, dirs, files in os.walk('PlantDoc-Dataset'): os.mkdir(os.path.join('cropped', subdir)) for fol in ['train', 'test']: df = pd.read_csv(fol + '_labels.csv') for subdir, dirs, files in os.walk('PlantDoc-Dataset/' + fol): for file in files: filepath = os.path.join(subdir, file) if filepath.endswith('.jpg'): img = cv2.imread(filepath) relevant = df[df['filename'] == file] xmin = relevant['xmin'].values xmax = relevant['xmax'].values ymin = relevant['ymin'].values ymax = relevant['ymax'].values for i in range(len(xmin)): cropped = img[ymin[i]-1:ymax[i]-1, xmin[i]-1:xmax[i]-1,:] try: cv2.imwrite(os.path.join('cropped', filepath[:-4] + str(i) + '.jpg'), cropped) except: print(filepath) print(img.shape) print(xmin[i]-1, xmax[i]-1, ymin[i]-1, ymax[i]-1) for subdir, dirs, files in os.walk('cropped/PlantDoc-Dataset/test'): for file in files: filepath = os.path.join(subdir, file) if filepath.endswith('.jpg'): shutil.move(filepath, filepath.replace('test', 'train')) shutil.rmtree('cropped/PlantDoc-Dataset/test') shutil.move('cropped/PlantDoc-Dataset/train', 'cropped/PlantDoc-Dataset/all') split_folders.ratio('cropped/PlantDoc-Dataset/all', output='cropped/PlantDoc-Dataset/splitted', seed=1337, ratio=(0.6, 0.2, 0.2)) shutil.rmtree('cropped/PlantDoc-Dataset/all') shutil.move('cropped/PlantDoc-Dataset/splitted/val', 'cropped/PlantDoc-Dataset/val') shutil.move('cropped/PlantDoc-Dataset/splitted/train', 'cropped/PlantDoc-Dataset/train') shutil.move('cropped/PlantDoc-Dataset/splitted/test', 'cropped/PlantDoc-Dataset/test') shutil.rmtree('cropped/PlantDoc-Dataset/splitted') shutil.rmtree('cropped/PlantDoc-Dataset/train/Tomato two spotted spider mites leaf') shutil.rmtree('cropped/PlantDoc-Dataset/test/Tomato two spotted spider mites leaf') shutil.rmtree('cropped/PlantDoc-Dataset/val/Tomato two spotted spider mites leaf') for subdir, dirs, files in os.walk('PlantDoc-Dataset/test'): for file in files: filepath = os.path.join(subdir, file) if filepath.endswith('.jpg'): shutil.move(filepath, filepath.replace('test', 'train')) shutil.rmtree('PlantDoc-Dataset/test') shutil.move('PlantDoc-Dataset/train', 'PlantDoc-Dataset/all') split_folders.ratio('PlantDoc-Dataset/all', output='PlantDoc-Dataset/splitted', seed=1337, ratio=(0.6, 0.2, 0.2)) shutil.rmtree('PlantDoc-Dataset/all') shutil.move('PlantDoc-Dataset/splitted/val', 'PlantDoc-Dataset/val') shutil.move('PlantDoc-Dataset/splitted/train', 'PlantDoc-Dataset/train') shutil.move('PlantDoc-Dataset/splitted/test', 'PlantDoc-Dataset/test') shutil.rmtree('PlantDoc-Dataset/splitted') shutil.rmtree('PlantDoc-Dataset/train/Tomato two spotted spider mites leaf') shutil.rmtree('PlantDoc-Dataset/test/Tomato two spotted spider mites leaf') shutil.rmtree('PlantDoc-Dataset/val/Tomato two spotted spider mites leaf')
#Splitting the dataset into Training and Validation import split_folders split_folders.ratio('flowers/flowers/flowers', output="output", seed=1337, ratio=(.8, .2)) #importing the libraries required import numpy as np import tensorflow as tf from tensorflow import keras from keras.models import Sequential from keras.layers import Conv2D from keras.layers import MaxPooling2D from keras.layers import Dense from keras.layers import Flatten from keras.layers import Dropout import matplotlib.pyplot as plt from PIL import Image # Creating the Model model = Sequential() # First-Layer model.add(Conv2D(32, (5, 5), input_shape=(250, 250, 3), activation='relu')) model.add(MaxPooling2D((2, 2))) #Second-Layer model.add(Conv2D(64, (5, 5), activation='relu')) model.add(MaxPooling2D((2, 2))) #Third-Layer