def __init__(self, data_path): if data_path[-1] == '/': data_path = data_path[:-1] loc_slash = data_path.rfind('/') if loc_slash != -1: # there is '/' in the data path model_name = data_path[loc_slash + 1:] # get folder name #model_name = model_name.strip('/') else: model_name = data_path csv_path = data_path + '/' + model_name + const.DATA_EXT # use it for csv file name self.csv_path = csv_path self.train_generator = None self.valid_generator = None self.train_hist = None self.drive = None #self.config = Config() #model_name) self.data_path = data_path #self.model_name = model_name self.drive = DriveData(self.csv_path) self.net_model = NetModel(data_path) self.image_process = ImageProcess() self.data_aug = DataAugmentation()
def train(Model, epochs, generator, discriminator) : dataloader = DA() noise, original = dataloader.rotated_data() time_sum = 0. if make_noisy : noise = make_data_noisy(noise) length = noise.shape[0] for epoch in range(epochs) : print("epoch {} is working" .format(epoch + 1)) print("Progress|", "*" * (epoch + 1), " " * (49 - epoch), "|{}%".format(2 * (epoch + 1))) start_time = time.time() for idx in range(length) : train_each_step(Model = Model, noise = noise[idx], original = original[idx], generator = generator, discriminator = discriminator) display.clear_output(wait = True) time_tmp = time.time() - start_time time_sum += time_tmp print("Time for epoch {} is {} sec" .format(epoch + 1, time_tmp)) display.clear_output(wait = True) print("================================================") print("Training complete!!") print("Time for all epoch({}) is {} sec" .format(epochs, time_sum)) print("================================================")
def train(): # prepare data for sentencepiece training PrepareData.extract_each_sentence('./../data/taiyaku.tsv', './../data/ja_sentences.tsv', 'ja') PrepareData.extract_each_sentence('./../data/taiyaku.tsv', './../data/en_sentences.tsv', 'en') # train sentencepiece model Tokenizer.train_sentencepiece(32000, 'ja') #data augment da = DataAugmentation() da.add_aux_corpus() # data filtering process fl = Filter('./../data/taiyaku.tsv') fl.one_multi_filter() print('1st done') fl.src_equal_trg_filter() print('second done') fl.non_alphabet_filter(0.5) print('') fl.correct_lang_filter() fl.update_df() # prepare data for feeding to a model to make PrepareData.prepare_tokenized_taiyaku('./../data/tokenized_taiyaku.tsv') t2t = PyT2T() # data generate t2t.data_gen() # train phase t2t.train()
def __init__(self, path, shape, batch=1, shuffle=False, augment=False): self.path = path self.shape = shape self.batch = batch self.shuffle = shuffle self.list_ids, self.n_samples, self.n_classes = self.load() self.on_epoch_end() self.augment = False if augment: self.augment = DataAugmentation()
def __init__(self): path2data = os.path.abspath(FLAGS.dataset_data) self.data = np.load(os.path.join(path2data, 'data.npy')) self.labels = np.load(os.path.join(path2data, 'labels.npy')) split = int(len(self.data) * 0.7) test_split = int(len(self.data) * 0.1) self.val_data = self.data[split:split + test_split] self.val_labels = self.labels[split:split + test_split] self.test_data = self.data[split + test_split:] self.test_labels = self.labels[split + test_split:] self.data = self.data[:split] self.labels = self.labels[:split] self.d = { 'train': (self.data, self.labels), 'val': (self.val_data, self.val_labels), 'test': (self.test_data, self.test_labels) } self.iters = self.get_iters_count() self.val_iters = self.get_val_iters_count() self.test_iters = self.get_test_iters_count() self.index = 0 self.val_index = 0 self.test_index = 0 self.data_augmentation = DataAugmentation()
def __data_generation(self, list_IDs_temp): 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) # Initialization X = np.empty((self.batch_size, *self.dim, self.n_channels)) y = np.empty((self.batch_size), dtype=int) # Generate data for i, ID in enumerate(list_IDs_temp): # Store sample X[i, ] = np.load(self.hypes["dataset_dir"] + ID + '.npy') # Store class y[i] = self.labels[ID] #apply data augmentation if self.training == True and self.hypes[ "data_augmentation"] == True and self.hypes[ "offline_augmentation"] == False: X_augmented, y_augmented = DataAugmentation.apply_augmentation( list(X), list(y), self.hypes) X = np.concatenate((X, X_augmented)) y = np.concatenate((y, y_augmented)) if self.hypes["loss"] != "scc": y = keras.utils.to_categorical(y, num_classes=self.n_classes) return X, y
def __init__(self, data_path): model_name = data_path[data_path.rfind('/'):] # get folder name model_name = model_name.strip('/') csv_path = data_path + '/' + model_name + '.csv' # use it for csv file name self.csv_path = csv_path self.train_generator = None self.valid_generator = None self.train_hist = None self.drive = None self.config = Config() #model_name) self.data_path = data_path #self.model_name = model_name self.drive = DriveData(self.csv_path) self.net_model = NetModel(data_path) self.image_process = ImageProcess() self.data_aug = DataAugmentation()
def preprocess(self, samples, augment_data=False): """format the features and labels as necessary for processing""" y = [] x = [] x_feat = [] for idx, image in samples.iterrows(): y.append(int(image.emotion)) image_pixel = np.asarray( [float(pix) for pix in image.pixels.split(" ")]) x.append(image_pixel) """Augmenting dataset for training""" if (augment_data): da = DataAugmentation() x, y = da.augment_dataset(x, y) """Extracting HOG Features""" for image_pixel in x: hog = Hog_descriptor(image_pixel, cell_size=2, bin_size=8) vector, image = hog.extract() x_feat.append(vector) return np.asarray(x), np.asarray(x_feat), np.asarray(y)
class DataGenerator(Sequence): def __init__(self, path, shape, batch=1, shuffle=False, augment=False): self.path = path self.shape = shape self.batch = batch self.shuffle = shuffle self.list_ids, self.n_samples, self.n_classes = self.load() self.on_epoch_end() self.augment = False if augment: self.augment = DataAugmentation() def __len__(self): return int(self.n_samples / self.batch) def __getitem__(self, index): tmp_indexes = self.indexes[index * self.batch:(index + 1) * self.batch] X = np.empty((self.batch, self.shape[0], self.shape[1], self.shape[2])) y = np.empty((self.batch, self.n_classes)) for i in range(len(tmp_indexes)): X[i, ], y[i, ] = self.get_sample(tmp_indexes[i]) return X, y def on_epoch_end(self): self.indexes = np.arange(self.n_samples) if self.shuffle: np.random.shuffle(self.indexes) def preprocess(self, image, expand=False): image = cv2.resize(image, self.shape[:2]) if self.augment: image = self.augment.augment_one(image) image = image.astype('float32') / 255. if expand: image = np.expand_dims(image, axis=0) return image def get_sample(self, index): fname, label = self.list_ids[index] fpath = os.path.join(self.path, fname) image = cv2.imread(fpath) image = self.preprocess(image) label = self.one_hot(label) return image, label def one_hot(self, labels): oh_label = np.zeros((self.n_classes)) for lab in labels: if len(lab) > 0: oh_label[self.classes.index(lab)] = 1. return oh_label def load(self): if os.path.exists(self.path): filepath = os.path.dirname(self.path) list_ids, count = [], 0 self.class_weight = defaultdict(int) with open(self.path) as fp: data = fp.readlines() for d in data: fname = d.strip().split(',')[1] labels = d.strip().split(',')[2:] list_ids.append([fname, labels]) for lab in labels: if len(lab) > 0: self.class_weight[lab] += 1 count += 1 self.path = filepath self.classes = list(self.class_weight.keys()) print('classes:', self.classes, self.class_weight) return list_ids, count, len(self.classes) else: return [], 0, 0
def main(): if (prepareData): imgUtils = ImageUtils(imgSize, useAditional=useAditional, keepAspectRatio=keepAspectRatio, useKaggleData=useKaggleData) imgUtils.dataPreparationOVA() K.set_image_data_format('channels_first') K.set_floatx('float32') np.random.seed(17) print("\nLoading train data...\n" + SEPARATOR) train_target = [] if (keepAspectRatio): if (useAditional): train_data = np.load('saved_data/trainExtra' + str(imgSize) + '_OrigAspectRatio.npy') train_target.append(np.load('saved_data/trainExtraOVA1_target.npy')) train_target.append(np.load('saved_data/trainExtraOVA2_target.npy')) train_target.append(np.load('saved_data/trainExtraOVA3_target.npy')) else: train_data = np.load('saved_data/train' + str(imgSize) + '_OrigAspectRatio.npy') train_target.append(np.load('saved_data/train_targetOVA1.npy')) train_target.append(np.load('saved_data/train_targetOVA2.npy')) train_target.append(np.load('saved_data/train_targetOVA3.npy')) else: if (useAditional): train_data = np.load('saved_data/trainExtra' + str(imgSize) + '.npy') train_target.append(np.load('saved_data/trainExtraOVA1_target.npy')) train_target.append(np.load('saved_data/trainExtraOVA2_target.npy')) train_target.append(np.load('saved_data/trainExtraOVA3_target.npy')) else: train_data = np.load('saved_data/train' + str(imgSize) + '.npy') train_target.append(np.load('saved_data/train_targetOVA1.npy')) train_target.append(np.load('saved_data/train_targetOVA2.npy')) train_target.append(np.load('saved_data/train_targetOVA3.npy')) print("\nMaking data augmentation...\n" + SEPARATOR) datagen = da.prepareDataAugmentation(train_data=train_data) model = [] currentDate = datetime.today() timeStamp = currentDate.strftime("%d-%m-%Y_%H-%M") for i in range(len(train_target)): x_train, x_val_train, y_train, y_val_train = train_test_split( train_data, train_target[i], test_size=percentTrainForValidation, random_state=17) print("\nCreating model " + str(i + 1) + "...\n" + SEPARATOR) if (loadPreviousModel): model.append(load_model(pathToPreviousModel[i])) print("Loaded model from: " + pathToPreviousModel[i]) model[i].summary() else: model.append(create_model()) print("\nTraining Set shape (num Instances, RGB chanels, width, height): " + str( x_train.shape) + "\nTraining labels: " + str(y_train.shape) + "\nValidating set shape: " + str( x_val_train.shape) + "\nValidating set labels: " + str( y_val_train.shape) + "\n" + SEPARATOR) if (saveNetArchImage): plot_model(model[i], to_file='saved_data/model_' + timeStamp + '.png') if (onlyEvaluate): print("\nEvaluating Model " + str(i + 1) + "...\n" + SEPARATOR) evaluateModel(model[i], x_val_train, y_val_train) else: print("\nFitting model " + str(i + 1) + "...\n" + SEPARATOR) checkPoint = ModelCheckpoint( "saved_data/OVA_model" + str(i + 1) + "_ep{epoch:02d}_" + timeStamp + ".hdf5", save_best_only=True) model[i].fit_generator(datagen.flow(x_train, y_train, batch_size=batchSize, shuffle=True), steps_per_epoch=10, epochs=NumEpoch, validation_data=(x_val_train, y_val_train), callbacks=[checkPoint]) # , verbose=2) print("\nLoading test data...\n" + SEPARATOR) if (keepAspectRatio): test_data = np.load('saved_data/test' + str(imgSize) + '_OrigAspectRatio.npy') test_id = np.load('saved_data/test_id.npy') else: test_data = np.load('saved_data/test' + str(imgSize) + '.npy') test_id = np.load('saved_data/test_id.npy') pred = [] for i in range(len(model)): print("\nPredicting with model " + str(i + 1) + "...\n" + SEPARATOR) pred.append(model[i].predict_proba(test_data)) predictions = np.transpose(np.vstack((pred[0][:, 1], pred[1][:, 1], pred[2][:, 1]))) df = pd.DataFrame(predictions, columns=['Type_1', 'Type_2', 'Type_3']) df['image_name'] = test_id df.to_csv("../submission/OVA_" + timeStamp + ".csv", index=False)
class DriveTrain: ########################################################################### # data_path = 'path_to_drive_data' e.g. ../data/2017-09-22-10-12-34-56/' def __init__(self, data_path): if data_path[-1] == '/': data_path = data_path[:-1] loc_slash = data_path.rfind('/') if loc_slash != -1: # there is '/' in the data path model_name = data_path[loc_slash + 1:] # get folder name #model_name = model_name.strip('/') else: model_name = data_path csv_path = data_path + '/' + model_name + const.DATA_EXT # use it for csv file name self.csv_path = csv_path self.train_generator = None self.valid_generator = None self.train_hist = None self.drive = None #self.config = Config() #model_name) self.data_path = data_path #self.model_name = model_name self.drive = DriveData(self.csv_path) self.net_model = NetModel(data_path) self.image_process = ImageProcess() self.data_aug = DataAugmentation() ########################################################################### # def _prepare_data(self): self.drive.read() from sklearn.model_selection import train_test_split samples = list(zip(self.drive.image_names, self.drive.measurements)) self.train_data, self.valid_data = train_test_split(samples, test_size=Config.config['validation_rate']) self.num_train_samples = len(self.train_data) self.num_valid_samples = len(self.valid_data) print('Train samples: ', self.num_train_samples) print('Valid samples: ', self.num_valid_samples) ########################################################################### # def _build_model(self, show_summary=True): def _generator(samples, batch_size=Config.config['batch_size']): num_samples = len(samples) while True: # Loop forever so the generator never terminates if Config.config['lstm'] is False: samples = sklearn.utils.shuffle(samples) for offset in range(0, num_samples, batch_size): batch_samples = samples[offset:offset+batch_size] images = [] measurements = [] for image_name, measurement in batch_samples: image_path = self.data_path + '/' + image_name image = cv2.imread(image_path) image = cv2.resize(image, (Config.config['input_image_width'], Config.config['input_image_height'])) image = self.image_process.process(image) images.append(image) steering_angle, throttle = measurement if abs(steering_angle) < Config.config['steering_angle_jitter_tolerance']: steering_angle = 0 measurements.append(steering_angle*Config.config['steering_angle_scale']) if Config.config['data_aug_flip'] is True: # Flipping the image flip_image, flip_steering = self.data_aug.flipping(image, steering_angle) images.append(flip_image) measurements.append(flip_steering*Config.config['steering_angle_scale']) if Config.config['data_aug_bright'] is True: # Changing the brightness of image if steering_angle > Config.config['steering_angle_jitter_tolerance'] or \ steering_angle < -Config.config['steering_angle_jitter_tolerance']: bright_image = self.data_aug.brightness(image) images.append(bright_image) measurements.append(steering_angle*Config.config['steering_angle_scale']) if Config.config['data_aug_shift'] is True: # Shifting the image shift_image, shift_steering = self.data_aug.shift(image, steering_angle) images.append(shift_image) measurements.append(shift_steering*Config.config['steering_angle_scale']) X_train = np.array(images) y_train = np.array(measurements) if Config.config['lstm'] is True: X_train = np.array(images).reshape(-1, 1, Config.config['input_image_height'], Config.config['input_image_width'], Config.config['input_image_depth']) y_train = np.array(measurements).reshape(-1, 1, 1) if Config.config['lstm'] is False: yield sklearn.utils.shuffle(X_train, y_train) else: yield X_train, y_train self.train_generator = _generator(self.train_data) self.valid_generator = _generator(self.valid_data) if (show_summary): self.net_model.model.summary() ########################################################################### # def _start_training(self): if (self.train_generator == None): raise NameError('Generators are not ready.') ###################################################################### # callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping # checkpoint callbacks = [] #weight_filename = self.net_model.name + '_' + const.CONFIG_YAML + '_ckpt' weight_filename = self.data_path + '_' + const.CONFIG_YAML + '_ckpt' checkpoint = ModelCheckpoint(weight_filename+'.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks.append(checkpoint) # early stopping earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='min') callbacks.append(earlystop) self.train_hist = self.net_model.model.fit_generator( self.train_generator, steps_per_epoch=self.num_train_samples//Config.config['batch_size'], epochs=Config.config['num_epochs'], validation_data=self.valid_generator, validation_steps=self.num_valid_samples//Config.config['batch_size'], verbose=1, callbacks=callbacks) ########################################################################### # def _plot_training_history(self): print(self.train_hist.history.keys()) ### plot the training and validation loss for each epoch plt.plot(self.train_hist.history['loss']) plt.plot(self.train_hist.history['val_loss']) plt.ylabel('mse loss') plt.xlabel('epoch') plt.legend(['training set', 'validatation set'], loc='upper right') plt.show() ########################################################################### # def train(self, show_summary=True): self._prepare_data() self._build_model(show_summary) self._start_training() self.net_model.save() self._plot_training_history() Config.summary()
img = cv2.imread('../input/train/{}.jpg'.format(f)) x_train.append(cv2.resize(img, (im_size, im_size))) y_train.append(labels[i]) # Using the stratify parameter on treain_test_split the split should be equally distributed per classes. # Try a small percentage of dataset for validation (5%) x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.05, random_state=42, stratify=y_train) # Data argumentation data_aug = DataAugmentation(x_train, options={ 'horizontal_flips': True, 'rotation': True, 'rotation_config': [(20, 1.3)] }) for i, images in enumerate(tqdm(data_aug)): for image in images: x_train.append(image) y_train.append(y_train[i]) print('Train set become', len(x_train)) # build np array and normalise them X_train = np.array(x_train, np.float32) / 255. Y_train = np.array(y_train, np.uint8) X_valid = np.array(x_valid, np.float32) / 255. Y_valid = np.array(y_valid, np.uint8) print('shape X_train', X_train.shape)
class DriveTrain: ########################################################################### # data_path = 'path_to_drive_data' e.g. ../data/2017-09-22-10-12-34-56' def __init__(self, data_path): model_name = data_path[data_path.rfind('/'):] # get folder name model_name = model_name.strip('/') csv_path = data_path + '/' + model_name + '.csv' # use it for csv file name self.csv_path = csv_path self.train_generator = None self.valid_generator = None self.train_hist = None self.drive = None self.config = Config() #model_name) self.data_path = data_path #self.model_name = model_name self.drive = DriveData(self.csv_path) self.net_model = NetModel(data_path) self.image_process = ImageProcess() self.data_aug = DataAugmentation() ########################################################################### # def _prepare_data(self): self.drive.read() from sklearn.model_selection import train_test_split samples = list(zip(self.drive.image_names, self.drive.measurements)) self.train_data, self.valid_data = train_test_split( samples, test_size=self.config.valid_rate) self.num_train_samples = len(self.train_data) self.num_valid_samples = len(self.valid_data) print('Train samples: ', self.num_train_samples) print('Valid samples: ', self.num_valid_samples) ########################################################################### # def _build_model(self, show_summary=True): def _generator(samples, batch_size=self.config.batch_size): num_samples = len(samples) while True: # Loop forever so the generator never terminates samples = sklearn.utils.shuffle(samples) for offset in range(0, num_samples, batch_size): batch_samples = samples[offset:offset + batch_size] images = [] measurements = [] for image_name, measurement in batch_samples: image_path = self.data_path + '/' + image_name + \ self.config.fname_ext image = cv2.imread(image_path) image = cv2.resize(image, (self.config.image_size[0], self.config.image_size[1])) image = self.image_process.process(image) images.append(image) steering_angle, throttle = measurement #if abs(steering_angle) < self.config.jitter_tolerance: # steering_angle = 0 measurements.append(steering_angle) #measurements.append(steering_angle*self.config.raw_scale) ###-----------------------Flipping the image-----------------------### flip_image, flip_steering = self.data_aug.flipping( image, steering_angle) images.append(flip_image) measurements.append(flip_steering) ''' # add the flipped image of the original images.append(cv2.flip(image,1)) #measurement = (steering_angle*-1.0, measurement[1]) measurements.append(steering_angle*-1.0) #measurements.append(steering_angle*self.config.raw_scale*-1.0) ''' ###----------------Changing the brightness of image----------------### if steering_angle > 0.01 or steering_angle < -0.015: bright_image = self.data_aug.brightness(image) images.append(bright_image) measurements.append(steering_angle) ###-----------------------Shifting the image-----------------------### shift_image, shift_steering = self.data_aug.shift( image, steering_angle) images.append(shift_image) measurements.append(shift_steering) X_train = np.array(images) y_train = np.array(measurements) if self.config.typeofModel == 4 or self.config.typeofModel == 5: X_train = np.array(images).reshape( -1, 1, self.config.image_size[1], self.config.image_size[0], self.config.image_size[2]) y_train = np.array(measurements).reshape(-1, 1, 1) yield sklearn.utils.shuffle(X_train, y_train) self.train_generator = _generator(self.train_data) self.valid_generator = _generator(self.valid_data) if (show_summary): self.net_model.model.summary() ########################################################################### # def _start_training(self): if (self.train_generator == None): raise NameError('Generators are not ready.') ###################################################################### # callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping # checkpoint callbacks = [] checkpoint = ModelCheckpoint(self.net_model.name + '.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks.append(checkpoint) # early stopping earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=1, mode='min') callbacks.append(earlystop) self.train_hist = self.net_model.model.fit_generator( self.train_generator, steps_per_epoch=self.num_train_samples // self.config.batch_size, epochs=self.config.num_epochs, validation_data=self.valid_generator, validation_steps=self.num_valid_samples // self.config.batch_size, verbose=1, callbacks=callbacks) ########################################################################### # def _plot_training_history(self): print(self.train_hist.history.keys()) ### plot the training and validation loss for each epoch plt.plot(self.train_hist.history['loss']) plt.plot(self.train_hist.history['val_loss']) plt.ylabel('mse loss') plt.xlabel('epoch') plt.legend(['training set', 'validatation set'], loc='upper right') plt.show() ########################################################################### # def train(self, show_summary=True): self._prepare_data() self._build_model(show_summary) self._start_training() self.net_model.save() self._plot_training_history()
@author: ninad """ import os import cv2 import pandas as pd import sys import numpy as np from progressbar import ProgressBar from data_augmentation import DataAugmentation from image_process import ImageProcess image_process = ImageProcess() data_aug = DataAugmentation() csv_fname = '/home/mir-lab/Ninad_Thesis/Test/Test.csv' csv_header = ['image_fname', 'steering_angle'] df = pd.read_csv(csv_fname, names=csv_header, index_col=False) num_data = len(df) text = open('/home/mir-lab/Ninad_Thesis/Test/Shift/Shift.txt', 'w+') bar = ProgressBar() for i in bar(range(num_data)): image_name = df.loc[i]['image_fname'] steering = df.loc[i]['steering_angle'] image_path = '/home/mir-lab/Ninad_Thesis/Test/' + image_name + '.jpg' image = cv2.imread(image_path) image = cv2.resize(image, (160, 70)) image = image_process.process(image) shift_image, shift_steering = data_aug.shift(image, steering)
def main(): if (prepareData): imgUtils = ImageUtils(imgSize, useAditional=useAditional, keepAspectRatio=keepAspectRatio, useKaggleData=useKaggleData) imgUtils.dataPreparation() K.set_image_data_format('channels_first') K.set_floatx('float32') np.random.seed(seed) print("\nLoading train data...\n" + SEPARATOR) if (keepAspectRatio): if (useAditional): train_data = np.load('saved_data/trainExtra' + str(imgSize) + '_OrigAspectRatio.npy') train_target = np.load('saved_data/trainExtra_target.npy') else: train_data = np.load('saved_data/train' + str(imgSize) + '_OrigAspectRatio.npy') train_target = np.load('saved_data/train_target.npy') else: if (useAditional): train_data = np.load('saved_data/trainExtra' + str(imgSize) + '.npy') train_target = np.load('saved_data/trainExtra_target.npy') else: train_data = np.load('saved_data/train' + str(imgSize) + '.npy') train_target = np.load('saved_data/train_target.npy') x_train, x_val_train, y_train, y_val_train = train_test_split( train_data, train_target, test_size=percentTrainForValidation, random_state=17) print( "\nTraining Set shape (num Instances, RGB chanels, width, height): " + str(x_train.shape) + "\nTraining labels: " + str(y_train.shape) + "\nValidating set shape: " + str(x_val_train.shape) + "\nValidating set labels: " + str(y_val_train.shape) + "\n" + SEPARATOR) print("\nMaking data augmentation...\n" + SEPARATOR) datagen = da.prepareDataAugmentation(train_data=train_data) currentDate = datetime.today() timeStamp = currentDate.strftime("%d-%m-%Y_%H-%M") print("\nCreating model...\n" + SEPARATOR) if (loadPreviousModel): model = load_model(pathToPreviousModel) print("Loaded model from: " + pathToPreviousModel) model.summary() else: if (hiperParamOpt): print("\nHyperparameter optimization...\n" + SEPARATOR) model = KerasClassifier(build_fn=create_model, epochs=NumEpoch, batch_size=batchSize, validation_split=percentTrainForValidation) grid_result = hiperParametersOptimization(model, x_train, y_train) # summarize results print("Best score: %f using parameters %s" % (grid_result.best_score_, grid_result.best_params_)) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param)) grid_result.best_estimator_.model.save( "saved_data/GridCV_Best_estimator" + timeStamp + ".h5") model = grid_result else: model = create_model() if (saveNetArchImage): if (hiperParamOpt): plot_model(grid_result.best_estimator, to_file='saved_data/model_' + timeStamp + '.png') else: plot_model(model, to_file='saved_data/model_' + timeStamp + '.png') if (onlyEvaluate): print("\nEvaluating Model...\n" + SEPARATOR) evaluateModel(model, x_val_train, y_val_train) else: if hiperParamOpt is False: fitKerasModel(datagen, model, timeStamp, x_train, x_val_train, y_train, y_val_train) makePrediction(model, timeStamp)
class DriveTrain: ########################################################################### # data_path = 'path_to_drive_data' e.g. ../data/2017-09-22-10-12-34-56/' def __init__(self, data_path): if data_path[-1] == '/': data_path = data_path[:-1] loc_slash = data_path.rfind('/') if loc_slash != -1: # there is '/' in the data path model_name = data_path[loc_slash + 1:] # get folder name #model_name = model_name.strip('/') else: model_name = data_path csv_path = data_path + '/' + model_name + const.DATA_EXT # use it for csv file name self.csv_path = csv_path self.train_generator = None self.valid_generator = None self.train_hist = None self.data = None #self.config = Config() #model_name) self.data_path = data_path #self.model_name = model_name self.model_name = data_path + '_' + Config.neural_net_yaml_name \ + '_N' + str(config['network_type']) self.model_ckpt_name = self.model_name + '_ckpt' self.data = DriveData(self.csv_path) self.net_model = NetModel(data_path) self.image_process = ImageProcess() self.data_aug = DataAugmentation() ########################################################################### # def _prepare_data(self): self.data.read() # put velocities regardless we use them or not for simplicity. samples = list( zip(self.data.image_names, self.data.velocities, self.data.measurements)) if config['lstm'] is True: self.train_data, self.valid_data = self._prepare_lstm_data(samples) else: self.train_data, self.valid_data = train_test_split( samples, test_size=config['validation_rate']) self.num_train_samples = len(self.train_data) self.num_valid_samples = len(self.valid_data) print('Train samples: ', self.num_train_samples) print('Valid samples: ', self.num_valid_samples) ########################################################################### # group the samples by the number of timesteps def _prepare_lstm_data(self, samples): num_samples = len(samples) # get the last index number steps = 1 last_index = (num_samples - config['lstm_timestep']) // steps image_names = [] velocities = [] measurements = [] for i in range(0, last_index, steps): sub_samples = samples[i:i + config['lstm_timestep']] # print('num_batch_sample : ',len(batch_samples)) sub_image_names = [] sub_velocities = [] sub_measurements = [] for image_name, velocity, measurement in sub_samples: sub_image_names.append(image_name) sub_velocities.append(velocity) sub_measurements.append(measurement) image_names.append(sub_image_names) velocities.append(sub_velocities) measurements.append(sub_measurements) samples = list(zip(image_names, velocities, measurements)) return train_test_split(samples, test_size=config['validation_rate'], shuffle=False) ########################################################################### # def _build_model(self, show_summary=True): def _data_augmentation(image, steering_angle): if config['data_aug_flip'] is True: # Flipping the image return True, self.data_aug.flipping(image, steering_angle) if config['data_aug_bright'] is True: # Changing the brightness of image if steering_angle > config['steering_angle_jitter_tolerance'] or \ steering_angle < -config['steering_angle_jitter_tolerance']: image = self.data_aug.brightness(image) return True, image, steering_angle if config['data_aug_shift'] is True: # Shifting the image return True, self.data_aug.shift(image, steering_angle) return False, image, steering_angle def _prepare_batch_samples(batch_samples): images = [] velocities = [] measurements = [] for image_name, velocity, measurement in batch_samples: image_path = self.data_path + '/' + image_name image = cv2.imread(image_path) # if collected data is not cropped then crop here # otherwise do not crop. if Config.data_collection['crop'] is not True: image = image[ Config.data_collection['image_crop_y1']:Config. data_collection['image_crop_y2'], Config.data_collection['image_crop_x1']:Config. data_collection['image_crop_x2']] image = cv2.resize(image, (config['input_image_width'], config['input_image_height'])) image = self.image_process.process(image) images.append(image) velocities.append(velocity) # if no brake data in collected data, brake values are dummy steering_angle, throttle, brake = measurement if abs(steering_angle ) < config['steering_angle_jitter_tolerance']: steering_angle = 0 if config['num_outputs'] == 2: measurements.append( (steering_angle * config['steering_angle_scale'], throttle)) else: measurements.append(steering_angle * config['steering_angle_scale']) # data augmentation append, image, steering_angle = _data_augmentation( image, steering_angle) if append is True: images.append(image) velocities.append(velocity) if config['num_outputs'] == 2: measurements.append( (steering_angle * config['steering_angle_scale'], throttle)) else: measurements.append(steering_angle * config['steering_angle_scale']) return images, velocities, measurements def _prepare_lstm_batch_samples(batch_samples): images = [] velocities = [] measurements = [] for i in range(0, config['batch_size']): images_timestep = [] velocities_timestep = [] measurements_timestep = [] for j in range(0, config['lstm_timestep']): image_name = batch_samples[i][0][j] image_path = self.data_path + '/' + image_name image = cv2.imread(image_path) # if collected data is not cropped then crop here # otherwise do not crop. if Config.data_collection['crop'] is not True: image = image[ Config.data_collection['image_crop_y1']:Config. data_collection['image_crop_y2'], Config.data_collection['image_crop_x1']:Config. data_collection['image_crop_x2']] image = cv2.resize(image, (config['input_image_width'], config['input_image_height'])) image = self.image_process.process(image) images_timestep.append(image) velocity = batch_samples[i][1][j] velocities_timestep.append(velocity) if j is config['lstm_timestep'] - 1: measurement = batch_samples[i][2][j] # if no brake data in collected data, brake values are dummy steering_angle, throttle, brake = measurement if abs(steering_angle ) < config['steering_angle_jitter_tolerance']: steering_angle = 0 if config['num_outputs'] == 2: measurements_timestep.append( (steering_angle * config['steering_angle_scale'], throttle)) else: measurements_timestep.append( steering_angle * config['steering_angle_scale']) # data augmentation? """ append, image, steering_angle = _data_augmentation(image, steering_angle) if append is True: images_timestep.append(image) measurements_timestep.append(steering_angle*config['steering_angle_scale']) """ images.append(images_timestep) velocities.append(velocities_timestep) measurements.append(measurements_timestep) return images, velocities, measurements def _generator(samples, batch_size=config['batch_size']): num_samples = len(samples) while True: # Loop forever so the generator never terminates if config['lstm'] is True: for offset in range(0, (num_samples // batch_size) * batch_size, batch_size): batch_samples = samples[offset:offset + batch_size] images, velocities, measurements = _prepare_lstm_batch_samples( batch_samples) X_train = np.array(images) y_train = np.array(measurements) if config['num_inputs'] == 2: X_train_vel = np.array(velocities).reshape( -1, config['lstm_timestep'], 1) X_train = [X_train, X_train_vel] if config['num_outputs'] == 2: y_train = np.stack(measurements).reshape( -1, config['num_outputs']) yield X_train, y_train else: samples = sklearn.utils.shuffle(samples) for offset in range(0, num_samples, batch_size): batch_samples = samples[offset:offset + batch_size] images, velocities, measurements = _prepare_batch_samples( batch_samples) X_train = np.array(images).reshape( -1, config['input_image_height'], config['input_image_width'], config['input_image_depth']) y_train = np.array(measurements) y_train = y_train.reshape(-1, 1) if config['num_inputs'] == 2: X_train_vel = np.array(velocities).reshape(-1, 1) X_train = [X_train, X_train_vel] yield X_train, y_train self.train_generator = _generator(self.train_data) self.valid_generator = _generator(self.valid_data) if (show_summary): self.net_model.model.summary() ########################################################################### # def _start_training(self): if (self.train_generator == None): raise NameError('Generators are not ready.') ###################################################################### # callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard # checkpoint callbacks = [] #weight_filename = self.data_path + '_' + Config.config_yaml_name \ # + '_N' + str(config['network_type']) + '_ckpt' checkpoint = ModelCheckpoint(self.model_ckpt_name + '.{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') callbacks.append(checkpoint) # early stopping patience = config['early_stopping_patience'] earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=patience, verbose=1, mode='min') callbacks.append(earlystop) # tensor board logdir = config['tensorboard_log_dir'] + datetime.now().strftime( "%Y%m%d-%H%M%S") tensorboard = TensorBoard(log_dir=logdir) callbacks.append(tensorboard) self.train_hist = self.net_model.model.fit_generator( self.train_generator, steps_per_epoch=self.num_train_samples // config['batch_size'], epochs=config['num_epochs'], validation_data=self.valid_generator, validation_steps=self.num_valid_samples // config['batch_size'], verbose=1, callbacks=callbacks, use_multiprocessing=True) ########################################################################### # def _plot_training_history(self): print(self.train_hist.history.keys()) plt.figure() # new figure window ### plot the training and validation loss for each epoch plt.plot(self.train_hist.history['loss'][1:]) plt.plot(self.train_hist.history['val_loss'][1:]) #plt.title('Mean Squared Error Loss') plt.ylabel('mse loss') plt.xlabel('epoch') plt.legend(['training set', 'validatation set'], loc='upper right') plt.tight_layout() #plt.show() plt.savefig(self.model_name + '_model.png', dpi=150) plt.savefig(self.model_name + '_model.pdf', dpi=150) ########################################################################### # def train(self, show_summary=True): self._prepare_data() self._build_model(show_summary) self._start_training() self.net_model.save(self.model_name) self._plot_training_history() Config.summary()
def create_feature_extractor(): if (prepareData): imgUtils = ImageUtils(imgSize, useAditional=useAditional, keepAspectRatio=keepAspectRatio, useKaggleData=useKaggleData) imgUtils.dataPreparation()() K.set_image_data_format('channels_first') K.set_floatx('float32') np.random.seed(RDM) print("\nLoading train data...\n" + SEPARATOR) if (keepAspectRatio): if (useAditional): train_data = np.load('saved_data/trainExtra' + str(imgSize) + '_OrigAspectRatio.npy') train_target = np.load('saved_data/trainExtra_target.npy') else: train_data = np.load('saved_data/train' + str(imgSize) + '_OrigAspectRatio.npy') train_target = np.load('saved_data/train_target.npy') else: if (useAditional): train_data = np.load('saved_data/trainExtra' + str(imgSize) + '.npy') train_target = np.load('saved_data/trainExtra_target.npy') else: train_data = np.load('saved_data/train' + str(imgSize) + '.npy') train_target = np.load('saved_data/train_target.npy') x_train, x_val_train, y_train, y_val_train = train_test_split( train_data, train_target, test_size=percentTrainForValidation, random_state=RDM) print("\nLoading test data...\n" + SEPARATOR) if (keepAspectRatio): test_data = np.load('saved_data/test' + str(imgSize) + '_OrigAspectRatio.npy') else: test_data = np.load('saved_data/test' + str(imgSize) + '.npy') if (dataAugmentation): print("\nMaking data augmentation...\n" + SEPARATOR) datagen = da.prepareDataAugmentation(train_data=train_data) print("\nCreating model...\n" + SEPARATOR) if (loadPreviousModel): baseModel = load_model(pathToPreviousModel) print("Loaded model from: " + pathToPreviousModel) if (ftModel == "VGG16"): model = Model(input=baseModel.input, outputs=baseModel.get_layer("block5_pool").output) elif (ftModel == "IV3"): model = Model(input=baseModel.input, outputs=baseModel.get_layer("mixed10").output) else: if (ftModel == "VGG16"): # loading VGG16 model weights model = VGG16(weights='imagenet', include_top=False, input_shape=(3, imgSize, imgSize)) elif (ftModel == "IV3"): model = InceptionV3(weights='imagenet', include_top=False, input_shape=(3, imgSize, imgSize)) # Extracting features from the train dataset using the VGG16 pre-trained model print("\nGenerating features...\n" + SEPARATOR) print("\nTraining features...\n") if (dataAugmentation): # predict_generator(self, generator, steps, max_q_size=10, workers=1, pickle_safe=False, verbose=1) # TODO dar mas imagenes7 batches = 0 features_train = [] train_labels = [] for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=batchSize, shuffle=True): features_batch = model.predict_on_batch(x_batch) features_train.append(features_batch) train_labels.append(y_batch) batches += 1 print("Batches: " + str(batches) + '/' + str(len(x_train))) if batches >= len(x_train): # we need to break the loop by hand because # the generator loops indefinitely break print("\nValidation features...\n") batches = 0 features_valid = [] valid_labels = [] for x_batch, y_batch in datagen.flow(x_val_train, y_val_train, batch_size=batchSize, shuffle=True): features_batch = model.predict_on_batch(x_batch) features_valid.append(features_batch) valid_labels.append(y_batch) batches += 1 print("Batches: " + str(batches) + '/' + str(len(x_val_train))) if batches >= len(x_val_train) // batchSize: # we need to break the loop by hand because # the generator loops indefinitely break print("\nTest features...\n") features_test = model.predict(test_data, batch_size=batchSize, verbose=1) else: features_train = model.predict(x_train, batch_size=batchSize, verbose=1) print("\nValidation features...\n") features_valid = model.predict(x_val_train, batch_size=batchSize, verbose=1) print("\nTest features...\n") features_test = model.predict(test_data, batch_size=batchSize, verbose=1) if (dataAugmentation): if (useAditional): if (keepAspectRatio): np.save('saved_data/feaExt_DATrain' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_DATrain' + str(imgSize) + '_target.npy', train_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_DAValid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_DAValid' + str(imgSize) + '_target.npy', valid_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: np.save('saved_data/feaExt_DA_NAR_Train' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_DA_NAR_Train' + str(imgSize) + '_target.npy', train_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_DA_NAR_Valid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_DA_NAR_Valid' + str(imgSize) + '_target.npy', valid_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt__NAR_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: if (keepAspectRatio): np.save('saved_data/fea_DATrain' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_DATrain' + str(imgSize) + '_target.npy', train_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_DAValid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_DAValid' + str(imgSize) + '_target.npy', valid_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: np.save('saved_data/fea_DA_NAR_Train' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_DA_NAR_Train' + str(imgSize) + '_target.npy', train_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_DA_NAR_Valid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_DA_NAR_Valid' + str(imgSize) + '_target.npy', valid_labels, allow_pickle=True, fix_imports=True) np.save('saved_data/fea__NAR_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: if (useAditional): if (keepAspectRatio): np.save('saved_data/feaExt_Train' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_Train' + str(imgSize) + '_target.npy', y_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_Valid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_Valid' + str(imgSize) + '_target.npy', y_val_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: np.save('saved_data/feaExt_NAR_Train' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_NAR_Train' + str(imgSize) + '_target.npy', y_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_NAR_Valid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_NAR_Valid' + str(imgSize) + '_target.npy', y_val_train, allow_pickle=True, fix_imports=True) np.save('saved_data/feaExt_NAR_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: if (keepAspectRatio): np.save('saved_data/fea_Train' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_Train' + str(imgSize) + '_target.npy', y_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_Valid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_Valid' + str(imgSize) + '_target.npy', y_val_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True) else: np.save('saved_data/fea_NAR_Train' + str(imgSize) + '.npy', features_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_NAR_Train' + str(imgSize) + '_target.npy', y_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_NAR_Valid' + str(imgSize) + '.npy', features_valid, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_NAR_Valid' + str(imgSize) + '_target.npy', y_val_train, allow_pickle=True, fix_imports=True) np.save('saved_data/fea_NAR_test' + str(imgSize) + '.npy', features_test, allow_pickle=True, fix_imports=True)