def get_predictions(data_dir, preprocessing_function=lambda x: x, model=model): if isinstance(preprocessing_function, str): if preprocessing_function == 'fliplr': preprocessing_function = lambda x: x[..., ::-1, :] elif preprocessing_function in ('identity', 'orig'): preprocessing_function = lambda x: x else: raise ValueError('unknown preprocessing_function:\t%s' % preprocessing_function) val_datagen = ImageDataGenerator(**norm_params) val_datagen.preprocessing_function = preprocessing_function datagen_val_output = val_datagen.flow_from_directory(data_dir, shuffle=False, **flowfromdir_params) gen_ = datagen_val_output yhat = model.predict_generator( gen_, steps=len(gen_), verbose=1, ) dfdict = {"scores_%d" % nn: yy for nn, yy in enumerate(yhat.T)} dfdict.update({"files": gen_.filenames, "label": gen_.classes}) dfres = pd.DataFrame(dfdict) return dfres
def save_bottlebeck_features(): if os.path.exists('bottleneck_features_train.npy') and (len( sys.argv) == 1 or sys.argv[1] != "--force"): print("Using saved features, pass --force to save new features") return datagen = ImageDataGenerator(preprocessing_function=preprocess_input) train_generator = datagen.flow_from_directory( train_dir, target_size=(config.img_width, config.img_height), batch_size=config.batch_size, class_mode="binary") val_generator = datagen.flow_from_directory( validation_dir, target_size=(config.img_width, config.img_height), batch_size=config.batch_size, class_mode="binary") # build the VGG16 network model = VGG16(include_top=False, weights='imagenet') print("Predicting bottleneck training features") training_labels = [] training_features = [] for batch in range(5): # nb_train_samples // config.batch_size): data, labels = next(train_generator) training_labels.append(labels) training_features.append(model.predict(data)) training_labels = np.concatenate(training_labels) training_features = np.concatenate(training_features) np.savez(open('bottleneck_features_train.npy', 'wb'), features=training_features, labels=training_labels) print("Predicting bottleneck validation features") validation_labels = [] validation_features = [] validation_data = [] for batch in range(nb_validation_samples // config.batch_size): data, labels = next(val_generator) validation_features.append(model.predict(data)) validation_labels.append(labels) validation_data.append(data) validation_labels = np.concatenate(validation_labels) validation_features = np.concatenate(validation_features) validation_data = np.concatenate(validation_data) np.savez(open('bottleneck_features_validation.npy', 'wb'), features=validation_features, labels=validation_labels, data=validation_data)
def Scaling(path, image_save_dir, aug_number): datagen = ImageDataGenerator(zoom_range=0.3, fill_mode='constant') # 0.3 img = load_img(path, grayscale=True) # 这是一个PIL图像 x = img_to_array(img) # 把PIL图像转换成一个numpy数组 x = x.reshape((1, ) + x.shape) # 这是一个numpy数组 image_dir = os.path.dirname(path) image_name = os.path.basename(path) prefix = image_name.split(".")[0] i = 1 for batch in datagen.flow(x, batch_size=1, save_to_dir=image_save_dir, save_prefix=prefix + "_aug_" + 'Scal', save_format='jpg'): i += 1 if i > aug_number: break # 否则生成器会退出循环
def Random_affine_transform(path, image_save_dir, aug_number): datagen = ImageDataGenerator( shear_range=3) # 水平或垂直投影变换,shear_range是角度范围 #5 img = load_img(path, grayscale=True) # 这是一个PIL图像 x = img_to_array(img) # 把PIL图像转换成一个numpy数组 x = x.reshape((1, ) + x.shape) # 这是一个numpy数组 image_dir = os.path.dirname(path) image_name = os.path.basename(path) prefix = image_name.split(".")[0] i = 1 for batch in datagen.flow(x, batch_size=1, save_to_dir=image_save_dir, save_prefix=prefix + "_aug_" + 'aff', save_format='jpg'): i += 1 if i > aug_number: break # 否则生成器会退出循环
def Random_translation(path, image_save_dir, aug_number): datagen = ImageDataGenerator(width_shift_range=0.05, height_shift_range=0.05) # 0.2 img = load_img(path, grayscale=True) # 这是一个PIL图像 x = img_to_array(img) # 把PIL图像转换成一个numpy数组 x = x.reshape((1, ) + x.shape) # 这是一个numpy数组 image_dir = os.path.dirname(path) image_name = os.path.basename(path) prefix = image_name.split(".")[0] i = 1 for batch in datagen.flow(x, batch_size=1, save_to_dir=image_save_dir, save_prefix=prefix + "_aug_" + 'trans', save_format='jpg'): i += 1 if i > aug_number: break # 否则生成器会退出循环
def data_generator(self, indeces=True, channel_mode="channels_last"): datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.15, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.15, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=True, # randomly flip images data_format=channel_mode, # (row, col, channel) format per image get_normal_also=indeces ) # Get indeces for unaugmented data as well return datagen
pairs = np.array(Parallel(n_jobs=8)(delayed(loadValidationData)(fname, shape) for fname in filelist)) val_car = pairs[:,0,:,:] val_car_mask = pairs[:,1,:,:] val_car = val_car.reshape(val_car.shape[0], shape[0], shape[1], 1) val_car_mask = val_car_mask.reshape(val_car.shape[0], shape[0], shape[1], 1) del pairs print "Time elapsed:", (time.time()-t)/60 else: print 'Creating validation imageDataGenerators' val_data_gen_args = dict(rescale = 1./255) val_image_datagen = ImageDataGenerator(**val_data_gen_args) val_mask_datagen = ImageDataGenerator(**val_data_gen_args) val_dir = 'data/full_' if full else 'data/val_' val_dir += str(shape)+rgb_suffix # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 val_image_generator = val_image_datagen.flow_from_directory( 'data/'+train_prefix+str(shape)+rgb_suffix, target_size=shape, color_mode = color_mode, class_mode = None, batch_size = batch_size, seed = seed)
# Step 3 - Flattening classifier.add(Flatten()) # Step 4 - Fully Connected Layer classifier.add(Dense(units=128, activation='relu')) classifier.add(Dense(units=6, activation='softmax')) # Step 5 - Compiling classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) from image import ImageDataGenerator train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) dev_datagen = ImageDataGenerator(rescale=1. / 255) test_datagen = ImageDataGenerator(rescale=1. / 255) training_set = train_datagen.flow_from_directory( '/home/osama/Documents/datasets/textile/dataset/training_set/', target_size=(128, 128), batch_size=1) dev_set = dev_datagen.flow_from_directory( '/home/osama/Documents/datasets/textile/dataset/dev_set', target_size=(128, 128), batch_size=32, class_mode='categorical')
from keras.applications import InceptionResNetV2, Xception from keras.models import Model, Sequential, load_model from keras.layers import Dense, GlobalAveragePooling2D from keras import backend as K import matplotlib.pyplot as plt from constants import train_data_dir, validation_data_dir batch_size = 64 image_size = 299 train_data_path = train_data_dir validation_data_path = validation_data_dir train_data_generator = ImageDataGenerator(rescale=1. / 255, contrast_stretching=False, logarithmic=False, gamma=False, equalization=False, adaptive_equalization=False) validation_data_generator = ImageDataGenerator(rescale=1. / 255) train_generator = train_data_generator.flow_from_directory( train_data_path, target_size=(image_size, image_size), batch_size=64, class_mode='categorical') validation_generator = validation_data_generator.flow_from_directory( validation_data_path, shuffle=False, target_size=(image_size, image_size),
nb_epoch=nb_epoch, validation_data=(X_test, Y_test), shuffle=True) else: print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 45.0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False, zerosquare=True, zerosquareh=noises, zerosquarew=noises, zerosquareintern=0.0) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) # fit the model on the batches generated by datagen.flow() if weighted:
datas = {'Train': train, 'Test': test, 'Validation': validation} for data in datas.values(): data['imageId'] = data['imageId'].astype(np.uint32) mlb = MultiLabelBinarizer() train_label = mlb.fit_transform(train['labelId']) y_test = np.zeros((39706, 228)) x_test = np.arange(y_test.shape[0]) + 1 width = 224 model_name = 'Xception' # with CustomObjectScope({'f1_loss': f1_loss, 'f1_score': f1_score, 'precision': precision, 'recall': recall}): # model = load_model(f'../models/{model_name}_f1.h5') test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) y_pred_test = model.predict_generator(test_datagen.flow(x_test, '../data/test_data', width, y_test, batch_size=1, shuffle=False), verbose=1) np.save(f'../data/json/y_pred_{model_name}', y_pred_test) # y_pred_test_xe = y_pred_test.copy() # y_pred_test = (y_pred_test_xe + y_pred_test_in) / 2 y_pred_test1 = np.round(y_pred_test) where_1 = mlb.inverse_transform(y_pred_test1)
def save_bottleneck_features(train_data_dir, val_data_dir, weights_path=WEIGHTS_PATH_NO_TOP, overwrite=False): bottleneck_features_name = "bottleneck_features.h5" if os.path.isfile(bottleneck_features_name): if overwrite: print("Overwriting bottleneck_features.h5") os.remove else: print( "bottleneck_features.h5 exists, use overwrite=True to overwrite." ) return print(bottleneck_features_name + "is being created...~80GB for ImageNet200") img_width, img_height = 224, 224 nb_train_samples = 100000 nb_val_samples = 10000 state = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] model = bottom_model(state, weights_path=weights_path) val_datagen = ImageDataGenerator(rescale=1. / 255) seed = 0 val_generator = val_datagen.flow_from_directory(val_data_dir, target_size=(img_height, img_width), batch_size=batch_size, shuffle=True, seed=seed) train_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_height, img_width), batch_size=batch_size, shuffle=True, seed=seed) seed = 0 np.random.seed(seed) train_index_array = np.random.permutation(100000)[:nb_train_samples] np.random.seed(seed) val_index_array = np.random.permutation(10000)[:nb_val_samples] chunk = 25 train_parts = (nb_train_samples // batch_size) // chunk train_samples = nb_train_samples // train_parts val_parts = (nb_val_samples // batch_size) // chunk val_samples = nb_val_samples // val_parts last = 0 with h5py.File(bottleneck_features_name, 'w') as hf: train_labels = hf.create_dataset( "train_labels", data=np.take(train_generator.classes[:nb_train_samples], train_index_array)) val_labels = hf.create_dataset( "val_labels", data=np.take(val_generator.classes[:nb_val_samples], val_index_array)) val = hf.create_dataset("val", (nb_val_samples, 14, 14, 1024), chunks=(64, 14, 14, 1024)) for i in range(val_parts): print("Val done: " + str(100 * i / val_parts) + "%") max_q_size = 1 val[i * val_samples:(i + 1) * val_samples, :, :, :] = model.predict_generator( val_generator, val_samples // batch_size, max_q_size=max_q_size) val_generator.batch_index -= max_q_size train = hf.create_dataset("training", (nb_train_samples, 14, 14, 1024), chunks=(64, 14, 14, 1024)) for i in range(train_parts): print("Train done: " + str(100 * i / train_parts) + "%") max_q_size = 1 train[i * train_samples:(i + 1) * train_samples, :, :, :] = model.predict_generator( train_generator, train_samples // batch_size, max_q_size=max_q_size) # recorrect for the over-calling of predict_generator by max_q_size train_generator.batch_index -= max_q_size
class_mode=prms.class_mode, classes=prms.classes, seed=prms.seed) norm_params = dict( #rescale=prms.scaleup, samplewise_center=prms.samplewise_center, samplewise_std_normalization=prms.samplewise_center, featurewise_center=False, featurewise_std_normalization=False, zca_whitening=False, ) # In[23]: train_datagen = ImageDataGenerator(**norm_params) train_datagen.preprocessing_function = lambda x: x[..., ::-1, :] #*2**-8 datagen_train_output = train_datagen.flow_from_directory( prms.data_train, #stratify = prms.oversampling, #sampling_factor=prms.sampling_factor, #oversampling=prms.oversampling, shuffle=False, **flowfromdir_params) SAMPLES_PER_EPOCH = len(datagen_train_output.filenames) STEPS_PER_EPOCH = int(np.ceil(SAMPLES_PER_EPOCH / prms.batch_size)) ########################################## def get_predictions(data_dir, preprocessing_function=lambda x: x, model=model):
except: last_lr = 0.001 print('\nLast learning Rate = {}'.format(last_lr)) ### Compile ### optimizer = Adam(lr=last_lr, clipnorm=5) #1e-3 model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], optimizer=optimizer, metrics=['accuracy'], loss_weights=[1.0, 0.4]) ### AUGMENTATION ### # This will do preprocessing and realtime data augmentation: # data generator for train set train_datagen = ImageDataGenerator(#horizontal_flip=True, #brightness_range=[1.2,1.2], # 0.5>val<=2 is not working preprocessing_function=preprocess_input #preprocessing_function=imgaug_steroids ) # data generator for test set validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) ### class_weights ### label_wts = np.load('nasnet_data/label_wts.npy') label_wts = np.ndarray.tolist(label_wts) ### loading file ### # Link training data scratch_dir = sys.argv[1] print ('\nSCRATCH_DIR: {}\n'.format(scratch_dir))
def data_generator(self, indeces=True, channel_mode="channels_last"): datagen = ImageDataGenerator( data_format=channel_mode, get_normal_also=indeces ) # Get indeces for unaugmented data as well return datagen
# model_name = 'InceptionResNetV2' batch_size, MODEL = batch_size_model[model_name] # model = build_model(MODEL, width, n_class, model_name, batch_size) model_name = 'Xception' with CustomObjectScope({ 'binary_crossentropy_weight': binary_crossentropy_weight, 'f1_loss': f1_loss, 'f1_score': f1_score, 'precision': precision, 'recall': recall }): model = load_model(f'../models/{model_name}_bcw.h5') # Load weights datagen = ImageDataGenerator(preprocessing_function=preprocess_input) val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) losses = { 'bcw': binary_crossentropy_weight, 'f1': f1_loss, 'bc': 'binary_crossentropy' } configs = [('bcw', Adam(lr=1e-5)), ('f1', Adam(lr=1e-5)), ('f1', SGD(lr=5e-5, momentum=0.9, nesterov=True))] for i, config in enumerate(configs): print(f'{i + 1} trial') # loss_name, opt = ('bcw', Adam(lr=3e-6)) loss_name, opt = config reduce_lr_patience = 2
batch_y = y_val for i, j in enumerate(tqdm(index_array)): s_img = cv2.imread(f'../data/val_data/{j+1}.jpg') b, g, r = cv2.split(s_img) # get b,g,r rgb_img = cv2.merge([r, g, b]) # switch it to rgb x = resizeAndPad(rgb_img, (width, width)) batch_x[i] = x model_names = ['Xception_f1_59', 'Xception_f1_5945'] for model_name in model_names: with CustomObjectScope({ 'f1_loss': f1_loss, 'f1_score': f1_score, 'precision': precision, 'recall': recall }): model = load_model(f'../models/{model_name}.h5') # y_pred_val = model.predict(batch_x, verbose=1) # print(model_name, f1_score(y_val, y_pred_val)) val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) y_pred_val = model.predict_generator(val_datagen.flow(x_val, '../data/val_data', width, y_val, batch_size=3, shuffle=False), verbose=1) print(model_name, f1_score_np(y_val, y_pred_val))
############################################################################### # Compiling the network classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ############################################################################### # Preparing the image generator training_set_path = "/textileconvnets/training_set/" dev_set_path = "/textileconvnets/dev_set" #test_set_path = "/home/osama/Documents/datasets/textile/dataset/test_set" train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, vertical_flip=True, rotation_range=45, width_shift_range=0.2, height_shift_range=0.2, preprocessing_function=image_preprocessing) dev_datagen = ImageDataGenerator(rescale=1. / 255, preprocessing_function=image_preprocessing) #test_datagen = ImageDataGenerator(rescale=1./255) training_set = train_datagen.flow_from_directory(training_set_path, target_size=(128, 128), batch_size=27) dev_set = dev_datagen.flow_from_directory(dev_set_path, target_size=(128, 128),
elif prms.preprocessing_function == 'm1p1': preprocessing_function = lambda x: x / 128.0 - 1 else: raise ValueError("unknown preprocessing_function") else: preprocessing_function = lambda x: x if prms.data_augmentation: print('Using real-time data augmentation.') train_datagen = ImageDataGenerator( zoom_range=prms.zoom_range, fill_mode=prms.fill_mode, rotation_range=prms.rotation_range, width_shift_range=prms.width_shift_range, height_shift_range=prms.height_shift_range, horizontal_flip=prms.horizontal_flip, vertical_flip=prms.vertical_flip, contrast=prms.contrast if "contrast" in prms else None, truncate_quantile=prms.truncate_quantile, #histeq_alpha=prms.histeq_alpha, **norm_params) else: train_datagen = ImageDataGenerator(**norm_params) val_datagen = ImageDataGenerator(**norm_params) datagen_train_output = train_datagen.flow_from_directory( prms.data_train, stratify=prms.oversampling, sampling_factor=prms.sampling_factor if prms.oversampling else None, oversampling=prms.oversampling,
wc=(W-w)/2 Xin=np.squeeze(Xin) Xin=np.transpose(Xin,(1,2,0)) Xout=np.zeros(Xorig.shape,dtype=Xorig.dtype) Xout[hc:hc+h,wc:wc+w,:]=Xin return Xout # random data generator datagen = ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, rotation_range=75, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.05, zoom_range=0.01, channel_shift_range=0.0, fill_mode='nearest', cval=0.0, horizontal_flip=True, vertical_flip=True, dim_ordering='th') def iterate_minibatches(inputs1 , targets, batchsize, shuffle=True, augment=True): assert len(inputs1) == len(targets) if shuffle: indices = np.arange(len(inputs1)) np.random.shuffle(indices) for start_idx in range(0, len(inputs1) - batchsize + 1, batchsize):
'syringe', 't-shirt', 'table', 'tablelamp', 'teacup', 'teapot', 'teddy-bear', 'telephone', 'tennis-racket', 'tent', 'tiger', 'tire', 'toilet', 'tomato', 'tooth', 'toothbrush', 'tractor', 'traffic light', 'train', 'tree', 'trombone', 'trousers', 'truck', 'trumpet', 'tv', 'umbrella', 'van', 'vase', 'violin', 'walkie talkie', 'wheel', 'wheelbarrow', 'windmill', 'wine-bottle', 'wineglass', 'wrist-watch', 'zebra' ] datagen = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=True, # set each sample mean to 0 featurewise_std_normalization=True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.2, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.2, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images def load_dataset_old(): # We first define some helper functions for supporting both Python 2 and 3. if sys.version_info[0] == 2: from urllib import urlretrieve import cPickle as pickle def pickle_load(f, encoding):