def test_multiprocessing_predict_error(): good_batches = 3 workers = 4 def custom_generator(): """Raises an exception after a few good batches""" for i in range(good_batches): yield (np.random.randint(1, 256, size=(2, 5)), np.random.randint(1, 256, size=(2, 5))) raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(5,))) model.compile(loss='mse', optimizer='adadelta') with pytest.raises(StopIteration): model.predict_generator( custom_generator(), good_batches * workers + 1, 1, workers=workers, use_multiprocessing=True, ) with pytest.raises(StopIteration): model.predict_generator( custom_generator(), good_batches + 1, 1, use_multiprocessing=False, )
def test_multiprocessing_predicting(): reached_end = False arr_data = np.random.randint(0, 256, (500, 2)) def myGenerator(): batch_size = 32 n_samples = 500 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') model.predict_generator(myGenerator(), val_samples=320, max_q_size=10, nb_worker=2, pickle_safe=True) model.predict_generator(myGenerator(), val_samples=320, max_q_size=10, pickle_safe=False) reached_end = True assert reached_end
def test_multiprocessing_predict_error(): batch_size = 32 good_batches = 5 def myGenerator(): """Raises an exception after a few good batches""" for i in range(good_batches): yield (np.random.randint(batch_size, 256, (500, 2)), np.random.randint(batch_size, 2, 500)) raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') samples = batch_size * (good_batches + 1) with pytest.raises(Exception): model.predict_generator( myGenerator(), samples, 1, nb_worker=4, pickle_safe=True, ) with pytest.raises(Exception): model.predict_generator( myGenerator(), samples, 1, pickle_safe=False, )
class MLP(BaseEstimator): def __init__(self, verbose=0, model=None, final_activation='sigmoid'): self.verbose = verbose self.model = model self.final_activation = final_activation def fit(self, X, y): if not self.model: self.model = Sequential() self.model.add(Dense(1000, input_dim=X.shape[1])) self.model.add(Activation('relu')) self.model.add(Dropout(0.5)) self.model.add(Dense(y.shape[1])) self.model.add(Activation(self.final_activation)) self.model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01)) self.model.fit_generator(generator=_batch_generator(X, y, 256, True), samples_per_epoch=X.shape[0], nb_epoch=20, verbose=self.verbose) def predict(self, X): pred = self.predict_proba(X) return sparse.csr_matrix(pred > 0.2) def predict_proba(self, X): pred = self.model.predict_generator(generator=_batch_generatorp(X, 512), val_samples=X.shape[0]) return pred
def test_multiprocessing_predicting(): arr_data = np.random.randint(0, 256, (50, 2)) def custom_generator(): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') model.predict_generator(custom_generator(), steps=5, max_queue_size=10, workers=2, use_multiprocessing=True) model.predict_generator(custom_generator(), steps=5, max_queue_size=10, use_multiprocessing=False) model.predict_generator(custom_generator(), steps=5, max_queue_size=10, workers=0)
def test_sequential_fit_generator_finite_length(): (X_train, y_train), (X_test, y_test) = _get_test_data(1000,200) def data_generator(train, nbatches): if train: max_batch_index = len(X_train) // batch_size else: max_batch_index = len(X_test) // batch_size for i in range(nbatches): if train: yield (X_train[i * batch_size: (i + 1) * batch_size], y_train[i * batch_size: (i + 1) * batch_size]) else: yield (X_test[i * batch_size: (i + 1) * batch_size], y_test[i * batch_size: (i + 1) * batch_size]) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,), activation='relu')) model.add(Dense(nb_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') nsamples = (len(X_train) // batch_size) * batch_size model.fit_generator(data_generator(True, nsamples//batch_size), nsamples, nb_epoch) loss = model.evaluate(X_train, y_train) assert(loss < 3.0) eval_results = model.evaluate_generator(data_generator(True, nsamples//batch_size), nsamples, nb_epoch) assert(eval_results < 3.0) predict_results = model.predict_generator(data_generator(True, nsamples//batch_size), nsamples, nb_epoch) assert(predict_results.shape == (nsamples, 4)) # should fail because not enough samples try: model.fit_generator(data_generator(True, nsamples//batch_size), nsamples+1, nb_epoch) assert(False) except: pass # should fail because generator throws exception def bad_generator(gen): for i in range(0,20): yield next(gen) raise Exception("Generator raised an exception") try: model.fit_generator(bad_generator(data_generator(True, nsamples//batch_size)), nsamples+1, nb_epoch) assert(False) except: pass
def test_multithreading_predict_error(): arr_data = np.random.randint(0, 256, (50, 2)) good_batches = 3 @threadsafe_generator def custom_generator(): """Raises an exception after a few good batches""" batch_size = 10 n_samples = 50 for i in range(good_batches): batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2,))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=False)
def test_multithreading_predicting(): arr_data = np.random.randint(0, 256, (50, 2)) @threadsafe_generator def custom_generator(): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2,))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=False) # - Main thread runs the generator without a queue # - Make sure the value of `use_multiprocessing` is ignored model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=False)
#Add both accuracies and losses into historyDataFrame historydf = pd.DataFrame(history.history, index=history.epoch) utils_Distracted_Driver.plot_loss_accuracy(history) #Now let's apply our model onto Test data test_datagen = ImageDataGenerator(rescale=1 / 255) test_generator = test_datagen.flow_from_directory(test_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode=None, shuffle=False) #print(test_generator.filenames) probabilities = model.predict_generator(test_generator, nb_test_samples / (batch_size)) #probabilities = model.predict_generator(test_generator, nb_test_samples//(batch_size-5)) type(probabilities) headers = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'] df = pd.DataFrame(probabilities, columns=headers) #probabilities = (np.rint(probabilities)).astype(int) mapper = [] i = 0 for file in test_generator.filenames: id = int(file.split('_')[1].split('.')[0]) #print(id) mapper.append(id) i += 1
test_gen = ImageDataGenerator(rescale=1./255) test_generator = test_gen.flow_from_dataframe( test_df, "D:/input/testset/test/", x_col='filename', y_col=None, class_mode=None, target_size=IMAGE_SIZE, batch_size=batch_size, shuffle=False ) # Predict # result return probability that image likely to be a dog. predict = classifier.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size)) # threshold 0.5 which mean if predicted value more than 50% it is a dog and under 50% will be a cat threshold = 0.5 test_df['probability'] = predict test_df['category'] = np.where(test_df['probability'] > threshold, 1,0) # Virtaulize Result test_df['category'].value_counts().plot.bar() # predicted result with images sample_test = test_df.head(18) sample_test.head()
plt.plot(epochs, acc_train, 'b', label='Training Accuracy') plt.plot(epochs, val_acc, 'r', label='Validation Accuracy') plt.title('Accuracy: Training and Validation') plt.xlabel('Epochs') plt.ylim(0, 1) plt.legend() plt.savefig('accuracy_two_classes.jpg', dpi=300) plt.show() # # calculate test scores # test_loss, test_acc = model.evaluate_generator(test_ds, steps=50) # print('Test Loss:', test_loss) # print('Test Accuracy:', test_acc) #Confusion Matrix and Classification Report - with test data set Y_pred = model.predict_generator(test_ds)#, num_of_test_samples // batch_size+1) y_pred = np.argmax(Y_pred, axis=1) cm = confusion_matrix(test_ds.classes, y_pred) print('Confusion Matrix - Test Data Set') print(cm) index = ['flat', 'trill'] columns = ['flat', 'trill'] cm_df = pd.DataFrame(cm, columns, index) sns.heatmap(cm_df/np.sum(cm_df), annot=True, fmt='.2%', cmap='Blues', cbar=False) #fmt=d gives no. of calls as integers plt.title('Confusion Matrix') plt.xlabel('True Classes') plt.ylabel('Predicted Classes') plt.savefig('cnn_confusion_matrix.jpg', dpi=300) plt.show() print('Classification Report - Test Data Set') print(classification_report(test_ds.classes, y_pred, target_names=columns))
nb_val_samples=150) classification_json = classification.to_json() with open("cnn_model_3.json", "w") as json_file: json_file.write(classification_json) classification.save_weights("cnn_model_3.h5") #89.3 val accuracy #mv test/*.jpg test/unknown/ test_data_gen = ImageDataGenerator(rescale=1. / 255) test_gen = test_data_gen.flow_from_directory('test', target_size=(128, 128), batch_size=25, class_mode='binary') prediction = classification.predict_generator(test_gen, 1531) result = [] filenames = test_gen.filenames for i in range(len(filenames)): result.append( (int(filenames[i].split("/")[1].split(".")[0]), prediction[i][0])) result.sort(key=lambda tup: tup[0]) with open("submission3.csv", "w") as output: output.write("name,invasive\n") for i in range(0, len(result)): output.write(str(result[i][0]) + "," + str(result[i][1]) + "\n")
# Load label names to use in prediction results label_list_path = 'datasets/cifar-100-python/meta' keras_dir = os.path.expanduser(os.path.join('~', '.keras')) datadir_base = os.path.expanduser(keras_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join('/tmp', '.keras') label_list_path = os.path.join(datadir_base, label_list_path) with open(label_list_path, mode='rb') as f: labels = pickle.load(f) # Evaluate model with test data set and share sample prediction results evaluation = model.evaluate_generator(datagen.flow(x_test, y_test, batch_size=batch_size), steps=x_test.shape[0] // batch_size) print('Model Accuracy = %.2f' % (evaluation[1])) predict_gen = model.predict_generator(datagen.flow(x_test, y_test, batch_size=batch_size), steps=x_test.shape[0] // batch_size) for predict_index, predicted_y in enumerate(predict_gen): actual_label = labels['fine_label_names'][np.argmax(y_test[predict_index])] predicted_label = labels['fine_label_names'][np.argmax(predicted_y)] print('Actual Label = %s vs. Predicted Label = %s' % (actual_label, predicted_label)) if predict_index == num_predictions: break
def save_bottlebeck_features(): datagen = ImageDataGenerator(rescale=1./255) # build the VGG16 network model = Sequential() model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) # load the weights of the VGG16 networks # (trained on ImageNet, won the ILSVRC competition in 2014) # note: when there is a complete match between your model definition # and your weight savefile, you can simply call model.load_weights(filename) assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).' f = h5py.File(weights_path) for k in range(f.attrs['nb_layers']): if k >= len(model.layers): # we don't look at the last (fully-connected) layers in the savefile break g = f['layer_{}'.format(k)] weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] model.layers[k].set_weights(weights) f.close() print('Model loaded.') generator = datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=16, class_mode=None, shuffle=False) bottleneck_features_train = model.predict_generator(generator, nb_train_samples) np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train) generator = datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=16, class_mode=None, shuffle=False) print 'Generating features' bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples) print 'Features generated!' np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)
plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig('acc.png') # summarize history for loss plt.cla() plt.plot(epoch_count, train_history.history['loss']) plt.plot(epoch_count, train_history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig('loss.png') predict = classifier.predict_generator(testing_set) from sklearn.metrics import confusion_matrix import numpy as np y_true = np.array([0] * 300 + [1] * 300) y_pred = predict > 0.5 cm = confusion_matrix(y_true, y_pred) print(cm) plt.cla() fig = plt.figure() plt.matshow(cm) plt.title('Confusion Matrix') plt.colorbar()
sp.set_title(titles[i], fontsize=16) plt.imshow(ims[i], interpolation=None if interp else 'none') from sklearn import svm, datasets from sklearn.model_selection import train_test_split from sklearn.metrics import plot_confusion_matrix from sklearn.metrics import confusion_matrix print("NOW WE SEE CLASS 0: \n") print("******************************************************") test_imgs, test_labels = next(test_batches) plots(test_imgs, titles=test_labels) print("test_labels : \n", test_labels) test_labels = test_labels[:,0] print("test_labels[:,0] : \n", test_labels) predictions = model.predict_generator(test_batches, steps=1, verbose=0) cm = confusion_matrix(test_labels, np.round(predictions[:,0])) print("predictions: \n",predictions) print("predictions[:,0] : \n",predictions[:,0]) print("np.round(predictions[:,0] : \n",np.round(predictions[:,0])) cm_plots_labels = ["Normal","Mild","Moderate","Severe"] plt.figure() #plot_confusion_matrix(cm, cm_plots_labels) print("******************************************************") print("NOW WE SEE CLASS 1: ") print("******************************************************") test_imgs, test_labels = next(test_batches) plots(test_imgs, titles=test_labels) test_labels = test_labels[:,1] print("test_labels: \n", test_labels) predictions = model.predict_generator(test_batches, steps=1, verbose=0)
model.add(Dense(6, activation='sigmoid')) model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6), loss="binary_crossentropy", metrics=["accuracy"]) STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size STEP_SIZE_TEST = test_generator.n // test_generator.batch_size model.fit_generator(generator=train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=valid_generator, validation_steps=STEP_SIZE_VALID, epochs=10) test_generator.reset() pred = model.predict_generator(test_generator, steps=STEP_SIZE_TEST, verbose=1) pred_bool = (pred > 0.5) final = [] predictions = pred_bool.astype(int) count = 0 for i in predictions: if (i[0] == 1): final.append(['apples']) elif (i[1] == 1): final.append(['bananas']) elif (i[2] == 1): final.append(['cans']) elif (i[3] == 1): final.append(['cardboard']) elif (i[4] == 1): final.append(['oranges'])
plt.subplot(1, 2, 2) plt.ylabel('Accuracy', fontsize=16) plt.plot(model.history.history['acc'], label='Training Accuracy') plt.plot(model.history.history['val_acc'], label='Validation Accuracy') plt.legend(loc='lower right') plt.show() # ## Finally computing the predictions and plotting the actual vs predicted labels in a confusion matrix # In[17]: # compute predictions predictions = model.predict_generator(generator=validation_generator, steps= nb_samples/batch_size) y_pred = [np.argmax(probas) for probas in predictions] y_test = validation_generator.classes class_names = validation_generator.class_indices.keys() from sklearn.metrics import confusion_matrix import itertools def plot_confusion_matrix(cm, classes, title='Confusion matrix', cmap=plt.cm.Blues): cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.figure(figsize=(10,10)) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45)
model.fit_generator(training_set,samples_per_epoch=10000,epochs=6) import pickle with open('DogsVsCats','wb') as f: pickle.dump(model,f) with open('DogsVsCats','rb') as f: pickle.load(f) m=training_set.class_indices y_pred=model.predict(training_set) y_pred=model.predict_generator(training_set,steps=1) y_pred2=model.predict_generator(training_set,steps=2) m=y_pred>0.5 df=pd.Series(data=m.reshape(25000)) mp=df.map({True:'cat',False:'dog'}) from sklearn.metrics import confusion_matrix cm=confusion_matrix(dataframe['Labels'],mp) '''Predictions''' from keras.preprocessing import image test_image=image.load_img('./train/cat.45.jpg',target_size=(128,128)) test_image=image.img_to_array(test_image)
history = model.fit_generator(train_gen, epochs=epochs, validation_data= validate_gen, validation_steps= total_validate//batch_size, steps_per_epoch=total_train//batch_size, callbacks=callbacks) #fit_generator: Fits the model on data yielded batch-by-batch by a Python generator. (deprecated) #TEST DATA PREP test_filename = os.listdir('C:\\Users\\Tejas Chaturvedi\\Downloads\\dogs-vs-cats\\test1') test_df = pd.DataFrame({'filenames':test_filename}) nb_samples = test_df.shape[0] test_datagen = ImageDataGenerator(rescale = 1./255) test_gen = test_datagen.flow_from_dataframe(test_df,'C:\\Users\\Tejas Chaturvedi\\Downloads\\dogs-vs-cats\\test1', x_col = 'filenames', y_col = None, target_size = Image_size, class_mode = 'categorical', batch_size = batch_size) #MAKE CATEGORICAL PREDICTION predict = model.predict_generator(test_gen, steps = np.ceil(nb_samples/batch_size)) #TEST_GENERATOR KO DEFINE HI NAHI HI KIYA?, STEPS KYA H? #CONVERT LABELS TO CATEGORIES test_df['category'] = np.argmax(predict, axis=-1) #Returns the indices of the maximum values along an axis. label = dict((v,k) for k,v in train_gen.class_indices.items()) test_df['category'] = test_df['category'].replace(label) test_df['category'] = test_df['category'].replace({'dog':1,'cat':0}) #VISULAIZING THE RESULTS sample_test = test_df.head(18) sample_test.head() plt.figure(figsize=(12, 24)) for index, row in sample_test.iterrows(): filename = row['filename'] category = row['category']
class_mode='categorical') # 3. model fit history = model.fit_generator(train_set, # 모델은 위에서 미리 선언 steps_per_epoch=30, epochs=30, validation_data=val_set, validation_steps=50) # 4. 시각화 import matplotlib.pyplot as plt plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.title('CNN model loss, acc') plt.xlabel('epoch') plt.ylabel('loss, acc') plt.legend(['train loss', 'test loss', 'train acc', 'test acc']) plt.show() # 5. predict pred = model.predict_generator(test_set) # 6. model 저장 model.save('C:/Users/User/Desktop/save/CNN_example(categorical).h5') print('모델 저장이 완료되었습니다.')
H = classifier.fit_generator( training_set, steps_per_epoch=num_of_training_images, # No of images in training set 3044 epochs=num_of_epochs, validation_data=test_set, validation_steps=num_of_testing_images) # No of images in test set 870 generator = test_datagen.flow_from_directory( 'data/test', target_size=(64, 64), batch_size=batch_size, color_mode='grayscale', class_mode=None, # only data, no labels shuffle=False) # keep data in same order as labels probabilities = classifier.predict_generator(generator) y_pred = np.argmax(probabilities, axis=1) print('Confusion Matrix') print(confusion_matrix(generator.classes, y_pred)) # classification report for precision, recall f1-score and accuracy target_names = [ 'Double click', 'Index', 'Left-click', 'Right-click', 'Screenshot', 'Scroll-down', 'Scroll-up' ] matrix = classification_report(generator.classes, y_pred, target_names=target_names) print('Classification report : \n', matrix) # Saving the model
def main(hdf5_paths, iptagger, n_train, n_test, n_validate): ''' ''' train_paths = [f for f in hdf5_paths if 'train' in f] test_paths = [f for f in hdf5_paths if 'test' in f] validate_paths = [f for f in hdf5_paths if 'validate' in f] def batch(paths, iptagger, batch_size, random=True): while True: if random: np.random.shuffle(paths) for fp in paths: d = io.load(fp) X = np.concatenate([d['X'], d[iptagger + '_vars']], axis=1) le = LabelEncoder() y = le.fit_transform(d['y']) w = d['w'] if random: ix = range(X.shape[0]) np.random.shuffle(ix) X, y, w = X[ix], y[ix], w[ix] for i in xrange(int(np.ceil(X.shape[0] / float(batch_size)))): yield X[(i * batch_size):((i+1)*batch_size)], y[(i * batch_size):((i+1)*batch_size)], w[(i * batch_size):((i+1)*batch_size)] def get_n_vars(train_paths, iptagger): # with open(train_paths[0], 'rb') as buf: # d = io.load(buf) d = io.load(train_paths[0]) return np.concatenate([d['X'], d[iptagger + '_vars']], axis=1).shape[1] net = Sequential() net.add(Dense(50, input_shape=(get_n_vars(train_paths, iptagger), ), activation='relu')) net.add(Dropout(0.3)) net.add(Dense(40, activation='relu')) net.add(Dropout(0.2)) net.add(Dense(16, activation='relu')) net.add(Dropout(0.1)) net.add(Dense(16, activation='relu')) net.add(Dropout(0.1)) net.add(Dense(4, activation='softmax')) net.summary() net.compile('adam', 'sparse_categorical_crossentropy') weights_path = './' + iptagger + '-' + MODEL_NAME + '-progress.h5' try: print 'Trying to load weights from ' + weights_path net.load_weights(weights_path) print 'Weights found and loaded from ' + weights_path except IOError: print 'Could not find weight in ' + weights_path # -- train try: net.fit_generator(batch(train_paths, iptagger, 256, random=True), samples_per_epoch = n_train, verbose=True, #batch_size=64, #sample_weight=train['w'], callbacks = [ EarlyStopping(verbose=True, patience=100, monitor='val_loss'), ModelCheckpoint(weights_path, monitor='val_loss', verbose=True, save_best_only=True) ], nb_epoch=200, validation_data=batch(validate_paths, iptagger, 64, random=False), nb_val_samples=n_validate ) except KeyboardInterrupt: print '\n Stopping early.' # -- load in best network print 'Loading best network...' net.load_weights(weights_path) print 'Extracting...' # # -- save the predicions #np.save('yhat-{}-{}.npy'.format(iptagger, MODEL_NAME), yhat) # from joblib import Parallel, delayed # test = Parallel(n_jobs=1, verbose=5, backend="threading")( # delayed(extract)(filepath, ['pt', 'y', 'mv2c10']) for filepath in test_paths # ) test = [extract(filepath, ['pt', 'y', 'mv2c10']) for filepath in test_paths] # -- test print 'Testing...' yhat = net.predict_generator(batch(test_paths, iptagger, 2048, random=False), val_samples=n_test) def dict_reduce(x, y): return { k: np.concatenate((v, y[k])) for k, v in x.iteritems() } test = reduce(dict_reduce, test) print 'Plotting...' _ = performance(yhat, test['y'], test['mv2c10'], iptagger) # -- Performance by pT print 'Plotting performance in bins of pT...' pt_bins = [10000, 50000, 100000, 150000, 200000, 300000, 500000, max(test['pt'])+1] bn = np.digitize(test['pt'], pt_bins) from collections import OrderedDict rej_at_70 = OrderedDict() for b in np.unique(bn): rej_at_70.update( performance( yhat[bn == b], test['y'][bn == b], test['mv2c10'][bn == b], iptagger, '{}-{}GeV'.format(pt_bins[b-1]/1000, pt_bins[b]/1000) ) ) # -- find center of each bin: bins_mean = [(pt_bins[i]+pt_bins[i+1])/2 for i in range(len(pt_bins)-1)] # -- horizontal error bars of lenght = bin length: xerr = [bins_mean[i]-pt_bins[i+1] for i in range(len(bins_mean))] plt.clf() _ = plt.errorbar( bins_mean, [rej_at_70[k]['DL1_70_bl'] for k in rej_at_70.keys()], xerr=xerr, #yerr=np.sqrt(bin_heights), fmt='o', capsize=0, color='green', label='DL1' + iptagger, alpha=0.7) _ = plt.errorbar( bins_mean, [rej_at_70[k]['MV2_70_bl'] for k in rej_at_70.keys()], xerr=xerr, #yerr=np.sqrt(bin_heights), fmt='o', capsize=0, color='red', label='MV2c10', alpha=0.7) plt.legend() plt.title('b vs. l rejection at 70% efficiency in pT bins') plt.yscale('log') plt.xlabel(r'$p_{T, \mathrm{jet}} \ \mathrm{MeV}$') plt.ylabel('Background rejection at 70% efficiency') plt.xlim(xmax=1000000) plt.savefig('pt_bl.pdf') plt.clf() _ = plt.errorbar( bins_mean, [rej_at_70[k]['DL1_70_bc'] for k in rej_at_70.keys()], xerr=xerr, #yerr=np.sqrt(bin_heights), fmt='o', capsize=0, color='green', label='DL1' + iptagger, alpha=0.7) _ = plt.errorbar( bins_mean, [rej_at_70[k]['MV2_70_bc'] for k in rej_at_70.keys()], xerr=xerr, #yerr=np.sqrt(bin_heights), fmt='o', capsize=0, color='red', label='MV2c10', alpha=0.7) plt.legend() plt.title('b vs. c rejection at 70% efficiency in pT bins') plt.xlabel(r'$p_{T, \mathrm{jet}} \ \mathrm{MeV}$') plt.ylabel('Background rejection at 70% efficiency') plt.yscale('log') plt.xlim(xmax=1000000) plt.savefig('pt_bc.pdf')
class deepmased(object): """ Implements a convolutional network for chimera prediction. """ def __init__(self, config): max_len = config.max_len filters = config.filters n_conv = config.n_conv n_features = config.n_features pool_window = config.pool_window dropout = config.dropout lr_init = config.lr_init mode = config.mode n_fc = config.n_fc n_hid = config.n_hid self.net = Sequential() self.net.add( Conv2D(filters, kernel_size=(2, n_features), input_shape=(max_len, n_features, 1), activation='relu', padding='valid')) self.net.add(BatchNormalization(axis=-1)) for i in range(1, n_conv): self.net.add( Conv2D(2**i * filters, kernel_size=(2, 1), strides=2, input_shape=(max_len, 1, 2**(i - 1) * filters), activation='relu')) self.net.add(BatchNormalization(axis=-1)) self.net.add(AveragePooling2D((pool_window, 1))) self.net.add(Flatten()) optimizer = keras.optimizers.adam(lr=lr_init) if mode in ['chimera', 'extensive']: for _ in range(n_fc - 1): self.net.add(Dense(n_hid, activation='relu')) self.net.add(Dropout(rate=dropout)) self.net.add(Dense(1, activation='sigmoid')) self.net.add(Dropout(rate=dropout)) recall_0 = utils.class_recall(0) recall_1 = utils.class_recall(1) self.net.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[recall_0, recall_1]) elif mode == 'edit': self.net.add(Dense(20, activation='relu')) self.net.add(Dropout(rate=dropout)) self.net.add(Dense(20, activation='relu')) self.net.add(Dropout(rate=dropout)) self.net.add(Dense(1, activation='linear')) self.net.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=[utils.explained_var]) else: raise ('Training mode not supported.') self.reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.01 * lr_init) def predict(self, x): return self.net.predict(x) def predict_generator(self, x): return self.net.predict_generator(x) def print_summary(self): print(self.net.summary()) def save(self, path): self.net.save(path)
# Training logger.info("Training model, {}".format(model.to_json())) X_train, X_val, y_train, y_val = \ train_test_split(Xtrain, dummy_y, test_size=0.02, random_state=42) num_epoch = 15 batch_gen = batch_generator(X_train, y_train, 32, True) fit = model.fit_generator(generator=batch_gen, nb_epoch=num_epoch, samples_per_epoch=69984, validation_data=(X_val.todense(), y_val), verbose=2) # Evaluate the model scores_val = model.predict_generator( generator=batch_predict_generator(X_val, 32, False), val_samples=X_val.shape[0]) scores = model.predict_generator( generator=batch_predict_generator(Xtest, 32, False), val_samples=Xtest.shape[0]) logger.info("logloss val {}".format(log_loss(y_val, scores_val))) # Get the predicted_probabilities and prepare file for submission pred = pd.DataFrame(scores, index = test.index, columns=y_enc.classes_) pred = pd.DataFrame(pred, index = test.index, columns=y_enc.classes_) ts = time.strftime("%a_%d%b%Y_%H%M%S") name_prefix = "sparse_keras_v2_{}epoch_".format(num_epoch) file_path = os.path.join("submissions", "%s%s.csv" % (name_prefix, ts)) pred.to_csv(file_path, index=True) u.gzip_file(file_path)
class r08522721_ThreeLayerCNN(classification): def trainAlgo(self): self.model = Sequential() a = int(self.param['hidden_neuron']) b = int(self.param['DropOut']) SZ = int(self.param['shape_size']) self.model.add( Conv2D(a // 8, (self.param['hidden_kernel_size'], self.param['hidden_kernel_size']), input_shape=(SZ, SZ, 3), data_format='channels_last', activation=self.param['hidden_activation'], padding='same')) self.model.add( Conv2D(a // 4, (self.param['hidden_kernel_size'], self.param['hidden_kernel_size']), padding='same', activation='relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add( Conv2D(a // 2, (self.param['hidden_kernel_size'], self.param['hidden_kernel_size']), padding='same', activation='relu')) self.model.add( Conv2D(a, (self.param['hidden_kernel_size'], self.param['hidden_kernel_size']), padding='same', activation='relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Dropout(b // 2)) self.model.add( Conv2D(a, (self.param['hidden_kernel_size'], self.param['hidden_kernel_size']), padding='same', activation='relu')) self.model.add( Conv2D(a, (self.param['hidden_kernel_size'], self.param['hidden_kernel_size']), padding='same', activation='relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Flatten()) self.model.add(Dense(500, activation='relu')) self.model.add(Dropout(b)) self.model.add( Dense(self.outputData['Y'].shape[1], activation='softmax')) self.model.compile(loss='categorical_crossentropy', optimizer=self.param['optimizer']) self.model.fit_generator( XYdataGenerator(self.inputData['X'], self.outputData['Y'], SZ, SZ, self.param['batch_size']), steps_per_epoch=int( ceil((len(self.inputData['X']) / self.param['batch_size']))), epochs=self.param['epochs']) def predictAlgo(self): SZ = int(self.param['shape_size']) r = self.model.predict_generator( XdataGenerator(self.inputData['X'], SZ, SZ, self.param['batch_size']), steps=int( ceil((len(self.inputData['X']) / self.param['batch_size'])))) self.result['Y'] = r
def test_sequential(): (X_train, y_train), (X_test, y_test) = _get_test_data() # TODO: factor out def data_generator(x, y, batch_size=50): index_array = np.arange(len(x)) while 1: batches = make_batches(len(X_test), batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = x[batch_ids] y_batch = y[batch_ids] yield (x_batch, y_batch) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,))) model.add(Activation("relu")) model.add(Dense(nb_class)) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="rmsprop") model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_test, y_test) prediction = model.predict_generator(data_generator(X_test, y_test), X_test.shape[0], max_q_size=2) gen_loss = model.evaluate_generator(data_generator(X_test, y_test, 50), X_test.shape[0], max_q_size=2) pred_loss = K.eval(K.mean(objectives.get(model.loss)(K.variable(y_test), K.variable(prediction)))) assert np.isclose(pred_loss, loss) assert np.isclose(gen_loss, loss) model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) fname = "test_sequential_temp.h5" model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,))) model.add(Activation("relu")) model.add(Dense(nb_class)) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="rmsprop") model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_test, y_test, verbose=0) assert loss == nloss # test serialization config = model.get_config() new_model = Sequential.from_config(config) model.summary() json_str = model.to_json() new_model = model_from_json(json_str) yaml_str = model.to_yaml() new_model = model_from_yaml(yaml_str)
def save_test_bottlebeck_features(): datagen = ImageDataGenerator(rescale=1. / 255) # build the VGG16 network model = Sequential() model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) # load the weights of the VGG16 networks # (trained on ImageNet, won the ILSVRC competition in 2014) # note: when there is a complete match between your model definition # and your weight savefile, you can simply call model.load_weights(filename) assert os.path.exists( weights_path ), 'Model weights not found (see "weights_path" variable in script).' f = h5py.File(weights_path) for k in range(f.attrs['nb_layers']): if k >= len(model.layers): # we don't look at the last (fully-connected) layers in the savefile break g = f['layer_{}'.format(k)] weights = [ g['param_{}'.format(p)] for p in range(g.attrs['nb_params']) ] model.layers[k].set_weights(weights) f.close() print('Model loaded.') generator = datagen.flow_from_directory(test_data_dir, batch_size=32, target_size=(img_width, img_height), class_mode=None, shuffle=False) bottleneck_features_test = model.predict_generator(generator, nb_test_samples) np.save(open('bottleneck_features_test.npy', 'wb'), bottleneck_features_test)
################################################################################################### # Storing the model to output ################################################################################################### print("[INFO] Storing trained model....") MODEL.save("./trained_model.hdf5") MODEL.save_weights("./trained_weights.hdf5") ################################################################################################### # Evaluate the model and store the report and history log ################################################################################################### print("[INFO] Evaluating the model....") PREDICTIONS = MODEL.predict_generator(generator=TEST_DATA, steps=NUM_OF_TEST_SAMPLES, verbose=VERBOSITY) Y_PREDICTIONS = np.argmax(PREDICTIONS, axis=1) TEST_DATA TESTSET["Expected"] = Y_PREDICTIONS TESTSET.to_csv("Submission.csv", index=False) ACCURACY = HISTORY.history["acc"][-1] * 100 VALIDATION_ACCURACY = HISTORY.history["val_acc"][-1] * 100 LOSS = HISTORY.history["loss"][-1] COHEN_KAPPA = HISTORY.history["cohen_kappa"][-1] VALIDATION_LOSS = HISTORY.history["val_loss"][-1] VALIDATION_COHEN_KAPPA = HISTORY.history["val_cohen_kappa"][-1]
def main(unused_argv): sess = tf.Session() K.set_session(sess) model = Sequential() # Keras layers can be called on TensorFlow tensors: model.add( Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=(img_size, img_size, 3))) #model.add(Conv2D(16, kernel_size = (3,3), activation = 'relu',)) model.add(MaxPooling2D(pool_size=(2, 2))) #model.add(Conv2D(32, kernel_size = (3,3), activation = 'relu')) model.add(Conv2D(32, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) #model.add(Conv2D(64, kernel_size = (3, 3), activation = 'relu')) model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) #model.add(Conv2D(128, kernel_size = (3, 3), activation = 'relu')) model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) #print(model.layers[-1].output) model.add(GlobalMaxPooling2D()) model.add(Dropout(.4)) model.add( Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add(Dropout(.4)) model.add( Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add(Dropout(.4)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy']) train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) # this is the augmentation configuration we will use for testing: # only rescaling test_datagen = ImageDataGenerator(rescale=1. / 255) #print(input_fn()) # this is a generator that will read pictures found in # subfolders of 'data/train', and indefinitely generate # batches of augmented image data train_generator = train_datagen.flow_from_directory( 'C:\\Users\\User\\Documents\\FirstRoundTraining', # this is the target directory target_size=(img_size, img_size), # all images will be resized to 128x128 batch_size=batch_size, class_mode='binary' ) # since we use binary_crossentropy loss, we need binary label validation_generator = test_datagen.flow_from_directory( 'C:\\Users\\User\\Documents\\SecondRoundTraining', target_size=(img_size, img_size), batch_size=batch_size, class_mode='binary') val = model.fit_generator( train_generator, steps_per_epoch=NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN // batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=NUM_EXAMPLES_PER_EPOCH_FOR_EVAL // batch_size, callbacks=[ TensorBoard(log_dir='SnekChecker' + model_num), ModelCheckpoint(model_num + 'try.hdf5', save_best_only=True, mode='min') ]) predict_datagen = ImageDataGenerator(rescale=1. / 255) predict_generator = predict_datagen.flow_from_directory( 'C:\\Users\\User\\Documents\\CheckSneks', # this is the target directory target_size=(img_size, img_size), # all images will be resized to 128x128 batch_size=batch_size, shuffle=False, class_mode='binary') val = model.predict_generator(predict_generator, steps=num_pics // batch_size + 1) files = sorted(os.listdir(directory + "\\Snakes")) val = zip(files, val[:num_pics]) with open(model_num + "_predictions.txt", 'w+') as f: [f.write(str(x) + ':' + str(y) + '\n') for x, y in val]
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=math.sqrt(0.1), patience=5, min_lr=1e-8, verbose=1) # fit model model.fit_generator(train_it, steps_per_epoch=4511 // batch_size, validation_data=val_it, validation_steps=1288 // batch_size, epochs=50, callbacks=[reduce_lr], verbose=1) val_it = val_generator(val_X, val_Y, 1) Y_pred = model.predict_generator(val_it, 1288) y_pred = np.argmax(Y_pred, axis=1) print('Confusion Matrix') print(confusion_matrix(val_data[:, 8193], y_pred)) test_it = val_generator(test_X, test_Y, 1) Y_pred = model.predict_generator(test_it, len(test_X)) y_pred = np.argmax(Y_pred, axis=1) print('Confusion Matrix') print(confusion_matrix(test_data[:, 8193], y_pred)) #model.fit(train_X, train_Y, epochs=300, verbose=0, batch_size=16) # connect the encoder LSTM as the output layer
plt.plot(epochs, acc_train, "b", label="Training Accuracy") plt.plot(epochs, val_acc, "r", label="Validation Accuracy") plt.title("Accuracy: Training and Validation") plt.xlabel("Epochs") plt.ylim(0, 1) plt.legend() plt.savefig("accuracy.png", dpi=300) plt.show() # Calculate test scores test_loss, test_acc = model.evaluate_generator(test_ds, steps=50) print("Test Loss:", test_loss) print("Test Accuracy:", test_acc) # Confusion Matrix Y_pred = model.predict_generator(test_ds) y_pred = np.argmax(Y_pred, axis=1) cm = confusion_matrix(test_ds.classes, y_pred) print("Confusion Matrix - Test Data Set") print(cm) index = ["flat", "trill"] columns = ["flat", "trill"] cm_df = pd.DataFrame(cm, columns, index) sns.heatmap(cm_df / np.sum(cm_df), annot=True, fmt=".2%", cmap="Blues", cbar=False) plt.title("Confusion Matrix") plt.xlabel("True Classes") plt.ylabel("Predicted Classes")
#saving the weights model.save_weights("weights.hdf5",overwrite=True) #saving the model itself in json format: model_json = model.to_json() with open("model.json", "w") as model_file: model_file.write(model_json) print("Model has been saved.") #testing it to a random image from the test set img = load_img('Dataset/test_set/stop/stop26.jpg',target_size=(200,200)) x=array(img) img = cv2.cvtColor( x, cv2.COLOR_RGB2GRAY ) img=img.reshape((1,)+img.shape) img=img.reshape(img.shape+(1,)) test_datagen = ImageDataGenerator(rescale=1./255) m=test_datagen.flow(img,batch_size=1) y_pred=model.predict_generator(m,1) #save the model schema in a pic plot_model(model, to_file='model.png', show_shapes = True)
class Resnet50(): """The Resnet 50 Imagenet model""" def __init__(self, size=(224, 224), n_classes=2, lr=0.001, batch_size=64, dropout=0.2): self.weights_file = 'resnet_nt.h5' # download from: http://www.platform.ai/models/ self.size = size self.n_classes = n_classes self.lr = lr self.batch_size = batch_size self.dropout = dropout def get_base(self): """Gets base architecture of Resnet 50 Model""" input_shape = (3, ) + self.size img_input = Input(shape=input_shape) bn_axis = 1 x = Lambda(preprocess)(img_input) x = ZeroPadding2D((3, 3))(x) x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') for n in ['b', 'c', 'd']: x = identity_block(x, 3, [128, 128, 512], stage=3, block=n) x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') for n in ['b', 'c', 'd', 'e', 'f']: x = identity_block(x, 3, [256, 256, 1024], stage=4, block=n) x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') self.img_input = img_input self.model = Model(self.img_input, x) convert_all_kernels_in_model(self.model) self.model.load_weights(self.weights_file) def build(self): """Builds the model and stacks global average pooling layer on top""" self.get_base() self.model.layers.pop() for layer in self.model.layers: layer.trainable = False m = GlobalAveragePooling2D()(self.model.layers[-1].output) m = Dropout(self.dropout)(m) Dense(self.n_classes, activation='softmax') m = Dense(self.n_classes, activation='softmax')(m) self.model = Model(self.model.input, m) self.model.compile(optimizer=optimizers.Adam(lr=self.lr), loss='categorical_crossentropy', metrics=['accuracy']) return self.model def build_precomputed(self): """ Builds the model based on output of last layer of base architecture; Used for training with bottleneck features; """ self.get_base() self.model = Sequential([ GlobalAveragePooling2D( input_shape=self.model.layers[-1].output_shape[1:]), Dropout(self.dropout), Dense(self.n_classes, activation="softmax"), ]) self.model.compile(optimizer=optimizers.Adam(lr=self.lr), loss='categorical_crossentropy', metrics=['accuracy']) return self.model def get_datagen(self, aug=False): if aug: return ImageDataGenerator(rotation_range=10, width_shift_range=0.05, zoom_range=0.05, channel_shift_range=10, height_shift_range=0.05, shear_range=0.05, horizontal_flip=True) return ImageDataGenerator() def fit_val(self, trn_path, val_path, nb_trn_samples, nb_val_samples, nb_epoch=1, callbacks=[], aug=False): """Custom fit method for training with validation data and option for data augmentation""" train_datagen = self.get_datagen(aug=aug) val_datagen = self.get_datagen(aug=False) trn_gen = train_datagen.flow_from_directory(trn_path, target_size=self.size, batch_size=self.batch_size, class_mode='categorical', shuffle=True) val_gen = val_datagen.flow_from_directory(val_path, target_size=self.size, batch_size=self.batch_size, class_mode='categorical', shuffle=True) self.model.fit_generator(trn_gen, samples_per_epoch=nb_trn_samples, nb_epoch=nb_epoch, verbose=2, validation_data=val_gen, nb_val_samples=nb_val_samples, callbacks=callbacks) def fit_full(self, trn_path, nb_trn_samples, nb_epoch=1, callbacks=[], aug=False): """Custom fit method for training without validation data and option for data augmentation""" train_datagen = self.get_datagen(aug=aug) trn_gen = train_datagen.flow_from_directory(trn_path, target_size=self.size, batch_size=self.batch_size, class_mode='categorical', shuffle=True) self.model.fit_generator(trn_gen, samples_per_epoch=nb_trn_samples, nb_epoch=nb_epoch, verbose=2, callbacks=callbacks) def test(self, test_path, nb_test_samples, aug=False): """Custom prediction method with option for data augmentation""" test_datagen = self.get_datagen(aug=aug) test_gen = test_datagen.flow_from_directory(test_path, target_size=self.size, batch_size=self.batch_size, class_mode=None, shuffle=False) return self.model.predict_generator( test_gen, val_samples=nb_test_samples), test_gen.filenames
m = model.fit_generator(img, epochs=10, steps_per_epoch=500, validation_data=img_test, validation_steps=640) scores = model.evaluate_generator(img_test, steps=1000, verbose=1) print("Accuracy is %s" % (scores[1] * 100)) from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report from sklearn import metrics Y_pred = model.predict_generator(img_test, steps=5) y_pred = np.argmax(Y_pred, axis=1) print("Confusion Metrix") print(metrics.confusion_matrix(img_test.classes, y_pred)) print("Classification Report") print(metrics.classification_report(img_test.classes, y_pred)) from keras.models import model_from_yaml model_yaml = model.to_yaml() with open("model.yaml", "w") as yaml_file: yaml_file.write(model_yaml) model.save_weights("model.h5") print("Saved model")
def test_multiprocessing_predicting(): arr_data = np.random.randint(0, 256, (50, 2)) @threadsafe_generator def custom_generator(): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2,))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) # - Main thread runs the generator without a queue # - Make sure the value of `use_multiprocessing` is ignored model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=True)
test_generator = data_generator.flow_from_directory( 'D:/Study/mini_proj_testdata/', target_size=(100, 100), batch_size=3, class_mode='binary') print(len(test_generator)) images.append(test_generator) print(images) classifier.fit_generator(train_generator, steps_per_epoch=300, epochs=2, validation_data=test_generator, validation_steps=5) predict = classifier.predict_generator(test_generator, steps=5) loss, acc = classifier.evaluate_generator(train_generator, steps=5) # print(train_generator.class_indices) print("loss : ", loss) print("acc : ", acc) print(predict) # train_generator = np.array(train_generator) # print(train_generator) # test_generator = np.array(test_generator) # print(test_generator.shape) # x_train, y_train = train_generator.next() # print(x_train.shape) # print(y_train.shape) # x_train = x_train.reshape(100,100,3)
def test_multiprocessing_predict_error(): arr_data = np.random.randint(0, 256, (50, 2)) good_batches = 3 @threadsafe_generator def custom_generator(): """Raises an exception after a few good batches""" batch_size = 10 n_samples = 50 for i in range(good_batches): batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start: end] yield X raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2,))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(StopIteration): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=True)
Conv2D(256, kernel_size=8, strides=6, padding='same', activation='relu'), Dropout(0.2), MaxPool2D(pool_size=4), Flatten(), Dense(1, activation='linear', kernel_initializer='random_normal', bias_initializer='zeros') ]) model.compile(Adam(lr=0.001, beta_1=0.97, beta_2=0.998), loss='mse') model.summary() model.fit_generator(train_data, steps_per_epoch=25, epochs=25) predict = model.predict_generator(test_data, steps=24) #mse = mean_squared_error(test_y_label, predict) test_data_df['price'] *= scaler predict *= scaler print(predict) print(test_data_df['price']) print('Qt prediction {}'.format(predict.shape[0])) print('Mean in prediction: {}, mean in labels: {}'.format( predict.mean(), np.mean(test_data_df['price']))) model.save(r'C:\Users\Kojimba\PycharmProjects\DeepEval\_trained_model\CNN.h5') print('Model saved to disk') with open( r'C:\Users\Kojimba\PycharmProjects\DeepEval\_trained_model\CNN_scaler.bin',
label_list_path = 'datasets/cifar-10-batches-py/batches.meta' keras_dir = os.path.expanduser(os.path.join('~', '.keras')) datadir_base = os.path.expanduser(keras_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join('/tmp', '.keras') label_list_path = os.path.join(datadir_base, label_list_path) with open(label_list_path, mode='rb') as f: labels = pickle.load(f) # Evaluate model with test data set and share sample prediction results evaluation = model.evaluate_generator(datagen.flow(x_test, y_test, batch_size=batch_size), steps=x_test.shape[0] // batch_size) print('Model Accuracy = %.2f' % (evaluation[1])) predict_gen = model.predict_generator(datagen.flow(x_test, y_test, batch_size=batch_size), steps=x_test.shape[0] // batch_size) for predict_index, predicted_y in enumerate(predict_gen): actual_label = labels['label_names'][np.argmax(y_test[predict_index])] predicted_label = labels['label_names'][np.argmax(predicted_y)] print('Actual Label = %s vs. Predicted Label = %s' % (actual_label, predicted_label)) if predict_index == num_predictions: break
def test_multiprocessing_predicting(): arr_data = np.random.randint(0, 256, (50, 2)) def custom_generator(): batch_size = 10 n_samples = 50 while True: batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start:end] yield X # Build a NN model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) else: model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=1, use_multiprocessing=False) # - Main thread runs the generator without a queue # - Make sure the value of `use_multiprocessing` is ignored model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=True) model.predict_generator(custom_generator(), steps=STEPS, max_queue_size=10, workers=0, use_multiprocessing=False)
def save_bottlebeck_features(train_generator, nb_train_samples, \ validation_generator, nb_validation_samples): #datagen = ImageDataGenerator(rescale=1./255) # build the VGG16 network model = Sequential() model.add(ZeroPadding2D((1, 1), input_shape=(3, img_rows, img_cols))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) # load the weights of the VGG16 networks # (trained on ImageNet, won the ILSVRC competition in 2014) # note: when there is a complete match between your model definition # and your weight savefile, you can simply call model.load_weights(filename) assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).' f = h5py.File('../input/vgg16_weights.h5') for k in range(f.attrs['nb_layers']): if k >= len(model.layers): # we don't look at the last (fully-connected) layers in the savefile break g = f['layer_{}'.format(k)] weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] model.layers[k].set_weights(weights) f.close() print('Model loaded.') model.compile(optimizer='adam', loss='categorical_crossentropy') bottleneck_features_train = model.predict_generator(train_generator, nb_train_samples) np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train) bottleneck_features_validation = model.predict_generator(validation_generator, nb_validation_samples) np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation) return
def fit(): batch_size = 128 nb_epoch = 1 chunk_size = 15000 # input image dimensions img_rows, img_cols = 28, 28 # number of convolutional filters to use nb_filters = 32 # size of pooling area for max pooling nb_pool = 2 # convolution kernel size nb_conv = 3 #load all the labels for the train and test sets y_train = np.loadtxt('labels_train.csv') y_test = np.loadtxt('labels_test.csv') fnames_train = ['train/train'+str(i)+'.png' for i in xrange(len(y_train))] fnames_test = ['test/test'+str(i)+'.png' for i in xrange(len(y_test))] nb_classes = len(np.unique(y_train)) # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train.astype(int), nb_classes) Y_test = np_utils.to_categorical(y_test.astype(int), nb_classes) model = Sequential() model.add(Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='valid', input_shape=(1, img_rows, img_cols))) model.add(Activation('relu')) model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) #model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, # verbose=1, validation_data=(X_test, Y_test)) model.fit_generator(myGenerator(Y_train, chunk_size, batch_size, fnames_train), samples_per_epoch = y_train.shape[0], nb_epoch = nb_epoch, verbose=2,callbacks=[], validation_data=None, class_weight=None) # show_accuracy=True, nb_worker=1 ''' i = 0 pred = np.zeros((len(fnames_test), Y_train.shape[1])) for X, y in myGenerator(Y_test, chunk_size, batch_size, fnames_test): print('chunk '+str(i)) pred[i*chunk_size:(i+1)*chunk_size, :] = model.predict(X, samples_per_epoch = y_train.shape[0], nb_epoch = nb_epoch, verbose=2,callbacks=[], validation_data=None, class_weight=None) # show_accuracy=True, nb_worker=1 i += 1 print(pred[0:10]) ''' pred = model.predict_generator(myGenerator(None, chunk_size, 100, fnames_test), len(fnames_test)) # show_accuracy=True, nb_worker=1 #score = model.evaluate(X_test, Y_test, verbose=0) #print('Test score:', score[0]) #print('Test accuracy:', score[1]) print( 'Test accuracy:', np.mean(np.argmax(pred, axis=1) == np.argmax(Y_test, axis=1)) ) return pred, Y_test
# only rescaling test_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode='binary') validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode='binary') model.fit_generator( train_generator, samples_per_epoch=nb_train_samples, nb_epoch=nb_epoch, validation_data=validation_generator, nb_val_samples=nb_validation_samples) # model.save_weights('ep50.h5') test_generator = test_datagen.flow_from_directory( test_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode='binary') predictions = model.predict_generator(test_generator, nb_test_samples) print(predictions)
keras_dir = os.path.expanduser(os.path.join('~', '.keras')) datadir_base = os.path.expanduser(keras_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join('/tmp', '.keras') label_list_path = os.path.join(datadir_base, label_list_path) with open(label_list_path, mode='rb') as f: labels = pickle.load(f) # Evaluate model with test data set and share sample prediction results evaluation = model.evaluate_generator(datagen.flow(x_test, y_test, batch_size=batch_size, shuffle=False), steps=x_test.shape[0] // batch_size, workers=4) print('Model Accuracy = %.2f' % (evaluation[1])) predict_gen = model.predict_generator(datagen.flow(x_test, y_test, batch_size=batch_size, shuffle=False), steps=x_test.shape[0] // batch_size, workers=4) for predict_index, predicted_y in enumerate(predict_gen): actual_label = labels['label_names'][np.argmax(y_test[predict_index])] predicted_label = labels['label_names'][np.argmax(predicted_y)] print('Actual Label = %s vs. Predicted Label = %s' % (actual_label, predicted_label)) if predict_index == num_predictions: break
def save_bottlebeck_features(): """builds the pretrained vgg16 model and runs it on our training and validation datasets""" datagen = ImageDataGenerator(rescale=1./255) # match the vgg16 architecture so we can load the pretrained weights into this model model = Sequential() model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) model.add(ZeroPadding2D((1, 1))) model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) model.add(MaxPooling2D((2, 2), strides=(2, 2))) # load VGG16 weights f = h5py.File(weights_path) for k in range(f.attrs['nb_layers']): if k >= len(model.layers): break g = f['layer_{}'.format(k)] weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] model.layers[k].set_weights(weights) f.close() print 'Model loaded.' generator = datagen.flow_from_directory( train_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode=None, shuffle=False) bottleneck_features_train = model.predict_generator(generator, nb_train_samples) np.save(open('bottleneck_features_train.npy', 'wb'), bottleneck_features_train) generator = datagen.flow_from_directory( validation_data_dir, target_size=(img_width, img_height), batch_size=32, class_mode=None, shuffle=False) bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples) np.save(open('bottleneck_features_validation.npy', 'wb'), bottleneck_features_validation)
def test_multiprocessing_predict_error(): arr_data = np.random.randint(0, 256, (50, 2)) good_batches = 3 def custom_generator(): """Raises an exception after a few good batches""" batch_size = 10 n_samples = 50 for i in range(good_batches): batch_index = np.random.randint(0, n_samples - batch_size) start = batch_index end = start + batch_size X = arr_data[start:end] yield X raise RuntimeError model = Sequential() model.add(Dense(1, input_shape=(2, ))) model.compile(loss='mse', optimizer='adadelta') # - Produce data on 4 worker processes, consume on main process: # - Each worker process runs OWN copy of generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=True) # - Produce data on 4 worker threads, consume on main thread: # - All worker threads share the SAME generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches * WORKERS + 1, max_queue_size=10, workers=WORKERS, use_multiprocessing=False) # - Produce data on 1 worker process, consume on main process: # - Worker process runs generator # - BUT on Windows, `multiprocessing` won't marshall generators across # process boundaries -> make sure `predict_generator()` raises ValueError # exception and does not attempt to run the generator. # - On other platforms, make sure `RuntimeError` exception bubbles up if os.name is 'nt': with pytest.raises(ValueError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) else: with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=True) # - Produce data on 1 worker thread, consume on main thread: # - Worker thread is the only thread running the generator # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=1, use_multiprocessing=False) # - Produce and consume data without a queue on main thread # - Make sure the value of `use_multiprocessing` is ignored # - Make sure `RuntimeError` exception bubbles up with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=True) with pytest.raises(RuntimeError): model.predict_generator(custom_generator(), steps=good_batches + 1, max_queue_size=10, workers=0, use_multiprocessing=False)
class Model: def __init__(self): self.model = None def build_model(self): self.model = Sequential() self.model.add(Conv2D(32, (1, 1), strides=1, padding='same', input_shape=(img_size, img_size, 1))) self.model.add(Activation('relu')) self.model.add(Conv2D(32, (5, 5), padding='same')) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Conv2D(32, (3, 3), padding='same')) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Conv2D(64, (5, 5), padding='same')) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Flatten()) self.model.add(Dense(2048)) self.model.add(Activation('relu')) self.model.add(Dropout(0.5)) self.model.add(Dense(1024)) self.model.add(Activation('relu')) self.model.add(Dropout(0.5)) self.model.add(Dense(num_classes)) self.model.add(Activation('softmax')) self.model.summary() def train_model(self): sgd=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) self.model.compile(loss='categorical_crossentropy', optimizer=sgd, #optimizer='rmsprop', metrics=['accuracy']) #自动扩充训练样本 train_datagen = ImageDataGenerator( rescale = 1./255, shear_range = 0.2, zoom_range = 0.2, horizontal_flip=True) #归一化验证集 val_datagen = ImageDataGenerator( rescale = 1./255) eval_datagen = ImageDataGenerator( rescale = 1./255) #以文件分类名划分label train_generator = train_datagen.flow_from_directory( root_path+'/train', target_size=(img_size,img_size), color_mode='grayscale', batch_size=batch_siz, class_mode='categorical') val_generator = val_datagen.flow_from_directory( root_path+'/val', target_size=(img_size,img_size), color_mode='grayscale', batch_size=batch_siz, class_mode='categorical') eval_generator = eval_datagen.flow_from_directory( root_path+'/test', target_size=(img_size,img_size), color_mode='grayscale', batch_size=batch_siz, class_mode='categorical') early_stopping = EarlyStopping(monitor='loss',patience=3) history_fit=self.model.fit_generator( train_generator, steps_per_epoch=800/(batch_siz/32),#28709 nb_epoch=nb_epoch, validation_data=val_generator, validation_steps=2000, #callbacks=[early_stopping] ) # history_eval=self.model.evaluate_generator( # eval_generator, # steps=2000) history_predict=self.model.predict_generator( eval_generator, steps=2000) with open(root_path+'/model_fit_log','w') as f: f.write(str(history_fit.history)) with open(root_path+'/model_predict_log','w') as f: f.write(str(history_predict)) # print("%s: %.2f%%" % (self.model.metrics_names[1], history_eval[1] * 100)) print('model trained') def save_model(self): model_json=self.model.to_json() with open(root_path+"/model_json.json", "w") as json_file: json_file.write(model_json) self.model.save_weights(root_path+'/model_weight.h5') self.model.save(root_path+'/model.h5') print('model saved')
model.load_weights('./model/model_v10.h5') print('Model imported') print('Importing class list') with open('./model/class_list_v10.txt') as f: class_list = f.readlines() class_list = [x.strip() for x in class_list] print('Class list imported') predict_img = ImageDataGenerator(); while True: input('Press to test') os.system('clear') print(class_list) predict_gen = predict_img.flow_from_directory( './test/', target_size=(img_width,img_height), color_mode='grayscale', batch_size=batch_size, class_mode='sparse' ) output_raw = model.predict_generator(predict_gen, steps=1, verbose=0) onehot = output_raw.tolist() print() print(class_list[onehot[0].index(1)]) print()
train_generator, steps_per_epoch=train_generator.samples // batch_size, epochs=epochs_fine_tuned, validation_data=validation_generator, validation_steps=validation_generator.samples // batch_size, callbacks=[checkpointer, tensorboard]) # predictions test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) test_generator = test_datagen.flow_from_directory( test_data_dir, batch_size=1, target_size=(img_width, img_height), class_mode=None, shuffle=False) features_test = model.predict_generator(test_generator, steps=test_generator.samples, verbose=1) #print(features_test.shape) #np.save(open(features_test_filepath, 'wb'), features_test) # create csv file label_strings = ['bobcat', 'chihuahua', 'collie', 'dalmatian', 'german_shepherd', 'leopard', 'lion', 'persian_cat', 'siamese_cat', 'tiger', 'wolf'] with open(prediction_filepath, 'w') as outfile: csvwriter = csv.writer(outfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) row_to_write = ['Id'] + [label for label in label_strings] csvwriter.writerow(row_to_write) for idx, prediction in enumerate(features_test): assert len(prediction) == len(label_strings) csvwriter.writerow([str(idx + 1)] + ["%.18f" % p for p in prediction])
batch_size=batch_size, class_mode='binary') model.fit_generator( train_generator, steps_per_epoch=nb_train_samples // batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples // batch_size) # DISPLAY THE CLASS NAME AND INDEX USED FOR TRAINING print "Class : Index" print train_generator.class_indices # THE FOLLOWING CODE WILL FEED THE TEST DATA TO YOUR MODEL NAMED model test_datagen = ImageDataGenerator(rescale=1. / 255) validation_generator = test_datagen.flow_from_directory( test_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='binary') predict= model.predict_generator( validation_generator, nb_validation_samples // batch_size) # DISPLAY THE PREDICTED CLASS FOR EACH SAMPLE print predict
print("Accuracy: %.2f%%" % (scores[1] * 100)) label_list_path = 'datasets/cifar-10-batches-py/batches.meta' keras_dir = os.path.expanduser(os.path.join('~', '.keras')) datadir_base = os.path.expanduser(keras_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.spath.join('/tmp', '.keras') label_list_path = os.path.join(datadir_base, label_list_path) with open(label_list_path, mode='rb') as f: labels = pickle.load(f) print("Load label names %s" % label_list_path) # Evaluate with test dataset and share same prediction results evaluation = model.evaluate_generator(datagen.flow(X_test, y_test, batch_size=32), steps=X_test.shape[0] // 32) print('Model accuracy = %.2f' % evaluation[1]) predict_gen = model.predict_generator(datagen.flow(X_test, y_test, batch_size=32), steps=X_test.shape[0] // 32) for predict_index, predicted_y in enumerate(predict_gen): actual_label = labels['label_names'][numpy.argmax(y_test[predict_index])] predicted_label = labels['label_names'][numpy.argmax(predicted_y)] print('Actual label = %s vs. Predicted label = %s' % (actual_label, predicted_label))
def test_sequential(in_tmpdir): (x_train, y_train), (x_test, y_test) = _get_test_data() # TODO: factor out def data_generator(x, y, batch_size=50): index_array = np.arange(len(x)) while 1: batches = make_batches(len(x_test), batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = x[batch_ids] y_batch = y[batch_ids] yield (x_batch, y_batch) model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, validation_split=0.1) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, shuffle=False) model.train_on_batch(x_train[:32], y_train[:32]) loss = model.evaluate(x_test, y_test) prediction = model.predict_generator(data_generator(x_test, y_test), 1, max_queue_size=2, verbose=1) gen_loss = model.evaluate_generator(data_generator(x_test, y_test, 50), 1, max_queue_size=2) pred_loss = K.eval(K.mean(losses.get(model.loss)(K.variable(y_test), K.variable(prediction)))) assert(np.isclose(pred_loss, loss)) assert(np.isclose(gen_loss, loss)) model.predict(x_test, verbose=0) model.predict_classes(x_test, verbose=0) model.predict_proba(x_test, verbose=0) fname = 'test_sequential_temp.h5' model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(num_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(x_test, y_test, verbose=0) assert(loss == nloss) # Test serialization config = model.get_config() assert 'name' in config new_model = Sequential.from_config(config) assert new_model.weights # Model should be built. model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
class Resnet50(): """The Resnet 50 Imagenet model""" def __init__(self, size=(224, 224), n_classes=2, lr=0.001, batch_size=64, dropout=0.2): self.weights_file = 'resnet_nt.h5' # download from: http://www.platform.ai/models/ self.size = size self.n_classes = n_classes self.lr = lr self.batch_size = batch_size self.dropout = dropout def get_base(self): """Gets base architecture of Resnet 50 Model""" input_shape = (3,) + self.size img_input = Input(shape=input_shape) bn_axis = 1 x = Lambda(preprocess)(img_input) x = ZeroPadding2D((3, 3))(x) x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') for n in ['b','c','d']: x = identity_block(x, 3, [128, 128, 512], stage=3, block=n) x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') for n in ['b','c','d', 'e', 'f']: x = identity_block(x, 3, [256, 256, 1024], stage=4, block=n) x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') self.img_input = img_input self.model = Model(self.img_input, x) convert_all_kernels_in_model(self.model) self.model.load_weights(self.weights_file) def build(self): """Builds the model and stacks global average pooling layer on top""" self.get_base() self.model.layers.pop() for layer in self.model.layers: layer.trainable = False m = GlobalAveragePooling2D()(self.model.layers[-1].output) m = Dropout(self.dropout)(m) Dense(self.n_classes, activation='softmax') m = Dense(self.n_classes, activation='softmax')(m) self.model = Model(self.model.input, m) self.model.compile(optimizer=optimizers.Adam(lr=self.lr), loss='categorical_crossentropy', metrics=['accuracy']) return self.model def build_precomputed(self): """ Builds the model based on output of last layer of base architecture; Used for training with bottleneck features; """ self.get_base() self.model = Sequential( [GlobalAveragePooling2D(input_shape=self.model.layers[-1].output_shape[1:]), Dropout(self.dropout), Dense(self.n_classes, activation="softmax"), ]) self.model.compile(optimizer=optimizers.Adam(lr=self.lr), loss='categorical_crossentropy', metrics=['accuracy']) return self.model def get_datagen(self, aug=False): if aug: return ImageDataGenerator(rotation_range=10, width_shift_range=0.05, zoom_range=0.05, channel_shift_range=10, height_shift_range=0.05, shear_range=0.05, horizontal_flip=True) return ImageDataGenerator() def fit_val(self, trn_path, val_path, nb_trn_samples, nb_val_samples, nb_epoch=1, callbacks=[], aug=False): """Custom fit method for training with validation data and option for data augmentation""" train_datagen = self.get_datagen(aug=aug) val_datagen = self.get_datagen(aug=False) trn_gen = train_datagen.flow_from_directory(trn_path, target_size=self.size, batch_size=self.batch_size, class_mode='categorical', shuffle=True) val_gen = val_datagen.flow_from_directory(val_path, target_size=self.size, batch_size=self.batch_size, class_mode='categorical', shuffle=True) self.model.fit_generator(trn_gen, samples_per_epoch=nb_trn_samples, nb_epoch=nb_epoch, verbose=2, validation_data=val_gen, nb_val_samples=nb_val_samples, callbacks=callbacks) def fit_full(self, trn_path, nb_trn_samples, nb_epoch=1, callbacks=[], aug=False): """Custom fit method for training without validation data and option for data augmentation""" train_datagen = self.get_datagen(aug=aug) trn_gen = train_datagen.flow_from_directory(trn_path, target_size=self.size, batch_size=self.batch_size, class_mode='categorical', shuffle=True) self.model.fit_generator(trn_gen, samples_per_epoch=nb_trn_samples, nb_epoch=nb_epoch, verbose=2, callbacks=callbacks) def test(self, test_path, nb_test_samples, aug=False): """Custom prediction method with option for data augmentation""" test_datagen = self.get_datagen(aug=aug) test_gen = test_datagen.flow_from_directory(test_path, target_size=self.size, batch_size=self.batch_size, class_mode=None, shuffle=False) return self.model.predict_generator(test_gen, val_samples=nb_test_samples), test_gen.filenames