batch_size=batch_size, class_mode='binary') test_set = test_datagen.flow_from_directory(test_set_path, target_size=input_size, batch_size=batch_size, class_mode='binary') # Create a loss history history = LossHistory() # train model classifier.fit_generator(training_set, steps_per_epoch=8000 / batch_size, epochs=90, validation_data=test_set, validation_steps=2000 / batch_size, workers=12, max_q_size=100, callbacks=[history]) # Serialize Model ModelSerializer.serialize_model_json(classifier, 'loss_history', 'loss_history_weights') # Predict single cases test_image_1 = image.load_img('dataset/single_prediction/cat_or_dog_1.jpg', target_size=input_size) test_image_2 = image.load_img('dataset/single_prediction/cat_or_dog_2.jpg', target_size=input_size) test_image_1 = image.img_to_array(test_image_1)
class KerasModel: def __init__(self, img_size, img_channels=3, output_size=17): self.losses = [] self.model = Sequential() self.model.add( BatchNormalization(input_shape=(img_size[0], img_size[1], img_channels))) self.model.add(Conv2D(32, (3, 3), padding='same', activation='relu')) self.model.add(Conv2D(32, (3, 3), activation='relu')) self.model.add(MaxPooling2D(pool_size=2)) self.model.add(Dropout(0.3)) self.model.add(Conv2D(64, (3, 3), padding='same', activation='relu')) self.model.add(Conv2D(64, (3, 3), activation='relu')) self.model.add(MaxPooling2D(pool_size=2)) self.model.add(Dropout(0.3)) self.model.add(Conv2D(128, (3, 3), padding='same', activation='relu')) self.model.add(Conv2D(128, (3, 3), activation='relu')) self.model.add(MaxPooling2D(pool_size=2)) self.model.add(Dropout(0.3)) self.model.add(Conv2D(256, (3, 3), padding='same', activation='relu')) self.model.add(Conv2D(256, (3, 3), activation='relu')) self.model.add(MaxPooling2D(pool_size=2)) self.model.add(Dropout(0.3)) self.model.add(Conv2D(512, (3, 3), padding='same', activation='relu')) self.model.add(Conv2D(512, (3, 3), activation='relu')) self.model.add(MaxPooling2D(pool_size=2)) self.model.add(Dropout(0.3)) self.model.add(Flatten()) self.model.add(Dense(512, activation='relu')) self.model.add(BatchNormalization()) self.model.add(Dropout(0.5)) self.model.add(Dense(output_size, activation='sigmoid')) def get_fbeta_score(self, validation_data, verbose=True): p_valid = self.model.predict(validation_data[0]) thresholds = optimise_f2_thresholds(validation_data[1], p_valid, verbose=verbose) return fbeta_score(validation_data[1], np.array(p_valid) > thresholds, beta=2, average='samples'), thresholds def fit(self, flow, epochs, lr, validation_data, train_callbacks=[], batches=300): history = LossHistory() fbeta = Fbeta(validation_data) opt = Adam(lr=lr) self.model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.model.fit_generator(flow, steps_per_epoch=batches, epochs=epochs, callbacks=[history, earlyStopping, fbeta] + train_callbacks, validation_data=validation_data) fb_score, thresholds = self.get_fbeta_score(validation_data, verbose=False) return [ fbeta.fbeta, history.train_losses, history.val_losses, fb_score, thresholds ] def save_weights(self, weight_file_path): self.model.save_weights(weight_file_path) def load_weights(self, weight_file_path): self.model.load_weights(weight_file_path) def predict_image(self, image): img = Image.fromarray(np.uint8(image * 255)) images = [img.copy().rotate(i) for i in [-90, 90, 180]] images.append(img) images = np.asarray([ np.asarray(image.convert("RGB"), dtype=np.float32) / 255 for image in images ]) return sum(self.model.predict(images)) / 4 def predict(self, x_test): return [self.predict_image(img) for img in tqdm(x_test)] def map_predictions(self, predictions, labels_map, thresholds): predictions_labels = [] for prediction in predictions: labels = [ labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i] ] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() self.x_vail = [] self.y_vail = [] self.train_filepath = '' self.train_img_filepath = '' self.valid_filepath = '' self.valid_img_filepath = '' self.test_img_filepath = '' self.test_addition_img_filepath = '' self.test_img_name_list = '' self.y_map = {} def setTrainFilePath(self, value): self.train_filepath = value def getTrainFilePath(self): return self.train_filepath def setValidFilePath(self, value): self.valid_filepath = value def getValidFilePath(self): return self.valid_filepath def setTrainImgFilePath(self, value): self.train_img_filepath = value def getTrainImgFilePath(self): return self.train_img_filepath def setValidImgFilePath(self, value): self.valid_img_filepath = value def getValidImgFilePath(self): return self.valid_img_filepath def setTestImgFilePath(self, value): self.test_img_filepath = value def getTestImgFilePath(self): return self.test_img_filepath def setTestAdditionImgFilePath(self, value): self.test_addition_img_filepath = value def getTestAdditionImgFilePath(self): return self.test_addition_img_filepath def getTestImgNameList(self): return self.test_img_name_list def getYMap(self): return self.y_map def vgg(self, type=16, bn=False, img_size=(224, 224), img_channels=3, output_size=1000): if type == 16 and bn == False: layer_list = vgg.vgg16(num_classes=output_size) elif type == 16 and bn == True: layer_list = vgg.vgg16_bn(num_classes=output_size) elif type == 11 and bn == False: layer_list = vgg.vgg11(num_classes=output_size) elif type == 11 and bn == True: layer_list = vgg.vgg11_bn(num_classes=output_size) elif type == 13 and bn == False: layer_list = vgg.vgg13(num_classes=output_size) elif type == 13 and bn == True: layer_list = vgg.vgg13_bn(num_classes=output_size) elif type == 19 and bn == False: layer_list = vgg.vgg19(num_classes=output_size) elif type == 19 and bn == True: layer_list = vgg.vgg19_bn(num_classes=output_size) else: print("请输入11,13,16,19这四个数字中的一个!") self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) for i, value in enumerate(layer_list): self.classifier.add(eval(value)) def squeezenet(self, type, img_size=(64, 64), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) if type == 1: x = squeezenet.squeezenet1_0(input_shape, num_classes=output_size) elif type == 1.1: x = squeezenet.squeezenet1_1(input_shape, num_classes=output_size) else: print("请输入1,1.0这两个数字中的一个!") model = Model(inputs=input_shape, outputs=x) self.classifier = model def resnet(self, type, img_size=(64, 64), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) if type == 18: x = resnet.resnet18(input_shape, num_classes=output_size) elif type == 34: x = resnet.resnet34(input_shape, num_classes=output_size) elif type == 50: x = resnet.resnet50(input_shape, num_classes=output_size) elif type == 101: x = resnet.resnet101(input_shape, num_classes=output_size) elif type == 152: x = resnet.resnet152(input_shape, num_classes=output_size) else: print("请输入18,34,50,101,152这五个数字中的一个!") return model = Model(inputs=input_shape, outputs=x) self.classifier = model def inception(self, img_size=(299, 299), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) x = inception.inception_v3(input_shape, num_classes=output_size, aux_logits=True, transform_input=False) model = Model(inputs=input_shape, outputs=x) self.classifier = model def densenet(self, type, img_size=(299, 299), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) if type == 161: x = densenet.densenet161(input_shape, num_classes=output_size) elif type == 121: x = densenet.densenet121(input_shape, num_classes=output_size) elif type == 169: x = densenet.densenet169(input_shape, num_classes=output_size) elif type == 201: x = densenet.densenet201(input_shape, num_classes=output_size) else: print("请输入161,121,169,201这四个数字中的一个!") return model = Model(inputs=input_shape, outputs=x) self.classifier = model def alexnet(self, img_size=(299, 299), img_channels=3, output_size=1000): input_shape = Input(shape=(*img_size, img_channels)) x = alexnet.alexnet(input_shape, num_classes=output_size) model = Model(inputs=input_shape, outputs=x) self.classifier = model def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) self.classifier.add( Conv2D(32, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(64, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(128, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(128, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(256, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(256, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(512, activation='relu')) self.classifier.add(BatchNormalization()) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score2(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) result_threshold_list_final, score_result = self.grid_search_best_threshold( y_valid, np.array(p_valid)) return result_threshold_list_final, score_result def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def grid_search_best_threshold(self, y_valid, p_valid): threshold_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] result_threshold_list_temp = [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 ] result_threshold_list_final = [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 ] for i in range(17): score_result = 0 for j in range(9): result_threshold_list_temp[i] = threshold_list[j] score_temp = fbeta_score(y_valid, p_valid > result_threshold_list_temp, beta=2, average='samples') if score_result < score_temp: score_result = score_temp result_threshold_list_final[i] = threshold_list[j] result_threshold_list_temp[i] = result_threshold_list_final[i] return result_threshold_list_final, score_result def train_model(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) self.x_vail = X_valid self.y_vail = y_valid opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit( X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def train_model_generator(self, generator_train, generator_valid, learn_rate=0.001, epoch=5, batchSize=128, steps=32383, validation_steps=8096, train_callbacks=()): history = LossHistory() #valid 8096 32383 opt = Adam(lr=learn_rate) steps = steps / batchSize + 1 - 9 validation_steps = validation_steps / batchSize + 1 if steps % batchSize == 0: steps = steps / batchSize - 9 if validation_steps % batchSize == 0: validation_steps = validation_steps / batchSize print(steps, validation_steps) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit_generator( generator_train, steps_per_epoch=steps, epochs=epoch, verbose=1, validation_data=generator_valid, validation_steps=validation_steps, callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def generate_trainOrValid_img_from_file(self, train_set_folder, train_csv_file, img_resize=(32, 32), batchSize=128, process_count=cpu_count()): labels_df = pd.read_csv(train_csv_file) labels = sorted( set( chain.from_iterable( [tags.split(" ") for tags in labels_df['tags'].values]))) labels_map = {l: i for i, l in enumerate(labels)} files_path = [] tags_list = [] for file_name, tags in labels_df.values: files_path.append('{}/{}.jpg'.format(train_set_folder, file_name)) tags_list.append(tags) X = [] Y = [] iter_num = 1 self.y_map = {v: k for k, v in labels_map.items()} with ThreadPoolExecutor(process_count) as pool: for img_array, targets in tqdm(pool.map( self._train_transform_to_matrices, [(file_path, tag, labels_map, img_resize) for file_path, tag in zip(files_path, tags_list)]), total=len(files_path)): if iter_num % batchSize == 0: X = [] Y = [] iter_num = 0 X.append(img_array) Y.append(targets) iter_num += 1 if iter_num == batchSize: print(iter_num) yield (np.array(X), np.array(Y)) def _train_transform_to_matrices(self, *args): file_path, tags, labels_map, img_resize = list(args[0]) img = Image.open(file_path) img.thumbnail(img_resize) img_array = np.asarray(img.convert("RGB"), dtype=np.float32) / 255 targets = np.zeros(len(labels_map)) for t in tags.split(' '): targets[labels_map[t]] = 1 return img_array, targets def generate_test_img_from_file(self, test_set_folder, img_resize=(32, 32), batchSize=128, process_count=cpu_count()): x_test = [] x_test_filename = [] files_name = os.listdir(test_set_folder) X = [] Y = [] iter_num = 1 with ThreadPoolExecutor(process_count) as pool: for img_array, file_name in tqdm(pool.map( _test_transform_to_matrices, [(test_set_folder, file_name, img_resize) for file_name in files_name]), total=len(files_name)): x_test.append(img_array) x_test_filename.append(file_name) self.test_img_name_list = x_test_filename if iter_num % batchSize == 0: X = [] Y = [] iter_num = 0 X.append(img_array) Y.append(targets) iter_num += 1 if iter_num == batchSize: print(iter_num) yield (np.array(X), np.array(Y)) def _test_transform_to_matrices(self, *args): test_set_folder, file_name, img_resize = list(args[0]) img = Image.open('{}/{}'.format(test_set_folder, file_name)) img.thumbnail(img_resize) # Convert to RGB and normalize img_array = np.array(img.convert("RGB"), dtype=np.float32) / 255 return img_array, file_name def save_weights(self, weight_file_path): self.classifier.save_weights(weight_file_path) def load_weights(self, weight_file_path): self.classifier.load_weights(weight_file_path) def setBestThreshold(self): result_threshold_list_final, score_result = self._get_fbeta_score2( self.classifier, self.x_vail, self.y_vail) print('最好得分:{}'.format(score_result)) print('最好的阈值:{}'.format(result_threshold_list_final)) return result_threshold_list_final def predict(self, x_test): predictions = self.classifier.predict(x_test) return predictions def predict_generator(self, generator): predictions = self.classifier.predcit_generator(generator) return predictions def map_predictions(self, predictions, labels_map, thresholds): predictions_labels = [] for prediction in predictions: labels = [ labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i] ] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
def main_fun(args, ctx): import numpy import os import tensorflow as tf import tensorflow.contrib.keras as keras from tensorflow.contrib.keras.api.keras import backend as K from tensorflow.contrib.keras.api.keras.models import Sequential, load_model, save_model from tensorflow.contrib.keras.api.keras.layers import Dense, Dropout from tensorflow.contrib.keras.api.keras.optimizers import RMSprop from tensorflow.contrib.keras.python.keras.callbacks import LambdaCallback, TensorBoard from tensorflow.python.saved_model import builder as saved_model_builder from tensorflow.python.saved_model import tag_constants from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def from tensorflowonspark import TFNode cluster, server = TFNode.start_cluster_server(ctx) if ctx.job_name == "ps": server.join() elif ctx.job_name == "worker": def generate_rdd_data(tf_feed, batch_size): print("generate_rdd_data invoked") while True: batch = tf_feed.next_batch(batch_size) imgs = [] lbls = [] for item in batch: imgs.append(item[0]) lbls.append(item[1]) images = numpy.array(imgs).astype('float32') / 255 labels = numpy.array(lbls).astype('float32') yield (images, labels) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % ctx.task_index, cluster=cluster)): IMAGE_PIXELS = 28 batch_size = 100 num_classes = 10 # the data, shuffled and split between train and test sets if args.input_mode == 'tf': from tensorflow.contrib.keras.api.keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) else: # args.mode == 'spark' x_train = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS], name="x_train") y_train = tf.placeholder(tf.float32, [None, 10], name="y_train") model = Sequential() model.add(Dense(512, activation='relu', input_shape=(784, ))) model.add(Dropout(0.2)) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(10, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) saver = tf.train.Saver() with tf.Session(server.target) as sess: K.set_session(sess) def save_checkpoint(epoch, logs=None): if epoch == 1: tf.train.write_graph(sess.graph.as_graph_def(), args.model_dir, 'graph.pbtxt') saver.save(sess, os.path.join(args.model_dir, 'model.ckpt'), global_step=epoch * args.steps_per_epoch) ckpt_callback = LambdaCallback(on_epoch_end=save_checkpoint) tb_callback = TensorBoard(log_dir=args.model_dir, histogram_freq=1, write_graph=True, write_images=True) # add callbacks to save model checkpoint and tensorboard events (on worker:0 only) callbacks = [ckpt_callback, tb_callback ] if ctx.task_index == 0 else None if args.input_mode == 'tf': # train & validate on in-memory data history = model.fit(x_train, y_train, batch_size=batch_size, epochs=args.epochs, verbose=1, validation_data=(x_test, y_test), callbacks=callbacks) else: # args.input_mode == 'spark': # train on data read from a generator which is producing data from a Spark RDD tf_feed = TFNode.DataFeed(ctx.mgr) history = model.fit_generator( generator=generate_rdd_data(tf_feed, batch_size), steps_per_epoch=args.steps_per_epoch, epochs=args.epochs, verbose=1, callbacks=callbacks) if args.export_dir and ctx.job_name == 'worker' and ctx.task_index == 0: # save a local Keras model, so we can reload it with an inferencing learning_phase save_model(model, "tmp_model") # reload the model K.set_learning_phase(False) new_model = load_model("tmp_model") # export a saved_model for inferencing builder = saved_model_builder.SavedModelBuilder( args.export_dir) signature = predict_signature_def( inputs={'images': new_model.input}, outputs={'scores': new_model.output}) builder.add_meta_graph_and_variables( sess=sess, tags=[tag_constants.SERVING], signature_def_map={'predict': signature}, clear_devices=True) builder.save() if args.input_mode == 'spark': tf_feed.terminate()
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=(2, 2))) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=(2, 2))) self.classifier.add(Dropout(0.25)) self.classifier.add(Conv2D(16, (2, 2), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=(2, 2))) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(256, activation='relu')) self.classifier.add(Dropout(0.25)) self.classifier.add(Dense(128, activation='relu')) self.classifier.add(Dropout(0.25)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) #print ('p_valid') #print(p_valid.shape) #print(p_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def train_model(self, x_train, y_train, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) adam = Adam(lr=0.01, decay=1e-6) rms = RMSprop(lr=0.0001, decay=1e-6) self.classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('X_train.shape[0]') print(X_train.shape[0]) checkpointer = ModelCheckpoint(filepath="weights.best.hdf5", verbose=1, save_best_only=True) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen.fit(X_train) self.classifier.fit_generator( datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=X_train.shape[0] // batch_size, epochs=epoch, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, checkpointer]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) print(fbeta_score) return [history.train_losses, history.val_losses, fbeta_score] def load_weight(self): self.classifier.load_weights("weights.best.hdf5") def predict(self, x_test): predictions = self.classifier.predict(x_test) #print('predictions') #print(predictions.shape) #print(predictions) return predictions def map_predictions(self, predictions, labels_map, thresholds): """ Return the predictions mapped to their labels :param predictions: the predictions from the predict() method :param labels_map: the map :param thresholds: The threshold of each class to be considered as existing or not existing :return: the predictions list mapped to their labels """ predictions_labels = [] for prediction in predictions: labels = [ labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i] ] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()
class Model(object): def __init__(self): self.model = Sequential() self.model.add( Conv2D(32, (3, 3), input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Conv2D(32, (3, 3))) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Conv2D(64, (3, 3))) self.model.add(Activation('relu')) self.model.add(MaxPooling2D(pool_size=(2, 2))) self.model.add(Flatten()) self.model.add(Dense(64)) self.model.add(Activation('relu')) self.model.add(Dropout(0.5)) self.model.add(Dense(1)) self.model.add(Activation('sigmoid')) def train(self, dataset, batch_size=batch_size, nb_epoch=epochs): self.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) self.model.fit_generator( dataset.train, steps_per_epoch=nb_train_samples // batch_size, epochs=epochs, validation_data=dataset.valid, validation_steps=nb_validation_samples // batch_size) def save(self, file_path=FILE_PATH): print('Model Saved.') self.model.save_weights(file_path) def load(self, file_path=FILE_PATH): print('Model Loaded.') self.model.load_weights(file_path) def predict(self, image): # 预测样本分类 img = image.resize((1, IMAGE_SIZE, IMAGE_SIZE, 3)) img = image.astype('float32') img /= 255 #归一化 result = self.model.predict(img) print(result) # 概率 result = self.model.predict_classes(img) print(result) # 0/1 return result[0] def evaluate(self, dataset): # 测试样本准确率 score = self.model.evaluate_generator(dataset.valid, steps=2) print("样本准确率%s: %.2f%%" % (self.model.metrics_names[1], score[1] * 100))
batch_size=batch_size, class_mode='binary') #Importing my test set images : test_set = test_datagen.flow_from_directory('dataset/test_set', target_size=input_size, batch_size=batch_size, class_mode='binary') # Creating a loss history class object : history = LossHistory() # Fitting the model i.e. training it : classifier.fit_generator(training_set, steps_per_epoch=8000/batch_size, #Amount of batches to be completed before declaring an epoch to be finished. epochs=90, validation_data=test_set, validation_steps=2000/batch_size, workers=12, #adjusted workers and maxQsize for my personal GPU performance. max_queue_size=100, callbacks=[history]) #recording training stats into history class object. # PART 3 - MAKING PREDICTIONS, SAVING MODEL, SAVING LOSS HISTORY TO FILE. # Saving model : model_path = 'dataset/cat_or_dog_model.h5' classifier.save(model_path) print("Model saved to", model_path) # Saving loss history to file : lossLog_path = 'dataset/loss_history.log'
class AmazonKerasClassifier: def __init__(self): self.losses = [] self.classifier = Sequential() def add_conv_layer(self, img_size=(32, 32), img_channels=3): self.classifier.add( BatchNormalization(input_shape=(*img_size, img_channels))) self.classifier.add( Conv2D(32, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(32, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(64, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(64, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(128, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(128, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) self.classifier.add( Conv2D(256, (3, 3), padding='same', activation='relu')) self.classifier.add(Conv2D(256, (3, 3), activation='relu')) self.classifier.add(MaxPooling2D(pool_size=2)) self.classifier.add(Dropout(0.25)) def add_flatten_layer(self): self.classifier.add(Flatten()) def add_ann_layer(self, output_size): self.classifier.add(Dense(512, activation='relu')) self.classifier.add(BatchNormalization()) self.classifier.add(Dropout(0.5)) self.classifier.add(Dense(output_size, activation='sigmoid')) def _get_fbeta_score(self, classifier, X_valid, y_valid): p_valid = classifier.predict(X_valid) return fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples') def train_model(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.2, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') self.classifier.fit( X_train, y_train, batch_size=batch_size, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def train_model_aug(self, x_train, y_train, learn_rate=0.001, epoch=5, batch_size=128, validation_split_size=0.15, train_callbacks=()): history = LossHistory() X_train, X_valid, y_train, y_valid = train_test_split( x_train, y_train, test_size=validation_split_size) opt = Adam(lr=learn_rate) self.classifier.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) # early stopping will auto-stop training process if model stops learning after 3 epochs earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto') datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.1, horizontal_flip=True) datagen.fit(x_train) self.classifier.fit_generator( datagen.flow(X_train, y_train, batch_size=32), steps_per_epoch=len(x_train) / 32, epochs=epoch, verbose=1, validation_data=(X_valid, y_valid), callbacks=[history, *train_callbacks, earlyStopping]) fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid) return [history.train_losses, history.val_losses, fbeta_score] def save_weights(self, weight_file_path): self.classifier.save_weights(weight_file_path) def load_weights(self, weight_file_path): self.classifier.load_weights(weight_file_path) def predict(self, x_test): predictions = self.classifier.predict(x_test) return predictions def map_predictions(self, predictions, labels_map, thresholds): """ Return the predictions mapped to their labels :param predictions: the predictions from the predict() method :param labels_map: the map :param thresholds: The threshold of each class to be considered as existing or not existing :return: the predictions list mapped to their labels """ predictions_labels = [] for prediction in predictions: labels = [ labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i] ] predictions_labels.append(labels) return predictions_labels def close(self): backend.clear_session()